139 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
			
		
		
	
	
			139 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
/* xml_driver.pl : Contains xml_parse/[2,3] a bi-directional XML parser written in
 | 
						|
 * Prolog.
 | 
						|
 *
 | 
						|
 * Copyright (C) 2001-2005 Binding Time Limited
 | 
						|
 * Copyright (C) 2005-2011 John Fletcher
 | 
						|
 *
 | 
						|
 * Current Release: $Revision: 3.3 $
 | 
						|
 * 
 | 
						|
 * TERMS AND CONDITIONS:
 | 
						|
 *
 | 
						|
 * This program is offered free of charge, as unsupported source code. You may
 | 
						|
 * use it, copy it, distribute it, modify it or sell it without restriction,
 | 
						|
 * but entirely at your own risk.
 | 
						|
 *
 | 
						|
 * xml_parse( {+Controls}, +?Chars, ?+Document ) parses Chars to/from a data
 | 
						|
 * structure of the form xml(<atts>, <content>). <atts> is a list of
 | 
						|
 * <atom>=<string> attributes from the (possibly implicit) XML signature of the
 | 
						|
 * document. <content> is a (possibly empty) list comprising occurrences of  * ~~~
 | 
						|
 *    pcdata(<string>)                    :  Text
 | 
						|
 *    comment(<string>)                   :  An xml comment;
 | 
						|
 *    element(<tag>,<atts>,<content>)     :  <tag>..</tag> encloses <content>
 | 
						|
 *                                        :  <tag /> if empty
 | 
						|
 *    instructions(<atom>, <string>)      :  Processing <? <atom> <params> ?>"
 | 
						|
 *     cdata( <string> )                  :  <![CDATA[ <string> ]]>
 | 
						|
 *    doctype(<atom>, <doctype id>)       :  DTD <!DOCTYPE .. >
 | 
						|
 * ~~~~
 | 
						|
 * The conversions are not completely symmetrical, in that weaker XML is
 | 
						|
 * accepted than can be generated. Specifically, in-bound (Chars -> Document)
 | 
						|
 * does not  require strictly well-formed XML. Document is instantiated to the
 | 
						|
 * term malformed(Attributes, Content) if Chars does not represent well-formed
 | 
						|
 * XML. The Content of a malformed/2 structure can contain:
 | 
						|
 *
 | 
						|
 *    unparsed( <string> )                :  Text which has not been parsed
 | 
						|
 *    out_of_context( <tag> )             :  <tag> is not closed
 | 
						|
 *
 | 
						|
 * in addition to the standard term types.
 | 
						|
 *
 | 
						|
 * Out-bound (Document -> Chars) parsing _does_ require that Document defines
 | 
						|
 * strictly well-formed XML. If an error is detected a 'domain' exception is
 | 
						|
 * raised.
 | 
						|
 *
 | 
						|
 * The domain exception will attempt to identify the particular sub-term in
 | 
						|
 * error and the message will show a list of its ancestor elements in the form
 | 
						|
 * <tag>{(id)}* where <id> is the value of any attribute _named_ id.
 | 
						|
 *
 | 
						|
 * At this release, the Controls applying to in-bound (Chars -> Document)
 | 
						|
 * parsing are:
 | 
						|
 *
 | 
						|
 *    extended_characters(<bool>)         :  Use the extended character
 | 
						|
 *                                        :  entities for XHTML (default true)
 | 
						|
 *
 | 
						|
 *    format(<bool>)                      :  Strip layouts when no character data
 | 
						|
 *                                        :  appears between elements.
 | 
						|
 *                                        :  (default true)
 | 
						|
 *
 | 
						|
 *    remove_attribute_prefixes(<bool>)   :  Remove namespace prefixes from
 | 
						|
 *                                        :  attributes when it's the same as the
 | 
						|
 *                                        :  prefix of the parent element
 | 
						|
 *                                        :  (default false).
 | 
						|
 *
 | 
						|
 *    allow_ampersand(<bool>)             :  Allow unescaped ampersand
 | 
						|
 *                                        :  characters (&) to occur in PCDATA.
 | 
						|
 *                                        :  (default false).
 | 
						|
 *
 | 
						|
 *    [<bool> is one of 'true' or 'false']
 | 
						|
 *
 | 
						|
 * For out-bound (Document -> Chars) parsing, the only available option is:
 | 
						|
 *
 | 
						|
 *    format(<Bool>)                      :  Indent the element content
 | 
						|
 *                                        :  (default true)
 | 
						|
 *
 | 
						|
 * Different DCGs for input and output are used because input parsing is
 | 
						|
 * more flexible than output parsing. Errors in input are recorded as part
 | 
						|
 * of the data structure. Output parsing throws an exception if the document
 | 
						|
 * is not well-formed, diagnosis tries to identify the specific culprit term.
 | 
						|
 */
 | 
						|
 | 
						|
:- module( xml, [xml_parse/2,
 | 
						|
		xml_parse/3,
 | 
						|
		 document_to_xml/3,
 | 
						|
		 xml_subterm/2
 | 
						|
		]).
 | 
						|
 | 
						|
xml_parse( Chars, Document ) :-
 | 
						|
	xml_parse( [], Chars, Document ).
 | 
						|
 | 
						|
xml_parse( Controls, Chars, Document ) :-
 | 
						|
	( ground( Chars ) ->
 | 
						|
		xml_to_document( Controls, Chars, Document )
 | 
						|
	; otherwise ->
 | 
						|
		document_to_xml( Controls, Document, Chars )
 | 
						|
	).
 | 
						|
 | 
						|
document_to_xml( Controls, Document, Chars ) :-
 | 
						|
	( member( format(false), Controls ) ->
 | 
						|
		Format = false
 | 
						|
	; otherwise ->
 | 
						|
		Format = true
 | 
						|
	),
 | 
						|
	( ground( Document ),
 | 
						|
	  document_generation(Format, Document, Chars0, [] ) ->
 | 
						|
			Chars = Chars0
 | 
						|
	; otherwise ->
 | 
						|
		xml_fault( Document, [], Culprit, Path, Message ),
 | 
						|
		xml_exception( Message, Document, Culprit, Path )
 | 
						|
	).
 | 
						|
 | 
						|
/** xml_subterm( +XMLTerm, ?Subterm )
 | 
						|
 * unifies Subterm with a sub-term of Term.
 | 
						|
 * Note that XMLTerm is a sub-term of itself. 
 | 
						|
 */
 | 
						|
xml_subterm( Term, Term ).
 | 
						|
xml_subterm( xml(_Attributes, Content), Term ) :-
 | 
						|
	xml_subterm( Content, Term ).	
 | 
						|
xml_subterm( [H|T], Term ) :-
 | 
						|
	( xml_subterm( H, Term )
 | 
						|
	; xml_subterm( T, Term )
 | 
						|
	).
 | 
						|
xml_subterm( element(_Name,_Attributes,Content), Term ) :-
 | 
						|
	xml_subterm( Content, Term ).
 | 
						|
xml_subterm( namespace(_URI,_Prefix,Content), Term ) :-
 | 
						|
	xml_subterm( Content, Term ).
 | 
						|
 | 
						|
/* xml is intended to be a rather modular module: it should be easy to
 | 
						|
 * build a program that can output XML, but not read it, or vice versa.
 | 
						|
 * Similarly, you may be happy to dispense with diagnosis once you are
 | 
						|
 * sure that your code will only try to make valid calls to xml_parse/2.
 | 
						|
 *
 | 
						|
 * It is intended that the code should be very portable too. Clearly,
 | 
						|
 * some small changes will be needed between platforms, but these should
 | 
						|
 * be limited to xml_utilities. xml_utilities contains most of the shared
 | 
						|
 * code and most of the potentially non-portable code.
 | 
						|
 */
 | 
						|
:- ensure_loaded( xml_acquisition ).
 | 
						|
:- ensure_loaded( xml_diagnosis ).
 | 
						|
:- ensure_loaded( xml_generation ).
 | 
						|
:- ensure_loaded( xml_pp ).
 | 
						|
:- ensure_loaded( xml_utilities ).
 |