390 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			390 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
|   | /* xml_generation.pl : Document -> XML translation | ||
|  |  * | ||
|  |  * Copyright (C) 2001-2005 Binding Time Limited | ||
|  |  * Copyright (C) 2005-2011 John Fletcher | ||
|  |  * | ||
|  |  * Current Release: $Revision: 3.7 $ | ||
|  |  *  | ||
|  |  * TERMS AND CONDITIONS: | ||
|  |  * | ||
|  |  * This program is offered free of charge, as unsupported source code. You may | ||
|  |  * use it, copy it, distribute it, modify it or sell it without restriction, | ||
|  |  * but entirely at your own risk. | ||
|  |  */ | ||
|  |  | ||
|  | :- ensure_loaded( xml_utilities ). | ||
|  |  | ||
|  | /* document_generation( +Format, +Document ) is a DCG generating Document | ||
|  |  * as a list of character codes. Format is true|false defining whether layouts, | ||
|  |  * to provide indentation, should be added between the element content of | ||
|  |  * the resultant "string". Note that formatting is disabled for elements that | ||
|  |  * are interspersed with pcdata/1 terms, such as XHTML's 'inline' elements. | ||
|  |  * Also, Format is over-ridden, for an individual element, by an explicit | ||
|  |  * 'xml:space'="preserve" attribute. | ||
|  |  */ | ||
|  | document_generation( Format, xml(Attributes, Document) ) --> | ||
|  | 	document_generation_body( Attributes, Format, Document ). | ||
|  |  | ||
|  | document_generation_body( [], Format, Document ) --> | ||
|  | 	generation( Document, "", Format, [], _Format1 ). | ||
|  | document_generation_body( Attributes, Format, Document ) --> | ||
|  | 	{	Attributes = [_|_], | ||
|  | 		xml_declaration_attributes_valid( Attributes ) | ||
|  | 	}, | ||
|  | 	"<?xml", | ||
|  | 	generated_attributes( Attributes, Format, Format0 ), | ||
|  | 	"?>", | ||
|  | 	indent( true, [] ), | ||
|  | 	generation( Document, "", Format0, [], _Format1 ). | ||
|  |  | ||
|  | generation( [], _Prefix, Format, _Indent, Format ) --> []. | ||
|  | generation( [Term|Terms], Prefix, Format0, Indent, Format ) --> | ||
|  | 	generation( Term, Prefix, Format0, Indent, Format1 ), | ||
|  | 	generation( Terms, Prefix, Format1, Indent, Format ). | ||
|  | generation( doctype(Name, External), _Prefix, Format, [], Format ) --> | ||
|  | 	"<!DOCTYPE ", | ||
|  | 	generated_name( Name ), | ||
|  | 	generated_external_id( External ), | ||
|  | 	">". | ||
|  | generation( instructions(Target,Process), _Prefix, Format, Indent, Format ) --> | ||
|  | 	indent( Format, Indent ), | ||
|  | 	"<?", generated_name(Target), " ", chars( Process ) ,"?>". | ||
|  | generation( pcdata(Chars), _Prefix, Format0, _Indent, Format1 ) --> | ||
|  | 	pcdata_generation( Chars ), | ||
|  | 	{character_data_format( Chars, Format0, Format1 )}. | ||
|  | generation( comment( Comment ), _Prefix, Format, Indent, Format ) --> | ||
|  | 	indent( Format, Indent ), | ||
|  | 	"<!--", chars( Comment ), "-->". | ||
|  | generation( namespace(URI, Prefix, element(Name, Atts, Content)), | ||
|  | 		_Prefix0, Format, Indent, Format ) --> | ||
|  | 	indent( Format, Indent ), | ||
|  | 	"<", generated_prefixed_name( Prefix, Name ), | ||
|  | 	generated_prefixed_attributes( Prefix, URI, Atts, Format, Format1 ),  | ||
|  | 	generated_content( Content, Format1, Indent, Prefix, Name ). | ||
|  | generation( element(Name, Atts, Content), Prefix, Format, Indent, Format ) --> | ||
|  | 	indent( Format, Indent ), | ||
|  | 	"<", generated_prefixed_name( Prefix, Name ), | ||
|  | 	generated_attributes( Atts, Format, Format1 ),  | ||
|  | 	generated_content( Content, Format1, Indent, Prefix, Name ). | ||
|  | generation( cdata(CData), _Prefix, Format0, _Indent, Format1 ) --> | ||
|  | 	"<![CDATA[", cdata_generation(CData), "]]>", | ||
|  | 	{character_data_format( CData, Format0, Format1 )}. | ||
|  |  | ||
|  | generated_attributes( [], Format, Format  ) --> []. | ||
|  | generated_attributes( [Name=Value|Attributes], Format0, Format  ) --> | ||
|  | 	{(	Name == 'xml:space', | ||
|  | 		Value="preserve" -> | ||
|  | 			Format1 = false | ||
|  | 	  ; otherwise -> | ||
|  | 			Format1 = Format0 | ||
|  | 	  )}, | ||
|  | 	" ", | ||
|  | 	generated_name( Name ), | ||
|  | 	"=""", | ||
|  | 	quoted_string( Value ), | ||
|  | 	"""", | ||
|  | 	generated_attributes( Attributes, Format1, Format  ). | ||
|  |  | ||
|  | generated_prefixed_name( [], Name ) --> | ||
|  | 	generated_name( Name ). | ||
|  | generated_prefixed_name( Prefix, Name ) --> | ||
|  | 	{Prefix = [_|_]}, | ||
|  | 	chars( Prefix ), ":", | ||
|  | 	generated_name( Name ). | ||
|  |  | ||
|  | generated_content( [], _Format, _Indent, _Prefix, _Namespace ) --> | ||
|  | 	" />". % Leave an extra space for XHTML output. | ||
|  | generated_content( [H|T], Format, Indent, Prefix, Namespace ) --> | ||
|  | 	">", | ||
|  | 	generation( H, Prefix, Format, [0' |Indent], Format1 ), | ||
|  | 	generation( T, Prefix, Format1, [0' |Indent], Format2 ), | ||
|  | 	indent( Format2, Indent ), | ||
|  | 	"</", generated_prefixed_name( Prefix, Namespace ), ">". | ||
|  |  | ||
|  | generated_prefixed_attributes( [_|_Prefix], _URI, Atts, Format0, Format ) --> | ||
|  | 	generated_attributes( Atts, Format0, Format  ). | ||
|  | generated_prefixed_attributes( [], URI, Atts, Format0, Format  ) --> | ||
|  | 	{atom_codes( URI, Namespace ), | ||
|  | 	 findall( Attr, (member(Attr, Atts), \+ Attr=(xmlns=_Val)), Atts1 ) | ||
|  | 	}, | ||
|  | 	generated_attributes( [xmlns=Namespace|Atts1], Format0, Format  ). | ||
|  |  | ||
|  | generated_name( Name, Plus, Minus ) :- | ||
|  | 	atom_codes( Name, Chars ), | ||
|  | 	append( Chars, Minus, Plus ). | ||
|  |  | ||
|  | generated_external_id( local ) --> "". | ||
|  | generated_external_id( local(Literals) ) --> " [", | ||
|  | 	generated_doctype_literals( Literals ), " | ||
|  | 	]". | ||
|  | generated_external_id( system(URL) ) --> | ||
|  | 	" SYSTEM """, | ||
|  | 	chars( URL ), | ||
|  | 	"""". | ||
|  | generated_external_id( system(URL,Literals) ) --> | ||
|  | 	" SYSTEM """, | ||
|  | 	chars( URL ), | ||
|  | 	""" [", | ||
|  | 	generated_doctype_literals( Literals ), " | ||
|  | 	]". | ||
|  | generated_external_id( public(URN,URL) ) --> | ||
|  | 	" PUBLIC """, | ||
|  | 	chars( URN ), | ||
|  | 	""" """, | ||
|  | 	chars( URL ), | ||
|  | 	"""". | ||
|  | generated_external_id( public(URN,URL,Literals) ) --> | ||
|  | 	" PUBLIC """, | ||
|  | 	chars( URN ), | ||
|  | 	""" """, | ||
|  | 	chars( URL ), | ||
|  | 	""" [", | ||
|  | 	generated_doctype_literals( Literals ), " | ||
|  | 	]". | ||
|  |  | ||
|  | generated_doctype_literals( [] ) --> "". | ||
|  | generated_doctype_literals( [dtd_literal(String)|Literals] ) --> " | ||
|  | 	<!", cdata_generation( String ), ">", | ||
|  | 	generated_doctype_literals( Literals ). | ||
|  |  | ||
|  | /* quoted_string( +Chars ) is a DCG representing Chars, a list of character | ||
|  |  * codes, as a legal XML attribute string. Any leading or trailing layout | ||
|  |  * characters are removed. &, " and < characters are replaced by &, " | ||
|  |  * and < respectively, . | ||
|  |  */ | ||
|  | quoted_string( Raw, Plus, Minus ) :- | ||
|  | 	quoted_string1( Raw, NoLeadingLayouts ), | ||
|  | 	quoted_string2( NoLeadingLayouts, Layout, Layout, Plus, Minus ). | ||
|  |  | ||
|  | quoted_string1( [], [] ). | ||
|  | quoted_string1( [Char|Chars], NoLeadingLayouts ) :- | ||
|  | 	( Char > 32 -> | ||
|  | 		NoLeadingLayouts = [Char|Chars] | ||
|  | 	; otherwise -> | ||
|  | 		quoted_string1( Chars, NoLeadingLayouts ) | ||
|  | 	). | ||
|  |  | ||
|  | quoted_string2( [], _LayoutPlus, _LayoutMinus, List, List ). | ||
|  | quoted_string2( [Char|Chars], LayoutPlus, LayoutMinus, Plus, Minus ) :- | ||
|  | 	( Char =< " " -> | ||
|  | 		Plus = Plus1, | ||
|  | 		LayoutMinus = [Char|LayoutMinus1], | ||
|  | 		LayoutPlus = LayoutPlus1 | ||
|  | 	; Char == 34 -> | ||
|  | 		Plus = LayoutPlus, | ||
|  | 		escaped_quote( LayoutMinus, Plus1 ), | ||
|  | 		LayoutPlus1 = LayoutMinus1 | ||
|  | 	; Char == 39 -> | ||
|  | 		Plus = LayoutPlus, | ||
|  | 		apos( LayoutMinus, Plus1 ), | ||
|  | 		LayoutPlus1 = LayoutMinus1 | ||
|  | 	; Char =< 127 -> | ||
|  | 		Plus = LayoutPlus, | ||
|  | 		pcdata_7bit( Char, LayoutMinus, Plus1 ), | ||
|  | 		LayoutPlus1 = LayoutMinus1 | ||
|  | 	; legal_xml_unicode( Char ) -> | ||
|  | 		Plus = LayoutPlus, | ||
|  | 		number_codes( Char, Codes ), | ||
|  | 		pcdata_8bits_plus( Codes, LayoutMinus, Plus1 ), | ||
|  | 		LayoutPlus1 = LayoutMinus1 | ||
|  | 	; otherwise -> | ||
|  | 		LayoutPlus = LayoutPlus1, | ||
|  | 		LayoutMinus = LayoutMinus1, | ||
|  | 		Plus = Plus1 | ||
|  | 	), | ||
|  | 	quoted_string2( Chars, LayoutPlus1, LayoutMinus1, Plus1, Minus ). | ||
|  |  | ||
|  | indent( false, _Indent ) --> []. | ||
|  | indent( true, Indent ) --> | ||
|  | 	[10], | ||
|  | 	chars( Indent ). | ||
|  |  | ||
|  | apos --> "'". | ||
|  |  | ||
|  | escaped_quote --> """. | ||
|  |  | ||
|  | /* pcdata_generation( +Chars ) is a DCG representing Chars, a list of character | ||
|  |  * codes as legal XML "Parsed character data" (PCDATA) string. Any codes | ||
|  |  * which cannot be represented by a 7-bit character are replaced by their | ||
|  |  * decimal numeric character entity e.g. code 160 (non-breaking space) is | ||
|  |  * represented as  . Any character codes disallowed by the XML | ||
|  |  * specification are not encoded. | ||
|  |  */ | ||
|  | pcdata_generation( [], Plus, Plus ). | ||
|  | pcdata_generation( [Char|Chars], Plus, Minus ) :- | ||
|  | 	( Char =< 127 -> | ||
|  | 		pcdata_7bit( Char, Plus, Mid ) | ||
|  | 	; legal_xml_unicode( Char ) -> | ||
|  | 		number_codes( Char, Codes ), | ||
|  | 		pcdata_8bits_plus( Codes, Plus, Mid ) | ||
|  | 	; otherwise -> | ||
|  | 		Plus = Mid | ||
|  | 	), | ||
|  | 	pcdata_generation( Chars, Mid, Minus ). | ||
|  |  | ||
|  | /* pcdata_7bit(+Char) represents the ascii character set in its | ||
|  |  * simplest format, using the character entities & < and >. | ||
|  |  */ | ||
|  | pcdata_7bit( 0 ) --> "". | ||
|  | pcdata_7bit( 1 ) --> "". | ||
|  | pcdata_7bit( 2 ) --> "". | ||
|  | pcdata_7bit( 3 ) --> "". | ||
|  | pcdata_7bit( 4 ) --> "". | ||
|  | pcdata_7bit( 5 ) --> "". | ||
|  | pcdata_7bit( 6 ) --> "". | ||
|  | pcdata_7bit( 7 ) --> "". | ||
|  | pcdata_7bit( 8 ) --> "". | ||
|  | pcdata_7bit( 9 ) --> [9]. | ||
|  | pcdata_7bit( 10 ) --> [10]. | ||
|  | pcdata_7bit( 11 ) --> "". | ||
|  | pcdata_7bit( 12 ) --> "". | ||
|  | pcdata_7bit( 13 ) --> [13]. | ||
|  | pcdata_7bit( 14 ) --> "". | ||
|  | pcdata_7bit( 15 ) --> "". | ||
|  | pcdata_7bit( 16 ) --> "". | ||
|  | pcdata_7bit( 17 ) --> "". | ||
|  | pcdata_7bit( 18 ) --> "". | ||
|  | pcdata_7bit( 19 ) --> "". | ||
|  | pcdata_7bit( 20 ) --> "". | ||
|  | pcdata_7bit( 21 ) --> "". | ||
|  | pcdata_7bit( 22 ) --> "". | ||
|  | pcdata_7bit( 23 ) --> "". | ||
|  | pcdata_7bit( 24 ) --> "". | ||
|  | pcdata_7bit( 25 ) --> "". | ||
|  | pcdata_7bit( 26 ) --> "". | ||
|  | pcdata_7bit( 27 ) --> "". | ||
|  | pcdata_7bit( 28 ) --> "". | ||
|  | pcdata_7bit( 29 ) --> "". | ||
|  | pcdata_7bit( 30 ) --> "". | ||
|  | pcdata_7bit( 31 ) --> "". | ||
|  | pcdata_7bit( 32 ) --> " ". | ||
|  | pcdata_7bit( 33 ) --> "!". | ||
|  | pcdata_7bit( 34 ) --> [34]. | ||
|  | pcdata_7bit( 35 ) --> "#". | ||
|  | pcdata_7bit( 36 ) --> "$". | ||
|  | pcdata_7bit( 37 ) --> "%". | ||
|  | pcdata_7bit( 38 ) --> "&". | ||
|  | pcdata_7bit( 39 ) --> "'". | ||
|  | pcdata_7bit( 40 ) --> "(". | ||
|  | pcdata_7bit( 41 ) --> ")". | ||
|  | pcdata_7bit( 42 ) --> "*". | ||
|  | pcdata_7bit( 43 ) --> "+". | ||
|  | pcdata_7bit( 44 ) --> ",". | ||
|  | pcdata_7bit( 45 ) --> "-". | ||
|  | pcdata_7bit( 46 ) --> ".". | ||
|  | pcdata_7bit( 47 ) --> "/". | ||
|  | pcdata_7bit( 48 ) --> "0". | ||
|  | pcdata_7bit( 49 ) --> "1". | ||
|  | pcdata_7bit( 50 ) --> "2". | ||
|  | pcdata_7bit( 51 ) --> "3". | ||
|  | pcdata_7bit( 52 ) --> "4". | ||
|  | pcdata_7bit( 53 ) --> "5". | ||
|  | pcdata_7bit( 54 ) --> "6". | ||
|  | pcdata_7bit( 55 ) --> "7". | ||
|  | pcdata_7bit( 56 ) --> "8". | ||
|  | pcdata_7bit( 57 ) --> "9". | ||
|  | pcdata_7bit( 58 ) --> ":". | ||
|  | pcdata_7bit( 59 ) --> ";". | ||
|  | pcdata_7bit( 60 ) --> "<". | ||
|  | pcdata_7bit( 61 ) --> "=". | ||
|  | pcdata_7bit( 62 ) --> ">". % escaping necessary to prevent ']]>' sequences in pcdata. | ||
|  | pcdata_7bit( 63 ) --> "?". | ||
|  | pcdata_7bit( 64 ) --> "@". | ||
|  | pcdata_7bit( 65 ) --> "A". | ||
|  | pcdata_7bit( 66 ) --> "B". | ||
|  | pcdata_7bit( 67 ) --> "C". | ||
|  | pcdata_7bit( 68 ) --> "D". | ||
|  | pcdata_7bit( 69 ) --> "E". | ||
|  | pcdata_7bit( 70 ) --> "F". | ||
|  | pcdata_7bit( 71 ) --> "G". | ||
|  | pcdata_7bit( 72 ) --> "H". | ||
|  | pcdata_7bit( 73 ) --> "I". | ||
|  | pcdata_7bit( 74 ) --> "J". | ||
|  | pcdata_7bit( 75 ) --> "K". | ||
|  | pcdata_7bit( 76 ) --> "L". | ||
|  | pcdata_7bit( 77 ) --> "M". | ||
|  | pcdata_7bit( 78 ) --> "N". | ||
|  | pcdata_7bit( 79 ) --> "O". | ||
|  | pcdata_7bit( 80 ) --> "P". | ||
|  | pcdata_7bit( 81 ) --> "Q". | ||
|  | pcdata_7bit( 82 ) --> "R". | ||
|  | pcdata_7bit( 83 ) --> "S". | ||
|  | pcdata_7bit( 84 ) --> "T". | ||
|  | pcdata_7bit( 85 ) --> "U". | ||
|  | pcdata_7bit( 86 ) --> "V". | ||
|  | pcdata_7bit( 87 ) --> "W". | ||
|  | pcdata_7bit( 88 ) --> "X". | ||
|  | pcdata_7bit( 89 ) --> "Y". | ||
|  | pcdata_7bit( 90 ) --> "Z". | ||
|  | pcdata_7bit( 91 ) --> "[". | ||
|  | pcdata_7bit( 92 ) --> [92]. | ||
|  | pcdata_7bit( 93 ) --> "]". | ||
|  | pcdata_7bit( 94 ) --> "^". | ||
|  | pcdata_7bit( 95 ) --> "_". | ||
|  | pcdata_7bit( 96 ) --> "`". | ||
|  | pcdata_7bit( 97 ) --> "a". | ||
|  | pcdata_7bit( 98 ) --> "b". | ||
|  | pcdata_7bit( 99 ) --> "c". | ||
|  | pcdata_7bit( 100 ) --> "d". | ||
|  | pcdata_7bit( 101 ) --> "e". | ||
|  | pcdata_7bit( 102 ) --> "f". | ||
|  | pcdata_7bit( 103 ) --> "g". | ||
|  | pcdata_7bit( 104 ) --> "h". | ||
|  | pcdata_7bit( 105 ) --> "i". | ||
|  | pcdata_7bit( 106 ) --> "j". | ||
|  | pcdata_7bit( 107 ) --> "k". | ||
|  | pcdata_7bit( 108 ) --> "l". | ||
|  | pcdata_7bit( 109 ) --> "m". | ||
|  | pcdata_7bit( 110 ) --> "n". | ||
|  | pcdata_7bit( 111 ) --> "o". | ||
|  | pcdata_7bit( 112 ) --> "p". | ||
|  | pcdata_7bit( 113 ) --> "q". | ||
|  | pcdata_7bit( 114 ) --> "r". | ||
|  | pcdata_7bit( 115 ) --> "s". | ||
|  | pcdata_7bit( 116 ) --> "t". | ||
|  | pcdata_7bit( 117 ) --> "u". | ||
|  | pcdata_7bit( 118 ) --> "v". | ||
|  | pcdata_7bit( 119 ) --> "w". | ||
|  | pcdata_7bit( 120 ) --> "x". | ||
|  | pcdata_7bit( 121 ) --> "y". | ||
|  | pcdata_7bit( 122 ) --> "z". | ||
|  | pcdata_7bit( 123 ) --> "{". | ||
|  | pcdata_7bit( 124 ) --> "|". | ||
|  | pcdata_7bit( 125 ) --> "}". | ||
|  | pcdata_7bit( 126 ) --> [126]. | ||
|  | pcdata_7bit( 127 ) --> "". | ||
|  |  | ||
|  | pcdata_8bits_plus( Codes ) --> | ||
|  | 	"&#", chars( Codes ), ";". | ||
|  |  | ||
|  | /* character_data_format( +Chars, +Format0, ?Format1 ) holds when Format0 and | ||
|  |  * Format1 are the statuses of XML formatting before and after Chars -  | ||
|  |  * which may be null. | ||
|  |  */ | ||
|  | character_data_format( [], Format, Format ). | ||
|  | character_data_format( [_Char|_Chars], _Format, false ). | ||
|  |  | ||
|  | /* cdata_generation( +Chars ) is a DCG representing Chars, a list of character | ||
|  |  * codes as a legal XML CDATA string. Any character codes disallowed by the XML | ||
|  |  * specification are not encoded. | ||
|  |  */ | ||
|  | cdata_generation( [] ) --> "". | ||
|  | cdata_generation( [Char|Chars] ) --> | ||
|  | 	( {legal_xml_unicode( Char )}, !, [Char] | ||
|  | 	; "" | ||
|  | 	), | ||
|  | 	cdata_generation( Chars ). | ||
|  |  | ||
|  | legal_xml_unicode( 9 ). | ||
|  | legal_xml_unicode( 10 ). | ||
|  | legal_xml_unicode( 13 ). | ||
|  | legal_xml_unicode( Code ) :- | ||
|  | 	Code >= 32, | ||
|  | 	Code =< 55295. | ||
|  | legal_xml_unicode( Code ) :- | ||
|  | 	Code >= 57344, | ||
|  | 	Code =< 65533. | ||
|  | legal_xml_unicode( Code ) :- | ||
|  | 	Code >= 65536, | ||
|  | 	Code =< 1114111. |