/* xml_acquisition.pl : XML -> Document translation.

 *

 * Copyright (C) 2001-2005 Binding Time Limited

 * Copyright (C) 2005-2011 John Fletcher

 *

 * Current Release: $Revision: 3.4 $

 * 

 * TERMS AND CONDITIONS:

 *

 * This program is offered free of charge, as unsupported source code. You may

 * use it, copy it, distribute it, modify it or sell it without restriction,

 * but entirely at your own risk.

 */



:- ensure_loaded( xml_utilities ).



/* xml_to_document( +Controls, +XML, ?Document ) translates the list of

 * character codes XML into the Prolog term Document. Controls is a list

 * of terms controlling the treatment of layout characters and character

 * entities.

 */

xml_to_document( Controls, XML, Document ) :-

	initial_context( Controls, Context ),

	( xml_declaration( Attributes0, XML, XML1 ) ->

		Attributes = Attributes0

	; otherwise ->

		XML1 = XML,

		Attributes = []

	),

	xml_to_document( XML1, Context, Terms, [], WellFormed ),

	xml_to_document1( WellFormed, Attributes, Terms, Document ).



xml_to_document1( true,  Attributes, Terms, xml(Attributes, Terms) ).

xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ).



% unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed )

unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ).



xml_declaration( Attributes ) -->

	spaces,

	"<?",

	nmtoken( xml ),

	xml_declaration_attributes( Attributes ),

	spaces,

	"?>".



xml_to_document( [], Context, Terms, [], WF ) :-

	close_context( Context, Terms, WF ).

xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :-

	( Char =:= "<" ->

		xml_markup_structure( Chars, Context, Terms, Residue, WF )

	; Char =:= "&" ->

		entity_reference( Chars, Context, Terms, Residue, WF )

	; Char =< " ",

	  \+ space_preserve( Context ) ->		

		layouts( Chars, Context, [Char|T], T, Terms, Residue, WF )

	; void_context( Context ) ->

		unparsed( [Char|Chars], Context, Terms, Residue, WF )

	; otherwise ->

		Terms = [pcdata([Char|Chars1])|Terms1],

		acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )

	).



layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :-

	close_context( Context, Terms, WF ).

layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :-

	( Char =:= "<" ->

		xml_markup_structure( Chars, Context, Terms, Residue, WF )

	; Char =:= "&" ->

		reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF )

	; Char =< " " ->

		Minus = [Char|Minus1],

		layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF )

	; void_context( Context ) ->

		unparsed( [Char|Chars], Context, Terms, Residue, WF )

	; otherwise ->

		Terms = [pcdata(Plus)|Terms1],

		Minus = [Char|Chars1],

		context_update( space_preserve, Context, true, Context1 ),

		acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF )

	).



acquire_pcdata( [], Context, [], Terms, [], WF ) :-

	close_context( Context, Terms, WF ).

acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :-

	( Char =:= "<" ->

		Chars1 = [],

		xml_markup_structure( Chars, Context, Terms, Residue, WF )

	; Char =:= "&" ->

		reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF )

	; otherwise ->

		Chars1 = [Char|Chars2],

		acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF )

	).



xml_markup_structure( [], Context, Terms, Residue, WF ) :-

	unparsed( "<", Context, Terms, Residue, WF ).

xml_markup_structure( Chars, Context, Terms, Residue, WF ) :-

	Chars = [Char|Chars1],

	( Char =:= "/" ->

		closing_tag( Context, Chars1, Terms, Residue, WF )

	; Char =:= "?" ->

		pi_acquisition( Chars1, Context, Terms, Residue, WF )

	; Char =:= "!" ->

		declaration_acquisition( Chars1, Context, Terms, Residue, WF )

	; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) ->

		push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF )

	; otherwise ->

		unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %'

	).



push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :-

	new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0),

	push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ).



push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :-

	xml_to_document( Chars, Context, Terms, Residue, WF ).

push_tag1( false, _Context, Term, Chars, [Term], Chars, false ).



new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :-

	namespace_attributes( Attributes0, Context, Context1, Attributes1 ),

	( append( NSChars, [0':|TagChars1], TagChars ), %'

	  specific_namespace( NSChars, Context1, SpecificNamespace ) ->

		Namespace0 = SpecificNamespace

	; otherwise ->

		NSChars = "",

		TagChars1 = TagChars,

		default_namespace( Context1, Namespace0 )

	),

	current_namespace( Context1, CurrentNamespace ),

	( Namespace0 == CurrentNamespace ->

		Term = element(Tag, Attributes, Contents),

		Context2 = Context1

	; otherwise ->

		Term = namespace( Namespace0, NSChars,

					element(Tag, Attributes, Contents)

					),

		context_update( current_namespace, Context1, Namespace0, Context2 )

	),

	input_attributes( Attributes1, Context2, Attributes ),

	atom_codes( Tag, TagChars1 ),

	close_tag( Type, Chars, Context2, Contents, Residue, WF ).



close_tag( empty, Residue, _Context, [], Residue, true ).

close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :-

	context_update( element, Context0, Tag, Context1 ),

	xml_to_document( Chars, Context1, Contents, Residue, WF ).



pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :-

	( inline_instruction(Target, Processing, Chars, Rest ),

	  Target \== xml ->

		Terms = [instructions(Target, Processing)|Terms1],

		xml_to_document( Rest, Context, Terms1, Residue, WellFormed )

	; otherwise ->

		unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed )

	).



declaration_acquisition( Chars, Context, Terms, Residue, WF ) :-

	( declaration_type( Chars, Type, Chars1 ),

	  declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) ->

		Terms = [Term|Terms1],

		xml_to_document( Rest, Context1, Terms1, Residue, WF )

	; otherwise ->

		unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF )

	).



open_tag( Tag, Namespaces, Attributes, Termination ) -->

	nmtoken_chars( Tag ),

	attributes( Attributes, [], Namespaces ),

	spaces,

	open_tag_terminator( Tag, Termination ).



open_tag_terminator( Tag, push(Tag) ) -->

	">".

open_tag_terminator( _Tag, empty ) -->

	"/>".



declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) -->

	comment(Comment).

declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) -->

	cdata( CData ).

declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) -->

	doctype( Name, Names, Namespaces0, Namespaces ),

	spaces,

	">".



inline_instruction( Target, Processing, Plus, Minus  ) :-

	nmtoken(Target, Plus, Mid0 ),

	spaces( Mid0, Mid1 ),

	append( Processing, [0'?,0'>|Minus], Mid1 ),

	!.



entity_reference_name( Reference ) -->

	nmtoken_chars( Reference ),

	";".



declaration_type( [Char1,Char2|Chars1], Class, Rest ) :-

	Chars = [Char1,Char2|Chars1],

	( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) ->

		Class = Class0,

		Rest = Residue

	; otherwise ->

		Class = generic,

		Rest = Chars

	).



declaration_type1( 0'-, 0'-, Chars, comment, Chars ).

declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :-

	append( "DATA[", Residue, Chars ).

declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :-

	append( "CTYPE", Residue, Chars ).



closing_tag( Context, Chars, Terms, Residue, WellFormed ) :-

	( closing_tag_name( Tag, Chars, Rest ),

	  current_tag( Context, Tag ) ->

		Terms = [],

		Residue = Rest,

		WellFormed = true

	; otherwise ->

		unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed )

	).



closing_tag_name( Tag ) -->

	nmtoken_chars( Tag ),

	spaces,

	">".



entity_reference( Chars, Context, Terms, Residue, WF ) :-

	reference_in_layout( Chars, Context, L, L, Terms, Residue, WF ).



reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) :-

	( standard_character_entity( Char, Chars, Rest ) ->

		Minus = [Char|Chars1],

		Terms = [pcdata(Plus)|Terms1],

		acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF )

	; entity_reference_name( Reference, Chars, Rest ),

	  defined_entity( Reference, Context, String ) ->

		append( String, Rest, Full ),

		xml_to_document( Full, Context, Terms, Residue, WF )

	; allow_ampersand( Context ) ->

		Minus = [0'&|Chars1], %'

		Terms = [pcdata(Plus)|Terms1],

		acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )

	; otherwise ->

		unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %'

	).



reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :-

	( standard_character_entity( Char, Chars0, Rest ) ->

		Chars1 = [Char|Chars2],

		acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF )

	; entity_reference_name( Reference, Chars0, Rest ),

	  defined_entity( Reference, Context, String ) ->

		append( String, Rest, Full ),

		acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF )

	; allow_ampersand( Context ) ->

		Chars1 = [0'&|Chars2],

		acquire_pcdata( Chars0, Context, Chars2, Terms, Residue, WF )

	; otherwise ->

		Chars1 = [],

		unparsed( [0'&|Chars0], Context, Terms, Residue, WF )

	).



namespace_attributes( [], Context, Context, [] ).

namespace_attributes( Attributes0, Context0, Context, Attributes ) :-

	Attributes0 = [_|_],

	append( "xmlns:", Unqualified, QualifiedNameChars ),

	( select( "xmlns"=Value, Attributes0, Attributes1 ) ->

		atom_codes( URI, Value ),

		context_update( default_namespace, Context0, URI, Context1 ),

		namespace_attributes( Attributes1, Context1, Context, Attributes )

	; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) ->

		Attributes = [QualifiedNameChars=Value|Attributes2],

		atom_codes( URI, Value ),

		context_update( ns_prefix(Unqualified), Context0, URI, Context1 ),

		namespace_attributes( Attributes1, Context1, Context, Attributes2 )

	; member( "xml:space"="preserve", Attributes0 ) ->

		Attributes = Attributes0,

		context_update( space_preserve, Context0, true, Context )

	; otherwise ->

		Context = Context0,

		Attributes = Attributes0

	).



input_attributes( [], _Context, [] ).

input_attributes( [NameChars=Value|Attributes0], Context,

		[Name=Value|Attributes] ) :-

	( remove_attribute_prefixes( Context ),

	  append( NSChars, [0':|NameChars1], NameChars ), %'

	  NSChars \== "xmlns",

	  specific_namespace( NSChars, Context, Namespace ),

	  current_namespace( Context, Namespace ) ->

		atom_codes( Name, NameChars1 )

	; otherwise ->

		atom_codes( Name, NameChars )

	),

	input_attributes( Attributes0, Context, Attributes ).



attributes( [Name=Value|Attributes], Seen, Namespaces ) -->

	spaces,

	nmtoken_chars( Name ),

	{\+ member(Name, Seen)},

	spaces,

	"=",

	spaces,

	attribute_value( Value, Namespaces ),

	attributes( Attributes, [Name|Seen], Namespaces ).

attributes( [], _Seen, _Namespaces ) --> "".



xml_declaration_attributes( [] ) --> "".

xml_declaration_attributes( [Name=Value|Attributes] ) -->

	spaces,

	nmtoken( Name ),

	spaces,

	"=",

	spaces,

	xml_string( Value ),

	{xml_declaration_attribute_valid(Name, Value)},

	xml_declaration_attributes( Attributes ),

	spaces.



doctype( Name, External, Namespaces0, Namespaces1 ) -->

	spaces,

	nmtoken( Name ),

	spaces,

	doctype_id( External0 ),

	spaces,

	doctype1( Namespaces0, Literals, Namespaces1 ),

	{doctype_extension(Literals, External0, External)}.



doctype_extension( [], External, External ).

doctype_extension( [Literal|Literals], External0, External ) :-

	extended_doctype( External0, [Literal|Literals], External ).



extended_doctype( system(URL), Literals, system(URL,Literals) ).

extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ).

extended_doctype( local, Literals, local(Literals) ).



doctype1( Namespaces0, Literals, Namespaces1 ) -->

	"[",

	!,

	dtd( Namespaces0, Literals, Namespaces1 ),

	"]".

doctype1( Namespaces, [], Namespaces ) --> "".



doctype_id( system(URL) ) -->

	"SYSTEM",

	spaces,

	uri( URL ).

doctype_id( public(URN,URL) ) -->

	"PUBLIC",

	spaces,

	uri( URN ),

	spaces,

	uri( URL ).

doctype_id( local ) --> "".



dtd( Namespaces0, Literals, Namespaces1 ) -->

	spaces,

	"<!ENTITY",

	!,

	spaces,

	nmtoken_chars( Name ),

	spaces,

	quote( Quote ),

	entity_value( Quote, Namespaces0, String ),

	spaces,

	">",

	{(\+ character_entity( Name, StandardChar )

	 ; String = [StandardChar], character_entity( Name, StandardChar )

	 ),

	 % Don't allow &lt; &quote; etc. to be updated

	 context_update( entity(Name), Namespaces0, String, Namespaces2 )

	 },

	dtd( Namespaces2, Literals, Namespaces1 ).

dtd( Namespaces0, Literals, Namespaces1 ) -->

	spaces,

	"<!--",

	!,

	dtd_comment,

	">",

	dtd( Namespaces0, Literals, Namespaces1 ).

dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) -->

	spaces,

	"<!",

	!,

	dtd_literal( Literal ),

	dtd( Namespaces0, Literals, Namespaces1 ).

dtd( Namespaces, [], Namespaces ) --> spaces.



dtd_literal( [] ) --> ">", !.

dtd_literal( Chars ) -->

	"--",

	!,

	dtd_comment,

	dtd_literal( Chars ).

dtd_literal( [Char|Chars] ) -->

	[Char],

	dtd_literal( Chars ).



dtd_comment( Plus, Minus ) :-

	append( _Chars, [0'-,0'-|Minus], Plus ),

	!.



nmtokens( [Name|Names] ) -->

	spaces,

	nmtoken( Name ),

	nmtokens( Names ).

nmtokens( [] ) --> [].



entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :-

	( Char == Quote ->

		String = [],

		Minus = Plus

	; Char =:= "&" ->

		reference_in_entity( Namespaces, Quote, String, Plus, Minus )

	; otherwise ->

		String = [Char|String1],

		entity_value( Quote, Namespaces, String1, Plus, Minus )

	).



attribute_value( String, Namespaces ) -->

	quote( Quote ),

	attribute_leading_layouts( Quote, Namespaces, String ).



attribute_leading_layouts( _Quote, _Namespace, [], [], [] ).

attribute_leading_layouts( Quote, Namespaces, String, [Char|Plus], Minus ) :-

	( Char == Quote ->

		String = [],

		Minus = Plus

	; Char =:= "&" ->

		ref_in_attribute_layout( Namespaces, Quote, String, Plus, Minus )

	; Char > 32, Char \== 160 ->

		String = [Char|String1],

		attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )

	; otherwise ->

		attribute_leading_layouts( Quote, Namespaces, String, Plus, Minus )

	).



attribute_layouts( _Quote, _Namespaces, _Layout, [], [], [] ).

attribute_layouts( Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :-

	( Char == Quote ->

		String = [],

		Minus = Plus

	; Char =:= "&" ->

		reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus )

	; Char > 32, Char \== 160 ->

		( Layout == true ->

			String = [0' ,Char|String1] %'

		; otherwise ->

			String = [Char|String1]

		),

		attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )

	; otherwise ->

		attribute_layouts( Quote, Namespaces, true, String, Plus, Minus )

	).



ref_in_attribute_layout( NS, Quote, String, Plus, Minus ) :-

	( standard_character_entity( Char, Plus, Mid ) ->

		String = [Char|String1],

		attribute_layouts( Quote, NS, false,  String1, Mid, Minus )

	; entity_reference_name( Name, Plus, Suffix ),

	  defined_entity( Name, NS, Text ) ->

		append( Text, Suffix, Mid ),

		attribute_leading_layouts( Quote, NS, String, Mid, Minus )

	; otherwise -> % Just & is okay in a value

		String = [0'&|String1], %'

		attribute_layouts( Quote, NS, false, String1, Plus, Minus )

	).



reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :-

	( standard_character_entity( Char, Plus, Mid ) ->

		( Layout == true ->

			String = [0' ,Char|String1] %'

		; otherwise ->

			String = [Char|String1]

		),

		Layout1 = false

	; entity_reference_name( Name, Plus, Suffix ),

	  defined_entity( Name, Namespaces, Text ) ->

		String = String1,

		append( Text, Suffix, Mid ),

		Layout1 = Layout

	; otherwise -> % Just & is okay in a value

		Mid = Plus,

		String = [0'&|String1], %'

		Layout1 = false

	),

	attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ).



/* References are resolved backwards in Entity defintions so that

 * circularity is avoided.

 */

reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :-

	( standard_character_entity( _SomeChar, Plus, _Rest ) ->

		String = [0'&|String1], % ' Character entities are unparsed

		Mid = Plus

	; entity_reference_name( Name, Plus, Suffix ), 

	  defined_entity( Name, Namespaces, Text ) -> 

		String = String1,

		append( Text, Suffix, Mid )

	),

	entity_value( Quote, Namespaces, String1, Mid, Minus ).



standard_character_entity( Char ) -->

	"#x", hex_character_reference( Char ), ";".

standard_character_entity( Char ) -->

	"#", digit( Digit ), digits( Digits ), ";",

	{number_chars( Char, [Digit|Digits])}.

standard_character_entity( C ) -->

	chars( String ),

	";",

	!,

	{character_entity(String, C)}.



uri( URI ) -->

	quote( Quote ),

	uri1( Quote, URI ).



uri1( Quote, [] ) -->

	quote( Quote ),

	!.

uri1( Quote, [Char|Chars] ) -->

	[Char],

	uri1( Quote, Chars ).



comment( Chars, Plus, Minus ) :-

	append( Chars, [0'-,0'-,0'>|Minus], Plus ), %'

	!.



cdata( Chars, Plus, Minus ) :-

	append( Chars, [0'],0'],0'>|Minus], Plus ), %'

	!.

% Syntax Components



hex_character_reference( Code ) -->

	hex_character_reference1( 0, Code ).



hex_character_reference1( Current, Code ) -->

	hex_digit_char( Value ),

	!,

	{New is (Current << 4) + Value},

	hex_character_reference1( New, Code ).

hex_character_reference1( Code, Code ) --> "".



hex_digit_char( 0 ) --> "0".

hex_digit_char( 1 ) --> "1".

hex_digit_char( 2 ) --> "2".

hex_digit_char( 3 ) --> "3".

hex_digit_char( 4 ) --> "4".

hex_digit_char( 5 ) --> "5".

hex_digit_char( 6 ) --> "6".

hex_digit_char( 7 ) --> "7".

hex_digit_char( 8 ) --> "8".

hex_digit_char( 9 ) --> "9".

hex_digit_char( 10 ) --> "A".

hex_digit_char( 11 ) --> "B".

hex_digit_char( 12 ) --> "C".

hex_digit_char( 13 ) --> "D".

hex_digit_char( 14 ) --> "E".

hex_digit_char( 15 ) --> "F".

hex_digit_char( 10 ) --> "a".

hex_digit_char( 11 ) --> "b".

hex_digit_char( 12 ) --> "c".

hex_digit_char( 13 ) --> "d".

hex_digit_char( 14 ) --> "e".

hex_digit_char( 15 ) --> "f".



quote( 0'" ) --> %'

	"""".

quote( 0'' ) -->

	"'".



spaces( [], [] ).

spaces( [Char|Chars0], Chars1 ) :-

	( Char =< 32 ->

		spaces( Chars0, Chars1 )

	; otherwise ->

		Chars1 = [Char|Chars0]

	).



nmtoken( Name ) -->

	nmtoken_chars( Chars ),

	{atom_codes(Name, Chars)}.



nmtoken_chars( [Char|Chars] ) -->

	[Char],

	{nmtoken_first( Char )},

	nmtoken_chars_tail( Chars ).



nmtoken_chars_tail( [Char|Chars] ) -->

	[Char],

	{nmtoken_char(Char)},

	!,

	nmtoken_chars_tail( Chars ).

nmtoken_chars_tail([]) --> "".



nmtoken_first( 0': ).

nmtoken_first( 0'_ ).

nmtoken_first( Char ) :-

	alphabet( Char ).



nmtoken_char( 0'a ).

nmtoken_char( 0'b ).

nmtoken_char( 0'c ).

nmtoken_char( 0'd ).

nmtoken_char( 0'e ).

nmtoken_char( 0'f ).

nmtoken_char( 0'g ).

nmtoken_char( 0'h ).

nmtoken_char( 0'i ).

nmtoken_char( 0'j ).

nmtoken_char( 0'k ).

nmtoken_char( 0'l ).

nmtoken_char( 0'm ).

nmtoken_char( 0'n ).

nmtoken_char( 0'o ).

nmtoken_char( 0'p ).

nmtoken_char( 0'q ).

nmtoken_char( 0'r ).

nmtoken_char( 0's ).

nmtoken_char( 0't ).

nmtoken_char( 0'u ).

nmtoken_char( 0'v ).

nmtoken_char( 0'w ).

nmtoken_char( 0'x ).

nmtoken_char( 0'y ).

nmtoken_char( 0'z ).

nmtoken_char( 0'A ).

nmtoken_char( 0'B ).

nmtoken_char( 0'C ).

nmtoken_char( 0'D ).

nmtoken_char( 0'E ).

nmtoken_char( 0'F ).

nmtoken_char( 0'G ).

nmtoken_char( 0'H ).

nmtoken_char( 0'I ).

nmtoken_char( 0'J ).

nmtoken_char( 0'K ).

nmtoken_char( 0'L ).

nmtoken_char( 0'M ).

nmtoken_char( 0'N ).

nmtoken_char( 0'O ).

nmtoken_char( 0'P ).

nmtoken_char( 0'Q ).

nmtoken_char( 0'R ).

nmtoken_char( 0'S ).

nmtoken_char( 0'T ).

nmtoken_char( 0'U ).

nmtoken_char( 0'V ).

nmtoken_char( 0'W ).

nmtoken_char( 0'X ).

nmtoken_char( 0'Y ).

nmtoken_char( 0'Z ).

nmtoken_char( 0'0 ).

nmtoken_char( 0'1 ).

nmtoken_char( 0'2 ).

nmtoken_char( 0'3 ).

nmtoken_char( 0'4 ).

nmtoken_char( 0'5 ).

nmtoken_char( 0'6 ).

nmtoken_char( 0'7 ).

nmtoken_char( 0'8 ).

nmtoken_char( 0'9 ).

nmtoken_char( 0'. ).

nmtoken_char( 0'- ).

nmtoken_char( 0'_ ).

nmtoken_char( 0': ).



xml_string( String ) -->

	quote( Quote ),

	xml_string1( Quote, String ).



xml_string1( Quote, [] ) -->

	quote( Quote ),

	!.

xml_string1( Quote, [Char|Chars] ) -->

	[Char],

	xml_string1( Quote, Chars ).



alphabet( 0'a ).

alphabet( 0'b ).

alphabet( 0'c ).

alphabet( 0'd ).

alphabet( 0'e ).

alphabet( 0'f ).

alphabet( 0'g ).

alphabet( 0'h ).

alphabet( 0'i ).

alphabet( 0'j ).

alphabet( 0'k ).

alphabet( 0'l ).

alphabet( 0'm ).

alphabet( 0'n ).

alphabet( 0'o ).

alphabet( 0'p ).

alphabet( 0'q ).

alphabet( 0'r ).

alphabet( 0's ).

alphabet( 0't ).

alphabet( 0'u ).

alphabet( 0'v ).

alphabet( 0'w ).

alphabet( 0'x ).

alphabet( 0'y ).

alphabet( 0'z ).

alphabet( 0'A ).

alphabet( 0'B ).

alphabet( 0'C ).

alphabet( 0'D ).

alphabet( 0'E ).

alphabet( 0'F ).

alphabet( 0'G ).

alphabet( 0'H ).

alphabet( 0'I ).

alphabet( 0'J ).

alphabet( 0'K ).

alphabet( 0'L ).

alphabet( 0'M ).

alphabet( 0'N ).

alphabet( 0'O ).

alphabet( 0'P ).

alphabet( 0'Q ).

alphabet( 0'R ).

alphabet( 0'S ).

alphabet( 0'T ).

alphabet( 0'U ).

alphabet( 0'V ).

alphabet( 0'W ).

alphabet( 0'X ).

alphabet( 0'Y ).

alphabet( 0'Z ).



digit( C ) --> [C], {digit_table( C )}.



digit_table( 0'0 ).

digit_table( 0'1 ).

digit_table( 0'2 ).

digit_table( 0'3 ).

digit_table( 0'4 ).

digit_table( 0'5 ).

digit_table( 0'6 ).

digit_table( 0'7 ).

digit_table( 0'8 ).

digit_table( 0'9 ).



digits( [Digit|Digits] ) -->

	digit( Digit ),

	digits( Digits ).

digits( [] ) --> [].



character_entity( "quot", 0'" ). %'

character_entity( "amp", 0'&  ). %'

character_entity( "lt", 0'< ). %'

character_entity( "gt", 0'> ). %'

character_entity( "apos", 0'' ).



end_of_file.



/* For reference, this is a comprehensive recognizer for namechar, based on

 * the definition of in http://www.w3.org/TR/2000/REC-xml-20001006 .

 */

namechar -->

	( letter

	| unicode_digit

	|  "."

	|  "-"

	|  "_"

	|  ":"

	|  combiningchar

	|  extender

	).



letter  --> (basechar | ideographic).



basechar  --> 

	( range( 16'0041, 16'005A )

	| range( 16'0061, 16'007A )

	| range( 16'00C0, 16'00D6 )

	| range( 16'00D8, 16'00F6 )

	| range( 16'00F8, 16'00FF )

	| range( 16'0100, 16'0131 )

	| range( 16'0134, 16'013E )

	| range( 16'0141, 16'0148 )

	| range( 16'014A, 16'017E )

	| range( 16'0180, 16'01C3 )

	| range( 16'01CD, 16'01F0 )

	| range( 16'01F4, 16'01F5 )

	| range( 16'01FA, 16'0217 )

	| range( 16'0250, 16'02A8 )

	| range( 16'02BB, 16'02C1 )

	| [16'0386]

	| range( 16'0388, 16'038A )

	| [16'038C]

	| range( 16'038E, 16'03A1 )

	| range( 16'03A3, 16'03CE )

	| range( 16'03D0, 16'03D6 )

	| [16'03DA]

	| [16'03DC]

	| [16'03DE]

	| [16'03E0]

	| range( 16'03E2, 16'03F3 )

	| range( 16'0401, 16'040C )

	| range( 16'040E, 16'044F )

	| range( 16'0451, 16'045C )

	| range( 16'045E, 16'0481 )

	| range( 16'0490, 16'04C4 )

	| range( 16'04C7, 16'04C8 )

	| range( 16'04CB, 16'04CC )

	| range( 16'04D0, 16'04EB )

	| range( 16'04EE, 16'04F5 )

	| range( 16'04F8, 16'04F9 )

	| range( 16'0531, 16'0556 )

	| [16'0559]

	| range( 16'0561, 16'0586 )

	| range( 16'05D0, 16'05EA )

	| range( 16'05F0, 16'05F2 )

	| range( 16'0621, 16'063A )

	| range( 16'0641, 16'064A )

	| range( 16'0671, 16'06B7 )

	| range( 16'06BA, 16'06BE )

	| range( 16'06C0, 16'06CE )

	| range( 16'06D0, 16'06D3 )

	| [16'06D5]

	| range( 16'06E5, 16'06E6 )

	| range( 16'0905, 16'0939 )

	| [16'093D]

	| range( 16'0958, 16'0961 )

	| range( 16'0985, 16'098C )

	| range( 16'098F, 16'0990 )

	| range( 16'0993, 16'09A8 )

	| range( 16'09AA, 16'09B0 )

	| [16'09B2]

	| range( 16'09B6, 16'09B9 )

	| range( 16'09DC, 16'09DD )

	| range( 16'09DF, 16'09E1 )

	| range( 16'09F0, 16'09F1 )

	| range( 16'0A05, 16'0A0A )

	| range( 16'0A0F, 16'0A10 )

	| range( 16'0A13, 16'0A28 )

	| range( 16'0A2A, 16'0A30 )

	| range( 16'0A32, 16'0A33 )

	| range( 16'0A35, 16'0A36 )

	| range( 16'0A38, 16'0A39 )

	| range( 16'0A59, 16'0A5C )

	| [16'0A5E]

	| range( 16'0A72, 16'0A74 )

	| range( 16'0A85, 16'0A8B )

	| [16'0A8D]

	| range( 16'0A8F, 16'0A91 )

	| range( 16'0A93, 16'0AA8 )

	| range( 16'0AAA, 16'0AB0 )

	| range( 16'0AB2, 16'0AB3 )

	| range( 16'0AB5, 16'0AB9 )

	| [16'0ABD]

	| [16'0AE0]

	| range( 16'0B05, 16'0B0C )

	| range( 16'0B0F, 16'0B10 )

	| range( 16'0B13, 16'0B28 )

	| range( 16'0B2A, 16'0B30 )

	| range( 16'0B32, 16'0B33 )

	| range( 16'0B36, 16'0B39 )

	| [16'0B3D]

	| range( 16'0B5C, 16'0B5D )

	| range( 16'0B5F, 16'0B61 )

	| range( 16'0B85, 16'0B8A )

	| range( 16'0B8E, 16'0B90 )

	| range( 16'0B92, 16'0B95 )

	| range( 16'0B99, 16'0B9A )

	| [16'0B9C]

	| range( 16'0B9E, 16'0B9F )

	| range( 16'0BA3, 16'0BA4 )

	| range( 16'0BA8, 16'0BAA )

	| range( 16'0BAE, 16'0BB5 )

	| range( 16'0BB7, 16'0BB9 )

	| range( 16'0C05, 16'0C0C )

	| range( 16'0C0E, 16'0C10 )

	| range( 16'0C12, 16'0C28 )

	| range( 16'0C2A, 16'0C33 )

	| range( 16'0C35, 16'0C39 )

	| range( 16'0C60, 16'0C61 )

	| range( 16'0C85, 16'0C8C )

	| range( 16'0C8E, 16'0C90 )

	| range( 16'0C92, 16'0CA8 )

	| range( 16'0CAA, 16'0CB3 )

	| range( 16'0CB5, 16'0CB9 )

	| [16'0CDE]

	| range( 16'0CE0, 16'0CE1 )

	| range( 16'0D05, 16'0D0C )

	| range( 16'0D0E, 16'0D10 )

	| range( 16'0D12, 16'0D28 )

	| range( 16'0D2A, 16'0D39 )

	| range( 16'0D60, 16'0D61 )

	| range( 16'0E01, 16'0E2E )

	| [16'0E30]

	| range( 16'0E32, 16'0E33 )

	| range( 16'0E40, 16'0E45 )

	| range( 16'0E81, 16'0E82 )

	| [16'0E84]

	| range( 16'0E87, 16'0E88 )

	| [16'0E8A]

	| [16'0E8D]

	| range( 16'0E94, 16'0E97 )

	| range( 16'0E99, 16'0E9F )

	| range( 16'0EA1, 16'0EA3 )

	| [16'0EA5]

	| [16'0EA7]

	| range( 16'0EAA, 16'0EAB )

	| range( 16'0EAD, 16'0EAE )

	| [16'0EB0]

	| range( 16'0EB2, 16'0EB3 )

	| [16'0EBD]

	| range( 16'0EC0, 16'0EC4 )

	| range( 16'0F40, 16'0F47 )

	| range( 16'0F49, 16'0F69 )

	| range( 16'10A0, 16'10C5 )

	| range( 16'10D0, 16'10F6 )

	| [16'1100]

	| range( 16'1102, 16'1103 )

	| range( 16'1105, 16'1107 )

	| [16'1109]

	| range( 16'110B, 16'110C )

	| range( 16'110E, 16'1112 )

	| [16'113C]

	| [16'113E]

	| [16'1140]

	| [16'114C]

	| [16'114E]

	| [16'1150]

	| range( 16'1154, 16'1155 )

	| [16'1159]

	| range( 16'115F, 16'1161 )

	| [16'1163]

	| [16'1165]

	| [16'1167]

	| [16'1169]

	| range( 16'116D, 16'116E )

	| range( 16'1172, 16'1173 )

	| [16'1175]

	| [16'119E]

	| [16'11A8]

	| [16'11AB]

	| range( 16'11AE, 16'11AF )

	| range( 16'11B7, 16'11B8 )

	| [16'11BA]

	| range( 16'11BC, 16'11C2 )

	| [16'11EB]

	| [16'11F0]

	| [16'11F9]

	| range( 16'1E00, 16'1E9B )

	| range( 16'1EA0, 16'1EF9 )

	| range( 16'1F00, 16'1F15 )

	| range( 16'1F18, 16'1F1D )

	| range( 16'1F20, 16'1F45 )

	| range( 16'1F48, 16'1F4D )

	| range( 16'1F50, 16'1F57 )

	| [16'1F59]

	| [16'1F5B]

	| [16'1F5D]

	| range( 16'1F5F, 16'1F7D )

	| range( 16'1F80, 16'1FB4 )

	| range( 16'1FB6, 16'1FBC )

	| [16'1FBE]

	| range( 16'1FC2, 16'1FC4 )

	| range( 16'1FC6, 16'1FCC )

	| range( 16'1FD0, 16'1FD3 )

	| range( 16'1FD6, 16'1FDB )

	| range( 16'1FE0, 16'1FEC )

	| range( 16'1FF2, 16'1FF4 )

	| range( 16'1FF6, 16'1FFC )

	| [16'2126]

	| range( 16'212A, 16'212B )

	| [16'212E]

	| range( 16'2180, 16'2182 )

	| range( 16'3041, 16'3094 )

	| range( 16'30A1, 16'30FA )

	| range( 16'3105, 16'312C )

	| range( 16'AC00, 16'D7A3 )

	).

ideographic  -->

	( range( 16'4E00, 16'9FA5 )

	| [16'3007]

	| range( 16'3021, 16'3029 )

	).

combiningchar  -->

	( range( 16'0300, 16'0345 )

	| range( 16'0360, 16'0361 )

	| range( 16'0483, 16'0486 )

	| range( 16'0591, 16'05A1 )

	| range( 16'05A3, 16'05B9 )

	| range( 16'05BB, 16'05BD )

	| [16'05BF]

	| range( 16'05C1, 16'05C2 )

	| [16'05C4]

	| range( 16'064B, 16'0652 )

	| [16'0670]

	| range( 16'06D6, 16'06DC )

	| range( 16'06DD, 16'06DF )

	| range( 16'06E0, 16'06E4 )

	| range( 16'06E7, 16'06E8 )

	| range( 16'06EA, 16'06ED )

	| range( 16'0901, 16'0903 )

	| [16'093C]

	| range( 16'093E, 16'094C )

	| [16'094D]

	| range( 16'0951, 16'0954 )

	| range( 16'0962, 16'0963 )

	| range( 16'0981, 16'0983 )

	| [16'09BC]

	| [16'09BE]

	| [16'09BF]

	| range( 16'09C0, 16'09C4 )

	| range( 16'09C7, 16'09C8 )

	| range( 16'09CB, 16'09CD )

	| [16'09D7]

	| range( 16'09E2, 16'09E3 )

	| [16'0A02]

	| [16'0A3C]

	| [16'0A3E]

	| [16'0A3F]

	| range( 16'0A40, 16'0A42 )

	| range( 16'0A47, 16'0A48 )

	| range( 16'0A4B, 16'0A4D )

	| range( 16'0A70, 16'0A71 )

	| range( 16'0A81, 16'0A83 )

	| [16'0ABC]

	| range( 16'0ABE, 16'0AC5 )

	| range( 16'0AC7, 16'0AC9 )

	| range( 16'0ACB, 16'0ACD )

	| range( 16'0B01, 16'0B03 )

	| [16'0B3C]

	| range( 16'0B3E, 16'0B43 )

	| range( 16'0B47, 16'0B48 )

	| range( 16'0B4B, 16'0B4D )

	| range( 16'0B56, 16'0B57 )

	| range( 16'0B82, 16'0B83 )

	| range( 16'0BBE, 16'0BC2 )

	| range( 16'0BC6, 16'0BC8 )

	| range( 16'0BCA, 16'0BCD )

	| [16'0BD7]

	| range( 16'0C01, 16'0C03 )

	| range( 16'0C3E, 16'0C44 )

	| range( 16'0C46, 16'0C48 )

	| range( 16'0C4A, 16'0C4D )

	| range( 16'0C55, 16'0C56 )

	| range( 16'0C82, 16'0C83 )

	| range( 16'0CBE, 16'0CC4 )

	| range( 16'0CC6, 16'0CC8 )

	| range( 16'0CCA, 16'0CCD )

	| range( 16'0CD5, 16'0CD6 )

	| range( 16'0D02, 16'0D03 )

	| range( 16'0D3E, 16'0D43 )

	| range( 16'0D46, 16'0D48 )

	| range( 16'0D4A, 16'0D4D )

	| [16'0D57]

	| [16'0E31]

	| range( 16'0E34, 16'0E3A )

	| range( 16'0E47, 16'0E4E )

	| [16'0EB1]

	| range( 16'0EB4, 16'0EB9 )

	| range( 16'0EBB, 16'0EBC )

	| range( 16'0EC8, 16'0ECD )

	| range( 16'0F18, 16'0F19 )

	| [16'0F35]

	| [16'0F37]

	| [16'0F39]

	| [16'0F3E]

	| [16'0F3F]

	| range( 16'0F71, 16'0F84 )

	| range( 16'0F86, 16'0F8B )

	| range( 16'0F90, 16'0F95 )

	| [16'0F97]

	| range( 16'0F99, 16'0FAD )

	| range( 16'0FB1, 16'0FB7 )

	| [16'0FB9]

	| range( 16'20D0, 16'20DC )

	| [16'20E1]

	| range( 16'302A, 16'302F )

	| [16'3099]

	| [16'309A]

	).



unicode_digit  -->

	( range( 16'0030, 16'0039 )

	| range( 16'0660, 16'0669 )

	| range( 16'06F0, 16'06F9 )

	| range( 16'0966, 16'096F )

	| range( 16'09E6, 16'09EF )

	| range( 16'0A66, 16'0A6F )

	| range( 16'0AE6, 16'0AEF )

	| range( 16'0B66, 16'0B6F )

	| range( 16'0BE7, 16'0BEF )

	| range( 16'0C66, 16'0C6F )

	| range( 16'0CE6, 16'0CEF )

	| range( 16'0D66, 16'0D6F )

	| range( 16'0E50, 16'0E59 )

	| range( 16'0ED0, 16'0ED9 )

	| range( 16'0F20, 16'0F29 )

	).



extender  -->

	( [16'00B7]

	| [16'02D0]

	| [16'02D1]

	| [16'0387]

	| [16'0640]

	| [16'0E46]

	| [16'0EC6]

	| [16'3005]

	| range( 16'3031, 16'3035 )

	| range( 16'309D, 16'309E )

	| range( 16'30FC, 16'30FE )

	).



range( Low, High ) -->

	[Char],

	{Char >= Low, Char =< High}.