/* xml_acquisition.pl : XML -> Document translation. * * Copyright (C) 2001-2005 Binding Time Limited * Copyright (C) 2005-2011 John Fletcher * * Current Release: $Revision: 3.4 $ * * TERMS AND CONDITIONS: * * This program is offered free of charge, as unsupported source code. You may * use it, copy it, distribute it, modify it or sell it without restriction, * but entirely at your own risk. */ :- ensure_loaded( xml_utilities ). /* xml_to_document( +Controls, +XML, ?Document ) translates the list of * character codes XML into the Prolog term Document. Controls is a list * of terms controlling the treatment of layout characters and character * entities. */ xml_to_document( Controls, XML, Document ) :- initial_context( Controls, Context ), ( xml_declaration( Attributes0, XML, XML1 ) -> Attributes = Attributes0 ; otherwise -> XML1 = XML, Attributes = [] ), xml_to_document( XML1, Context, Terms, [], WellFormed ), xml_to_document1( WellFormed, Attributes, Terms, Document ). xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ). xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ). % unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed ) unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ). xml_declaration( Attributes ) --> spaces, "<?", nmtoken( xml ), xml_declaration_attributes( Attributes ), spaces, "?>". xml_to_document( [], Context, Terms, [], WF ) :- close_context( Context, Terms, WF ). xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :- ( Char =:= "<" -> xml_markup_structure( Chars, Context, Terms, Residue, WF ) ; Char =:= "&" -> entity_reference( Chars, Context, Terms, Residue, WF ) ; Char =< " ", \+ space_preserve( Context ) -> layouts( Chars, Context, [Char|T], T, Terms, Residue, WF ) ; void_context( Context ) -> unparsed( [Char|Chars], Context, Terms, Residue, WF ) ; otherwise -> Terms = [pcdata([Char|Chars1])|Terms1], acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF ) ). layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :- close_context( Context, Terms, WF ). layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :- ( Char =:= "<" -> xml_markup_structure( Chars, Context, Terms, Residue, WF ) ; Char =:= "&" -> reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) ; Char =< " " -> Minus = [Char|Minus1], layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF ) ; void_context( Context ) -> unparsed( [Char|Chars], Context, Terms, Residue, WF ) ; otherwise -> Terms = [pcdata(Plus)|Terms1], Minus = [Char|Chars1], context_update( space_preserve, Context, true, Context1 ), acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF ) ). acquire_pcdata( [], Context, [], Terms, [], WF ) :- close_context( Context, Terms, WF ). acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :- ( Char =:= "<" -> Chars1 = [], xml_markup_structure( Chars, Context, Terms, Residue, WF ) ; Char =:= "&" -> reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF ) ; otherwise -> Chars1 = [Char|Chars2], acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF ) ). xml_markup_structure( [], Context, Terms, Residue, WF ) :- unparsed( "<", Context, Terms, Residue, WF ). xml_markup_structure( Chars, Context, Terms, Residue, WF ) :- Chars = [Char|Chars1], ( Char =:= "/" -> closing_tag( Context, Chars1, Terms, Residue, WF ) ; Char =:= "?" -> pi_acquisition( Chars1, Context, Terms, Residue, WF ) ; Char =:= "!" -> declaration_acquisition( Chars1, Context, Terms, Residue, WF ) ; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) -> push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF ) ; otherwise -> unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %' ). push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :- new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0), push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ). push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :- xml_to_document( Chars, Context, Terms, Residue, WF ). push_tag1( false, _Context, Term, Chars, [Term], Chars, false ). new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :- namespace_attributes( Attributes0, Context, Context1, Attributes1 ), ( append( NSChars, [0':|TagChars1], TagChars ), %' specific_namespace( NSChars, Context1, SpecificNamespace ) -> Namespace0 = SpecificNamespace ; otherwise -> NSChars = "", TagChars1 = TagChars, default_namespace( Context1, Namespace0 ) ), current_namespace( Context1, CurrentNamespace ), ( Namespace0 == CurrentNamespace -> Term = element(Tag, Attributes, Contents), Context2 = Context1 ; otherwise -> Term = namespace( Namespace0, NSChars, element(Tag, Attributes, Contents) ), context_update( current_namespace, Context1, Namespace0, Context2 ) ), input_attributes( Attributes1, Context2, Attributes ), atom_codes( Tag, TagChars1 ), close_tag( Type, Chars, Context2, Contents, Residue, WF ). close_tag( empty, Residue, _Context, [], Residue, true ). close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :- context_update( element, Context0, Tag, Context1 ), xml_to_document( Chars, Context1, Contents, Residue, WF ). pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :- ( inline_instruction(Target, Processing, Chars, Rest ), Target \== xml -> Terms = [instructions(Target, Processing)|Terms1], xml_to_document( Rest, Context, Terms1, Residue, WellFormed ) ; otherwise -> unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed ) ). declaration_acquisition( Chars, Context, Terms, Residue, WF ) :- ( declaration_type( Chars, Type, Chars1 ), declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) -> Terms = [Term|Terms1], xml_to_document( Rest, Context1, Terms1, Residue, WF ) ; otherwise -> unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF ) ). open_tag( Tag, Namespaces, Attributes, Termination ) --> nmtoken_chars( Tag ), attributes( Attributes, [], Namespaces ), spaces, open_tag_terminator( Tag, Termination ). open_tag_terminator( Tag, push(Tag) ) --> ">". open_tag_terminator( _Tag, empty ) --> "/>". declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) --> comment(Comment). declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) --> cdata( CData ). declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) --> doctype( Name, Names, Namespaces0, Namespaces ), spaces, ">". inline_instruction( Target, Processing, Plus, Minus ) :- nmtoken(Target, Plus, Mid0 ), spaces( Mid0, Mid1 ), append( Processing, [0'?,0'>|Minus], Mid1 ), !. entity_reference_name( Reference ) --> nmtoken_chars( Reference ), ";". declaration_type( [Char1,Char2|Chars1], Class, Rest ) :- Chars = [Char1,Char2|Chars1], ( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) -> Class = Class0, Rest = Residue ; otherwise -> Class = generic, Rest = Chars ). declaration_type1( 0'-, 0'-, Chars, comment, Chars ). declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :- append( "DATA[", Residue, Chars ). declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :- append( "CTYPE", Residue, Chars ). closing_tag( Context, Chars, Terms, Residue, WellFormed ) :- ( closing_tag_name( Tag, Chars, Rest ), current_tag( Context, Tag ) -> Terms = [], Residue = Rest, WellFormed = true ; otherwise -> unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed ) ). closing_tag_name( Tag ) --> nmtoken_chars( Tag ), spaces, ">". entity_reference( Chars, Context, Terms, Residue, WF ) :- reference_in_layout( Chars, Context, L, L, Terms, Residue, WF ). reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) :- ( standard_character_entity( Char, Chars, Rest ) -> Minus = [Char|Chars1], Terms = [pcdata(Plus)|Terms1], acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF ) ; entity_reference_name( Reference, Chars, Rest ), defined_entity( Reference, Context, String ) -> append( String, Rest, Full ), xml_to_document( Full, Context, Terms, Residue, WF ) ; allow_ampersand( Context ) -> Minus = [0'&|Chars1], %' Terms = [pcdata(Plus)|Terms1], acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF ) ; otherwise -> unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %' ). reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :- ( standard_character_entity( Char, Chars0, Rest ) -> Chars1 = [Char|Chars2], acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF ) ; entity_reference_name( Reference, Chars0, Rest ), defined_entity( Reference, Context, String ) -> append( String, Rest, Full ), acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF ) ; allow_ampersand( Context ) -> Chars1 = [0'&|Chars2], acquire_pcdata( Chars0, Context, Chars2, Terms, Residue, WF ) ; otherwise -> Chars1 = [], unparsed( [0'&|Chars0], Context, Terms, Residue, WF ) ). namespace_attributes( [], Context, Context, [] ). namespace_attributes( Attributes0, Context0, Context, Attributes ) :- Attributes0 = [_|_], append( "xmlns:", Unqualified, QualifiedNameChars ), ( select( "xmlns"=Value, Attributes0, Attributes1 ) -> atom_codes( URI, Value ), context_update( default_namespace, Context0, URI, Context1 ), namespace_attributes( Attributes1, Context1, Context, Attributes ) ; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) -> Attributes = [QualifiedNameChars=Value|Attributes2], atom_codes( URI, Value ), context_update( ns_prefix(Unqualified), Context0, URI, Context1 ), namespace_attributes( Attributes1, Context1, Context, Attributes2 ) ; member( "xml:space"="preserve", Attributes0 ) -> Attributes = Attributes0, context_update( space_preserve, Context0, true, Context ) ; otherwise -> Context = Context0, Attributes = Attributes0 ). input_attributes( [], _Context, [] ). input_attributes( [NameChars=Value|Attributes0], Context, [Name=Value|Attributes] ) :- ( remove_attribute_prefixes( Context ), append( NSChars, [0':|NameChars1], NameChars ), %' NSChars \== "xmlns", specific_namespace( NSChars, Context, Namespace ), current_namespace( Context, Namespace ) -> atom_codes( Name, NameChars1 ) ; otherwise -> atom_codes( Name, NameChars ) ), input_attributes( Attributes0, Context, Attributes ). attributes( [Name=Value|Attributes], Seen, Namespaces ) --> spaces, nmtoken_chars( Name ), {\+ member(Name, Seen)}, spaces, "=", spaces, attribute_value( Value, Namespaces ), attributes( Attributes, [Name|Seen], Namespaces ). attributes( [], _Seen, _Namespaces ) --> "". xml_declaration_attributes( [] ) --> "". xml_declaration_attributes( [Name=Value|Attributes] ) --> spaces, nmtoken( Name ), spaces, "=", spaces, xml_string( Value ), {xml_declaration_attribute_valid(Name, Value)}, xml_declaration_attributes( Attributes ), spaces. doctype( Name, External, Namespaces0, Namespaces1 ) --> spaces, nmtoken( Name ), spaces, doctype_id( External0 ), spaces, doctype1( Namespaces0, Literals, Namespaces1 ), {doctype_extension(Literals, External0, External)}. doctype_extension( [], External, External ). doctype_extension( [Literal|Literals], External0, External ) :- extended_doctype( External0, [Literal|Literals], External ). extended_doctype( system(URL), Literals, system(URL,Literals) ). extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ). extended_doctype( local, Literals, local(Literals) ). doctype1( Namespaces0, Literals, Namespaces1 ) --> "[", !, dtd( Namespaces0, Literals, Namespaces1 ), "]". doctype1( Namespaces, [], Namespaces ) --> "". doctype_id( system(URL) ) --> "SYSTEM", spaces, uri( URL ). doctype_id( public(URN,URL) ) --> "PUBLIC", spaces, uri( URN ), spaces, uri( URL ). doctype_id( local ) --> "". dtd( Namespaces0, Literals, Namespaces1 ) --> spaces, "<!ENTITY", !, spaces, nmtoken_chars( Name ), spaces, quote( Quote ), entity_value( Quote, Namespaces0, String ), spaces, ">", {(\+ character_entity( Name, StandardChar ) ; String = [StandardChar], character_entity( Name, StandardChar ) ), % Don't allow < "e; etc. to be updated context_update( entity(Name), Namespaces0, String, Namespaces2 ) }, dtd( Namespaces2, Literals, Namespaces1 ). dtd( Namespaces0, Literals, Namespaces1 ) --> spaces, "<!--", !, dtd_comment, ">", dtd( Namespaces0, Literals, Namespaces1 ). dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) --> spaces, "<!", !, dtd_literal( Literal ), dtd( Namespaces0, Literals, Namespaces1 ). dtd( Namespaces, [], Namespaces ) --> spaces. dtd_literal( [] ) --> ">", !. dtd_literal( Chars ) --> "--", !, dtd_comment, dtd_literal( Chars ). dtd_literal( [Char|Chars] ) --> [Char], dtd_literal( Chars ). dtd_comment( Plus, Minus ) :- append( _Chars, [0'-,0'-|Minus], Plus ), !. nmtokens( [Name|Names] ) --> spaces, nmtoken( Name ), nmtokens( Names ). nmtokens( [] ) --> []. entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :- ( Char == Quote -> String = [], Minus = Plus ; Char =:= "&" -> reference_in_entity( Namespaces, Quote, String, Plus, Minus ) ; otherwise -> String = [Char|String1], entity_value( Quote, Namespaces, String1, Plus, Minus ) ). attribute_value( String, Namespaces ) --> quote( Quote ), attribute_leading_layouts( Quote, Namespaces, String ). attribute_leading_layouts( _Quote, _Namespace, [], [], [] ). attribute_leading_layouts( Quote, Namespaces, String, [Char|Plus], Minus ) :- ( Char == Quote -> String = [], Minus = Plus ; Char =:= "&" -> ref_in_attribute_layout( Namespaces, Quote, String, Plus, Minus ) ; Char > 32, Char \== 160 -> String = [Char|String1], attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus ) ; otherwise -> attribute_leading_layouts( Quote, Namespaces, String, Plus, Minus ) ). attribute_layouts( _Quote, _Namespaces, _Layout, [], [], [] ). attribute_layouts( Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :- ( Char == Quote -> String = [], Minus = Plus ; Char =:= "&" -> reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) ; Char > 32, Char \== 160 -> ( Layout == true -> String = [0' ,Char|String1] %' ; otherwise -> String = [Char|String1] ), attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus ) ; otherwise -> attribute_layouts( Quote, Namespaces, true, String, Plus, Minus ) ). ref_in_attribute_layout( NS, Quote, String, Plus, Minus ) :- ( standard_character_entity( Char, Plus, Mid ) -> String = [Char|String1], attribute_layouts( Quote, NS, false, String1, Mid, Minus ) ; entity_reference_name( Name, Plus, Suffix ), defined_entity( Name, NS, Text ) -> append( Text, Suffix, Mid ), attribute_leading_layouts( Quote, NS, String, Mid, Minus ) ; otherwise -> % Just & is okay in a value String = [0'&|String1], %' attribute_layouts( Quote, NS, false, String1, Plus, Minus ) ). reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :- ( standard_character_entity( Char, Plus, Mid ) -> ( Layout == true -> String = [0' ,Char|String1] %' ; otherwise -> String = [Char|String1] ), Layout1 = false ; entity_reference_name( Name, Plus, Suffix ), defined_entity( Name, Namespaces, Text ) -> String = String1, append( Text, Suffix, Mid ), Layout1 = Layout ; otherwise -> % Just & is okay in a value Mid = Plus, String = [0'&|String1], %' Layout1 = false ), attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ). /* References are resolved backwards in Entity defintions so that * circularity is avoided. */ reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :- ( standard_character_entity( _SomeChar, Plus, _Rest ) -> String = [0'&|String1], % ' Character entities are unparsed Mid = Plus ; entity_reference_name( Name, Plus, Suffix ), defined_entity( Name, Namespaces, Text ) -> String = String1, append( Text, Suffix, Mid ) ), entity_value( Quote, Namespaces, String1, Mid, Minus ). standard_character_entity( Char ) --> "#x", hex_character_reference( Char ), ";". standard_character_entity( Char ) --> "#", digit( Digit ), digits( Digits ), ";", {number_chars( Char, [Digit|Digits])}. standard_character_entity( C ) --> chars( String ), ";", !, {character_entity(String, C)}. uri( URI ) --> quote( Quote ), uri1( Quote, URI ). uri1( Quote, [] ) --> quote( Quote ), !. uri1( Quote, [Char|Chars] ) --> [Char], uri1( Quote, Chars ). comment( Chars, Plus, Minus ) :- append( Chars, [0'-,0'-,0'>|Minus], Plus ), %' !. cdata( Chars, Plus, Minus ) :- append( Chars, [0'],0'],0'>|Minus], Plus ), %' !. % Syntax Components hex_character_reference( Code ) --> hex_character_reference1( 0, Code ). hex_character_reference1( Current, Code ) --> hex_digit_char( Value ), !, {New is (Current << 4) + Value}, hex_character_reference1( New, Code ). hex_character_reference1( Code, Code ) --> "". hex_digit_char( 0 ) --> "0". hex_digit_char( 1 ) --> "1". hex_digit_char( 2 ) --> "2". hex_digit_char( 3 ) --> "3". hex_digit_char( 4 ) --> "4". hex_digit_char( 5 ) --> "5". hex_digit_char( 6 ) --> "6". hex_digit_char( 7 ) --> "7". hex_digit_char( 8 ) --> "8". hex_digit_char( 9 ) --> "9". hex_digit_char( 10 ) --> "A". hex_digit_char( 11 ) --> "B". hex_digit_char( 12 ) --> "C". hex_digit_char( 13 ) --> "D". hex_digit_char( 14 ) --> "E". hex_digit_char( 15 ) --> "F". hex_digit_char( 10 ) --> "a". hex_digit_char( 11 ) --> "b". hex_digit_char( 12 ) --> "c". hex_digit_char( 13 ) --> "d". hex_digit_char( 14 ) --> "e". hex_digit_char( 15 ) --> "f". quote( 0'" ) --> %' """". quote( 0'' ) --> "'". spaces( [], [] ). spaces( [Char|Chars0], Chars1 ) :- ( Char =< 32 -> spaces( Chars0, Chars1 ) ; otherwise -> Chars1 = [Char|Chars0] ). nmtoken( Name ) --> nmtoken_chars( Chars ), {atom_codes(Name, Chars)}. nmtoken_chars( [Char|Chars] ) --> [Char], {nmtoken_first( Char )}, nmtoken_chars_tail( Chars ). nmtoken_chars_tail( [Char|Chars] ) --> [Char], {nmtoken_char(Char)}, !, nmtoken_chars_tail( Chars ). nmtoken_chars_tail([]) --> "". nmtoken_first( 0': ). nmtoken_first( 0'_ ). nmtoken_first( Char ) :- alphabet( Char ). nmtoken_char( 0'a ). nmtoken_char( 0'b ). nmtoken_char( 0'c ). nmtoken_char( 0'd ). nmtoken_char( 0'e ). nmtoken_char( 0'f ). nmtoken_char( 0'g ). nmtoken_char( 0'h ). nmtoken_char( 0'i ). nmtoken_char( 0'j ). nmtoken_char( 0'k ). nmtoken_char( 0'l ). nmtoken_char( 0'm ). nmtoken_char( 0'n ). nmtoken_char( 0'o ). nmtoken_char( 0'p ). nmtoken_char( 0'q ). nmtoken_char( 0'r ). nmtoken_char( 0's ). nmtoken_char( 0't ). nmtoken_char( 0'u ). nmtoken_char( 0'v ). nmtoken_char( 0'w ). nmtoken_char( 0'x ). nmtoken_char( 0'y ). nmtoken_char( 0'z ). nmtoken_char( 0'A ). nmtoken_char( 0'B ). nmtoken_char( 0'C ). nmtoken_char( 0'D ). nmtoken_char( 0'E ). nmtoken_char( 0'F ). nmtoken_char( 0'G ). nmtoken_char( 0'H ). nmtoken_char( 0'I ). nmtoken_char( 0'J ). nmtoken_char( 0'K ). nmtoken_char( 0'L ). nmtoken_char( 0'M ). nmtoken_char( 0'N ). nmtoken_char( 0'O ). nmtoken_char( 0'P ). nmtoken_char( 0'Q ). nmtoken_char( 0'R ). nmtoken_char( 0'S ). nmtoken_char( 0'T ). nmtoken_char( 0'U ). nmtoken_char( 0'V ). nmtoken_char( 0'W ). nmtoken_char( 0'X ). nmtoken_char( 0'Y ). nmtoken_char( 0'Z ). nmtoken_char( 0'0 ). nmtoken_char( 0'1 ). nmtoken_char( 0'2 ). nmtoken_char( 0'3 ). nmtoken_char( 0'4 ). nmtoken_char( 0'5 ). nmtoken_char( 0'6 ). nmtoken_char( 0'7 ). nmtoken_char( 0'8 ). nmtoken_char( 0'9 ). nmtoken_char( 0'. ). nmtoken_char( 0'- ). nmtoken_char( 0'_ ). nmtoken_char( 0': ). xml_string( String ) --> quote( Quote ), xml_string1( Quote, String ). xml_string1( Quote, [] ) --> quote( Quote ), !. xml_string1( Quote, [Char|Chars] ) --> [Char], xml_string1( Quote, Chars ). alphabet( 0'a ). alphabet( 0'b ). alphabet( 0'c ). alphabet( 0'd ). alphabet( 0'e ). alphabet( 0'f ). alphabet( 0'g ). alphabet( 0'h ). alphabet( 0'i ). alphabet( 0'j ). alphabet( 0'k ). alphabet( 0'l ). alphabet( 0'm ). alphabet( 0'n ). alphabet( 0'o ). alphabet( 0'p ). alphabet( 0'q ). alphabet( 0'r ). alphabet( 0's ). alphabet( 0't ). alphabet( 0'u ). alphabet( 0'v ). alphabet( 0'w ). alphabet( 0'x ). alphabet( 0'y ). alphabet( 0'z ). alphabet( 0'A ). alphabet( 0'B ). alphabet( 0'C ). alphabet( 0'D ). alphabet( 0'E ). alphabet( 0'F ). alphabet( 0'G ). alphabet( 0'H ). alphabet( 0'I ). alphabet( 0'J ). alphabet( 0'K ). alphabet( 0'L ). alphabet( 0'M ). alphabet( 0'N ). alphabet( 0'O ). alphabet( 0'P ). alphabet( 0'Q ). alphabet( 0'R ). alphabet( 0'S ). alphabet( 0'T ). alphabet( 0'U ). alphabet( 0'V ). alphabet( 0'W ). alphabet( 0'X ). alphabet( 0'Y ). alphabet( 0'Z ). digit( C ) --> [C], {digit_table( C )}. digit_table( 0'0 ). digit_table( 0'1 ). digit_table( 0'2 ). digit_table( 0'3 ). digit_table( 0'4 ). digit_table( 0'5 ). digit_table( 0'6 ). digit_table( 0'7 ). digit_table( 0'8 ). digit_table( 0'9 ). digits( [Digit|Digits] ) --> digit( Digit ), digits( Digits ). digits( [] ) --> []. character_entity( "quot", 0'" ). %' character_entity( "amp", 0'& ). %' character_entity( "lt", 0'< ). %' character_entity( "gt", 0'> ). %' character_entity( "apos", 0'' ). end_of_file. /* For reference, this is a comprehensive recognizer for namechar, based on * the definition of in http://www.w3.org/TR/2000/REC-xml-20001006 . */ namechar --> ( letter | unicode_digit | "." | "-" | "_" | ":" | combiningchar | extender ). letter --> (basechar | ideographic). basechar --> ( range( 16'0041, 16'005A ) | range( 16'0061, 16'007A ) | range( 16'00C0, 16'00D6 ) | range( 16'00D8, 16'00F6 ) | range( 16'00F8, 16'00FF ) | range( 16'0100, 16'0131 ) | range( 16'0134, 16'013E ) | range( 16'0141, 16'0148 ) | range( 16'014A, 16'017E ) | range( 16'0180, 16'01C3 ) | range( 16'01CD, 16'01F0 ) | range( 16'01F4, 16'01F5 ) | range( 16'01FA, 16'0217 ) | range( 16'0250, 16'02A8 ) | range( 16'02BB, 16'02C1 ) | [16'0386] | range( 16'0388, 16'038A ) | [16'038C] | range( 16'038E, 16'03A1 ) | range( 16'03A3, 16'03CE ) | range( 16'03D0, 16'03D6 ) | [16'03DA] | [16'03DC] | [16'03DE] | [16'03E0] | range( 16'03E2, 16'03F3 ) | range( 16'0401, 16'040C ) | range( 16'040E, 16'044F ) | range( 16'0451, 16'045C ) | range( 16'045E, 16'0481 ) | range( 16'0490, 16'04C4 ) | range( 16'04C7, 16'04C8 ) | range( 16'04CB, 16'04CC ) | range( 16'04D0, 16'04EB ) | range( 16'04EE, 16'04F5 ) | range( 16'04F8, 16'04F9 ) | range( 16'0531, 16'0556 ) | [16'0559] | range( 16'0561, 16'0586 ) | range( 16'05D0, 16'05EA ) | range( 16'05F0, 16'05F2 ) | range( 16'0621, 16'063A ) | range( 16'0641, 16'064A ) | range( 16'0671, 16'06B7 ) | range( 16'06BA, 16'06BE ) | range( 16'06C0, 16'06CE ) | range( 16'06D0, 16'06D3 ) | [16'06D5] | range( 16'06E5, 16'06E6 ) | range( 16'0905, 16'0939 ) | [16'093D] | range( 16'0958, 16'0961 ) | range( 16'0985, 16'098C ) | range( 16'098F, 16'0990 ) | range( 16'0993, 16'09A8 ) | range( 16'09AA, 16'09B0 ) | [16'09B2] | range( 16'09B6, 16'09B9 ) | range( 16'09DC, 16'09DD ) | range( 16'09DF, 16'09E1 ) | range( 16'09F0, 16'09F1 ) | range( 16'0A05, 16'0A0A ) | range( 16'0A0F, 16'0A10 ) | range( 16'0A13, 16'0A28 ) | range( 16'0A2A, 16'0A30 ) | range( 16'0A32, 16'0A33 ) | range( 16'0A35, 16'0A36 ) | range( 16'0A38, 16'0A39 ) | range( 16'0A59, 16'0A5C ) | [16'0A5E] | range( 16'0A72, 16'0A74 ) | range( 16'0A85, 16'0A8B ) | [16'0A8D] | range( 16'0A8F, 16'0A91 ) | range( 16'0A93, 16'0AA8 ) | range( 16'0AAA, 16'0AB0 ) | range( 16'0AB2, 16'0AB3 ) | range( 16'0AB5, 16'0AB9 ) | [16'0ABD] | [16'0AE0] | range( 16'0B05, 16'0B0C ) | range( 16'0B0F, 16'0B10 ) | range( 16'0B13, 16'0B28 ) | range( 16'0B2A, 16'0B30 ) | range( 16'0B32, 16'0B33 ) | range( 16'0B36, 16'0B39 ) | [16'0B3D] | range( 16'0B5C, 16'0B5D ) | range( 16'0B5F, 16'0B61 ) | range( 16'0B85, 16'0B8A ) | range( 16'0B8E, 16'0B90 ) | range( 16'0B92, 16'0B95 ) | range( 16'0B99, 16'0B9A ) | [16'0B9C] | range( 16'0B9E, 16'0B9F ) | range( 16'0BA3, 16'0BA4 ) | range( 16'0BA8, 16'0BAA ) | range( 16'0BAE, 16'0BB5 ) | range( 16'0BB7, 16'0BB9 ) | range( 16'0C05, 16'0C0C ) | range( 16'0C0E, 16'0C10 ) | range( 16'0C12, 16'0C28 ) | range( 16'0C2A, 16'0C33 ) | range( 16'0C35, 16'0C39 ) | range( 16'0C60, 16'0C61 ) | range( 16'0C85, 16'0C8C ) | range( 16'0C8E, 16'0C90 ) | range( 16'0C92, 16'0CA8 ) | range( 16'0CAA, 16'0CB3 ) | range( 16'0CB5, 16'0CB9 ) | [16'0CDE] | range( 16'0CE0, 16'0CE1 ) | range( 16'0D05, 16'0D0C ) | range( 16'0D0E, 16'0D10 ) | range( 16'0D12, 16'0D28 ) | range( 16'0D2A, 16'0D39 ) | range( 16'0D60, 16'0D61 ) | range( 16'0E01, 16'0E2E ) | [16'0E30] | range( 16'0E32, 16'0E33 ) | range( 16'0E40, 16'0E45 ) | range( 16'0E81, 16'0E82 ) | [16'0E84] | range( 16'0E87, 16'0E88 ) | [16'0E8A] | [16'0E8D] | range( 16'0E94, 16'0E97 ) | range( 16'0E99, 16'0E9F ) | range( 16'0EA1, 16'0EA3 ) | [16'0EA5] | [16'0EA7] | range( 16'0EAA, 16'0EAB ) | range( 16'0EAD, 16'0EAE ) | [16'0EB0] | range( 16'0EB2, 16'0EB3 ) | [16'0EBD] | range( 16'0EC0, 16'0EC4 ) | range( 16'0F40, 16'0F47 ) | range( 16'0F49, 16'0F69 ) | range( 16'10A0, 16'10C5 ) | range( 16'10D0, 16'10F6 ) | [16'1100] | range( 16'1102, 16'1103 ) | range( 16'1105, 16'1107 ) | [16'1109] | range( 16'110B, 16'110C ) | range( 16'110E, 16'1112 ) | [16'113C] | [16'113E] | [16'1140] | [16'114C] | [16'114E] | [16'1150] | range( 16'1154, 16'1155 ) | [16'1159] | range( 16'115F, 16'1161 ) | [16'1163] | [16'1165] | [16'1167] | [16'1169] | range( 16'116D, 16'116E ) | range( 16'1172, 16'1173 ) | [16'1175] | [16'119E] | [16'11A8] | [16'11AB] | range( 16'11AE, 16'11AF ) | range( 16'11B7, 16'11B8 ) | [16'11BA] | range( 16'11BC, 16'11C2 ) | [16'11EB] | [16'11F0] | [16'11F9] | range( 16'1E00, 16'1E9B ) | range( 16'1EA0, 16'1EF9 ) | range( 16'1F00, 16'1F15 ) | range( 16'1F18, 16'1F1D ) | range( 16'1F20, 16'1F45 ) | range( 16'1F48, 16'1F4D ) | range( 16'1F50, 16'1F57 ) | [16'1F59] | [16'1F5B] | [16'1F5D] | range( 16'1F5F, 16'1F7D ) | range( 16'1F80, 16'1FB4 ) | range( 16'1FB6, 16'1FBC ) | [16'1FBE] | range( 16'1FC2, 16'1FC4 ) | range( 16'1FC6, 16'1FCC ) | range( 16'1FD0, 16'1FD3 ) | range( 16'1FD6, 16'1FDB ) | range( 16'1FE0, 16'1FEC ) | range( 16'1FF2, 16'1FF4 ) | range( 16'1FF6, 16'1FFC ) | [16'2126] | range( 16'212A, 16'212B ) | [16'212E] | range( 16'2180, 16'2182 ) | range( 16'3041, 16'3094 ) | range( 16'30A1, 16'30FA ) | range( 16'3105, 16'312C ) | range( 16'AC00, 16'D7A3 ) ). ideographic --> ( range( 16'4E00, 16'9FA5 ) | [16'3007] | range( 16'3021, 16'3029 ) ). combiningchar --> ( range( 16'0300, 16'0345 ) | range( 16'0360, 16'0361 ) | range( 16'0483, 16'0486 ) | range( 16'0591, 16'05A1 ) | range( 16'05A3, 16'05B9 ) | range( 16'05BB, 16'05BD ) | [16'05BF] | range( 16'05C1, 16'05C2 ) | [16'05C4] | range( 16'064B, 16'0652 ) | [16'0670] | range( 16'06D6, 16'06DC ) | range( 16'06DD, 16'06DF ) | range( 16'06E0, 16'06E4 ) | range( 16'06E7, 16'06E8 ) | range( 16'06EA, 16'06ED ) | range( 16'0901, 16'0903 ) | [16'093C] | range( 16'093E, 16'094C ) | [16'094D] | range( 16'0951, 16'0954 ) | range( 16'0962, 16'0963 ) | range( 16'0981, 16'0983 ) | [16'09BC] | [16'09BE] | [16'09BF] | range( 16'09C0, 16'09C4 ) | range( 16'09C7, 16'09C8 ) | range( 16'09CB, 16'09CD ) | [16'09D7] | range( 16'09E2, 16'09E3 ) | [16'0A02] | [16'0A3C] | [16'0A3E] | [16'0A3F] | range( 16'0A40, 16'0A42 ) | range( 16'0A47, 16'0A48 ) | range( 16'0A4B, 16'0A4D ) | range( 16'0A70, 16'0A71 ) | range( 16'0A81, 16'0A83 ) | [16'0ABC] | range( 16'0ABE, 16'0AC5 ) | range( 16'0AC7, 16'0AC9 ) | range( 16'0ACB, 16'0ACD ) | range( 16'0B01, 16'0B03 ) | [16'0B3C] | range( 16'0B3E, 16'0B43 ) | range( 16'0B47, 16'0B48 ) | range( 16'0B4B, 16'0B4D ) | range( 16'0B56, 16'0B57 ) | range( 16'0B82, 16'0B83 ) | range( 16'0BBE, 16'0BC2 ) | range( 16'0BC6, 16'0BC8 ) | range( 16'0BCA, 16'0BCD ) | [16'0BD7] | range( 16'0C01, 16'0C03 ) | range( 16'0C3E, 16'0C44 ) | range( 16'0C46, 16'0C48 ) | range( 16'0C4A, 16'0C4D ) | range( 16'0C55, 16'0C56 ) | range( 16'0C82, 16'0C83 ) | range( 16'0CBE, 16'0CC4 ) | range( 16'0CC6, 16'0CC8 ) | range( 16'0CCA, 16'0CCD ) | range( 16'0CD5, 16'0CD6 ) | range( 16'0D02, 16'0D03 ) | range( 16'0D3E, 16'0D43 ) | range( 16'0D46, 16'0D48 ) | range( 16'0D4A, 16'0D4D ) | [16'0D57] | [16'0E31] | range( 16'0E34, 16'0E3A ) | range( 16'0E47, 16'0E4E ) | [16'0EB1] | range( 16'0EB4, 16'0EB9 ) | range( 16'0EBB, 16'0EBC ) | range( 16'0EC8, 16'0ECD ) | range( 16'0F18, 16'0F19 ) | [16'0F35] | [16'0F37] | [16'0F39] | [16'0F3E] | [16'0F3F] | range( 16'0F71, 16'0F84 ) | range( 16'0F86, 16'0F8B ) | range( 16'0F90, 16'0F95 ) | [16'0F97] | range( 16'0F99, 16'0FAD ) | range( 16'0FB1, 16'0FB7 ) | [16'0FB9] | range( 16'20D0, 16'20DC ) | [16'20E1] | range( 16'302A, 16'302F ) | [16'3099] | [16'309A] ). unicode_digit --> ( range( 16'0030, 16'0039 ) | range( 16'0660, 16'0669 ) | range( 16'06F0, 16'06F9 ) | range( 16'0966, 16'096F ) | range( 16'09E6, 16'09EF ) | range( 16'0A66, 16'0A6F ) | range( 16'0AE6, 16'0AEF ) | range( 16'0B66, 16'0B6F ) | range( 16'0BE7, 16'0BEF ) | range( 16'0C66, 16'0C6F ) | range( 16'0CE6, 16'0CEF ) | range( 16'0D66, 16'0D6F ) | range( 16'0E50, 16'0E59 ) | range( 16'0ED0, 16'0ED9 ) | range( 16'0F20, 16'0F29 ) ). extender --> ( [16'00B7] | [16'02D0] | [16'02D1] | [16'0387] | [16'0640] | [16'0E46] | [16'0EC6] | [16'3005] | range( 16'3031, 16'3035 ) | range( 16'309D, 16'309E ) | range( 16'30FC, 16'30FE ) ). range( Low, High ) --> [Char], {Char >= Low, Char =< High}.