1118 lines
31 KiB
Prolog
1118 lines
31 KiB
Prolog
/* xml_acquisition.pl : XML -> Document translation.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2001-2005 Binding Time Limited
|
|
|
|
* Copyright (C) 2005-2011 John Fletcher
|
|
|
|
*
|
|
|
|
* Current Release: $Revision: 3.4 $
|
|
|
|
*
|
|
|
|
* TERMS AND CONDITIONS:
|
|
|
|
*
|
|
|
|
* This program is offered free of charge, as unsupported source code. You may
|
|
|
|
* use it, copy it, distribute it, modify it or sell it without restriction,
|
|
|
|
* but entirely at your own risk.
|
|
|
|
*/
|
|
|
|
|
|
|
|
:- ensure_loaded( xml_utilities ).
|
|
|
|
|
|
|
|
/* xml_to_document( +Controls, +XML, ?Document ) translates the list of
|
|
|
|
* character codes XML into the Prolog term Document. Controls is a list
|
|
|
|
* of terms controlling the treatment of layout characters and character
|
|
|
|
* entities.
|
|
|
|
*/
|
|
|
|
xml_to_document( Controls, XML, Document ) :-
|
|
|
|
initial_context( Controls, Context ),
|
|
|
|
( xml_declaration( Attributes0, XML, XML1 ) ->
|
|
|
|
Attributes = Attributes0
|
|
|
|
; otherwise ->
|
|
|
|
XML1 = XML,
|
|
|
|
Attributes = []
|
|
|
|
),
|
|
|
|
xml_to_document( XML1, Context, Terms, [], WellFormed ),
|
|
|
|
xml_to_document1( WellFormed, Attributes, Terms, Document ).
|
|
|
|
|
|
|
|
xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ).
|
|
|
|
xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ).
|
|
|
|
|
|
|
|
% unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed )
|
|
|
|
unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ).
|
|
|
|
|
|
|
|
xml_declaration( Attributes ) -->
|
|
|
|
spaces,
|
|
|
|
"<?",
|
|
|
|
nmtoken( xml ),
|
|
|
|
xml_declaration_attributes( Attributes ),
|
|
|
|
spaces,
|
|
|
|
"?>".
|
|
|
|
|
|
|
|
xml_to_document( [], Context, Terms, [], WF ) :-
|
|
|
|
close_context( Context, Terms, WF ).
|
|
|
|
xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :-
|
|
|
|
( Char =:= "<" ->
|
|
|
|
xml_markup_structure( Chars, Context, Terms, Residue, WF )
|
|
|
|
; Char =:= "&" ->
|
|
|
|
entity_reference( Chars, Context, Terms, Residue, WF )
|
|
|
|
; Char =< " ",
|
|
|
|
\+ space_preserve( Context ) ->
|
|
|
|
layouts( Chars, Context, [Char|T], T, Terms, Residue, WF )
|
|
|
|
; void_context( Context ) ->
|
|
|
|
unparsed( [Char|Chars], Context, Terms, Residue, WF )
|
|
|
|
; otherwise ->
|
|
|
|
Terms = [pcdata([Char|Chars1])|Terms1],
|
|
|
|
acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
|
|
|
|
).
|
|
|
|
|
|
|
|
layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :-
|
|
|
|
close_context( Context, Terms, WF ).
|
|
|
|
layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :-
|
|
|
|
( Char =:= "<" ->
|
|
|
|
xml_markup_structure( Chars, Context, Terms, Residue, WF )
|
|
|
|
; Char =:= "&" ->
|
|
|
|
reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF )
|
|
|
|
; Char =< " " ->
|
|
|
|
Minus = [Char|Minus1],
|
|
|
|
layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF )
|
|
|
|
; void_context( Context ) ->
|
|
|
|
unparsed( [Char|Chars], Context, Terms, Residue, WF )
|
|
|
|
; otherwise ->
|
|
|
|
Terms = [pcdata(Plus)|Terms1],
|
|
|
|
Minus = [Char|Chars1],
|
|
|
|
context_update( space_preserve, Context, true, Context1 ),
|
|
|
|
acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF )
|
|
|
|
).
|
|
|
|
|
|
|
|
acquire_pcdata( [], Context, [], Terms, [], WF ) :-
|
|
|
|
close_context( Context, Terms, WF ).
|
|
|
|
acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :-
|
|
|
|
( Char =:= "<" ->
|
|
|
|
Chars1 = [],
|
|
|
|
xml_markup_structure( Chars, Context, Terms, Residue, WF )
|
|
|
|
; Char =:= "&" ->
|
|
|
|
reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF )
|
|
|
|
; otherwise ->
|
|
|
|
Chars1 = [Char|Chars2],
|
|
|
|
acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF )
|
|
|
|
).
|
|
|
|
|
|
|
|
xml_markup_structure( [], Context, Terms, Residue, WF ) :-
|
|
|
|
unparsed( "<", Context, Terms, Residue, WF ).
|
|
|
|
xml_markup_structure( Chars, Context, Terms, Residue, WF ) :-
|
|
|
|
Chars = [Char|Chars1],
|
|
|
|
( Char =:= "/" ->
|
|
|
|
closing_tag( Context, Chars1, Terms, Residue, WF )
|
|
|
|
; Char =:= "?" ->
|
|
|
|
pi_acquisition( Chars1, Context, Terms, Residue, WF )
|
|
|
|
; Char =:= "!" ->
|
|
|
|
declaration_acquisition( Chars1, Context, Terms, Residue, WF )
|
|
|
|
; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) ->
|
|
|
|
push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF )
|
|
|
|
; otherwise ->
|
|
|
|
unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %'
|
|
|
|
).
|
|
|
|
|
|
|
|
push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :-
|
|
|
|
new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0),
|
|
|
|
push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ).
|
|
|
|
|
|
|
|
push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :-
|
|
|
|
xml_to_document( Chars, Context, Terms, Residue, WF ).
|
|
|
|
push_tag1( false, _Context, Term, Chars, [Term], Chars, false ).
|
|
|
|
|
|
|
|
new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :-
|
|
|
|
namespace_attributes( Attributes0, Context, Context1, Attributes1 ),
|
|
|
|
( append( NSChars, [0':|TagChars1], TagChars ), %'
|
|
|
|
specific_namespace( NSChars, Context1, SpecificNamespace ) ->
|
|
|
|
Namespace0 = SpecificNamespace
|
|
|
|
; otherwise ->
|
|
|
|
NSChars = "",
|
|
|
|
TagChars1 = TagChars,
|
|
|
|
default_namespace( Context1, Namespace0 )
|
|
|
|
),
|
|
|
|
current_namespace( Context1, CurrentNamespace ),
|
|
|
|
( Namespace0 == CurrentNamespace ->
|
|
|
|
Term = element(Tag, Attributes, Contents),
|
|
|
|
Context2 = Context1
|
|
|
|
; otherwise ->
|
|
|
|
Term = namespace( Namespace0, NSChars,
|
|
|
|
element(Tag, Attributes, Contents)
|
|
|
|
),
|
|
|
|
context_update( current_namespace, Context1, Namespace0, Context2 )
|
|
|
|
),
|
|
|
|
input_attributes( Attributes1, Context2, Attributes ),
|
|
|
|
atom_codes( Tag, TagChars1 ),
|
|
|
|
close_tag( Type, Chars, Context2, Contents, Residue, WF ).
|
|
|
|
|
|
|
|
close_tag( empty, Residue, _Context, [], Residue, true ).
|
|
|
|
close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :-
|
|
|
|
context_update( element, Context0, Tag, Context1 ),
|
|
|
|
xml_to_document( Chars, Context1, Contents, Residue, WF ).
|
|
|
|
|
|
|
|
pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :-
|
|
|
|
( inline_instruction(Target, Processing, Chars, Rest ),
|
|
|
|
Target \== xml ->
|
|
|
|
Terms = [instructions(Target, Processing)|Terms1],
|
|
|
|
xml_to_document( Rest, Context, Terms1, Residue, WellFormed )
|
|
|
|
; otherwise ->
|
|
|
|
unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed )
|
|
|
|
).
|
|
|
|
|
|
|
|
declaration_acquisition( Chars, Context, Terms, Residue, WF ) :-
|
|
|
|
( declaration_type( Chars, Type, Chars1 ),
|
|
|
|
declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) ->
|
|
|
|
Terms = [Term|Terms1],
|
|
|
|
xml_to_document( Rest, Context1, Terms1, Residue, WF )
|
|
|
|
; otherwise ->
|
|
|
|
unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF )
|
|
|
|
).
|
|
|
|
|
|
|
|
open_tag( Tag, Namespaces, Attributes, Termination ) -->
|
|
|
|
nmtoken_chars( Tag ),
|
|
|
|
attributes( Attributes, [], Namespaces ),
|
|
|
|
spaces,
|
|
|
|
open_tag_terminator( Tag, Termination ).
|
|
|
|
|
|
|
|
open_tag_terminator( Tag, push(Tag) ) -->
|
|
|
|
">".
|
|
|
|
open_tag_terminator( _Tag, empty ) -->
|
|
|
|
"/>".
|
|
|
|
|
|
|
|
declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) -->
|
|
|
|
comment(Comment).
|
|
|
|
declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) -->
|
|
|
|
cdata( CData ).
|
|
|
|
declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) -->
|
|
|
|
doctype( Name, Names, Namespaces0, Namespaces ),
|
|
|
|
spaces,
|
|
|
|
">".
|
|
|
|
|
|
|
|
inline_instruction( Target, Processing, Plus, Minus ) :-
|
|
|
|
nmtoken(Target, Plus, Mid0 ),
|
|
|
|
spaces( Mid0, Mid1 ),
|
|
|
|
append( Processing, [0'?,0'>|Minus], Mid1 ),
|
|
|
|
!.
|
|
|
|
|
|
|
|
entity_reference_name( Reference ) -->
|
|
|
|
nmtoken_chars( Reference ),
|
|
|
|
";".
|
|
|
|
|
|
|
|
declaration_type( [Char1,Char2|Chars1], Class, Rest ) :-
|
|
|
|
Chars = [Char1,Char2|Chars1],
|
|
|
|
( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) ->
|
|
|
|
Class = Class0,
|
|
|
|
Rest = Residue
|
|
|
|
; otherwise ->
|
|
|
|
Class = generic,
|
|
|
|
Rest = Chars
|
|
|
|
).
|
|
|
|
|
|
|
|
declaration_type1( 0'-, 0'-, Chars, comment, Chars ).
|
|
|
|
declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :-
|
|
|
|
append( "DATA[", Residue, Chars ).
|
|
|
|
declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :-
|
|
|
|
append( "CTYPE", Residue, Chars ).
|
|
|
|
|
|
|
|
closing_tag( Context, Chars, Terms, Residue, WellFormed ) :-
|
|
|
|
( closing_tag_name( Tag, Chars, Rest ),
|
|
|
|
current_tag( Context, Tag ) ->
|
|
|
|
Terms = [],
|
|
|
|
Residue = Rest,
|
|
|
|
WellFormed = true
|
|
|
|
; otherwise ->
|
|
|
|
unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed )
|
|
|
|
).
|
|
|
|
|
|
|
|
closing_tag_name( Tag ) -->
|
|
|
|
nmtoken_chars( Tag ),
|
|
|
|
spaces,
|
|
|
|
">".
|
|
|
|
|
|
|
|
entity_reference( Chars, Context, Terms, Residue, WF ) :-
|
|
|
|
reference_in_layout( Chars, Context, L, L, Terms, Residue, WF ).
|
|
|
|
|
|
|
|
reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) :-
|
|
|
|
( standard_character_entity( Char, Chars, Rest ) ->
|
|
|
|
Minus = [Char|Chars1],
|
|
|
|
Terms = [pcdata(Plus)|Terms1],
|
|
|
|
acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF )
|
|
|
|
; entity_reference_name( Reference, Chars, Rest ),
|
|
|
|
defined_entity( Reference, Context, String ) ->
|
|
|
|
append( String, Rest, Full ),
|
|
|
|
xml_to_document( Full, Context, Terms, Residue, WF )
|
|
|
|
; allow_ampersand( Context ) ->
|
|
|
|
Minus = [0'&|Chars1], %'
|
|
|
|
Terms = [pcdata(Plus)|Terms1],
|
|
|
|
acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
|
|
|
|
; otherwise ->
|
|
|
|
unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %'
|
|
|
|
).
|
|
|
|
|
|
|
|
reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :-
|
|
|
|
( standard_character_entity( Char, Chars0, Rest ) ->
|
|
|
|
Chars1 = [Char|Chars2],
|
|
|
|
acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF )
|
|
|
|
; entity_reference_name( Reference, Chars0, Rest ),
|
|
|
|
defined_entity( Reference, Context, String ) ->
|
|
|
|
append( String, Rest, Full ),
|
|
|
|
acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF )
|
|
|
|
; allow_ampersand( Context ) ->
|
|
|
|
Chars1 = [0'&|Chars2],
|
|
|
|
acquire_pcdata( Chars0, Context, Chars2, Terms, Residue, WF )
|
|
|
|
; otherwise ->
|
|
|
|
Chars1 = [],
|
|
|
|
unparsed( [0'&|Chars0], Context, Terms, Residue, WF )
|
|
|
|
).
|
|
|
|
|
|
|
|
namespace_attributes( [], Context, Context, [] ).
|
|
|
|
namespace_attributes( Attributes0, Context0, Context, Attributes ) :-
|
|
|
|
Attributes0 = [_|_],
|
|
|
|
append( "xmlns:", Unqualified, QualifiedNameChars ),
|
|
|
|
( select( "xmlns"=Value, Attributes0, Attributes1 ) ->
|
|
|
|
atom_codes( URI, Value ),
|
|
|
|
context_update( default_namespace, Context0, URI, Context1 ),
|
|
|
|
namespace_attributes( Attributes1, Context1, Context, Attributes )
|
|
|
|
; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) ->
|
|
|
|
Attributes = [QualifiedNameChars=Value|Attributes2],
|
|
|
|
atom_codes( URI, Value ),
|
|
|
|
context_update( ns_prefix(Unqualified), Context0, URI, Context1 ),
|
|
|
|
namespace_attributes( Attributes1, Context1, Context, Attributes2 )
|
|
|
|
; member( "xml:space"="preserve", Attributes0 ) ->
|
|
|
|
Attributes = Attributes0,
|
|
|
|
context_update( space_preserve, Context0, true, Context )
|
|
|
|
; otherwise ->
|
|
|
|
Context = Context0,
|
|
|
|
Attributes = Attributes0
|
|
|
|
).
|
|
|
|
|
|
|
|
input_attributes( [], _Context, [] ).
|
|
|
|
input_attributes( [NameChars=Value|Attributes0], Context,
|
|
|
|
[Name=Value|Attributes] ) :-
|
|
|
|
( remove_attribute_prefixes( Context ),
|
|
|
|
append( NSChars, [0':|NameChars1], NameChars ), %'
|
|
|
|
NSChars \== "xmlns",
|
|
|
|
specific_namespace( NSChars, Context, Namespace ),
|
|
|
|
current_namespace( Context, Namespace ) ->
|
|
|
|
atom_codes( Name, NameChars1 )
|
|
|
|
; otherwise ->
|
|
|
|
atom_codes( Name, NameChars )
|
|
|
|
),
|
|
|
|
input_attributes( Attributes0, Context, Attributes ).
|
|
|
|
|
|
|
|
attributes( [Name=Value|Attributes], Seen, Namespaces ) -->
|
|
|
|
spaces,
|
|
|
|
nmtoken_chars( Name ),
|
|
|
|
{\+ member(Name, Seen)},
|
|
|
|
spaces,
|
|
|
|
"=",
|
|
|
|
spaces,
|
|
|
|
attribute_value( Value, Namespaces ),
|
|
|
|
attributes( Attributes, [Name|Seen], Namespaces ).
|
|
|
|
attributes( [], _Seen, _Namespaces ) --> "".
|
|
|
|
|
|
|
|
xml_declaration_attributes( [] ) --> "".
|
|
|
|
xml_declaration_attributes( [Name=Value|Attributes] ) -->
|
|
|
|
spaces,
|
|
|
|
nmtoken( Name ),
|
|
|
|
spaces,
|
|
|
|
"=",
|
|
|
|
spaces,
|
|
|
|
xml_string( Value ),
|
|
|
|
{xml_declaration_attribute_valid(Name, Value)},
|
|
|
|
xml_declaration_attributes( Attributes ),
|
|
|
|
spaces.
|
|
|
|
|
|
|
|
doctype( Name, External, Namespaces0, Namespaces1 ) -->
|
|
|
|
spaces,
|
|
|
|
nmtoken( Name ),
|
|
|
|
spaces,
|
|
|
|
doctype_id( External0 ),
|
|
|
|
spaces,
|
|
|
|
doctype1( Namespaces0, Literals, Namespaces1 ),
|
|
|
|
{doctype_extension(Literals, External0, External)}.
|
|
|
|
|
|
|
|
doctype_extension( [], External, External ).
|
|
|
|
doctype_extension( [Literal|Literals], External0, External ) :-
|
|
|
|
extended_doctype( External0, [Literal|Literals], External ).
|
|
|
|
|
|
|
|
extended_doctype( system(URL), Literals, system(URL,Literals) ).
|
|
|
|
extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ).
|
|
|
|
extended_doctype( local, Literals, local(Literals) ).
|
|
|
|
|
|
|
|
doctype1( Namespaces0, Literals, Namespaces1 ) -->
|
|
|
|
"[",
|
|
|
|
!,
|
|
|
|
dtd( Namespaces0, Literals, Namespaces1 ),
|
|
|
|
"]".
|
|
|
|
doctype1( Namespaces, [], Namespaces ) --> "".
|
|
|
|
|
|
|
|
doctype_id( system(URL) ) -->
|
|
|
|
"SYSTEM",
|
|
|
|
spaces,
|
|
|
|
uri( URL ).
|
|
|
|
doctype_id( public(URN,URL) ) -->
|
|
|
|
"PUBLIC",
|
|
|
|
spaces,
|
|
|
|
uri( URN ),
|
|
|
|
spaces,
|
|
|
|
uri( URL ).
|
|
|
|
doctype_id( local ) --> "".
|
|
|
|
|
|
|
|
dtd( Namespaces0, Literals, Namespaces1 ) -->
|
|
|
|
spaces,
|
|
|
|
"<!ENTITY",
|
|
|
|
!,
|
|
|
|
spaces,
|
|
|
|
nmtoken_chars( Name ),
|
|
|
|
spaces,
|
|
|
|
quote( Quote ),
|
|
|
|
entity_value( Quote, Namespaces0, String ),
|
|
|
|
spaces,
|
|
|
|
">",
|
|
|
|
{(\+ character_entity( Name, StandardChar )
|
|
|
|
; String = [StandardChar], character_entity( Name, StandardChar )
|
|
|
|
),
|
|
|
|
% Don't allow < "e; etc. to be updated
|
|
|
|
context_update( entity(Name), Namespaces0, String, Namespaces2 )
|
|
|
|
},
|
|
|
|
dtd( Namespaces2, Literals, Namespaces1 ).
|
|
|
|
dtd( Namespaces0, Literals, Namespaces1 ) -->
|
|
|
|
spaces,
|
|
|
|
"<!--",
|
|
|
|
!,
|
|
|
|
dtd_comment,
|
|
|
|
">",
|
|
|
|
dtd( Namespaces0, Literals, Namespaces1 ).
|
|
|
|
dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) -->
|
|
|
|
spaces,
|
|
|
|
"<!",
|
|
|
|
!,
|
|
|
|
dtd_literal( Literal ),
|
|
|
|
dtd( Namespaces0, Literals, Namespaces1 ).
|
|
|
|
dtd( Namespaces, [], Namespaces ) --> spaces.
|
|
|
|
|
|
|
|
dtd_literal( [] ) --> ">", !.
|
|
|
|
dtd_literal( Chars ) -->
|
|
|
|
"--",
|
|
|
|
!,
|
|
|
|
dtd_comment,
|
|
|
|
dtd_literal( Chars ).
|
|
|
|
dtd_literal( [Char|Chars] ) -->
|
|
|
|
[Char],
|
|
|
|
dtd_literal( Chars ).
|
|
|
|
|
|
|
|
dtd_comment( Plus, Minus ) :-
|
|
|
|
append( _Chars, [0'-,0'-|Minus], Plus ),
|
|
|
|
!.
|
|
|
|
|
|
|
|
nmtokens( [Name|Names] ) -->
|
|
|
|
spaces,
|
|
|
|
nmtoken( Name ),
|
|
|
|
nmtokens( Names ).
|
|
|
|
nmtokens( [] ) --> [].
|
|
|
|
|
|
|
|
entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :-
|
|
|
|
( Char == Quote ->
|
|
|
|
String = [],
|
|
|
|
Minus = Plus
|
|
|
|
; Char =:= "&" ->
|
|
|
|
reference_in_entity( Namespaces, Quote, String, Plus, Minus )
|
|
|
|
; otherwise ->
|
|
|
|
String = [Char|String1],
|
|
|
|
entity_value( Quote, Namespaces, String1, Plus, Minus )
|
|
|
|
).
|
|
|
|
|
|
|
|
attribute_value( String, Namespaces ) -->
|
|
|
|
quote( Quote ),
|
|
|
|
attribute_leading_layouts( Quote, Namespaces, String ).
|
|
|
|
|
|
|
|
attribute_leading_layouts( _Quote, _Namespace, [], [], [] ).
|
|
|
|
attribute_leading_layouts( Quote, Namespaces, String, [Char|Plus], Minus ) :-
|
|
|
|
( Char == Quote ->
|
|
|
|
String = [],
|
|
|
|
Minus = Plus
|
|
|
|
; Char =:= "&" ->
|
|
|
|
ref_in_attribute_layout( Namespaces, Quote, String, Plus, Minus )
|
|
|
|
; Char > 32, Char \== 160 ->
|
|
|
|
String = [Char|String1],
|
|
|
|
attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )
|
|
|
|
; otherwise ->
|
|
|
|
attribute_leading_layouts( Quote, Namespaces, String, Plus, Minus )
|
|
|
|
).
|
|
|
|
|
|
|
|
attribute_layouts( _Quote, _Namespaces, _Layout, [], [], [] ).
|
|
|
|
attribute_layouts( Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :-
|
|
|
|
( Char == Quote ->
|
|
|
|
String = [],
|
|
|
|
Minus = Plus
|
|
|
|
; Char =:= "&" ->
|
|
|
|
reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus )
|
|
|
|
; Char > 32, Char \== 160 ->
|
|
|
|
( Layout == true ->
|
|
|
|
String = [0' ,Char|String1] %'
|
|
|
|
; otherwise ->
|
|
|
|
String = [Char|String1]
|
|
|
|
),
|
|
|
|
attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )
|
|
|
|
; otherwise ->
|
|
|
|
attribute_layouts( Quote, Namespaces, true, String, Plus, Minus )
|
|
|
|
).
|
|
|
|
|
|
|
|
ref_in_attribute_layout( NS, Quote, String, Plus, Minus ) :-
|
|
|
|
( standard_character_entity( Char, Plus, Mid ) ->
|
|
|
|
String = [Char|String1],
|
|
|
|
attribute_layouts( Quote, NS, false, String1, Mid, Minus )
|
|
|
|
; entity_reference_name( Name, Plus, Suffix ),
|
|
|
|
defined_entity( Name, NS, Text ) ->
|
|
|
|
append( Text, Suffix, Mid ),
|
|
|
|
attribute_leading_layouts( Quote, NS, String, Mid, Minus )
|
|
|
|
; otherwise -> % Just & is okay in a value
|
|
|
|
String = [0'&|String1], %'
|
|
|
|
attribute_layouts( Quote, NS, false, String1, Plus, Minus )
|
|
|
|
).
|
|
|
|
|
|
|
|
reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :-
|
|
|
|
( standard_character_entity( Char, Plus, Mid ) ->
|
|
|
|
( Layout == true ->
|
|
|
|
String = [0' ,Char|String1] %'
|
|
|
|
; otherwise ->
|
|
|
|
String = [Char|String1]
|
|
|
|
),
|
|
|
|
Layout1 = false
|
|
|
|
; entity_reference_name( Name, Plus, Suffix ),
|
|
|
|
defined_entity( Name, Namespaces, Text ) ->
|
|
|
|
String = String1,
|
|
|
|
append( Text, Suffix, Mid ),
|
|
|
|
Layout1 = Layout
|
|
|
|
; otherwise -> % Just & is okay in a value
|
|
|
|
Mid = Plus,
|
|
|
|
String = [0'&|String1], %'
|
|
|
|
Layout1 = false
|
|
|
|
),
|
|
|
|
attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ).
|
|
|
|
|
|
|
|
/* References are resolved backwards in Entity defintions so that
|
|
|
|
* circularity is avoided.
|
|
|
|
*/
|
|
|
|
reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :-
|
|
|
|
( standard_character_entity( _SomeChar, Plus, _Rest ) ->
|
|
|
|
String = [0'&|String1], % ' Character entities are unparsed
|
|
|
|
Mid = Plus
|
|
|
|
; entity_reference_name( Name, Plus, Suffix ),
|
|
|
|
defined_entity( Name, Namespaces, Text ) ->
|
|
|
|
String = String1,
|
|
|
|
append( Text, Suffix, Mid )
|
|
|
|
),
|
|
|
|
entity_value( Quote, Namespaces, String1, Mid, Minus ).
|
|
|
|
|
|
|
|
standard_character_entity( Char ) -->
|
|
|
|
"#x", hex_character_reference( Char ), ";".
|
|
|
|
standard_character_entity( Char ) -->
|
|
|
|
"#", digit( Digit ), digits( Digits ), ";",
|
|
|
|
{number_chars( Char, [Digit|Digits])}.
|
|
|
|
standard_character_entity( C ) -->
|
|
|
|
chars( String ),
|
|
|
|
";",
|
|
|
|
!,
|
|
|
|
{character_entity(String, C)}.
|
|
|
|
|
|
|
|
uri( URI ) -->
|
|
|
|
quote( Quote ),
|
|
|
|
uri1( Quote, URI ).
|
|
|
|
|
|
|
|
uri1( Quote, [] ) -->
|
|
|
|
quote( Quote ),
|
|
|
|
!.
|
|
|
|
uri1( Quote, [Char|Chars] ) -->
|
|
|
|
[Char],
|
|
|
|
uri1( Quote, Chars ).
|
|
|
|
|
|
|
|
comment( Chars, Plus, Minus ) :-
|
|
|
|
append( Chars, [0'-,0'-,0'>|Minus], Plus ), %'
|
|
|
|
!.
|
|
|
|
|
|
|
|
cdata( Chars, Plus, Minus ) :-
|
|
|
|
append( Chars, [0'],0'],0'>|Minus], Plus ), %'
|
|
|
|
!.
|
|
|
|
% Syntax Components
|
|
|
|
|
|
|
|
hex_character_reference( Code ) -->
|
|
|
|
hex_character_reference1( 0, Code ).
|
|
|
|
|
|
|
|
hex_character_reference1( Current, Code ) -->
|
|
|
|
hex_digit_char( Value ),
|
|
|
|
!,
|
|
|
|
{New is (Current << 4) + Value},
|
|
|
|
hex_character_reference1( New, Code ).
|
|
|
|
hex_character_reference1( Code, Code ) --> "".
|
|
|
|
|
|
|
|
hex_digit_char( 0 ) --> "0".
|
|
|
|
hex_digit_char( 1 ) --> "1".
|
|
|
|
hex_digit_char( 2 ) --> "2".
|
|
|
|
hex_digit_char( 3 ) --> "3".
|
|
|
|
hex_digit_char( 4 ) --> "4".
|
|
|
|
hex_digit_char( 5 ) --> "5".
|
|
|
|
hex_digit_char( 6 ) --> "6".
|
|
|
|
hex_digit_char( 7 ) --> "7".
|
|
|
|
hex_digit_char( 8 ) --> "8".
|
|
|
|
hex_digit_char( 9 ) --> "9".
|
|
|
|
hex_digit_char( 10 ) --> "A".
|
|
|
|
hex_digit_char( 11 ) --> "B".
|
|
|
|
hex_digit_char( 12 ) --> "C".
|
|
|
|
hex_digit_char( 13 ) --> "D".
|
|
|
|
hex_digit_char( 14 ) --> "E".
|
|
|
|
hex_digit_char( 15 ) --> "F".
|
|
|
|
hex_digit_char( 10 ) --> "a".
|
|
|
|
hex_digit_char( 11 ) --> "b".
|
|
|
|
hex_digit_char( 12 ) --> "c".
|
|
|
|
hex_digit_char( 13 ) --> "d".
|
|
|
|
hex_digit_char( 14 ) --> "e".
|
|
|
|
hex_digit_char( 15 ) --> "f".
|
|
|
|
|
|
|
|
quote( 0'" ) --> %'
|
|
|
|
"""".
|
|
|
|
quote( 0'' ) -->
|
|
|
|
"'".
|
|
|
|
|
|
|
|
spaces( [], [] ).
|
|
|
|
spaces( [Char|Chars0], Chars1 ) :-
|
|
|
|
( Char =< 32 ->
|
|
|
|
spaces( Chars0, Chars1 )
|
|
|
|
; otherwise ->
|
|
|
|
Chars1 = [Char|Chars0]
|
|
|
|
).
|
|
|
|
|
|
|
|
nmtoken( Name ) -->
|
|
|
|
nmtoken_chars( Chars ),
|
|
|
|
{atom_codes(Name, Chars)}.
|
|
|
|
|
|
|
|
nmtoken_chars( [Char|Chars] ) -->
|
|
|
|
[Char],
|
|
|
|
{nmtoken_first( Char )},
|
|
|
|
nmtoken_chars_tail( Chars ).
|
|
|
|
|
|
|
|
nmtoken_chars_tail( [Char|Chars] ) -->
|
|
|
|
[Char],
|
|
|
|
{nmtoken_char(Char)},
|
|
|
|
!,
|
|
|
|
nmtoken_chars_tail( Chars ).
|
|
|
|
nmtoken_chars_tail([]) --> "".
|
|
|
|
|
|
|
|
nmtoken_first( 0': ).
|
|
|
|
nmtoken_first( 0'_ ).
|
|
|
|
nmtoken_first( Char ) :-
|
|
|
|
alphabet( Char ).
|
|
|
|
|
|
|
|
nmtoken_char( 0'a ).
|
|
|
|
nmtoken_char( 0'b ).
|
|
|
|
nmtoken_char( 0'c ).
|
|
|
|
nmtoken_char( 0'd ).
|
|
|
|
nmtoken_char( 0'e ).
|
|
|
|
nmtoken_char( 0'f ).
|
|
|
|
nmtoken_char( 0'g ).
|
|
|
|
nmtoken_char( 0'h ).
|
|
|
|
nmtoken_char( 0'i ).
|
|
|
|
nmtoken_char( 0'j ).
|
|
|
|
nmtoken_char( 0'k ).
|
|
|
|
nmtoken_char( 0'l ).
|
|
|
|
nmtoken_char( 0'm ).
|
|
|
|
nmtoken_char( 0'n ).
|
|
|
|
nmtoken_char( 0'o ).
|
|
|
|
nmtoken_char( 0'p ).
|
|
|
|
nmtoken_char( 0'q ).
|
|
|
|
nmtoken_char( 0'r ).
|
|
|
|
nmtoken_char( 0's ).
|
|
|
|
nmtoken_char( 0't ).
|
|
|
|
nmtoken_char( 0'u ).
|
|
|
|
nmtoken_char( 0'v ).
|
|
|
|
nmtoken_char( 0'w ).
|
|
|
|
nmtoken_char( 0'x ).
|
|
|
|
nmtoken_char( 0'y ).
|
|
|
|
nmtoken_char( 0'z ).
|
|
|
|
nmtoken_char( 0'A ).
|
|
|
|
nmtoken_char( 0'B ).
|
|
|
|
nmtoken_char( 0'C ).
|
|
|
|
nmtoken_char( 0'D ).
|
|
|
|
nmtoken_char( 0'E ).
|
|
|
|
nmtoken_char( 0'F ).
|
|
|
|
nmtoken_char( 0'G ).
|
|
|
|
nmtoken_char( 0'H ).
|
|
|
|
nmtoken_char( 0'I ).
|
|
|
|
nmtoken_char( 0'J ).
|
|
|
|
nmtoken_char( 0'K ).
|
|
|
|
nmtoken_char( 0'L ).
|
|
|
|
nmtoken_char( 0'M ).
|
|
|
|
nmtoken_char( 0'N ).
|
|
|
|
nmtoken_char( 0'O ).
|
|
|
|
nmtoken_char( 0'P ).
|
|
|
|
nmtoken_char( 0'Q ).
|
|
|
|
nmtoken_char( 0'R ).
|
|
|
|
nmtoken_char( 0'S ).
|
|
|
|
nmtoken_char( 0'T ).
|
|
|
|
nmtoken_char( 0'U ).
|
|
|
|
nmtoken_char( 0'V ).
|
|
|
|
nmtoken_char( 0'W ).
|
|
|
|
nmtoken_char( 0'X ).
|
|
|
|
nmtoken_char( 0'Y ).
|
|
|
|
nmtoken_char( 0'Z ).
|
|
|
|
nmtoken_char( 0'0 ).
|
|
|
|
nmtoken_char( 0'1 ).
|
|
|
|
nmtoken_char( 0'2 ).
|
|
|
|
nmtoken_char( 0'3 ).
|
|
|
|
nmtoken_char( 0'4 ).
|
|
|
|
nmtoken_char( 0'5 ).
|
|
|
|
nmtoken_char( 0'6 ).
|
|
|
|
nmtoken_char( 0'7 ).
|
|
|
|
nmtoken_char( 0'8 ).
|
|
|
|
nmtoken_char( 0'9 ).
|
|
|
|
nmtoken_char( 0'. ).
|
|
|
|
nmtoken_char( 0'- ).
|
|
|
|
nmtoken_char( 0'_ ).
|
|
|
|
nmtoken_char( 0': ).
|
|
|
|
|
|
|
|
xml_string( String ) -->
|
|
|
|
quote( Quote ),
|
|
|
|
xml_string1( Quote, String ).
|
|
|
|
|
|
|
|
xml_string1( Quote, [] ) -->
|
|
|
|
quote( Quote ),
|
|
|
|
!.
|
|
|
|
xml_string1( Quote, [Char|Chars] ) -->
|
|
|
|
[Char],
|
|
|
|
xml_string1( Quote, Chars ).
|
|
|
|
|
|
|
|
alphabet( 0'a ).
|
|
|
|
alphabet( 0'b ).
|
|
|
|
alphabet( 0'c ).
|
|
|
|
alphabet( 0'd ).
|
|
|
|
alphabet( 0'e ).
|
|
|
|
alphabet( 0'f ).
|
|
|
|
alphabet( 0'g ).
|
|
|
|
alphabet( 0'h ).
|
|
|
|
alphabet( 0'i ).
|
|
|
|
alphabet( 0'j ).
|
|
|
|
alphabet( 0'k ).
|
|
|
|
alphabet( 0'l ).
|
|
|
|
alphabet( 0'm ).
|
|
|
|
alphabet( 0'n ).
|
|
|
|
alphabet( 0'o ).
|
|
|
|
alphabet( 0'p ).
|
|
|
|
alphabet( 0'q ).
|
|
|
|
alphabet( 0'r ).
|
|
|
|
alphabet( 0's ).
|
|
|
|
alphabet( 0't ).
|
|
|
|
alphabet( 0'u ).
|
|
|
|
alphabet( 0'v ).
|
|
|
|
alphabet( 0'w ).
|
|
|
|
alphabet( 0'x ).
|
|
|
|
alphabet( 0'y ).
|
|
|
|
alphabet( 0'z ).
|
|
|
|
alphabet( 0'A ).
|
|
|
|
alphabet( 0'B ).
|
|
|
|
alphabet( 0'C ).
|
|
|
|
alphabet( 0'D ).
|
|
|
|
alphabet( 0'E ).
|
|
|
|
alphabet( 0'F ).
|
|
|
|
alphabet( 0'G ).
|
|
|
|
alphabet( 0'H ).
|
|
|
|
alphabet( 0'I ).
|
|
|
|
alphabet( 0'J ).
|
|
|
|
alphabet( 0'K ).
|
|
|
|
alphabet( 0'L ).
|
|
|
|
alphabet( 0'M ).
|
|
|
|
alphabet( 0'N ).
|
|
|
|
alphabet( 0'O ).
|
|
|
|
alphabet( 0'P ).
|
|
|
|
alphabet( 0'Q ).
|
|
|
|
alphabet( 0'R ).
|
|
|
|
alphabet( 0'S ).
|
|
|
|
alphabet( 0'T ).
|
|
|
|
alphabet( 0'U ).
|
|
|
|
alphabet( 0'V ).
|
|
|
|
alphabet( 0'W ).
|
|
|
|
alphabet( 0'X ).
|
|
|
|
alphabet( 0'Y ).
|
|
|
|
alphabet( 0'Z ).
|
|
|
|
|
|
|
|
digit( C ) --> [C], {digit_table( C )}.
|
|
|
|
|
|
|
|
digit_table( 0'0 ).
|
|
|
|
digit_table( 0'1 ).
|
|
|
|
digit_table( 0'2 ).
|
|
|
|
digit_table( 0'3 ).
|
|
|
|
digit_table( 0'4 ).
|
|
|
|
digit_table( 0'5 ).
|
|
|
|
digit_table( 0'6 ).
|
|
|
|
digit_table( 0'7 ).
|
|
|
|
digit_table( 0'8 ).
|
|
|
|
digit_table( 0'9 ).
|
|
|
|
|
|
|
|
digits( [Digit|Digits] ) -->
|
|
|
|
digit( Digit ),
|
|
|
|
digits( Digits ).
|
|
|
|
digits( [] ) --> [].
|
|
|
|
|
|
|
|
character_entity( "quot", 0'" ). %'
|
|
|
|
character_entity( "amp", 0'& ). %'
|
|
|
|
character_entity( "lt", 0'< ). %'
|
|
|
|
character_entity( "gt", 0'> ). %'
|
|
|
|
character_entity( "apos", 0'' ).
|
|
|
|
|
|
|
|
end_of_file.
|
|
|
|
|
|
|
|
/* For reference, this is a comprehensive recognizer for namechar, based on
|
|
|
|
* the definition of in http://www.w3.org/TR/2000/REC-xml-20001006 .
|
|
|
|
*/
|
|
|
|
namechar -->
|
|
|
|
( letter
|
|
|
|
| unicode_digit
|
|
|
|
| "."
|
|
|
|
| "-"
|
|
|
|
| "_"
|
|
|
|
| ":"
|
|
|
|
| combiningchar
|
|
|
|
| extender
|
|
|
|
).
|
|
|
|
|
|
|
|
letter --> (basechar | ideographic).
|
|
|
|
|
|
|
|
basechar -->
|
|
|
|
( range( 16'0041, 16'005A )
|
|
|
|
| range( 16'0061, 16'007A )
|
|
|
|
| range( 16'00C0, 16'00D6 )
|
|
|
|
| range( 16'00D8, 16'00F6 )
|
|
|
|
| range( 16'00F8, 16'00FF )
|
|
|
|
| range( 16'0100, 16'0131 )
|
|
|
|
| range( 16'0134, 16'013E )
|
|
|
|
| range( 16'0141, 16'0148 )
|
|
|
|
| range( 16'014A, 16'017E )
|
|
|
|
| range( 16'0180, 16'01C3 )
|
|
|
|
| range( 16'01CD, 16'01F0 )
|
|
|
|
| range( 16'01F4, 16'01F5 )
|
|
|
|
| range( 16'01FA, 16'0217 )
|
|
|
|
| range( 16'0250, 16'02A8 )
|
|
|
|
| range( 16'02BB, 16'02C1 )
|
|
|
|
| [16'0386]
|
|
|
|
| range( 16'0388, 16'038A )
|
|
|
|
| [16'038C]
|
|
|
|
| range( 16'038E, 16'03A1 )
|
|
|
|
| range( 16'03A3, 16'03CE )
|
|
|
|
| range( 16'03D0, 16'03D6 )
|
|
|
|
| [16'03DA]
|
|
|
|
| [16'03DC]
|
|
|
|
| [16'03DE]
|
|
|
|
| [16'03E0]
|
|
|
|
| range( 16'03E2, 16'03F3 )
|
|
|
|
| range( 16'0401, 16'040C )
|
|
|
|
| range( 16'040E, 16'044F )
|
|
|
|
| range( 16'0451, 16'045C )
|
|
|
|
| range( 16'045E, 16'0481 )
|
|
|
|
| range( 16'0490, 16'04C4 )
|
|
|
|
| range( 16'04C7, 16'04C8 )
|
|
|
|
| range( 16'04CB, 16'04CC )
|
|
|
|
| range( 16'04D0, 16'04EB )
|
|
|
|
| range( 16'04EE, 16'04F5 )
|
|
|
|
| range( 16'04F8, 16'04F9 )
|
|
|
|
| range( 16'0531, 16'0556 )
|
|
|
|
| [16'0559]
|
|
|
|
| range( 16'0561, 16'0586 )
|
|
|
|
| range( 16'05D0, 16'05EA )
|
|
|
|
| range( 16'05F0, 16'05F2 )
|
|
|
|
| range( 16'0621, 16'063A )
|
|
|
|
| range( 16'0641, 16'064A )
|
|
|
|
| range( 16'0671, 16'06B7 )
|
|
|
|
| range( 16'06BA, 16'06BE )
|
|
|
|
| range( 16'06C0, 16'06CE )
|
|
|
|
| range( 16'06D0, 16'06D3 )
|
|
|
|
| [16'06D5]
|
|
|
|
| range( 16'06E5, 16'06E6 )
|
|
|
|
| range( 16'0905, 16'0939 )
|
|
|
|
| [16'093D]
|
|
|
|
| range( 16'0958, 16'0961 )
|
|
|
|
| range( 16'0985, 16'098C )
|
|
|
|
| range( 16'098F, 16'0990 )
|
|
|
|
| range( 16'0993, 16'09A8 )
|
|
|
|
| range( 16'09AA, 16'09B0 )
|
|
|
|
| [16'09B2]
|
|
|
|
| range( 16'09B6, 16'09B9 )
|
|
|
|
| range( 16'09DC, 16'09DD )
|
|
|
|
| range( 16'09DF, 16'09E1 )
|
|
|
|
| range( 16'09F0, 16'09F1 )
|
|
|
|
| range( 16'0A05, 16'0A0A )
|
|
|
|
| range( 16'0A0F, 16'0A10 )
|
|
|
|
| range( 16'0A13, 16'0A28 )
|
|
|
|
| range( 16'0A2A, 16'0A30 )
|
|
|
|
| range( 16'0A32, 16'0A33 )
|
|
|
|
| range( 16'0A35, 16'0A36 )
|
|
|
|
| range( 16'0A38, 16'0A39 )
|
|
|
|
| range( 16'0A59, 16'0A5C )
|
|
|
|
| [16'0A5E]
|
|
|
|
| range( 16'0A72, 16'0A74 )
|
|
|
|
| range( 16'0A85, 16'0A8B )
|
|
|
|
| [16'0A8D]
|
|
|
|
| range( 16'0A8F, 16'0A91 )
|
|
|
|
| range( 16'0A93, 16'0AA8 )
|
|
|
|
| range( 16'0AAA, 16'0AB0 )
|
|
|
|
| range( 16'0AB2, 16'0AB3 )
|
|
|
|
| range( 16'0AB5, 16'0AB9 )
|
|
|
|
| [16'0ABD]
|
|
|
|
| [16'0AE0]
|
|
|
|
| range( 16'0B05, 16'0B0C )
|
|
|
|
| range( 16'0B0F, 16'0B10 )
|
|
|
|
| range( 16'0B13, 16'0B28 )
|
|
|
|
| range( 16'0B2A, 16'0B30 )
|
|
|
|
| range( 16'0B32, 16'0B33 )
|
|
|
|
| range( 16'0B36, 16'0B39 )
|
|
|
|
| [16'0B3D]
|
|
|
|
| range( 16'0B5C, 16'0B5D )
|
|
|
|
| range( 16'0B5F, 16'0B61 )
|
|
|
|
| range( 16'0B85, 16'0B8A )
|
|
|
|
| range( 16'0B8E, 16'0B90 )
|
|
|
|
| range( 16'0B92, 16'0B95 )
|
|
|
|
| range( 16'0B99, 16'0B9A )
|
|
|
|
| [16'0B9C]
|
|
|
|
| range( 16'0B9E, 16'0B9F )
|
|
|
|
| range( 16'0BA3, 16'0BA4 )
|
|
|
|
| range( 16'0BA8, 16'0BAA )
|
|
|
|
| range( 16'0BAE, 16'0BB5 )
|
|
|
|
| range( 16'0BB7, 16'0BB9 )
|
|
|
|
| range( 16'0C05, 16'0C0C )
|
|
|
|
| range( 16'0C0E, 16'0C10 )
|
|
|
|
| range( 16'0C12, 16'0C28 )
|
|
|
|
| range( 16'0C2A, 16'0C33 )
|
|
|
|
| range( 16'0C35, 16'0C39 )
|
|
|
|
| range( 16'0C60, 16'0C61 )
|
|
|
|
| range( 16'0C85, 16'0C8C )
|
|
|
|
| range( 16'0C8E, 16'0C90 )
|
|
|
|
| range( 16'0C92, 16'0CA8 )
|
|
|
|
| range( 16'0CAA, 16'0CB3 )
|
|
|
|
| range( 16'0CB5, 16'0CB9 )
|
|
|
|
| [16'0CDE]
|
|
|
|
| range( 16'0CE0, 16'0CE1 )
|
|
|
|
| range( 16'0D05, 16'0D0C )
|
|
|
|
| range( 16'0D0E, 16'0D10 )
|
|
|
|
| range( 16'0D12, 16'0D28 )
|
|
|
|
| range( 16'0D2A, 16'0D39 )
|
|
|
|
| range( 16'0D60, 16'0D61 )
|
|
|
|
| range( 16'0E01, 16'0E2E )
|
|
|
|
| [16'0E30]
|
|
|
|
| range( 16'0E32, 16'0E33 )
|
|
|
|
| range( 16'0E40, 16'0E45 )
|
|
|
|
| range( 16'0E81, 16'0E82 )
|
|
|
|
| [16'0E84]
|
|
|
|
| range( 16'0E87, 16'0E88 )
|
|
|
|
| [16'0E8A]
|
|
|
|
| [16'0E8D]
|
|
|
|
| range( 16'0E94, 16'0E97 )
|
|
|
|
| range( 16'0E99, 16'0E9F )
|
|
|
|
| range( 16'0EA1, 16'0EA3 )
|
|
|
|
| [16'0EA5]
|
|
|
|
| [16'0EA7]
|
|
|
|
| range( 16'0EAA, 16'0EAB )
|
|
|
|
| range( 16'0EAD, 16'0EAE )
|
|
|
|
| [16'0EB0]
|
|
|
|
| range( 16'0EB2, 16'0EB3 )
|
|
|
|
| [16'0EBD]
|
|
|
|
| range( 16'0EC0, 16'0EC4 )
|
|
|
|
| range( 16'0F40, 16'0F47 )
|
|
|
|
| range( 16'0F49, 16'0F69 )
|
|
|
|
| range( 16'10A0, 16'10C5 )
|
|
|
|
| range( 16'10D0, 16'10F6 )
|
|
|
|
| [16'1100]
|
|
|
|
| range( 16'1102, 16'1103 )
|
|
|
|
| range( 16'1105, 16'1107 )
|
|
|
|
| [16'1109]
|
|
|
|
| range( 16'110B, 16'110C )
|
|
|
|
| range( 16'110E, 16'1112 )
|
|
|
|
| [16'113C]
|
|
|
|
| [16'113E]
|
|
|
|
| [16'1140]
|
|
|
|
| [16'114C]
|
|
|
|
| [16'114E]
|
|
|
|
| [16'1150]
|
|
|
|
| range( 16'1154, 16'1155 )
|
|
|
|
| [16'1159]
|
|
|
|
| range( 16'115F, 16'1161 )
|
|
|
|
| [16'1163]
|
|
|
|
| [16'1165]
|
|
|
|
| [16'1167]
|
|
|
|
| [16'1169]
|
|
|
|
| range( 16'116D, 16'116E )
|
|
|
|
| range( 16'1172, 16'1173 )
|
|
|
|
| [16'1175]
|
|
|
|
| [16'119E]
|
|
|
|
| [16'11A8]
|
|
|
|
| [16'11AB]
|
|
|
|
| range( 16'11AE, 16'11AF )
|
|
|
|
| range( 16'11B7, 16'11B8 )
|
|
|
|
| [16'11BA]
|
|
|
|
| range( 16'11BC, 16'11C2 )
|
|
|
|
| [16'11EB]
|
|
|
|
| [16'11F0]
|
|
|
|
| [16'11F9]
|
|
|
|
| range( 16'1E00, 16'1E9B )
|
|
|
|
| range( 16'1EA0, 16'1EF9 )
|
|
|
|
| range( 16'1F00, 16'1F15 )
|
|
|
|
| range( 16'1F18, 16'1F1D )
|
|
|
|
| range( 16'1F20, 16'1F45 )
|
|
|
|
| range( 16'1F48, 16'1F4D )
|
|
|
|
| range( 16'1F50, 16'1F57 )
|
|
|
|
| [16'1F59]
|
|
|
|
| [16'1F5B]
|
|
|
|
| [16'1F5D]
|
|
|
|
| range( 16'1F5F, 16'1F7D )
|
|
|
|
| range( 16'1F80, 16'1FB4 )
|
|
|
|
| range( 16'1FB6, 16'1FBC )
|
|
|
|
| [16'1FBE]
|
|
|
|
| range( 16'1FC2, 16'1FC4 )
|
|
|
|
| range( 16'1FC6, 16'1FCC )
|
|
|
|
| range( 16'1FD0, 16'1FD3 )
|
|
|
|
| range( 16'1FD6, 16'1FDB )
|
|
|
|
| range( 16'1FE0, 16'1FEC )
|
|
|
|
| range( 16'1FF2, 16'1FF4 )
|
|
|
|
| range( 16'1FF6, 16'1FFC )
|
|
|
|
| [16'2126]
|
|
|
|
| range( 16'212A, 16'212B )
|
|
|
|
| [16'212E]
|
|
|
|
| range( 16'2180, 16'2182 )
|
|
|
|
| range( 16'3041, 16'3094 )
|
|
|
|
| range( 16'30A1, 16'30FA )
|
|
|
|
| range( 16'3105, 16'312C )
|
|
|
|
| range( 16'AC00, 16'D7A3 )
|
|
|
|
).
|
|
|
|
ideographic -->
|
|
|
|
( range( 16'4E00, 16'9FA5 )
|
|
|
|
| [16'3007]
|
|
|
|
| range( 16'3021, 16'3029 )
|
|
|
|
).
|
|
|
|
combiningchar -->
|
|
|
|
( range( 16'0300, 16'0345 )
|
|
|
|
| range( 16'0360, 16'0361 )
|
|
|
|
| range( 16'0483, 16'0486 )
|
|
|
|
| range( 16'0591, 16'05A1 )
|
|
|
|
| range( 16'05A3, 16'05B9 )
|
|
|
|
| range( 16'05BB, 16'05BD )
|
|
|
|
| [16'05BF]
|
|
|
|
| range( 16'05C1, 16'05C2 )
|
|
|
|
| [16'05C4]
|
|
|
|
| range( 16'064B, 16'0652 )
|
|
|
|
| [16'0670]
|
|
|
|
| range( 16'06D6, 16'06DC )
|
|
|
|
| range( 16'06DD, 16'06DF )
|
|
|
|
| range( 16'06E0, 16'06E4 )
|
|
|
|
| range( 16'06E7, 16'06E8 )
|
|
|
|
| range( 16'06EA, 16'06ED )
|
|
|
|
| range( 16'0901, 16'0903 )
|
|
|
|
| [16'093C]
|
|
|
|
| range( 16'093E, 16'094C )
|
|
|
|
| [16'094D]
|
|
|
|
| range( 16'0951, 16'0954 )
|
|
|
|
| range( 16'0962, 16'0963 )
|
|
|
|
| range( 16'0981, 16'0983 )
|
|
|
|
| [16'09BC]
|
|
|
|
| [16'09BE]
|
|
|
|
| [16'09BF]
|
|
|
|
| range( 16'09C0, 16'09C4 )
|
|
|
|
| range( 16'09C7, 16'09C8 )
|
|
|
|
| range( 16'09CB, 16'09CD )
|
|
|
|
| [16'09D7]
|
|
|
|
| range( 16'09E2, 16'09E3 )
|
|
|
|
| [16'0A02]
|
|
|
|
| [16'0A3C]
|
|
|
|
| [16'0A3E]
|
|
|
|
| [16'0A3F]
|
|
|
|
| range( 16'0A40, 16'0A42 )
|
|
|
|
| range( 16'0A47, 16'0A48 )
|
|
|
|
| range( 16'0A4B, 16'0A4D )
|
|
|
|
| range( 16'0A70, 16'0A71 )
|
|
|
|
| range( 16'0A81, 16'0A83 )
|
|
|
|
| [16'0ABC]
|
|
|
|
| range( 16'0ABE, 16'0AC5 )
|
|
|
|
| range( 16'0AC7, 16'0AC9 )
|
|
|
|
| range( 16'0ACB, 16'0ACD )
|
|
|
|
| range( 16'0B01, 16'0B03 )
|
|
|
|
| [16'0B3C]
|
|
|
|
| range( 16'0B3E, 16'0B43 )
|
|
|
|
| range( 16'0B47, 16'0B48 )
|
|
|
|
| range( 16'0B4B, 16'0B4D )
|
|
|
|
| range( 16'0B56, 16'0B57 )
|
|
|
|
| range( 16'0B82, 16'0B83 )
|
|
|
|
| range( 16'0BBE, 16'0BC2 )
|
|
|
|
| range( 16'0BC6, 16'0BC8 )
|
|
|
|
| range( 16'0BCA, 16'0BCD )
|
|
|
|
| [16'0BD7]
|
|
|
|
| range( 16'0C01, 16'0C03 )
|
|
|
|
| range( 16'0C3E, 16'0C44 )
|
|
|
|
| range( 16'0C46, 16'0C48 )
|
|
|
|
| range( 16'0C4A, 16'0C4D )
|
|
|
|
| range( 16'0C55, 16'0C56 )
|
|
|
|
| range( 16'0C82, 16'0C83 )
|
|
|
|
| range( 16'0CBE, 16'0CC4 )
|
|
|
|
| range( 16'0CC6, 16'0CC8 )
|
|
|
|
| range( 16'0CCA, 16'0CCD )
|
|
|
|
| range( 16'0CD5, 16'0CD6 )
|
|
|
|
| range( 16'0D02, 16'0D03 )
|
|
|
|
| range( 16'0D3E, 16'0D43 )
|
|
|
|
| range( 16'0D46, 16'0D48 )
|
|
|
|
| range( 16'0D4A, 16'0D4D )
|
|
|
|
| [16'0D57]
|
|
|
|
| [16'0E31]
|
|
|
|
| range( 16'0E34, 16'0E3A )
|
|
|
|
| range( 16'0E47, 16'0E4E )
|
|
|
|
| [16'0EB1]
|
|
|
|
| range( 16'0EB4, 16'0EB9 )
|
|
|
|
| range( 16'0EBB, 16'0EBC )
|
|
|
|
| range( 16'0EC8, 16'0ECD )
|
|
|
|
| range( 16'0F18, 16'0F19 )
|
|
|
|
| [16'0F35]
|
|
|
|
| [16'0F37]
|
|
|
|
| [16'0F39]
|
|
|
|
| [16'0F3E]
|
|
|
|
| [16'0F3F]
|
|
|
|
| range( 16'0F71, 16'0F84 )
|
|
|
|
| range( 16'0F86, 16'0F8B )
|
|
|
|
| range( 16'0F90, 16'0F95 )
|
|
|
|
| [16'0F97]
|
|
|
|
| range( 16'0F99, 16'0FAD )
|
|
|
|
| range( 16'0FB1, 16'0FB7 )
|
|
|
|
| [16'0FB9]
|
|
|
|
| range( 16'20D0, 16'20DC )
|
|
|
|
| [16'20E1]
|
|
|
|
| range( 16'302A, 16'302F )
|
|
|
|
| [16'3099]
|
|
|
|
| [16'309A]
|
|
|
|
).
|
|
|
|
|
|
|
|
unicode_digit -->
|
|
|
|
( range( 16'0030, 16'0039 )
|
|
|
|
| range( 16'0660, 16'0669 )
|
|
|
|
| range( 16'06F0, 16'06F9 )
|
|
|
|
| range( 16'0966, 16'096F )
|
|
|
|
| range( 16'09E6, 16'09EF )
|
|
|
|
| range( 16'0A66, 16'0A6F )
|
|
|
|
| range( 16'0AE6, 16'0AEF )
|
|
|
|
| range( 16'0B66, 16'0B6F )
|
|
|
|
| range( 16'0BE7, 16'0BEF )
|
|
|
|
| range( 16'0C66, 16'0C6F )
|
|
|
|
| range( 16'0CE6, 16'0CEF )
|
|
|
|
| range( 16'0D66, 16'0D6F )
|
|
|
|
| range( 16'0E50, 16'0E59 )
|
|
|
|
| range( 16'0ED0, 16'0ED9 )
|
|
|
|
| range( 16'0F20, 16'0F29 )
|
|
|
|
).
|
|
|
|
|
|
|
|
extender -->
|
|
|
|
( [16'00B7]
|
|
|
|
| [16'02D0]
|
|
|
|
| [16'02D1]
|
|
|
|
| [16'0387]
|
|
|
|
| [16'0640]
|
|
|
|
| [16'0E46]
|
|
|
|
| [16'0EC6]
|
|
|
|
| [16'3005]
|
|
|
|
| range( 16'3031, 16'3035 )
|
|
|
|
| range( 16'309D, 16'309E )
|
|
|
|
| range( 16'30FC, 16'30FE )
|
|
|
|
).
|
|
|
|
|
|
|
|
range( Low, High ) -->
|
|
|
|
[Char],
|
|
|
|
{Char >= Low, Char =< High}.
|
|
|