update to most recent sgml pl files.

This commit is contained in:
Vitor Santos Costa 2010-05-06 11:37:40 +01:00
parent 9301b67724
commit 515f7eafe2
6 changed files with 118 additions and 84 deletions

View File

@ -55,7 +55,7 @@ iso_639(Code, Lang) :-
% l3(?Code, ?Lang)
%
%
% ISO-639 3-letter codes
l3(abk, 'Abkhazian').
@ -465,7 +465,7 @@ l3(uzb, 'Uzbek').
l3(vai, 'Vai').
l3(ven, 'Venda').
l3(vie, 'Vietnamese').
l3(vol, 'Volapük').
l3(vol, 'Volap\u00fck'). % Use \uxxxx for portability. (= \"u)
l3(vot, 'Votic').
l3(wak, 'Wakashan languages').
l3(wal, 'Walamo').
@ -487,7 +487,7 @@ l3(zul, 'Zulu').
l3(zun, 'Zuni').
% l2(?Code, ?Lang)
%
%
% ISO-639 2 letter codes
l2(aa, 'Afar').

View File

@ -3,9 +3,9 @@
Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: wielemak@science.uva.nl
E-mail: J.Wielemaker@cs.vu.nl
WWW: http://www.swi-prolog.org
Copyright (C): 1985-2005, University of Amsterdam
Copyright (C): 1985-2009, University of Amsterdam
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@ -58,10 +58,13 @@
xml_quote_attribute/2, % +In, -Quoted
xml_quote_cdata/2, % +In, -Quoted
xml_name/1, % +In
xml_name/2, % +In, +Encoding
iri_xml_namespace/2, % +IRI, -Namespace
iri_xml_namespace/3, % +IRI, -Namespace, -LocalName
xml_is_dom/1 % +Term
]).
:- expects_dialect(swi).
:- assert(system:swi_io).
:- use_module(library(lists)).
:- use_module(library(option)).
@ -73,17 +76,15 @@ user:file_search_path(dtd, '.').
:- if(current_prolog_flag(version_data, swi(_,_,_,_))).
user:file_search_path(dtd, swi('library/DTD')).
:- else.
user:file_search_path(dtd, library('DTD')).
user:file_search_path(dtd, swi('DTD')).
:- endif.
sgml_register_catalog_file(File, Location) :-
prolog_to_os_filename(File, OsFile),
'_sgml_register_catalog_file'(OsFile, Location).
load_foreign :-
current_predicate(_, _:sgml_parse(_,_)), !.
load_foreign :-
load_foreign_library(foreign(sgml2pl)).
:- use_foreign_library(foreign(sgml2pl)).
register_catalog(Base) :-
absolute_file_name(dtd(Base),
@ -94,15 +95,9 @@ register_catalog(Base) :-
SocFile),
sgml_register_catalog_file(SocFile, end).
% make sure this is loaded in the current module, not user.
:- load_foreign.
init :-
:- initialization
ignore(register_catalog('HTML4')).
:- initialization
init.
/*******************************
* DTD HANDLING *
@ -146,9 +141,9 @@ dtd(Type, DTD) :-
asserta(current_dtd(Type, DTD)).
%% load_dtd(+DTD, +DtdFile, +Options)
%
%
% Load file into a DTD. Defined options are:
%
%
% * dialect(+Dialect)
% Dialect to use (xml, xmlns, sgml)
%
@ -160,10 +155,10 @@ load_dtd(DTD, DtdFile) :-
load_dtd(DTD, DtdFile, Options) :-
split_dtd_options(Options, DTDOptions, FileOptions),
open_dtd(DTD, DTDOptions, DtdOut),
system:swi_open(DtdFile, read, DtdIn, FileOptions),
system:swi_copy_stream_data(DtdIn, DtdOut),
system:swi_close(DtdIn),
system:swi_close(DtdOut).
open(DtdFile, read, DtdIn, FileOptions),
copy_stream_data(DtdIn, DtdOut),
close(DtdIn),
close(DtdOut).
split_dtd_options([], [], []).
split_dtd_options([H|T], [H|TD], S) :-
@ -176,7 +171,7 @@ dtd_option(dialect(_)).
%% destroy_dtds
%
%
% Destroy DTDs cached by this thread as they will become
% unreachable anyway.
@ -270,7 +265,10 @@ set_parser_options(Parser, Options, RestOptions) :-
set_parser_options(_, Options, Options).
load_structure(stream(In), Term, Options) :- !,
:- meta_predicate
load_structure(+, -, :).
load_structure(stream(In), Term, M:Options) :- !,
( select_option(offset(Offset), Options, Options1)
-> seek(In, Offset, bof, _)
; Options1 = Options
@ -283,8 +281,8 @@ load_structure(stream(In), Term, Options) :- !,
new_sgml_parser(Parser,
[ dtd(DTD)
]),
def_entities(Options2, DTD, Options3),
call_cleanup(parse(Parser, Options3, TermRead, In),
def_entities(Options2, Parser, Options3),
call_cleanup(parse(Parser, M:Options3, TermRead, In),
free_sgml_parser(Parser)),
( ExplicitDTD == true
-> ( DTD = dtd(_, DocType),
@ -296,35 +294,49 @@ load_structure(stream(In), Term, Options) :- !,
),
Term = TermRead.
load_structure(Stream, Term, Options) :-
system:swi_is_stream(Stream), !,
is_stream(Stream), !,
load_structure(stream(Stream), Term, Options).
load_structure(File, Term, Options) :-
system:swi_open(File, read, In, [type(binary)]),
load_structure(stream(In), Term, [file(File)|Options]),
system:swi_close(In).
load_structure(File, Term, M:Options) :-
open(File, read, In, [type(binary)]),
load_structure(stream(In), Term, M:[file(File)|Options]),
close(In).
parse(Parser, Options, Document, In) :-
parse(Parser, M:Options, Document, In) :-
set_parser_options(Parser, Options, Options1),
parser_meta_options(Options1, M, Options2),
sgml_parse(Parser,
[ document(Document),
source(In)
| Options1
| Options2
]).
def_entities([], _, []).
def_entities([entity(Name, Value)|T], DTD, Opts) :- !,
def_entity(DTD, Name, Value),
def_entities(T, DTD, Opts).
def_entities([H|T0], DTD, [H|T]) :-
def_entities(T0, DTD, T).
parser_meta_options([], _, []).
parser_meta_options([call(When, Closure)|T0], M, [call(When, M:Closure)|T]) :- !,
parser_meta_options(T0, M, T).
parser_meta_options([H|T0], M, [H|T]) :-
parser_meta_options(T0, M, T).
def_entity(DTD, Name, Value) :-
open_dtd(DTD, [], Stream),
def_entities([], _, []).
def_entities([H|T], Parser, Opts) :-
def_entity(H, Parser), !,
def_entities(T, Parser, Opts).
def_entities([H|T0], Parser, [H|T]) :-
def_entities(T0, Parser, T).
def_entity(entity(Name, Value), Parser) :-
get_sgml_parser(Parser, dtd(DTD)),
xml_quote_attribute(Value, QValue),
system:swi_format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]),
system:close(Stream).
setup_call_cleanup(open_dtd(DTD, [], Stream),
format(Stream, '<!ENTITY ~w "~w">~n',
[Name, QValue]),
close(Stream)).
def_entity(xmlns(URI), Parser) :-
set_sgml_parser(Parser, xmlns(URI)).
def_entity(xmlns(NS, URI), Parser) :-
set_sgml_parser(Parser, xmlns(NS, URI)).
/*******************************
* UTILITIES *
*******************************/
@ -350,7 +362,7 @@ load_html_file(File, Term) :-
% xml_quote_attribute(+In, -Quoted)
% xml_quote_cdata(+In, -Quoted)
%
%
% Backward compatibility for versions that allow to specify
% encoding. All characters that cannot fit the encoding are mapped
% to XML character entities (&#dd;). Using ASCII is the safest
@ -371,7 +383,7 @@ xml_name(In) :-
*******************************/
% xml_is_dome(@Term)
%
%
% True if term statisfies the structure as returned by
% load_structure/3 and friends.
@ -434,3 +446,5 @@ prolog:called_by(sgml_parse(_, Options), Called) :-
callable(G)
),
Called).
:- retract(system:swi_io).

View File

@ -422,7 +422,7 @@ colourise(M, Parser, Options) :-
E,
show_message(M, E)),
erase(Ref).
on_begin(_Tag, _Attributes, Parser) :-
get_sgml_parser(Parser, file(File)),
current_tb(TB, File),
@ -503,7 +503,7 @@ colour_item(Class, TB, From, To, Fragment) :-
Len > 0,
new(Fragment, sgml_mode_fragment(TB, From, Len, Name)).
colour_item(_, _, _, _, @nil).
/*******************************
* STYLES *
@ -680,7 +680,7 @@ fill_attribute_menu([A|T], DTD, E, P, Mode) :-
dtd_property(DTD, attribute(E, A, Type, Default)),
add_attribute_menu(Type, Default, A, P, Mode),
fill_attribute_menu(T, DTD, E, P, Mode).
add_attribute_menu(nameof(List), Def, A, P, Mode) :- !,
send(P, append, new(P2, popup(A, message(Mode, insert_attribute,
A, @arg1)))),
@ -797,7 +797,7 @@ tag_region(M, Tag:[name], From:int, To:int,
-> send(M, tag_region, Tag, From, To, block)
; send(M, tag_region, Tag, From, To, inline)
).
insert_element(M, Tag:element=name, Style:[{inline,shorttag,block}]) :->
"Insert a new empty element"::
@ -1002,7 +1002,7 @@ feed(In, Len, Parser) :-
report_allowed(M) :-> % DEBUGGING
"Report allowed elements at point"::
get(M, allowed_elements, Allowed),
concat_atom(Allowed, ', ', Atom),
atomic_list_concat(Allowed, ', ', Atom),
send(M, report, status, 'Allowed: %s', Atom).
show_message(M, E) :-

View File

@ -70,10 +70,10 @@ stage.
%% xml_write(+Stream, +Data, +Options) is det.
%% sgml_write(+Stream, +Data, +Options) is det.
%% html_write(+Stream, +Data, +Options) is det.
%
%
% Write a term as created by the SGML/XML parser to a stream in
% SGML or XML format. Options:
%
%
% * dtd(DTD)
% The DTD. This is needed for SGML documents that contain
% elements with content model EMPTY. Characters which may
@ -96,35 +96,35 @@ stage.
% The public identifier to be written in the <!DOCTYPE> line.
%
% * system(SysId)
% The system identifier to be written in the <!DOCTYPE> line.
%
% The system identifier to be written in the <!DOCTYPE> line.
%
% * header(Bool)
% If Bool is 'false', do not emit the <xml ...> header
% line. (xml_write/3 only)
%
%
% * nsmap(Map:list(Id=URI))
% When emitting embedded XML, assume these namespaces
% are already defined from the environment. (xml_write/3
% only).
%
%
% * indent(Indent)
% Indentation of the document (for embedding)
%
%
% * layout(Bool)
% Emit/do not emit layout characters to make output
% readable.
%
%
% * net(Bool)
% Use/do not use Null End Tags.
% For XML, this applies only to empty elements, so you get
%
%
% ==
% <foo/> (default, net(true))
% <foo></foo> (net(false))
% ==
%
%
% For SGML, this applies to empty elements, so you get
%
%
% ==
% <foo> (if foo is declared to be EMPTY in the DTD)
% <foo></foo> (default, net(false))
@ -132,7 +132,7 @@ stage.
% ==
%
% and also to elements with character content not containing /
%
%
% ==
% <b>xxx</b> (default, net(false))
% <b/xxx/ (net(true)).
@ -229,7 +229,7 @@ update_state(Option, _) :-
domain_error(xml_write_option, Option).
% emit_xml_encoding(+Stream, +Options)
%
%
% Emit the XML fileheader with encoding information. Setting the
% right encoding on the output stream must be done before calling
% xml_write/3.
@ -238,7 +238,9 @@ emit_xml_encoding(Out, Options) :-
option(header(Hdr), Options, true),
Hdr == true, !,
stream_property(Out, encoding(Encoding)),
( Encoding == utf8
( ( Encoding == utf8
; Encoding == wchar_t
)
-> format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', [])
; Encoding == iso_latin_1
-> format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', [])
@ -248,7 +250,7 @@ emit_xml_encoding(_, _).
%% emit_doctype(+Options, +Data, +Stream)
%
%
% Emit the document-type declaration.
% There is a problem with the first clause if we are emitting SGML:
% the SGML DTDs for HTML up to HTML 4 *do not allow* any 'version'
@ -291,13 +293,16 @@ write_doctype(Out, DocType, PubId, SysId) :-
%% emit(+Element, +Out, +State, +Options)
%
%
% Emit a single element
emit([], _, _) :- !.
emit([H|T], Out, State) :- !,
emit(H, Out, State),
emit(T, Out, State).
emit(CDATA, Out, State) :-
atom(CDATA), !,
sgml_write_content(Out, CDATA, State).
emit(Element, Out, State) :-
\+ \+ emit_element(Element, Out, State).
@ -399,7 +404,7 @@ emit_name(Term, Out, _) :-
write(Out, Term).
%% update_nsmap(+Attributes, !State)
%
%
% Modify the nsmap of State to reflect modifications due to xmlns
% arguments.
@ -425,7 +430,7 @@ set_nsmap(NS, URI, Map, [NS=URI|Map]).
%% content(+Content, +Out, +Element, +State, +Options)
%
%
% Emit the content part of a structure as well as the termination
% for the content. For empty content we have three versions: XML
% style '/>', SGML declared EMPTY element (nothing) or normal SGML
@ -535,7 +540,7 @@ write_element_content([H|T], Out, State) :-
*******************************/
%% add_missing_namespaces(+DOM0, +NsMap, -DOM)
%
%
% Add xmlns:NS=URI definitions to the toplevel element(s) to
% deal with missing namespaces.
@ -571,11 +576,12 @@ generate_ns(_, NS) :-
rdf_db:ns/2.
default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi).
default_ns('http://www.w3.org/1999/xhtml', xhtml).
default_ns(URI, NS) :-
rdf_db:ns(NS, URI).
%% missing_namespaces(+DOM, +NSMap, -Missing)
%
%
% Return a list of URIs appearing in DOM that are not covered
% by xmlns definitions.
@ -650,7 +656,8 @@ writeq([H|T], Out, Escape, EntityMap) :-
( memberchk(H, Escape)
-> write_entity(H, Out, EntityMap)
; H >= 256
-> ( stream_property(Out, encoding(utf8))
-> ( stream_property(Out, encoding(Enc)),
unicode_encoding(Enc)
-> put_code(Out, H)
; write_entity(H, Out, EntityMap)
)
@ -658,6 +665,10 @@ writeq([H|T], Out, Escape, EntityMap) :-
),
writeq(T, Out, Escape, EntityMap).
unicode_encoding(utf8).
unicode_encoding(wchar_t).
unicode_encoding(unicode_le).
unicode_encoding(unicode_be).
write_entity(Code, Out, EntityMap) :-
( get_assoc(Code, EntityMap, EntityName)
@ -689,7 +700,7 @@ emit_indent(Indent, Out) :-
format(Out, '~N', []),
write_n(Tabs, '\t', Out),
write_n(Spaces, ' ', Out).
write_n(N, Char, Out) :-
( N > 0
-> put_char(Out, Char),
@ -697,7 +708,7 @@ write_n(N, Char, Out) :-
write_n(N2, Char, Out)
; true
).
inc_indent(State) :-
inc_indent(State, 2).
@ -713,7 +724,7 @@ inc_indent(State, Inc) :-
*******************************/
%% empty_element(+State, +Element)
%
%
% True if Element is declared with EMPTY content in the (SGML)
% DTD.
@ -721,9 +732,9 @@ empty_element(State, Element) :-
get_state(State, dtd, DTD),
DTD \== (-),
dtd_property(DTD, element(Element, _, empty)).
%% dtd_character_entities(+DTD, -Map)
%
%
% Return an assoc mapping character entities to their name. Note
% that the entity representation is a bit dubious. Entities should
% allow for a wide-character version and avoid the &#..; trick.

View File

@ -74,7 +74,7 @@ mkswitch(List, Indent) :-
mkswitch(High, NextIndent),
indent(Indent),
format('}~n').
end(List, Max) :-
last(List, Last),
( Last = _-Max

View File

@ -31,6 +31,7 @@
:- module(xsdp_type,
[ xsdp_type/1, % ?Type
xsdp_uri_type/2, % ?URI, ?Type
xsdp_numeric_uri/2, % ?URI, ?Primary
xsdp_subtype_of/2, % ?Type, ?Super
xsdp_convert/3 % +Type, +Content, -Value
@ -64,14 +65,22 @@ ns('http://www.w3.org/2001/XMLSchema#').
*******************************/
%% xsdp_type(?Type)
%
%
% Test/generate the names for the XML schema primitive types
xsdp_type(Type) :-
subtype_of(Type, _).
%% xsdp_uri_type(?URI, ?Type)
%
% True if URI is the URI for the the XML-Schema primitive Type.
xsdp_uri_type(URI, Type) :-
xsd_local_id(URI, Type),
subtype_of(Type, _).
%% xsdp_subtype_of(?Type, ?Super)
%
%
% True if Type is a (transitive) subtype of Super.
xsdp_subtype_of(Type, Type).
@ -167,7 +176,7 @@ xsd_local_ids.
numeric_uirs.
%% xsdp_convert(+Type, +Content, -Value)
%
%
% Convert the content model Content to an object of the given XSD
% type and return the Prolog value in Value.