Experiment with porting SGML to YAP, and trying to preserve SWI code as much
as possible.
This commit is contained in:
808
packages/sgml/sgml_write.pl
Normal file
808
packages/sgml/sgml_write.pl
Normal file
@@ -0,0 +1,808 @@
|
||||
/* $Id$
|
||||
|
||||
Part of SWI-Prolog
|
||||
|
||||
Author: Jan Wielemaker & Richard O'Keefe
|
||||
E-mail: wielemaker@science.uva.nl
|
||||
WWW: http://www.swi-prolog.org
|
||||
Copyright (C): 1985-2004, University of Amsterdam
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
As a special exception, if you link this library with other files,
|
||||
compiled with a Free Software compiler, to produce an executable, this
|
||||
library does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however
|
||||
invalidate any other reasons why the executable file might be covered by
|
||||
the GNU General Public License.
|
||||
*/
|
||||
|
||||
:- module(sgml_write,
|
||||
[ html_write/2, % +Data, +Options
|
||||
html_write/3, % +Stream, +Data, +Options
|
||||
sgml_write/2, % +Data, +Options
|
||||
sgml_write/3, % +Stream, +Data, +Options
|
||||
xml_write/2, % +Data, +Options
|
||||
xml_write/3 % +Stream, +Data, +Options
|
||||
]).
|
||||
:- use_module(library(lists)).
|
||||
:- use_module(library(sgml)).
|
||||
:- use_module(library(debug)).
|
||||
:- use_module(library(assoc)).
|
||||
:- use_module(library(option)).
|
||||
:- use_module(library(error)).
|
||||
|
||||
/** <module> XML/SGML writer module
|
||||
|
||||
This library provides the inverse functionality of the sgml.pl parser
|
||||
library, writing XML, SGML and HTML documents from the parsed output. It
|
||||
is intended to allow rewriting in a different dialect or encoding or to
|
||||
perform document transformation in Prolog on the parsed representation.
|
||||
|
||||
The current implementation is particularly keen on getting character
|
||||
encoding and the use of character entities right. Some work has been
|
||||
done providing layout, but space handling in XML and SGML make this a
|
||||
very hazardous area.
|
||||
|
||||
The Prolog-based low-level character and escape handling is the real
|
||||
bottleneck in this library and will probably be moved to C in a later
|
||||
stage.
|
||||
|
||||
@see library(http/html_write) provides a high-level library for
|
||||
emitting HTML and XHTML.
|
||||
*/
|
||||
|
||||
%% xml_write(+Data, +Options) is det.
|
||||
%% sgml_write(+Data, +Options) is det.
|
||||
%% html_write(+Data, +Options) is det.
|
||||
%% xml_write(+Stream, +Data, +Options) is det.
|
||||
%% sgml_write(+Stream, +Data, +Options) is det.
|
||||
%% html_write(+Stream, +Data, +Options) is det.
|
||||
%
|
||||
% Write a term as created by the SGML/XML parser to a stream in
|
||||
% SGML or XML format. Options:
|
||||
%
|
||||
% * dtd(DTD)
|
||||
% The DTD. This is needed for SGML documents that contain
|
||||
% elements with content model EMPTY. Characters which may
|
||||
% not be written directly in the Stream's encoding will be
|
||||
% written using character data entities from the DTD if at
|
||||
% all possible, otherwise as numeric character references.
|
||||
% Note that the DTD will NOT be written out at all; as yet
|
||||
% there is no way to write out an internal subset, though
|
||||
% it would not be hard to add one.
|
||||
%
|
||||
% * doctype(DocType)
|
||||
% Document type for the SGML document type declaration.
|
||||
% If omitted it is taken from the root element. There is
|
||||
% never any point in having this be disagree with the
|
||||
% root element. A <!DOCTYPE> declaration will be written
|
||||
% if and only if at least one of doctype(_), public(_), or
|
||||
% system(_) is provided in Options.
|
||||
%
|
||||
% * public(PubId)
|
||||
% The public identifier to be written in the <!DOCTYPE> line.
|
||||
%
|
||||
% * system(SysId)
|
||||
% The system identifier to be written in the <!DOCTYPE> line.
|
||||
%
|
||||
% * header(Bool)
|
||||
% If Bool is 'false', do not emit the <xml ...> header
|
||||
% line. (xml_write/3 only)
|
||||
%
|
||||
% * nsmap(Map:list(Id=URI))
|
||||
% When emitting embedded XML, assume these namespaces
|
||||
% are already defined from the environment. (xml_write/3
|
||||
% only).
|
||||
%
|
||||
% * indent(Indent)
|
||||
% Indentation of the document (for embedding)
|
||||
%
|
||||
% * layout(Bool)
|
||||
% Emit/do not emit layout characters to make output
|
||||
% readable.
|
||||
%
|
||||
% * net(Bool)
|
||||
% Use/do not use Null End Tags.
|
||||
% For XML, this applies only to empty elements, so you get
|
||||
%
|
||||
% ==
|
||||
% <foo/> (default, net(true))
|
||||
% <foo></foo> (net(false))
|
||||
% ==
|
||||
%
|
||||
% For SGML, this applies to empty elements, so you get
|
||||
%
|
||||
% ==
|
||||
% <foo> (if foo is declared to be EMPTY in the DTD)
|
||||
% <foo></foo> (default, net(false))
|
||||
% <foo// (net(true))
|
||||
% ==
|
||||
%
|
||||
% and also to elements with character content not containing /
|
||||
%
|
||||
% ==
|
||||
% <b>xxx</b> (default, net(false))
|
||||
% <b/xxx/ (net(true)).
|
||||
% ==
|
||||
%
|
||||
% Note that if the stream is UTF-8, the system will write special
|
||||
% characters as UTF-8 sequences, while if it is ISO Latin-1 it
|
||||
% will use (character) entities if there is a DTD that provides
|
||||
% them, otherwise it will use numeric character references.
|
||||
|
||||
xml_write(Data, Options) :-
|
||||
current_output(Stream),
|
||||
xml_write(Stream, Data, Options).
|
||||
|
||||
xml_write(Stream0, Data, Options) :-
|
||||
fix_user_stream(Stream0, Stream),
|
||||
( stream_property(Stream, encoding(text))
|
||||
-> set_stream(Stream, encoding(utf8)),
|
||||
call_cleanup(xml_write(Stream, Data, Options),
|
||||
set_stream(Stream, encoding(text)))
|
||||
; new_state(xml, State),
|
||||
init_state(Options, State),
|
||||
get_state(State, nsmap, NSMap),
|
||||
add_missing_namespaces(Data, NSMap, Data1),
|
||||
emit_xml_encoding(Stream, Options),
|
||||
emit_doctype(Options, Data, Stream),
|
||||
write_initial_indent(State, Stream),
|
||||
emit(Data1, Stream, State)
|
||||
).
|
||||
|
||||
|
||||
sgml_write(Data, Options) :-
|
||||
current_output(Stream),
|
||||
sgml_write(Stream, Data, Options).
|
||||
|
||||
sgml_write(Stream0, Data, Options) :-
|
||||
fix_user_stream(Stream0, Stream),
|
||||
( stream_property(Stream, encoding(text))
|
||||
-> set_stream(Stream, encoding(utf8)),
|
||||
call_cleanup(sgml_write(Stream, Data, Options),
|
||||
set_stream(Stream, encoding(text)))
|
||||
; new_state(sgml, State),
|
||||
init_state(Options, State),
|
||||
write_initial_indent(State, Stream),
|
||||
emit_doctype(Options, Data, Stream),
|
||||
emit(Data, Stream, State)
|
||||
).
|
||||
|
||||
|
||||
html_write(Data, Options) :-
|
||||
current_output(Stream),
|
||||
html_write(Stream, Data, Options).
|
||||
|
||||
html_write(Stream, Data, Options) :-
|
||||
sgml_write(Stream, Data,
|
||||
[ dtd(html)
|
||||
| Options
|
||||
]).
|
||||
|
||||
fix_user_stream(user, user_output) :- !.
|
||||
fix_user_stream(Stream, Stream).
|
||||
|
||||
|
||||
init_state([], _).
|
||||
init_state([H|T], State) :-
|
||||
update_state(H, State),
|
||||
init_state(T, State).
|
||||
|
||||
update_state(dtd(DTD), State) :- !,
|
||||
( atom(DTD)
|
||||
-> dtd(DTD, DTDObj)
|
||||
; DTDObj = DTD
|
||||
),
|
||||
set_state(State, dtd, DTDObj),
|
||||
dtd_character_entities(DTDObj, EntityMap),
|
||||
set_state(State, entity_map, EntityMap).
|
||||
update_state(nsmap(Map), State) :- !,
|
||||
set_state(State, nsmap, Map).
|
||||
update_state(indent(Indent), State) :- !,
|
||||
must_be(integer, Indent),
|
||||
set_state(State, indent, Indent).
|
||||
update_state(layout(Bool), State) :- !,
|
||||
must_be(boolean, Bool),
|
||||
set_state(State, layout, Bool).
|
||||
update_state(doctype(_), _) :- !.
|
||||
update_state(public(_), _) :- !.
|
||||
update_state(system(_), _) :- !.
|
||||
update_state(net(Bool), State) :- !,
|
||||
must_be(boolean, Bool),
|
||||
set_state(State, net, Bool).
|
||||
update_state(header(Bool), _) :- !,
|
||||
must_be(boolean, Bool).
|
||||
update_state(Option, _) :-
|
||||
domain_error(xml_write_option, Option).
|
||||
|
||||
% emit_xml_encoding(+Stream, +Options)
|
||||
%
|
||||
% Emit the XML fileheader with encoding information. Setting the
|
||||
% right encoding on the output stream must be done before calling
|
||||
% xml_write/3.
|
||||
|
||||
emit_xml_encoding(Out, Options) :-
|
||||
option(header(Hdr), Options, true),
|
||||
Hdr == true, !,
|
||||
stream_property(Out, encoding(Encoding)),
|
||||
( Encoding == utf8
|
||||
-> format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', [])
|
||||
; Encoding == iso_latin_1
|
||||
-> format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', [])
|
||||
; domain_error(xml_encoding, Encoding)
|
||||
).
|
||||
emit_xml_encoding(_, _).
|
||||
|
||||
|
||||
%% emit_doctype(+Options, +Data, +Stream)
|
||||
%
|
||||
% Emit the document-type declaration.
|
||||
% There is a problem with the first clause if we are emitting SGML:
|
||||
% the SGML DTDs for HTML up to HTML 4 *do not allow* any 'version'
|
||||
% attribute; so the only time this is useful is when it is illegal!
|
||||
|
||||
emit_doctype(_Options, Data, Out) :-
|
||||
( memberchk(element(html,Att,_), Data)
|
||||
; Data = element(html,Att,_)
|
||||
),
|
||||
memberchk(version=Version, Att),
|
||||
!,
|
||||
format(Out, '<!DOCTYPE HTML PUBLIC "~w">~n~n', [Version]).
|
||||
emit_doctype(Options, Data, Out) :-
|
||||
( memberchk(public(PubId), Options) -> true
|
||||
; PubId = (-)
|
||||
),
|
||||
( memberchk(system(SysId), Options) -> true
|
||||
; SysId = (-)
|
||||
),
|
||||
\+ (PubId == (-),
|
||||
SysId == (-),
|
||||
\+ memberchk(doctype(_), Options)
|
||||
),
|
||||
( Data = element(DocType,_,_)
|
||||
; memberchk(element(DocType,_,_), Data)
|
||||
; memberchk(doctype(DocType), Options)
|
||||
),
|
||||
!,
|
||||
write_doctype(Out, DocType, PubId, SysId).
|
||||
emit_doctype(_, _, _).
|
||||
|
||||
write_doctype(Out, DocType, -, -) :- !,
|
||||
format(Out, '<!DOCTYPE ~w []>~n~n', [DocType]).
|
||||
write_doctype(Out, DocType, -, SysId) :- !,
|
||||
format(Out, '<!DOCTYPE ~w SYSTEM "~w">~n~n', [DocType,SysId]).
|
||||
write_doctype(Out, DocType, PubId, -) :- !,
|
||||
format(Out, '<!DOCTYPE ~w PUBLIC "~w">~n~n', [DocType,PubId]).
|
||||
write_doctype(Out, DocType, PubId, SysId) :-
|
||||
format(Out, '<!DOCTYPE ~w PUBLIC "~w" "~w">~n~n', [DocType,PubId,SysId]).
|
||||
|
||||
|
||||
%% emit(+Element, +Out, +State, +Options)
|
||||
%
|
||||
% Emit a single element
|
||||
|
||||
emit([], _, _) :- !.
|
||||
emit([H|T], Out, State) :- !,
|
||||
emit(H, Out, State),
|
||||
emit(T, Out, State).
|
||||
emit(Element, Out, State) :-
|
||||
\+ \+ emit_element(Element, Out, State).
|
||||
|
||||
emit_element(pi(PI), Out, State) :-
|
||||
get_state(State, entity_map, EntityMap),
|
||||
write(Out, <?),
|
||||
write_quoted(Out, PI, "", EntityMap),
|
||||
( get_state(State, dialect, xml) ->
|
||||
write(Out, ?>)
|
||||
; write(Out, >)
|
||||
).
|
||||
emit_element(element(Name, Attributes, Content), Out, State) :-
|
||||
att_length(Attributes, State, Alen),
|
||||
( Alen > 60,
|
||||
get_state(State, layout, true)
|
||||
-> Sep = nl,
|
||||
AttIndent = 4
|
||||
; Sep = sp,
|
||||
AttIndent = 0
|
||||
),
|
||||
( get_state(State, dialect, xml)
|
||||
-> update_nsmap(Attributes, State)
|
||||
; true
|
||||
),
|
||||
put_char(Out, '<'),
|
||||
emit_name(Name, Out, State),
|
||||
( AttIndent > 0
|
||||
-> \+ \+ ( inc_indent(State, AttIndent),
|
||||
attributes(Attributes, Sep, Out, State)
|
||||
)
|
||||
; attributes(Attributes, Sep, Out, State)
|
||||
),
|
||||
content(Content, Out, Name, State).
|
||||
|
||||
attributes([], _, _, _).
|
||||
attributes([H|T], Sep, Out, State) :-
|
||||
( Sep == nl
|
||||
-> write_indent(State, Out)
|
||||
; put_char(Out, ' ')
|
||||
),
|
||||
attribute(H, Out, State),
|
||||
attributes(T, Sep, Out, State).
|
||||
|
||||
attribute(Name=Value, Out, State) :-
|
||||
emit_name(Name, Out, State),
|
||||
put_char(Out, =),
|
||||
sgml_write_attribute(Out, Value, State).
|
||||
|
||||
att_length(Atts, State, Len) :-
|
||||
att_length(Atts, State, 0, Len).
|
||||
|
||||
att_length([], _, Len, Len).
|
||||
att_length([A0|T], State, Len0, Len) :-
|
||||
alen(A0, State, AL),
|
||||
Len1 is Len0 + 1 + AL,
|
||||
att_length(T, State, Len1, Len).
|
||||
|
||||
alen(URI:Name=Value, State, Len) :- !,
|
||||
atom_length(Value, AL),
|
||||
vlen(Name, NL),
|
||||
get_state(State, nsmap, Nsmap),
|
||||
( memberchk(NS=URI, Nsmap)
|
||||
-> atom_length(NS, NsL)
|
||||
; atom_length(URI, NsL)
|
||||
),
|
||||
Len is AL+NL+NsL+3.
|
||||
alen(Name=Value, _, Len) :-
|
||||
atom_length(Name, NL),
|
||||
vlen(Value, AL),
|
||||
Len is AL+NL+3.
|
||||
|
||||
vlen(Value, Len) :-
|
||||
is_list(Value), !,
|
||||
vlen_list(Value, 0, Len).
|
||||
vlen(Value, Len) :-
|
||||
atom_length(Value, Len).
|
||||
|
||||
vlen_list([], L, L).
|
||||
vlen_list([H|T], L0, L) :-
|
||||
atom_length(H, HL),
|
||||
( L0 == 0
|
||||
-> L1 is L0 + HL
|
||||
; L1 is L0 + HL + 1
|
||||
),
|
||||
vlen_list(T, L1, L).
|
||||
|
||||
|
||||
emit_name(Name, Out, _) :-
|
||||
atom(Name), !,
|
||||
write(Out, Name).
|
||||
emit_name(URI:Name, Out, State) :-
|
||||
get_state(State, nsmap, NSMap),
|
||||
memberchk(NS=URI, NSMap), !,
|
||||
( NS == []
|
||||
-> write(Out, Name)
|
||||
; format(Out, '~w:~w', [NS, Name])
|
||||
).
|
||||
emit_name(Term, Out, _) :-
|
||||
write(Out, Term).
|
||||
|
||||
%% update_nsmap(+Attributes, !State)
|
||||
%
|
||||
% Modify the nsmap of State to reflect modifications due to xmlns
|
||||
% arguments.
|
||||
|
||||
update_nsmap(Attributes, State) :-
|
||||
get_state(State, nsmap, Map0),
|
||||
update_nsmap(Attributes, Map0, Map),
|
||||
set_state(State, nsmap, Map).
|
||||
|
||||
update_nsmap([], Map, Map).
|
||||
update_nsmap([xmlns:NS=URI|T], Map0, Map) :- !,
|
||||
set_nsmap(NS, URI, Map0, Map1),
|
||||
update_nsmap(T, Map1, Map).
|
||||
update_nsmap([xmlns=URI|T], Map0, Map) :- !,
|
||||
set_nsmap([], URI, Map0, Map1),
|
||||
update_nsmap(T, Map1, Map).
|
||||
update_nsmap([_|T], Map0, Map) :- !,
|
||||
update_nsmap(T, Map0, Map).
|
||||
|
||||
set_nsmap(NS, URI, Map0, Map) :-
|
||||
select(NS=_, Map0, Map1), !,
|
||||
Map = [NS=URI|Map1].
|
||||
set_nsmap(NS, URI, Map, [NS=URI|Map]).
|
||||
|
||||
|
||||
%% content(+Content, +Out, +Element, +State, +Options)
|
||||
%
|
||||
% Emit the content part of a structure as well as the termination
|
||||
% for the content. For empty content we have three versions: XML
|
||||
% style '/>', SGML declared EMPTY element (nothing) or normal SGML
|
||||
% element (we must close with the same element name).
|
||||
|
||||
content([], Out, Element, State) :- !, % empty element
|
||||
( get_state(State, net, true)
|
||||
-> ( get_state(State, dialect, xml) ->
|
||||
write(Out, />)
|
||||
; empty_element(State, Element) ->
|
||||
write(Out, >)
|
||||
; write(Out, //)
|
||||
)
|
||||
;/* get_state(State, net, false) */
|
||||
write(Out, >),
|
||||
( get_state(State, dialect, sgml),
|
||||
empty_element(State, Element)
|
||||
-> true
|
||||
; emit_close(Element, Out, State)
|
||||
)
|
||||
).
|
||||
content([Atom], Out, Element, State) :-
|
||||
atom(Atom), !,
|
||||
( get_state(State, dialect, sgml),
|
||||
get_state(State, net, true),
|
||||
\+ sub_atom(Atom, _, _, _, /),
|
||||
atom_length(Atom, Len),
|
||||
Len < 20
|
||||
-> write(Out, /),
|
||||
sgml_write_content(Out, Atom, State),
|
||||
write(Out, /)
|
||||
;/* XML or not NET */
|
||||
write(Out, >),
|
||||
sgml_write_content(Out, Atom, State),
|
||||
emit_close(Element, Out, State)
|
||||
).
|
||||
content(Content, Out, Element, State) :-
|
||||
get_state(State, layout, true),
|
||||
/* If xml:space='preserve' is present, */
|
||||
/* we MUST NOT tamper with white space at all. */
|
||||
\+ (Element = element(_,Atts,_),
|
||||
memberchk('xml:space'=preserve, Atts)
|
||||
),
|
||||
element_content(Content, Elements),
|
||||
!,
|
||||
format(Out, >, []),
|
||||
\+ \+ (
|
||||
inc_indent(State),
|
||||
write_element_content(Elements, Out, State)
|
||||
),
|
||||
write_indent(State, Out),
|
||||
emit_close(Element, Out, State).
|
||||
content(Content, Out, Element, State) :-
|
||||
format(Out, >, []),
|
||||
write_mixed_content(Content, Out, Element, State),
|
||||
emit_close(Element, Out, State).
|
||||
|
||||
|
||||
emit_close(Element, Out, State) :-
|
||||
write(Out, '</'),
|
||||
emit_name(Element, Out, State),
|
||||
write(Out, '>').
|
||||
|
||||
|
||||
write_mixed_content([], _, _, _).
|
||||
write_mixed_content([H|T], Out, Element, State) :-
|
||||
write_mixed_content_element(H, Out, State),
|
||||
write_mixed_content(T, Out, Element, State).
|
||||
|
||||
write_mixed_content_element(H, Out, State) :-
|
||||
( atom(H)
|
||||
-> sgml_write_content(Out, H, State)
|
||||
; functor(H, element, 3)
|
||||
-> emit(H, Out, State)
|
||||
; functor(H, pi, 1)
|
||||
-> emit(H, Out, State)
|
||||
; H = sdata(Data) % cannot be written without entity!
|
||||
-> print_message(warning, sgml_write(sdata_as_cdata(Data))),
|
||||
sgml_write_content(Out, Data, State)
|
||||
; assertion(fail)
|
||||
).
|
||||
|
||||
|
||||
element_content([], []).
|
||||
element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- !,
|
||||
element_content(T0, T).
|
||||
element_content([Blank|T0], T) :-
|
||||
atom(Blank),
|
||||
atom_codes(Blank, Codes),
|
||||
all_blanks(Codes),
|
||||
element_content(T0, T).
|
||||
|
||||
all_blanks([]).
|
||||
all_blanks([H|T]) :-
|
||||
code_type(H, space),
|
||||
all_blanks(T).
|
||||
|
||||
write_element_content([], _, _).
|
||||
write_element_content([H|T], Out, State) :-
|
||||
write_indent(State, Out),
|
||||
emit(H, Out, State),
|
||||
write_element_content(T, Out, State).
|
||||
|
||||
|
||||
/*******************************
|
||||
* NAMESPACES *
|
||||
*******************************/
|
||||
|
||||
%% add_missing_namespaces(+DOM0, +NsMap, -DOM)
|
||||
%
|
||||
% Add xmlns:NS=URI definitions to the toplevel element(s) to
|
||||
% deal with missing namespaces.
|
||||
|
||||
add_missing_namespaces([], _, []) :- !.
|
||||
add_missing_namespaces([H0|T0], Def, [H|T]) :- !,
|
||||
add_missing_namespaces(H0, Def, H),
|
||||
add_missing_namespaces(T0, Def, T).
|
||||
add_missing_namespaces(Elem0, Def, Elem) :-
|
||||
Elem0 = element(Name, Atts0, Content), !,
|
||||
missing_namespaces(Elem0, Def, Missing),
|
||||
( Missing == []
|
||||
-> Elem = Elem0
|
||||
; add_missing_ns(Missing, Atts0, Atts),
|
||||
Elem = element(Name, Atts, Content)
|
||||
).
|
||||
add_missing_namespaces(DOM, _, DOM). % CDATA, etc.
|
||||
|
||||
add_missing_ns([], Atts, Atts).
|
||||
add_missing_ns([H|T], Atts0, Atts) :-
|
||||
generate_ns(H, NS),
|
||||
add_missing_ns(T, [xmlns:NS=H|Atts0], Atts).
|
||||
|
||||
%% generate_ns(+URI, -NS) is det.
|
||||
%
|
||||
% Generate a namespace (NS) identifier for URI.
|
||||
|
||||
generate_ns(URI, NS) :-
|
||||
default_ns(URI, NS), !.
|
||||
generate_ns(_, NS) :-
|
||||
gensym(xns, NS).
|
||||
|
||||
:- multifile
|
||||
rdf_db:ns/2.
|
||||
|
||||
default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi).
|
||||
default_ns(URI, NS) :-
|
||||
rdf_db:ns(NS, URI).
|
||||
|
||||
%% missing_namespaces(+DOM, +NSMap, -Missing)
|
||||
%
|
||||
% Return a list of URIs appearing in DOM that are not covered
|
||||
% by xmlns definitions.
|
||||
|
||||
missing_namespaces(DOM, Defined, Missing) :-
|
||||
missing_namespaces(DOM, Defined, [], Missing).
|
||||
|
||||
missing_namespaces([], _, L, L) :- !.
|
||||
missing_namespaces([H|T], Def, L0, L) :- !,
|
||||
missing_namespaces(H, Def, L0, L1),
|
||||
missing_namespaces(T, Def, L1, L).
|
||||
missing_namespaces(element(Name, Atts, Content), Def, L0, L) :- !,
|
||||
update_nsmap(Atts, Def, Def1),
|
||||
missing_ns(Name, Def1, L0, L1),
|
||||
missing_att_ns(Atts, Def1, L1, L2),
|
||||
missing_namespaces(Content, Def1, L2, L).
|
||||
missing_namespaces(_, _, L, L).
|
||||
|
||||
missing_att_ns([], _, M, M).
|
||||
missing_att_ns([Name=_|T], Def, M0, M) :-
|
||||
missing_ns(Name, Def, M0, M1),
|
||||
missing_att_ns(T, Def, M1, M).
|
||||
|
||||
missing_ns(URI:_, Def, M0, M) :- !,
|
||||
( ( memberchk(_=URI, Def)
|
||||
; memberchk(URI, M0)
|
||||
; URI = xml % predefined ones
|
||||
; URI = xmlns
|
||||
)
|
||||
-> M = M0
|
||||
; M = [URI|M0]
|
||||
).
|
||||
missing_ns(_, _, M, M).
|
||||
|
||||
/*******************************
|
||||
* QUOTED WRITE *
|
||||
*******************************/
|
||||
|
||||
sgml_write_attribute(Out, Values, State) :-
|
||||
is_list(Values), !,
|
||||
get_state(State, entity_map, EntityMap),
|
||||
put_char(Out, '"'),
|
||||
write_quoted_list(Values, Out, """<&>", EntityMap),
|
||||
put_char(Out, '"').
|
||||
sgml_write_attribute(Out, Value, State) :-
|
||||
get_state(State, entity_map, EntityMap),
|
||||
put_char(Out, '"'),
|
||||
write_quoted(Out, Value, """<&>", EntityMap),
|
||||
put_char(Out, '"').
|
||||
|
||||
write_quoted_list([], _, _, _).
|
||||
write_quoted_list([H|T], Out, Escape, EntityMap) :-
|
||||
write_quoted(Out, H, Escape, EntityMap),
|
||||
( T == []
|
||||
-> true
|
||||
; put_char(Out, ' '),
|
||||
write_quoted_list(T, Out, Escape, EntityMap)
|
||||
).
|
||||
|
||||
|
||||
sgml_write_content(Out, Value, State) :-
|
||||
get_state(State, entity_map, EntityMap),
|
||||
write_quoted(Out, Value, "<&>", EntityMap).
|
||||
|
||||
|
||||
write_quoted(Out, Atom, Escape, EntityMap) :-
|
||||
atom_codes(Atom, Codes),
|
||||
writeq(Codes, Out, Escape, EntityMap).
|
||||
|
||||
|
||||
writeq([], _, _, _).
|
||||
writeq([H|T], Out, Escape, EntityMap) :-
|
||||
( memberchk(H, Escape)
|
||||
-> write_entity(H, Out, EntityMap)
|
||||
; H >= 256
|
||||
-> ( stream_property(Out, encoding(utf8))
|
||||
-> put_code(Out, H)
|
||||
; write_entity(H, Out, EntityMap)
|
||||
)
|
||||
; put_code(Out, H)
|
||||
),
|
||||
writeq(T, Out, Escape, EntityMap).
|
||||
|
||||
|
||||
write_entity(Code, Out, EntityMap) :-
|
||||
( get_assoc(Code, EntityMap, EntityName)
|
||||
-> format(Out, '&~w;', [EntityName])
|
||||
; format(Out, '&#~w;', [Code])
|
||||
).
|
||||
|
||||
|
||||
/*******************************
|
||||
* INDENTATION *
|
||||
*******************************/
|
||||
|
||||
write_initial_indent(State, Out) :-
|
||||
( get_state(State, indent, Indent),
|
||||
Indent > 0
|
||||
-> emit_indent(Indent, Out)
|
||||
; true
|
||||
).
|
||||
|
||||
write_indent(State, _) :-
|
||||
get_state(State, layout, false), !.
|
||||
write_indent(State, Out) :-
|
||||
get_state(State, indent, Indent),
|
||||
emit_indent(Indent, Out).
|
||||
|
||||
emit_indent(Indent, Out) :-
|
||||
Tabs is Indent // 8,
|
||||
Spaces is Indent mod 8,
|
||||
format(Out, '~N', []),
|
||||
write_n(Tabs, '\t', Out),
|
||||
write_n(Spaces, ' ', Out).
|
||||
|
||||
write_n(N, Char, Out) :-
|
||||
( N > 0
|
||||
-> put_char(Out, Char),
|
||||
N2 is N - 1,
|
||||
write_n(N2, Char, Out)
|
||||
; true
|
||||
).
|
||||
|
||||
inc_indent(State) :-
|
||||
inc_indent(State, 2).
|
||||
|
||||
inc_indent(State, Inc) :-
|
||||
state(indent, Arg),
|
||||
arg(Arg, State, I0),
|
||||
I is I0 + Inc,
|
||||
setarg(Arg, State, I).
|
||||
|
||||
|
||||
/*******************************
|
||||
* DTD HANDLING *
|
||||
*******************************/
|
||||
|
||||
%% empty_element(+State, +Element)
|
||||
%
|
||||
% True if Element is declared with EMPTY content in the (SGML)
|
||||
% DTD.
|
||||
|
||||
empty_element(State, Element) :-
|
||||
get_state(State, dtd, DTD),
|
||||
DTD \== (-),
|
||||
dtd_property(DTD, element(Element, _, empty)).
|
||||
|
||||
%% dtd_character_entities(+DTD, -Map)
|
||||
%
|
||||
% Return an assoc mapping character entities to their name. Note
|
||||
% that the entity representation is a bit dubious. Entities should
|
||||
% allow for a wide-character version and avoid the &#..; trick.
|
||||
|
||||
dtd_character_entities(DTD, Map) :-
|
||||
empty_assoc(Empty),
|
||||
dtd_property(DTD, entities(Entities)),
|
||||
fill_entity_map(Entities, DTD, Empty, Map).
|
||||
|
||||
fill_entity_map([], _, Map, Map).
|
||||
fill_entity_map([H|T], DTD, Map0, Map) :-
|
||||
( dtd_property(DTD, entity(H, CharEntity)),
|
||||
atom(CharEntity),
|
||||
( sub_atom(CharEntity, 0, _, _, '&#'),
|
||||
sub_atom(CharEntity, _, _, 0, ';')
|
||||
-> sub_atom(CharEntity, 2, _, 1, Name),
|
||||
atom_number(Name, Code)
|
||||
; atom_length(CharEntity, 1),
|
||||
char_code(CharEntity, Code)
|
||||
)
|
||||
-> put_assoc(Code, Map0, H, Map1),
|
||||
fill_entity_map(T, DTD, Map1, Map)
|
||||
; fill_entity_map(T, DTD, Map0, Map)
|
||||
).
|
||||
|
||||
|
||||
|
||||
/*******************************
|
||||
* FIELDS *
|
||||
*******************************/
|
||||
|
||||
state(indent, 1). % current indentation
|
||||
state(layout, 2). % use layout (true/false)
|
||||
state(dtd, 3). % DTD for entity names
|
||||
state(entity_map, 4). % compiled entity-map
|
||||
state(dialect, 5). % xml/sgml
|
||||
state(nsmap, 6). % defined namespaces
|
||||
state(net, 7). % Should null end-tags be used?
|
||||
|
||||
new_state(Dialect,
|
||||
state(
|
||||
0, % indent
|
||||
true, % layout
|
||||
-, % DTD
|
||||
EntityMap, % entity_map
|
||||
Dialect, % dialect
|
||||
[], % NS=Full map
|
||||
Net % Null End-Tags?
|
||||
)) :-
|
||||
( Dialect == sgml
|
||||
-> Net = false,
|
||||
empty_assoc(EntityMap)
|
||||
; Net = true,
|
||||
xml_entities(EntityMap)
|
||||
).
|
||||
|
||||
get_state(State, Field, Value) :-
|
||||
state(Field, Arg),
|
||||
arg(Arg, State, Value).
|
||||
|
||||
set_state(State, Field, Value) :-
|
||||
state(Field, Arg),
|
||||
setarg(Arg, State, Value).
|
||||
|
||||
xml_entities(Map) :-
|
||||
list_to_assoc([ 60 - lt,
|
||||
61 - amp,
|
||||
62 - gt,
|
||||
39 - apos,
|
||||
34 - quot
|
||||
], Map).
|
||||
|
||||
|
||||
/*******************************
|
||||
* MESSAGES *
|
||||
*******************************/
|
||||
|
||||
:- multifile
|
||||
prolog:message/3.
|
||||
|
||||
prolog:message(sgml_write(sdata_as_cdata(Data))) -->
|
||||
[ 'SGML-write: emitting SDATA as CDATA: "~p"'-[Data] ].
|
Reference in New Issue
Block a user