add missing files.
This commit is contained in:
parent
9ebf1e46f9
commit
8ca1345c09
76
packages/sgml/pltotex.pl
Normal file
76
packages/sgml/pltotex.pl
Normal file
@ -0,0 +1,76 @@
|
||||
:- module(pltotex,
|
||||
[ pltotex/2,
|
||||
pltotex/0
|
||||
]).
|
||||
:- use_module(library(doc_latex)).
|
||||
:- use_module(library(main)).
|
||||
:- use_module(library(error)).
|
||||
:- use_module(library(apply)).
|
||||
:- use_module(library(lists)).
|
||||
|
||||
pltotex(File, Options) :-
|
||||
file_name_extension(_, txt, File), !,
|
||||
tex_file(File, Out, Options),
|
||||
doc_latex(File, Out,
|
||||
[ stand_alone(false)
|
||||
| Options
|
||||
]).
|
||||
pltotex(Lib, Options) :-
|
||||
( file_name_extension(_, pl, Lib)
|
||||
-> Spec = Lib
|
||||
; atom_to_term(Lib, Spec, _)
|
||||
),
|
||||
absolute_file_name(Spec, File,
|
||||
[ access(read),
|
||||
file_type(prolog)
|
||||
]),
|
||||
tex_file(File, Out, Options),
|
||||
user:use_module(File), % we want the operators in user
|
||||
doc_latex(File, Out,
|
||||
[ stand_alone(false)
|
||||
| Options
|
||||
]).
|
||||
|
||||
tex_file(_, TeXFile, Options) :-
|
||||
option(out(Base), Options), !,
|
||||
file_name_extension(Base, tex, TeXFile).
|
||||
tex_file(File, TeXFile, _) :-
|
||||
file_base_name(File, Local),
|
||||
file_name_extension(Base0, _, Local),
|
||||
strip(Base0, 0'_, Base),
|
||||
file_name_extension(Base, tex, TeXFile).
|
||||
|
||||
strip(In, Code, Out) :-
|
||||
atom_codes(In, Codes0),
|
||||
delete(Codes0, Code, Codes),
|
||||
atom_codes(Out, Codes).
|
||||
|
||||
|
||||
%% pltotex
|
||||
%
|
||||
% Usage: pl -q -s pltotex.pl -g pltotex -- file ...
|
||||
|
||||
pltotex :-
|
||||
main.
|
||||
|
||||
main(Argv) :-
|
||||
partition(is_option, Argv, OptArgs, Files),
|
||||
maplist(to_option, OptArgs, Options),
|
||||
maplist(process_file(Options), Files).
|
||||
|
||||
is_option(Arg) :-
|
||||
sub_atom(Arg, 0, _, _, --).
|
||||
|
||||
to_option('--section', section_level(section)) :- !.
|
||||
to_option('--subsection', section_level(subsection)) :- !.
|
||||
to_option('--subsubsection', section_level(subsubsection)) :- !.
|
||||
to_option(Arg, Option) :-
|
||||
atom_concat(--, Opt, Arg),
|
||||
sub_atom(Opt, B, _, A, =), !,
|
||||
sub_atom(Opt, 0, B, _, Name),
|
||||
sub_atom(Opt, _, A, 0, Value),
|
||||
Option =.. [Name, Value].
|
||||
|
||||
process_file(Options, File) :-
|
||||
pltotex(File, Options).
|
||||
|
638
packages/sgml/pwp.pl
Normal file
638
packages/sgml/pwp.pl
Normal file
@ -0,0 +1,638 @@
|
||||
:- module(pwp,
|
||||
[ pwp_files/2, % +FileIn, +FileOut
|
||||
pwp_stream/3, % +StreamIn, +StreamOut, +Context
|
||||
pwp_xml/3 % +DomIn, -DOMOut, +Context
|
||||
]).
|
||||
|
||||
/** <module> Prolog Well-formed Pages
|
||||
|
||||
PWP is an approach to server-side scripting using Prolog
|
||||
which is based on a simple key principle:
|
||||
|
||||
- The source form of a PWP should be WELL-FORMED XML
|
||||
|
||||
Especially when generating XML rather than HTML, this is such an
|
||||
obvious thing to do. We have many kinds of XML checking tools.
|
||||
|
||||
- We can tell whether an XML document is WELL FORMED (all the
|
||||
punctuation is right, all tags balance) using practically
|
||||
any decent parser, including SWI Prolog's 'sgml'.
|
||||
|
||||
- If we can write a Document Type Description then we can check
|
||||
that a document is VALID using tools like Open SP (formerly
|
||||
James Clark's SP) or SWI Prolog's 'sgml'. This does not
|
||||
guarantee that the output will be valid, but it does catch
|
||||
a lot of errors very early.
|
||||
|
||||
- If we can write an XML Schema then we can check that a
|
||||
document is schema-valid. (SWI Prolog's 'sgml' does not
|
||||
yet come with a schema validator, but who knows what the
|
||||
future holds?).
|
||||
|
||||
- Since an XML document is just a data structure, we can use
|
||||
any checking tool that we can write in Prolog, IF the input
|
||||
is well-formed so that we can load a template as a Prolog
|
||||
data structure.
|
||||
|
||||
Having decided that the input should be well formed, that means
|
||||
*|NO NEW SYNTAX|*
|
||||
|
||||
None of the weird and horrible <% ... %> or whatever not-quite-XML
|
||||
stuff you see in other template systems, making checking so very hard
|
||||
(and therefore, making errors so distressingly common).
|
||||
|
||||
That in turns means that PWP "markup" must be based on special
|
||||
elements or special attributes. The fact that an XML parser must
|
||||
allow undeclared attributes on any element even when validating,
|
||||
but must not allow undeclared elements, suggests doing this through
|
||||
attributes. In particular, one should be able to take an existing
|
||||
DTD, such as an XHTML DTD, and just use that without modification.
|
||||
So the design reduces to
|
||||
|
||||
- Allow dynamic data selection, insertion, and transformation
|
||||
just using a small number of extra attributes.
|
||||
|
||||
This description uses the following name space:
|
||||
|
||||
==
|
||||
xmlns:pwp='http://www.cs.otago.ac.nz/staffpriv/ok/pwp.pl'
|
||||
==
|
||||
|
||||
The attributes are
|
||||
|
||||
- pwp:ask = Query
|
||||
- pwp:use = Term
|
||||
- pwp:how = text | xml
|
||||
- pwp:tag = QName or '-'
|
||||
- pwp:att = '' | 'one non-alphanumeric character'
|
||||
|
||||
Here's what they mean. Each element is expanded in the context
|
||||
of a set of variable bindings. After expansion, if the tag is
|
||||
not mapped to '-', all attributes in the pwp: namespace are removed
|
||||
and the children elements are recursively expanded.
|
||||
|
||||
* pwp:ask = Query
|
||||
|
||||
* Query is a Prolog goal. For each solution of Query, the element
|
||||
is further processed with the new variables of Query added to
|
||||
the context.
|
||||
|
||||
* If Query is not a well formed Prolog goal, or if execution of
|
||||
Query throws an exception, page transformation comes to a complete
|
||||
halt and no page is generated.
|
||||
|
||||
* pwp:use = Term
|
||||
* pwp:how = text | xml | text-file | xml-file
|
||||
|
||||
Term is a Prolog term; variables in Term are bound by the context.
|
||||
An empty Term is regarded as a missing value for this attribute.
|
||||
The Prolog variable CONTEXT refers to the entire context, a list
|
||||
of Name = Value, where Name is a Prolog atom holding the name of the context variable and Value is an arbitrary Prolog term.
|
||||
|
||||
- If pwp:how is text,
|
||||
The value of Term is used to define a sequence of characters.
|
||||
|
||||
- A number produces the same characters that write/1 would.
|
||||
- An atom produces the same characters that write/1 would.
|
||||
- A string produces the same characters that write/1 would.
|
||||
- A list of character codes produces those characters.
|
||||
|
||||
- The following terms produce the same sequence of characters
|
||||
that the corresponding goal would have sent to the current
|
||||
output stream:
|
||||
|
||||
- write(Datum)
|
||||
- writeq(Datum)
|
||||
- write_canonical(Datum)
|
||||
- print(Datum)
|
||||
- print(Datum)
|
||||
- format(Format)
|
||||
- format(Format, Arguments)
|
||||
|
||||
- A singleton list [X] defines the characters that X defines.
|
||||
- Any other term F(T1,...,Tn) defines the characters that T1
|
||||
defines, followed by the characters that T2 defines, ...,
|
||||
followed by the characters that Tn defines.
|
||||
|
||||
- If pwp:how is xml,
|
||||
The value of Term must be an XML term as defined in the
|
||||
SGML2PL documentation or a list of such terms. A single
|
||||
term is taken as if it had been [Term]. The resulting
|
||||
list of terms replaces the children of the current element
|
||||
and will not be further processed.
|
||||
|
||||
- If pwp:how is text-file,
|
||||
The value of Term is used to define a sequence of characters.
|
||||
That sequence of characters is used as a file name.
|
||||
The file is read as a sequence of characters, and that
|
||||
sequence used as character data.
|
||||
|
||||
- If pwp:how is xml-file,
|
||||
The value of Term is used to define a sequence of characters.
|
||||
That sequence of characters is used as a file name.
|
||||
The file is loaded as XML, and the sequence of XML items thus
|
||||
obtained used. This means that PWP provides XML inclusion
|
||||
without depending on the parser to support XInclude.
|
||||
|
||||
The default value for pwp:how is text.
|
||||
|
||||
* pwp:tag = QName or '-'
|
||||
|
||||
If pwp:tag is missing or the value is empty, the current element
|
||||
appears in the output (after further processing) with its present
|
||||
tag. If pwp:tag is a QName, the current element appears (...)
|
||||
with that as its tag. That option is most useful in DTDs, where
|
||||
an "authoring" DTD may use one tag and have it automatically mapped
|
||||
to another tag in the output, e.g., <item> -> <li>. Finally, if
|
||||
pwp:tag is '-', the children of the current element (either the
|
||||
result of pwp:use or the transformed original children, whichever
|
||||
applies) appear in the output but there is no element around them.
|
||||
|
||||
A missing or empty pwp:ask is just like pwp:ask = 'true'.
|
||||
|
||||
* pwp:att = '' | 'one non-alphanumeric character'.
|
||||
|
||||
Attributes in the pwp namespace are not affected by this attribute.
|
||||
Such attributes are always stripped out and never substituted into.
|
||||
|
||||
If pwp:att is missing or empty, attributes of the current
|
||||
element are copied over to the output unchanged.
|
||||
|
||||
If pwp:att = 'c' for some non-alphanumeric character c,
|
||||
each attribute is examined for occurrences of c(...)c which
|
||||
are as short as possible.
|
||||
There is no one character which could be used every time, so you
|
||||
have to explicitly choose a substitution marker which is safe
|
||||
for the data you do not want to be altered. None of the pwp
|
||||
attributes are inherited, least of all this one.
|
||||
|
||||
Text outside c(...)c groups is copied unchanged; text inside
|
||||
such a group is parsed as a Prolog term and treated as if by
|
||||
pwp:how = text.
|
||||
|
||||
|
||||
Examples:
|
||||
|
||||
1. *|A "Hello World" like example|*
|
||||
|
||||
==
|
||||
<html
|
||||
xmlns:pwp="http://www.cs.otago.ac.nz/staffpriv/ok/pwp.pl"
|
||||
pwp:ask = "ensure_loaded(msg), once(msg(Greeting))">
|
||||
<head>
|
||||
<title pwp:use="Greeting"/>
|
||||
</head>
|
||||
<body>
|
||||
<p><span pwp:use="Greeting" pwp:tag='-'/></p>
|
||||
</body>
|
||||
</html>
|
||||
==
|
||||
|
||||
where msg.pl contains
|
||||
|
||||
==
|
||||
msg('Hello, World!').
|
||||
==
|
||||
|
||||
This example illustrates an important point. Prolog Well-Formed
|
||||
Pages provide *NO* way to physically incorporate Prolog *clauses*
|
||||
into a page template. Prolog clauses must be put in separate
|
||||
files which can be checked by a Prolog syntax checker, compiler,
|
||||
cross-referencer, &c WITHOUT the Prolog tool in question needing
|
||||
to know anything whatsoever about PWP. You load the files using
|
||||
pwp:ask on the root element.
|
||||
|
||||
2. *|Binding some variables and using them|*
|
||||
|
||||
==
|
||||
<html
|
||||
xmlns:pwp="http://www.cs.otago.ac.nz/staffpriv/ok/pwp.pl">
|
||||
<head><title>Example 2</title></head>
|
||||
<body pwp:ask="Hello = 'Hello world', A = 20, B = 22">
|
||||
<h1 pwp:use="Hello"/>
|
||||
<p>The answer is <span pwp:use="C" pwp:ask="C is A+B"/>.</p>
|
||||
</body>
|
||||
</html>
|
||||
==
|
||||
|
||||
3. *|Making a table|*
|
||||
We are given a Prolog database staff.pl defining
|
||||
staff(NickName, FullName, Office, Phone, E_Mail_Address).
|
||||
status(NickName, full_time | part_time).
|
||||
We want to make a phone list of full time staff.
|
||||
|
||||
==
|
||||
<html
|
||||
xmlns:pwp="http://www.cs.otago.ac.nz/staffpriv/ok/pwp.pl"
|
||||
pwp:ask='ensure_loaded(staff)'>
|
||||
<head>
|
||||
<title>Phone list for Full-Time staff.</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Phone list for Full-Time staff.</h1>
|
||||
<table
|
||||
pwp:ask = "setof(FullName-Phone,
|
||||
N^O^E^(
|
||||
status(N, full_time),
|
||||
staff(N, FullName, O, Phone, E)
|
||||
),
|
||||
Staff_List)">
|
||||
<tr><th>Name</th><th>Phone</th></tr>
|
||||
<tr pwp:ask="member(FullName-Phone, Staff_List)">
|
||||
<td pwp:use="FullName"/>
|
||||
<td pwp:use="Phone"/>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
==
|
||||
|
||||
4. *|Substituting into an attribute|*
|
||||
Same data base as before, but now we want to make a mailing list
|
||||
page.
|
||||
|
||||
==
|
||||
<html
|
||||
xmlns:pwp="http://www.cs.otago.ac.nz/staffpriv/ok/pwp.pl"
|
||||
pwp:ask='ensure_loaded(staff)'>
|
||||
<head>
|
||||
<title>Phone list for Full-Time staff.</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Phone list for Full-Time staff.</h1>
|
||||
<table
|
||||
pwp:ask = "setof(FullName-E_Mail,
|
||||
N^O^P^staff(N, FullName, O, P, E_Mail),
|
||||
Staff_List)">
|
||||
<tr><th>Name</th><th>Address</th></tr>
|
||||
<tr pwp:ask="member(FullName-E_Mail, Staff_List)">
|
||||
<td pwp:use="FullName"/>
|
||||
<td><a pwp:use="E_Mail"
|
||||
pwp:att='$' href="mailto:$(E_Mail)$"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
==
|
||||
|
||||
5. *|If-then-else effect|*
|
||||
A page that displays the value of the 'SHELL' environment
|
||||
variable if it has one, otherwise displays 'There is no
|
||||
default shell.'
|
||||
|
||||
==
|
||||
<html
|
||||
xmlns:pwp="http://www.cs.otago.ac.nz/staffpriv/ok/pwp.pl">
|
||||
<head><title>$SHELL</title></head>
|
||||
<body>
|
||||
<p pwp:ask="getenv('SHELL', Shell)"
|
||||
>The default shell is <span pwp:tag="-" pwp:use="Shell"/>.</p>
|
||||
<p pwp:ask="\+getenv('SHELL',_)">There is no default shell.</p>
|
||||
</body>
|
||||
</html>
|
||||
==
|
||||
|
||||
There is one other criterion for a good server-side template
|
||||
language:
|
||||
|
||||
It should be possible to compile templates so as to eliminate
|
||||
most if not all interpretation overhead.
|
||||
|
||||
This particular notation satisfies that criterion with the
|
||||
limitation that the conversion of a term to character data requires
|
||||
run-time traversal of terms (because the terms are not known until
|
||||
run time).
|
||||
|
||||
@author Richard O'Keefe
|
||||
@tbd Support compilation of PWP input files
|
||||
*/
|
||||
|
||||
:- use_module(library(sgml), [load_xml_file/2]).
|
||||
:- use_module(library(sgml_write), [xml_write/3]).
|
||||
:- use_module(library(lists), [append/3]).
|
||||
|
||||
:- meta_predicate
|
||||
pwp_files(:, +),
|
||||
pwp_stream(:, +, +),
|
||||
pwp_xml(:, -, +).
|
||||
|
||||
|
||||
%% pwp_files(:In:atom, +Out:atom) is det.
|
||||
%
|
||||
% loads an Xml document from the file named In,
|
||||
% transforms it using the PWP attributes, and
|
||||
% writes the transformed version to the new file named Out.
|
||||
|
||||
pwp_files(M:In, Out) :-
|
||||
load_xml_file(In, Contents),
|
||||
pwp_xml(M:Contents, Transformed, []), !,
|
||||
setup_call_cleanup(open(Out, write, Output),
|
||||
xml_write(Output, Transformed, []),
|
||||
close(Output)).
|
||||
|
||||
|
||||
%% pwp_stream(:Input:input_stream, +Output:output_stream,
|
||||
%% +Context:list) is det.
|
||||
%
|
||||
% Loads an Xml document from the given Input stream, transforms it
|
||||
% using the PWP attributes, and writes the transformed version to
|
||||
% the given Output stream. Context provides initial contextual
|
||||
% variables and is a list of Name=Value.
|
||||
|
||||
pwp_stream(M:Input, Output, Context) :-
|
||||
load_xml_file(stream(Input), Contents),
|
||||
pwp_xml(M:Contents, Transformed, Context), !,
|
||||
xml_write(Output, Transformed, []).
|
||||
|
||||
|
||||
/* Recall that an XML term is one of
|
||||
|
||||
<atom> Character Data
|
||||
sdata(...) SDATA (SGML only)
|
||||
ndata(...) NDATA
|
||||
pi(...) Processing instruction
|
||||
|
||||
element(Name, [Att...], [Child...])
|
||||
|
||||
where Att is Attribute=Value and Child is an XML term.
|
||||
|
||||
We are only concerned with elements; all other XML terms are
|
||||
left alone. I have given some thought to recognising
|
||||
|
||||
<?pwp ...Command...?>
|
||||
|
||||
processing instructions, executing the Command, and removing
|
||||
the processing instructions, as a debugging tool. But this
|
||||
is a proof-of-concept implementation; debugging features can
|
||||
wait for The Real Thing.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
%% pwp_xml(:In:list(xml), -Out:list(xml), +Context)
|
||||
%
|
||||
% maps down a list of XML items, acting specially on elements and
|
||||
% copying everything else unchanged, including white space.
|
||||
% The Context is a list of 'VariableName'=CurrentValue bindings.
|
||||
|
||||
pwp_xml(M:In, Out, Context) :-
|
||||
pwp_list(In, Out, M, Context).
|
||||
|
||||
pwp_list([], [], _, _).
|
||||
pwp_list([element(Tag0,Atts0,Kids0)|Xs], Ys0, M, Context) :- !,
|
||||
pwp_attributes(Atts0, Ask, Use, How, Att, Tag1, Atts1),
|
||||
( nonvar(Tag1), Tag1 \== '' -> Tag2 = Tag1
|
||||
; Tag2 = Tag0
|
||||
),
|
||||
( nonvar(Ask), Ask \== '', Ask \== 'true'
|
||||
-> atom_to_term(Ask, Query, Bindings),
|
||||
pwp_unite(Bindings, Context, Context1),
|
||||
findall(Xml,
|
||||
( M:Query,
|
||||
pwp_element(Tag2, Atts1, Kids0, Use, How, Att,
|
||||
M, Context1, Xml)),
|
||||
NewContent)
|
||||
; /* Ask is missing, empty, or true */
|
||||
pwp_element(Tag2, Atts1, Kids0, Use, How, Att,
|
||||
M, Context, NewContent)
|
||||
),
|
||||
pwp_attach(NewContent, Ys0, Ys1),
|
||||
pwp_list(Xs, Ys1, M, Context).
|
||||
pwp_list([X|Xs], [X|Ys], M, Context) :-
|
||||
pwp_list(Xs, Ys, M, Context).
|
||||
|
||||
|
||||
%% pwp_attributes(+Atts0:list(=(atom,atom)),
|
||||
%% -Ask:atom, -Use:atom, -How:atom, -Att:atom,
|
||||
%% -Tag:atom, -Atts1:list(=(atom,atom)))
|
||||
%
|
||||
% Walks down a list of AttributeName=ItsValue pairs, stripping out
|
||||
% those whose AttributeName begins with the 'pwp:' prefix, and
|
||||
% copying the rest to Atts1. Along the way, Ask/Use/How/Att/Tag
|
||||
% are bound to the values of the
|
||||
% pwp:ask/pwp:use/pwp:how/pwp:att/pwp:tag attributes, if any. At
|
||||
% the end, any of these variables that are still unbound REMAIN
|
||||
% unbound; they are not bound to default values.
|
||||
|
||||
pwp_attributes([], _, _, _, _, _, []).
|
||||
pwp_attributes([AV|AVs], Ask, Use, How, Att, Tag, New_Atts1) :-
|
||||
AV = (Name=Value),
|
||||
( pwp_attr(Name, PWPName)
|
||||
-> ( pwp_attr(PWPName, Value, Ask, Use, How, Att, Tag)
|
||||
-> New_Atts1 = New_Atts2
|
||||
; New_Atts1 = New_Atts2
|
||||
)
|
||||
; New_Atts1 = [AV|New_Atts2]
|
||||
),
|
||||
pwp_attributes(AVs, Ask, Use, How, Att, Tag, New_Atts2).
|
||||
|
||||
|
||||
pwp_attr(ask, Value, Value, _Use, _How, _Att, _Tag).
|
||||
pwp_attr(use, Value, _Ask, Value, _How, _Att, _Tag).
|
||||
pwp_attr(how, Value, _Ask, _Use, Value, _Att, _Tag).
|
||||
pwp_attr(att, Value, _Ask, _Use, _How, Value, _Tag).
|
||||
pwp_attr(tag, Value, _Ask, _Use, _How, _Att, Value).
|
||||
|
||||
%% pwp_attr(+XMLAttr, -PWPLocal) is semidet.
|
||||
%
|
||||
% True if PWPLocal is the local name of a pwp:Local expression in
|
||||
% XML. This predicate deals with the three different XML
|
||||
% representations: the form is returned of XML namespace
|
||||
% processing is not enabled. The second if it is enabled and the
|
||||
% namespace is properly defined and the last if the namespace is
|
||||
% not defined.
|
||||
|
||||
pwp_attr(Atom, PWP) :-
|
||||
atom(Atom),
|
||||
atom_concat('pwp:', PWP, Atom), !.
|
||||
pwp_attr('http://www.cs.otago.ac.nz/staffpriv/ok/pwp.pl':PWP, PWP) :- !.
|
||||
pwp_attr('pwp':PWP, PWP) :- !.
|
||||
pwp_attr('xmlns:pwp', -).
|
||||
|
||||
%% pwp_unite(+Bindings, +Context0, -Context:list(=(atom,any)))
|
||||
%
|
||||
% merges the new Bindings with the bindings in the outer Context0,
|
||||
% constructing a new list of VariableName=CurrentValue bindings in
|
||||
% Context1. This is only used when the CurrentValue parts of the
|
||||
% new Bindings are known to be distinct new variables, so the
|
||||
% Bindings cannot possibly conflict with any existing binding in
|
||||
% Context0. This is O(|Bindings|.|Context0|), which is not that
|
||||
% efficient, but since we do not expect there to be very many
|
||||
% variables it doesn't matter much.
|
||||
|
||||
pwp_unite(Bindings, Context0, Context) :-
|
||||
pwp_unite(Bindings, Context0, Context0, Context).
|
||||
|
||||
|
||||
pwp_unite([], _, Context, Context).
|
||||
pwp_unite([Binding|Bindings], Context0, Context1, Context) :-
|
||||
memberchk(Binding, Context0), !,
|
||||
pwp_unite(Bindings, Context0, Context1, Context).
|
||||
pwp_unite(['CONTEXT'=Context0|Bindings], Context0, Context1, Context) :- !,
|
||||
pwp_unite(Bindings, Context0, Context1, Context).
|
||||
pwp_unite([Binding|Bindings], Context0, Context1, Context) :-
|
||||
pwp_unite(Bindings, Context0, [Binding|Context1], Context).
|
||||
|
||||
|
||||
|
||||
%% pwp_unite(+Bindings, +Context0: list(=(atom,any)))
|
||||
%
|
||||
% looks up the bindings in Bindings in the outer Context0.
|
||||
% This is only used for 'pwp:use' terms (and the related terms
|
||||
% in $(...)$ attribute value substitutions), so that we have
|
||||
% no interest in forming a new context. (If we did, we'd use
|
||||
% pwp_unite/3 instead.) This is only used when the CurrentValue
|
||||
% parts of the new Bindings are known to be distinct new variables,
|
||||
% so the Bindings cannot possibly conflict with any existing
|
||||
% binding in Context0. However, there _could_ be new variables
|
||||
% in Bindings, and that would cause problems. An XML term may
|
||||
% not contain variables, and a term we want to convert to a list
|
||||
% of character codes had better not contain variables either.
|
||||
% One approach would be to just bind such variables to something,
|
||||
% another is to throw some kind of exception. For the moment we
|
||||
% call functor/3 so as to get an instantiation error.
|
||||
|
||||
pwp_unite([], _).
|
||||
pwp_unite([Binding|Bindings], Context) :-
|
||||
memberchk(Binding, Context), !,
|
||||
pwp_unite(Bindings, Context).
|
||||
pwp_unite([_=Value|_], _) :-
|
||||
functor(Value, _, _).
|
||||
|
||||
%% pwp_attach(+Tree, ?Ys0: list(xml), ?Ys: list(xml))
|
||||
%
|
||||
% is a combination of "flatten" and "append".
|
||||
% It unifies Ys0\Ys with the result of flattening Tree.
|
||||
|
||||
pwp_attach([], Ys, Ys) :- !.
|
||||
pwp_attach([X|Xs], Ys0, Ys) :- !,
|
||||
pwp_attach(X, Ys0, Ys1),
|
||||
pwp_attach(Xs, Ys1, Ys).
|
||||
pwp_attach(X, [X|Ys], Ys).
|
||||
|
||||
|
||||
|
||||
pwp_element('-', _, Kids, Use, How, _, M, Context, Xml) :- !,
|
||||
pwp_use(Use, How, Kids, M, Context, Xml).
|
||||
pwp_element(Tag, Atts, Kids, Use, How, Magic, M, Context,
|
||||
element(Tag,Atts1,Kids1)) :-
|
||||
( nonvar(Magic)
|
||||
-> pwp_substitute(Atts, Magic, Context, Atts1)
|
||||
; Atts1 = Atts
|
||||
),
|
||||
pwp_use(Use, How, Kids, M, Context, Kids1).
|
||||
|
||||
pwp_use('', _, Kids, M, Context, Kids1) :- !,
|
||||
pwp_list(Kids, Kids1, M, Context).
|
||||
pwp_use(Use, How, _, _, Context, Kids1) :-
|
||||
atom_to_term(Use, Term, Bindings),
|
||||
pwp_unite(Bindings, Context),
|
||||
pwp_how(How, Term, Kids1).
|
||||
|
||||
pwp_how('text', Term, [CData]) :- !,
|
||||
pwp_use_codes(Term, Codes, []),
|
||||
atom_codes(CData, Codes).
|
||||
pwp_how('xml', Term, Kids1) :-
|
||||
( Term == [] -> Kids1 = Term
|
||||
; Term = [_|_] -> Kids1 = Term
|
||||
; Kids1 = [Term]
|
||||
).
|
||||
pwp_how('text-file', Term, [CData]) :-
|
||||
pwp_use_codes(Term, Codes, []),
|
||||
atom_codes(FileName, Codes),
|
||||
read_file_to_codes(FileName, FileCodes, []),
|
||||
atom_codes(CData, FileCodes).
|
||||
pwp_how('xml-file', Term, Kids1) :-
|
||||
pwp_use_codes(Term, Codes, []),
|
||||
atom_codes(FileName, Codes),
|
||||
load_xml_file(FileName, Kids1).
|
||||
|
||||
|
||||
pwp_substitute([], _, _, []).
|
||||
pwp_substitute([AV|AVs], Magic, Context, [AV1|Atts1]) :-
|
||||
AV = (Name = Value),
|
||||
( sub_atom(Value, _, _, _, Magic)
|
||||
-> char_code(Magic, C),
|
||||
atom_codes(Value, Codes),
|
||||
pwp_split(Codes, C, B0, T0, A0), !,
|
||||
pwp_substitute(B0, T0, A0, C, Context, V),
|
||||
atom_codes(New_Value, V),
|
||||
AV1 = (Name = New_Value),
|
||||
pwp_substitute(AVs, Magic, Context, Atts1)
|
||||
).
|
||||
pwp_substitute([AV|AVs], Magic, Context, [AV|Atts1]) :-
|
||||
pwp_substitute(AVs, Magic, Context, Atts1).
|
||||
|
||||
|
||||
pwp_substitute(B0, T0, A0, C, Context, V0) :-
|
||||
append(B0, V1, V0),
|
||||
atom_codes(Atom, T0),
|
||||
atom_to_term(Atom, Term, Bindings),
|
||||
pwp_unite(Bindings, Context, _),
|
||||
pwp_use_codes(Term, V1, V2),
|
||||
( pwp_split(A0, C, B1, T1, A1)
|
||||
-> pwp_substitute(B1, T1, A1, C, Context, V2)
|
||||
; V2 = A0
|
||||
).
|
||||
|
||||
|
||||
pwp_split(Codes, C, Before, Text, After) :-
|
||||
append(Before, [C,0'(|Rest], Codes),
|
||||
append(Text, [0'),C|After], Rest), !.
|
||||
|
||||
|
||||
pwp_use_codes(format(Format), S0, S) :- !,
|
||||
pwp_format(Format, [], S0, S).
|
||||
pwp_use_codes(format(Format,Args), S0, S) :- !,
|
||||
pwp_format(Format, Args, S0, S).
|
||||
pwp_use_codes(write_canonical(Datum), S0, S) :- !,
|
||||
pwp_format('~k', [Datum], S0, S).
|
||||
pwp_use_codes(print(Datum), S0, S) :- !,
|
||||
pwp_format('~p', [Datum], S0, S).
|
||||
pwp_use_codes(writeq(Datum), S0, S) :- !,
|
||||
pwp_format('~q', [Datum], S0, S).
|
||||
pwp_use_codes(write(Datum), S0, S) :- !,
|
||||
pwp_format('~w', [Datum], S0, S).
|
||||
pwp_use_codes(Atomic, S0, S) :-
|
||||
atomic(Atomic), !,
|
||||
( number(Atomic) -> number_codes(Atomic, Codes)
|
||||
; atom(Atomic) -> atom_codes(Atomic, Codes)
|
||||
; string(Atomic) -> string_to_list(Atomic, Codes)
|
||||
; pwp_format('~w', [Atomic], S0, S)
|
||||
),
|
||||
append(Codes, S, S0).
|
||||
pwp_use_codes([X|Xs], S0, S) :-
|
||||
pwp_is_codes([X|Xs]), !,
|
||||
append([X|Xs], S, S0).
|
||||
pwp_use_codes([X|Xs], S0, S) :- !,
|
||||
pwp_use_codes(Xs, X, S0, S).
|
||||
pwp_use_codes(Compound, S0, S) :-
|
||||
Compound =.. [_,X|Xs],
|
||||
pwp_use_codes(Xs, X, S0, S).
|
||||
|
||||
|
||||
|
||||
pwp_use_codes([], X, S0, S) :- !,
|
||||
pwp_use_codes(X, S0, S).
|
||||
pwp_use_codes([Y|Ys], X, S0, S) :-
|
||||
pwp_use_codes(X, S0, S1),
|
||||
pwp_use_codes(Ys, Y, S1, S).
|
||||
|
||||
|
||||
|
||||
%% pwp_is_codes(+String: any)
|
||||
%
|
||||
% is true when String is a list of integers and each of those
|
||||
% integers is a possible Unicode value (in the range U+0000..U+10FFFF).
|
||||
% Back in the days of ISO Latin 1 we would have checked for 0..255,
|
||||
% and way back in the days of ASCII for 0..127. Yes, there are more
|
||||
% than a million possible characters in Unicode; currently about
|
||||
% 100 000 of them are in use.
|
||||
|
||||
pwp_is_codes([]).
|
||||
pwp_is_codes([C|Cs]) :-
|
||||
integer(C), C >= 0, C =< 0x10FFFF,
|
||||
pwp_is_codes(Cs).
|
||||
|
||||
pwp_format(Format, Arguments, S0, S) :-
|
||||
format(codes(S0, S), Format, Arguments).
|
336
packages/sgml/xpath.pl
Normal file
336
packages/sgml/xpath.pl
Normal file
@ -0,0 +1,336 @@
|
||||
/* This file is part of ClioPatria.
|
||||
|
||||
Author: Jan Wielemaker <J.Wielemaker@cs.vu.nl>
|
||||
HTTP: http://e-culture.multimedian.nl/
|
||||
Copyright: 2007, E-Culture/MultimediaN
|
||||
|
||||
ClioPatria is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
ClioPatria is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with ClioPatria. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
:- module(xpath,
|
||||
[ xpath/3, % +DOM, +Spec, -Value
|
||||
xpath_chk/3, % +DOM, +Spec, -Value
|
||||
|
||||
op(400, fx, //),
|
||||
op(400, fx, /),
|
||||
op(200, fy, @)
|
||||
]).
|
||||
:- use_module(library(record)).
|
||||
:- use_module(library(lists)).
|
||||
:- use_module(library(occurs)).
|
||||
:- use_module(library(debug)).
|
||||
|
||||
/** <module> Select nodes in an XML DOM
|
||||
|
||||
The library xpath.pl provides predicates to select nodes from an XML DOM
|
||||
tree as produced by library(sgml) based on descriptions inspired by the
|
||||
XPATH language.
|
||||
|
||||
The predicate xpath/3 selects a sub-structure of the DOM
|
||||
non-deterministically based on an xpath-like specification. Not all
|
||||
selectors of XPATH are implemented, but the ability to mix xpath/3 calls
|
||||
with arbitrary Prolog code provides a powerful tool for extracting
|
||||
information from XML parse-trees.
|
||||
|
||||
@see http://www.w3.org/TR/xpath
|
||||
*/
|
||||
|
||||
:- record
|
||||
element(name, attributes, content).
|
||||
|
||||
%% xpath_chk(+DOM, +Spec, ?Content) is semidet.
|
||||
%
|
||||
% Semi-deterministic version of xpath/3.
|
||||
|
||||
xpath_chk(DOM, Spec, Content) :-
|
||||
xpath(DOM, Spec, Content), !.
|
||||
|
||||
%% xpath(+DOM, +Spec, ?Content) is nondet.
|
||||
%
|
||||
% Match an element in a DOM structure. The syntax is inspired by
|
||||
% XPath, using () rather than [] to select inside an element.
|
||||
% First we can construct paths using / and //:
|
||||
%
|
||||
% $ =|//|=Term :
|
||||
% Select any node in the DOM matching term.
|
||||
% $ =|/|=Term :
|
||||
% Match the root against Term.
|
||||
% $ Term :
|
||||
% Select the immediate children of the root matching Term.
|
||||
%
|
||||
% The Terms above are of type _callable_. The functor specifies
|
||||
% the element name. The element name '*' refers to any element.
|
||||
% The name =self= refers to the top-element itself and is often
|
||||
% used for processing matches of an earlier xpath/3 query. A term
|
||||
% NS:Term refers to an XML name in the namespace NS. Optional
|
||||
% arguments specify additional constraints and functions. The
|
||||
% arguments are processed from left to right. Defined conditional
|
||||
% argument values are:
|
||||
%
|
||||
% $ Integer :
|
||||
% The N-th element with the given name
|
||||
% $ =last= :
|
||||
% The last element with the given name.
|
||||
% $ =last= - IntExpr :
|
||||
% The IntExpr-th element counting from the last (0-based)
|
||||
%
|
||||
% Defined function argument values are:
|
||||
%
|
||||
% $ =self= :
|
||||
% Evaluate to the entire element
|
||||
% $ =text= :
|
||||
% Evaluates to all text from the sub-tree as an atom
|
||||
% $ =normalize_space= :
|
||||
% As =text=, but uses normalize_space/2 to normalise
|
||||
% white-space in the output
|
||||
% $ =number= :
|
||||
% Extract an integer or float from the value. Ignores
|
||||
% leading and trailing white-space
|
||||
% $ =|@|=Attribute :
|
||||
% Evaluates to the value of the given attribute
|
||||
%
|
||||
% In addition, the argument-list can be _conditions_:
|
||||
%
|
||||
% $ Left = Right :
|
||||
% Succeeds if the left-hand unifies with the right-hand.
|
||||
% E.g. normalize_space = 'euro'
|
||||
% $ contains(Haystack, Needle) :
|
||||
% Succeeds if Needle is a sub-string of Haystack.
|
||||
%
|
||||
% Examples:
|
||||
%
|
||||
% Match each table-row in DOM:
|
||||
%
|
||||
% ==
|
||||
% xpath(DOM, //tr, TR)
|
||||
% ==
|
||||
%
|
||||
% Match the last cell of each tablerow in DOM. This example
|
||||
% illustrates that a result can be the input of subsequent xpath/3
|
||||
% queries. Using multiple queries on the intermediate TR term
|
||||
% guarantee that all results come from the same table-row:
|
||||
%
|
||||
% ==
|
||||
% xpath(DOM, //tr, TR),
|
||||
% xpath(TR, /td(last), TD)
|
||||
% ==
|
||||
%
|
||||
% Match each =href= attribute in an <a> element
|
||||
%
|
||||
% ==
|
||||
% xpath(DOM, //a(@href), HREF)
|
||||
% ==
|
||||
%
|
||||
% Suppose we have a table containing rows where each first column
|
||||
% is the name of a product with a link to details and the second
|
||||
% is the price (a number). The following predicate matches the
|
||||
% name, URL and price:
|
||||
%
|
||||
% ==
|
||||
% product(DOM, Name, URL, Price) :-
|
||||
% xpath(DOM, //tr, TR),
|
||||
% xpath(TR, td(1), C1),
|
||||
% xpath(C1, /self(normalize_space), Name),
|
||||
% xpath(C1, a(@href), URL),
|
||||
% xpath(TR, td(2, number), Price).
|
||||
% ==
|
||||
|
||||
xpath(DOM, Spec, Content) :-
|
||||
in_dom(Spec, DOM, Content).
|
||||
|
||||
in_dom(//Spec, DOM, Value) :- !,
|
||||
element_spec(Spec, Name, Modifiers),
|
||||
sub_dom(I, Len, Name, E, DOM),
|
||||
modifiers(Modifiers, I, Len, E, Value).
|
||||
in_dom(/Spec, E, Value) :- !,
|
||||
element_spec(Spec, Name, Modifiers),
|
||||
( Name == self
|
||||
-> true
|
||||
; element_name(E, Name)
|
||||
),
|
||||
modifiers(Modifiers, 1, 1, E, Value).
|
||||
in_dom(A/B, DOM, Value) :- !,
|
||||
in_dom(A, DOM, Value0),
|
||||
in_dom(B, Value0, Value).
|
||||
in_dom(A//B, DOM, Value) :- !,
|
||||
in_dom(A, DOM, Value0),
|
||||
in_dom(//B, Value0, Value).
|
||||
in_dom(Spec, element(_, _, Content), Value) :-
|
||||
element_spec(Spec, Name, Modifiers),
|
||||
count_named_elements(Content, Name, CLen),
|
||||
CLen > 0,
|
||||
nth_element(N, Name, E, Content),
|
||||
modifiers(Modifiers, N, CLen, E, Value).
|
||||
|
||||
element_spec(Var, _, _) :-
|
||||
var(Var), !,
|
||||
instantiation_error(Var).
|
||||
element_spec(NS:Term, NS:Name, Modifiers) :- !,
|
||||
Term =.. [Name0|Modifiers],
|
||||
star(Name0, Name).
|
||||
element_spec(Term, Name, Modifiers) :- !,
|
||||
Term =.. [Name0|Modifiers],
|
||||
star(Name0, Name).
|
||||
|
||||
star(*, _) :- !.
|
||||
star(Name, Name).
|
||||
|
||||
|
||||
%% sub_dom(-Index, -Count, +Name, -Sub, +DOM) is nondet.
|
||||
%
|
||||
% Sub is a node in DOM with Name.
|
||||
%
|
||||
% @param Count is the total number of nodes in the content
|
||||
% list Sub appears that have the same name.
|
||||
% @param Index is the 1-based index of Sub of nodes with
|
||||
% Name.
|
||||
|
||||
sub_dom(1, 1, Name, DOM, DOM) :-
|
||||
element_name(DOM, Name).
|
||||
sub_dom(N, Len, Name, E, element(_,_,Content)) :- !,
|
||||
sub_dom_2(N, Len, Name, E, Content).
|
||||
sub_dom(N, Len, Name, E, Content) :-
|
||||
is_list(Content),
|
||||
sub_dom_2(N, Len, Name, E, Content).
|
||||
|
||||
sub_dom_2(N, Len, Name, Element, Content) :-
|
||||
( count_named_elements(Content, Name, Len),
|
||||
nth_element(N, Name, Element, Content)
|
||||
; member(element(_,_,C2), Content),
|
||||
sub_dom_2(N, Len, Name, Element, C2)
|
||||
).
|
||||
|
||||
|
||||
%% count_named_elements(+Content, +Name, -Count) is det.
|
||||
%
|
||||
% Count is the number of nodes with Name in Content.
|
||||
|
||||
count_named_elements(Content, Name, Count) :-
|
||||
count_named_elements(Content, Name, 0, Count).
|
||||
|
||||
count_named_elements([], _, Count, Count).
|
||||
count_named_elements([element(Name,_,_)|T], Name, C0, C) :- !,
|
||||
C1 is C0+1,
|
||||
count_named_elements(T, Name, C1, C).
|
||||
count_named_elements([_|T], Name, C0, C) :-
|
||||
count_named_elements(T, Name, C0, C).
|
||||
|
||||
|
||||
%% nth_element(?N, +Name, -Element, +Content:list) is nondet.
|
||||
%
|
||||
% True if Element is the N-th element with name in Content.
|
||||
|
||||
nth_element(N, Name, Element, Content) :-
|
||||
nth_element_(1, N, Name, Element, Content).
|
||||
|
||||
nth_element_(I, N, Name, E, [H|T]) :-
|
||||
element_name(H, Name), !,
|
||||
( N = I,
|
||||
E = H
|
||||
; I2 is I + 1,
|
||||
( nonvar(N), I2 > N
|
||||
-> !, fail
|
||||
; true
|
||||
),
|
||||
nth_element_(I2, N, Name, E, T)
|
||||
).
|
||||
nth_element_(I, N, Name, E, [_|T]) :-
|
||||
nth_element_(I, N, Name, E, T).
|
||||
|
||||
|
||||
%% modifiers(+Modifiers, +I, +Clen, +DOM, -Value)
|
||||
%
|
||||
%
|
||||
|
||||
modifiers([], _, _, Value, Value).
|
||||
modifiers([H|T], I, L, Value0, Value) :-
|
||||
modifier(H, I, L, Value0, Value1),
|
||||
modifiers(T, I, L, Value1, Value).
|
||||
|
||||
modifier(N, I, _, Value, Value) :- % Integer
|
||||
integer(N), !,
|
||||
N =:= I.
|
||||
modifier(last, I, L, Value, Value) :- !, % last
|
||||
I =:= L.
|
||||
modifier(last-Expr, I, L, Value, Value) :- !, % last-Expr
|
||||
I =:= L-Expr.
|
||||
modifier(Function, _, _, In, Out) :-
|
||||
xpath_function(Function, In, Out).
|
||||
|
||||
xpath_function(self, DOM, Value) :- !, % self
|
||||
Value = DOM.
|
||||
xpath_function(text, DOM, Text) :- !, % text
|
||||
text_of_dom(DOM, Text).
|
||||
xpath_function(normalize_space, DOM, Text) :- !, % normalize_space
|
||||
text_of_dom(DOM, Text0),
|
||||
normalize_space(atom(Text), Text0).
|
||||
xpath_function(number, DOM, Number) :- !, % number
|
||||
text_of_dom(DOM, Text0),
|
||||
normalize_space(string(Text), Text0),
|
||||
catch(atom_number(Text, Number), _, fail).
|
||||
xpath_function(@Name, element(_, Attrs, _), Value) :- !, % @Name
|
||||
memberchk(Name=Value, Attrs).
|
||||
xpath_function(Left = Right, Value, Value) :- !, % =
|
||||
var_or_function(Left, Value, LeftValue),
|
||||
var_or_function(Right, Value, RightValue),
|
||||
LeftValue = RightValue.
|
||||
xpath_function(contains(Haystack, Needle), Value, Value) :- !, % contains(Haystack, Needle)
|
||||
val_or_function(Haystack, Value, HaystackValue),
|
||||
val_or_function(Needle, Value, NeedleValue),
|
||||
atom(HaystackValue), atom(NeedleValue),
|
||||
( sub_atom(HaystackValue, _, _, _, NeedleValue)
|
||||
-> true
|
||||
).
|
||||
|
||||
var_or_function(Arg, _, Arg) :-
|
||||
var(Arg), !.
|
||||
var_or_function(Func, Value0, Value) :-
|
||||
xpath_function(Func, Value0, Value).
|
||||
|
||||
val_or_function(Arg, _, Arg) :-
|
||||
var(Arg), !,
|
||||
instantiation_error(Arg).
|
||||
val_or_function(Func, Value0, Value) :- % TBD
|
||||
xpath_function(Func, Value0, Value).
|
||||
|
||||
|
||||
%% text_of_dom(+DOM, -Text:atom) is det.
|
||||
%
|
||||
% Text is the joined textual content of DOM.
|
||||
|
||||
text_of_dom(DOM, Text) :-
|
||||
phrase(text_of(DOM), Tokens),
|
||||
concat_atom(Tokens, Text).
|
||||
|
||||
text_of(element(_,_,Content)) -->
|
||||
text_of_list(Content).
|
||||
text_of([]) -->
|
||||
[].
|
||||
text_of([H|T]) -->
|
||||
text_of(H),
|
||||
text_of(T).
|
||||
|
||||
|
||||
text_of_list([]) -->
|
||||
[].
|
||||
text_of_list([H|T]) -->
|
||||
text_of_1(H),
|
||||
text_of_list(T).
|
||||
|
||||
|
||||
text_of_1(element(_,_,Content)) --> !,
|
||||
text_of_list(Content).
|
||||
text_of_1(Data) -->
|
||||
{ assertion(atom(Data)) },
|
||||
[Data].
|
Reference in New Issue
Block a user