388 lines
12 KiB
Prolog
388 lines
12 KiB
Prolog
/* Using xml.pl to solve XML Query Cases - An Example
|
|
*
|
|
* The following is a complete example to illustrate how the module can be used;
|
|
* it exercises both the input and output parsing modes of xml_parse/[2,3], and
|
|
* illustrates the use of xml_subterm/2 to access the nodes of a "document value
|
|
* model". It's written for Quintus Prolog, but should port to other Prologs
|
|
* easily.
|
|
*
|
|
* The entry-point of the program is the test/1 predicate.
|
|
*
|
|
* test( +QueryId ) executes a Prolog implementation of a Query from Use Case
|
|
* "XMP": Experiences and Exemplars, in the W3C's XML Query Use Cases, which
|
|
* "contains several example queries that illustrate requirements gathered from
|
|
* the database and document communities".
|
|
* <http://www.w3.org/TR/2002/WD-xmlquery-use-cases-20021115/#xmp>
|
|
*
|
|
* QueryId is one of q1…q12 selecting which of the 12 use cases is executed.
|
|
* The XML output is written to the file [QueryId].xml in the current directory.
|
|
*
|
|
* xml_pp/1 is used to display the resulting "document value model"
|
|
% data-structures on the user output (stdout) stream.
|
|
*/
|
|
|
|
:- use_module(library(lists),[append/3]).
|
|
|
|
test( Query ) :-
|
|
xml_query( Query, ResultElement ),
|
|
% Parse output XML into the Output chars
|
|
xml_parse( Output, xml([], [ResultElement]) ),
|
|
absolute_file_name( Query, [extensions([xml])], OutputFile ),
|
|
% Write OutputFile from the Output list of chars
|
|
tell( OutputFile ),
|
|
put_chars( Output ),
|
|
told,
|
|
% Pretty print OutputXML
|
|
write( 'Output XML' ), nl,
|
|
xml_pp( xml([], [ResultElement]) ).
|
|
|
|
/* xml_query( +QueryNo, ?OutputXML ) when OutputXML is an XML Document Value Model
|
|
* produced by running an example taken, identified by QueryNo from the XML Query
|
|
* "XMP" use case.
|
|
*/
|
|
|
|
% Q1: List books published by Addison-Wesley after 1991, including their year and
|
|
% title.
|
|
|
|
xml_query( q1, element(bib, [], Books) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Publisher, publisher ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall(
|
|
element(book, [year=Year], [Title]),
|
|
(
|
|
xml_subterm( Bibliography, element(book, Attributes, Content) ),
|
|
xml_subterm( Content, Publisher ),
|
|
xml_subterm( Publisher, Text ),
|
|
text_value( Text, "Addison-Wesley" ),
|
|
member( year=Year, Attributes ),
|
|
number_codes( YearNo, Year ),
|
|
YearNo > 1991,
|
|
xml_subterm( Content, Title )
|
|
),
|
|
Books
|
|
).
|
|
|
|
% Q2: Create a flat list of all the title-author pairs, with each pair enclosed
|
|
% in a "result" element.
|
|
|
|
xml_query( q2, element(results, [], Results) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Author, author ),
|
|
element_name( Book, book ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall(
|
|
element(result, [], [Title,Author]),
|
|
(
|
|
xml_subterm( Bibliography, Book ),
|
|
xml_subterm( Book, Title ),
|
|
xml_subterm( Book, Author )
|
|
),
|
|
Results
|
|
).
|
|
|
|
% Q3: For each book in the bibliography, list the title and authors, grouped
|
|
% inside a "result" element.
|
|
|
|
xml_query( q3, element(results, [], Results) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Author, author ),
|
|
element_name( Book, book ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall(
|
|
element(result, [], [Title|Authors]),
|
|
(
|
|
xml_subterm( Bibliography, Book ),
|
|
xml_subterm( Book, Title ),
|
|
findall( Author, xml_subterm(Book, Author), Authors )
|
|
),
|
|
Results
|
|
).
|
|
|
|
% Q4: For each author in the bibliography, list the author's name and the titles
|
|
% of all books by that author, grouped inside a "result" element.
|
|
|
|
xml_query( q4, element(results, [], Results) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Author, author ),
|
|
element_name( Book, book ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall( Author, xml_subterm(Bibliography, Author), AuthorBag ),
|
|
sort( AuthorBag, Authors ),
|
|
findall(
|
|
element(result, [], [Author|Titles]),
|
|
(
|
|
member( Author, Authors ),
|
|
findall( Title, (
|
|
xml_subterm( Bibliography, Book ),
|
|
xml_subterm( Book, Author ),
|
|
xml_subterm( Book, Title )
|
|
),
|
|
Titles
|
|
)
|
|
),
|
|
Results
|
|
).
|
|
|
|
% Q5: For each book found at both bn.com and amazon.com, list the title of the
|
|
% book and its price from each source.
|
|
|
|
xml_query( q5, element('books-with-prices', [], BooksWithPrices) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Book, book ),
|
|
element_name( Review, entry ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
input_document( 'reviews.xml', Reviews ),
|
|
findall(
|
|
element('book-with-prices', [], [
|
|
Title,
|
|
element('price-bn',[], BNPrice ),
|
|
element('price-amazon',[], AmazonPrice )
|
|
] ),
|
|
(
|
|
xml_subterm( Bibliography, Book ),
|
|
xml_subterm( Book, Title ),
|
|
xml_subterm( Reviews, Review ),
|
|
xml_subterm( Review, Title ),
|
|
xml_subterm( Book, element(price,_, BNPrice) ),
|
|
xml_subterm( Review, element(price,_, AmazonPrice) )
|
|
),
|
|
BooksWithPrices
|
|
).
|
|
|
|
% Q6: For each book that has at least one author, list the title and first two
|
|
% authors, and an empty "et-al" element if the book has additional authors.
|
|
|
|
xml_query( q6, element(bib, [], Results) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Author, author ),
|
|
element_name( Book, book ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall(
|
|
element(book, [], [Title,FirstAuthor|Authors]),
|
|
(
|
|
xml_subterm( Bibliography, Book ),
|
|
xml_subterm( Book, Title ),
|
|
findall( Author, xml_subterm(Book, Author), [FirstAuthor|Others] ),
|
|
other_authors( Others, Authors )
|
|
),
|
|
Results
|
|
).
|
|
|
|
% Q7: List the titles and years of all books published by Addison-Wesley after
|
|
% 1991, in alphabetic order.
|
|
|
|
xml_query( q7, element(bib, [], Books) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Publisher, publisher ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall(
|
|
Title-element(book, [year=Year], [Title]),
|
|
(
|
|
xml_subterm( Bibliography, element(book, Attributes, Book) ),
|
|
xml_subterm( Book, Publisher ),
|
|
xml_subterm( Publisher, Text ),
|
|
text_value( Text, "Addison-Wesley" ),
|
|
member( year=Year, Attributes ),
|
|
number_codes( YearNo, Year ),
|
|
YearNo > 1991,
|
|
xml_subterm( Book, Title )
|
|
),
|
|
TitleBooks
|
|
),
|
|
keysort( TitleBooks, TitleBookSet ),
|
|
range( TitleBookSet, Books ).
|
|
|
|
% Q8: Find books in which the name of some element ends with the string "or" and
|
|
% the same element contains the string "Suciu" somewhere in its content. For each
|
|
% such book, return the title and the qualifying element.
|
|
|
|
xml_query( q8, element(bib, [], Books) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Book, book ),
|
|
element_name( QualifyingElement, QualifyingName ),
|
|
append( "Suciu", _Back, Suffix ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall(
|
|
element(book, [], [Title,QualifyingElement]),
|
|
(
|
|
xml_subterm( Bibliography, Book ),
|
|
xml_subterm( Book, QualifyingElement ),
|
|
atom_codes( QualifyingName, QNChars ),
|
|
append( _QNPrefix, "or", QNChars ),
|
|
xml_subterm( QualifyingElement, TextItem ),
|
|
text_value( TextItem, TextValue ),
|
|
append( _Prefix, Suffix, TextValue ),
|
|
xml_subterm( Book, Title )
|
|
),
|
|
Books
|
|
).
|
|
|
|
% Q9: In the document "books.xml", find all section or chapter titles that
|
|
% contain the word "XML", regardless of the level of nesting.
|
|
|
|
xml_query( q9, element(results, [], Titles) ) :-
|
|
element_name( Title, title ),
|
|
append( "XML", _Back, Suffix ),
|
|
input_document( 'books.xml', Books ),
|
|
findall(
|
|
Title,
|
|
(
|
|
xml_subterm( Books, Title ),
|
|
xml_subterm( Title, TextItem ),
|
|
text_value( TextItem, TextValue ),
|
|
append( _Prefix, Suffix, TextValue )
|
|
),
|
|
Titles
|
|
).
|
|
|
|
% Q10: In the document "prices.xml", find the minimum price for each book, in the
|
|
% form of a "minprice" element with the book title as its title attribute.
|
|
|
|
xml_query( q10, element(results, [], MinPrices) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Price, price ),
|
|
input_document( 'prices.xml', Prices ),
|
|
findall( Title, xml_subterm(Prices, Title), TitleBag ),
|
|
sort( TitleBag, TitleSet ),
|
|
element_name( Book, book ),
|
|
findall(
|
|
element(minprice, [title=TitleString], [MinPrice]),
|
|
(
|
|
member( Title, TitleSet ),
|
|
xml_subterm( Title, TitleText ),
|
|
text_value( TitleText, TitleString ),
|
|
findall( PriceValue-Price, (
|
|
xml_subterm( Prices, Book ),
|
|
xml_subterm( Book, Title ),
|
|
xml_subterm( Book, Price ),
|
|
xml_subterm( Price, Text ),
|
|
text_value( Text, PriceChars ),
|
|
number_codes( PriceValue, PriceChars )
|
|
),
|
|
PriceValues
|
|
),
|
|
minimum( PriceValues, PriceValue-MinPrice )
|
|
),
|
|
MinPrices
|
|
).
|
|
|
|
% Q11: For each book with an author, return the book with its title and authors.
|
|
% For each book with an editor, return a reference with the book title and the
|
|
% editor's affiliation.
|
|
|
|
xml_query( q11, element(bib, [], Results) ) :-
|
|
element_name( Title, title ),
|
|
element_name( Author, author ),
|
|
element_name( Book, book ),
|
|
element_name( Editor, editor ),
|
|
element_name( Affiliation, affiliation ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall(
|
|
element(book, [], [Title,FirstAuthor|Authors]),
|
|
(
|
|
xml_subterm( Bibliography, Book ),
|
|
xml_subterm( Book, Title ),
|
|
findall( Author, xml_subterm(Book, Author), [FirstAuthor|Authors] )
|
|
),
|
|
Books
|
|
),
|
|
findall(
|
|
element(reference, [], [Title,Affiliation]),
|
|
(
|
|
xml_subterm( Bibliography, Book ),
|
|
xml_subterm( Book, Title ),
|
|
xml_subterm( Book, Editor ),
|
|
xml_subterm( Editor, Affiliation )
|
|
),
|
|
References
|
|
),
|
|
append( Books, References, Results ).
|
|
|
|
% Q12: Find pairs of books that have different titles but the same set of authors
|
|
% (possibly in a different order).
|
|
|
|
xml_query( q12, element(bib, [], Pairs) ) :-
|
|
element_name( Author, author ),
|
|
element_name( Book1, book ),
|
|
element_name( Book2, book ),
|
|
element_name( Title1, title ),
|
|
element_name( Title2, title ),
|
|
input_document( 'bib.xml', Bibliography ),
|
|
findall(
|
|
element('book-pair', [], [Title1,Title2]),
|
|
(
|
|
xml_subterm( Bibliography, Book1 ),
|
|
findall( Author, xml_subterm(Book1, Author), AuthorBag1 ),
|
|
sort( AuthorBag1, AuthorSet ),
|
|
xml_subterm( Bibliography, Book2 ),
|
|
Book2 @< Book1,
|
|
findall( Author, xml_subterm(Book2, Author), AuthorBag2 ),
|
|
sort( AuthorBag2, AuthorSet ),
|
|
xml_subterm( Book1, Title1 ),
|
|
xml_subterm( Book2, Title2 )
|
|
),
|
|
Pairs
|
|
).
|
|
|
|
% Auxilliary Predicates
|
|
|
|
other_authors( [], [] ).
|
|
other_authors( [Author|Authors], [Author|EtAl] ) :-
|
|
et_al( Authors, EtAl ).
|
|
|
|
et_al( [], [] ).
|
|
et_al( [_|_], [element('et-al',[],[])] ).
|
|
|
|
text_value( [pcdata(Text)], Text ).
|
|
text_value( [cdata(Text)], Text ).
|
|
|
|
element_name( element(Name, _Attributes, _Content), Name ).
|
|
|
|
|
|
/* range( +Pairs, ?Range ) when Pairs is a list of key-datum pairs and Range
|
|
* is the list of data.
|
|
*/
|
|
range( [], [] ).
|
|
range( [_Key-Datum|Pairs], [Datum|Data] ) :-
|
|
range( Pairs, Data ).
|
|
|
|
/* minimum( +List, ?Min ) is true if Min is the least member of List in the
|
|
* standard order.
|
|
*/
|
|
minimum( [H|T], Min ):-
|
|
minimum1( T, H, Min ).
|
|
|
|
minimum1( [], Min, Min ).
|
|
minimum1( [H|T], Min0, Min ) :-
|
|
compare( Relation, H, Min0 ),
|
|
minimum2( Relation, H, Min0, T, Min ).
|
|
|
|
minimum2( '=', Min0, Min0, T, Min ) :-
|
|
minimum1( T, Min0, Min ).
|
|
minimum2( '<', Min0, _Min1, T, Min ) :-
|
|
minimum1( T, Min0, Min ).
|
|
minimum2( '>', _Min0, Min1, T, Min ) :-
|
|
minimum1( T, Min1, Min ).
|
|
|
|
/* input_document( +File, ?XML ) reads File and parses the input into the
|
|
* "Document Value Model" XML.
|
|
*/
|
|
input_document( File, XML ) :-
|
|
% Read InputFile as a list of chars
|
|
see( File ),
|
|
get_chars( Input ),
|
|
seen,
|
|
% Parse the Input chars into the term XML
|
|
xml_parse( Input, XML ).
|
|
|
|
% Load the XML module.
|
|
|
|
:- use_module( library(xml) ).
|
|
|
|
|
|
% Load a small library of utilities.
|
|
|
|
:- ensure_loaded( misc ).
|
|
|
|
|