/* $Id$ Part of SWI-Prolog SGML/XML parser Author: Jan Wielemaker E-mail: jan@swi.psy.uva.nl WWW: http://www.swi.psy.uva.nl/projects/SWI-Prolog/ Copying: LGPL-2. See the file COPYING or http://www.gnu.org Copyright (C) 1990-2002 SWI, University of Amsterdam. All rights reserved. */ :- module(rdf_ntriples, [ load_rdf_ntriples/2, % +File, -Triples rdf_ntriple_part/4 % +Field, -Value, ]). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - This module parses n-triple files as defined by the W3C RDF working in http://www.w3.org/TR/rdf-testcases/#ntriples. This format is a simplified version of the RDF N3 notation used in the *.nt files that are used to describe the normative outcome of the RDF test-cases. The returned list terms are of the form rdf(Subject, Predicate, Object) where # Subject is an atom or node(Id) for anonymous nodes # Predicate is an atom # Object is an atom, node(Id), literal(Atom) or xml(Atom) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ % load_rdf_ntriples(+Source, -Triples) % % Load a file or stream to a list of rdf(S,P,O) triples. load_rdf_ntriples(File, Triples) :- open_nt_file(File, In, Close), call_cleanup(stream_to_triples(In, Triples), Close). % open_nt_file(+Input, -Stream, -Close) % % Open Input, returning Stream and a goal to cleanup Stream if it % was opened. open_nt_file(stream(Stream), Stream, true) :- !. open_nt_file(Stream, Stream, true) :- is_stream(Stream), !. open_nt_file(Spec, Stream, close(Stream)) :- absolute_file_name(Spec, [ access(read), extensions([nt,'']) ], Path), open(Path, read, Stream). % rdf_ntriple_part(+Type, -Value, ) % % Parse one of the fields of an ntriple. This is used for the % SWI-Prolog Sesame (www.openrdf.org) implementation to realise % /servlets/removeStatements. I do not think public use of this % predicate should be stimulated. rdf_ntriple_part(subject, Subject) --> subject(Subject). rdf_ntriple_part(predicate, Predicate) --> predicate(Predicate). rdf_ntriple_part(object, Object) --> predicate(Object). % stream_to_triples(+Stream, -ListOfTriples) % % Read Stream, returning all its triples stream_to_triples(In, Triples) :- read_line_to_codes(In, Line), ( Line == end_of_file -> Triples = [] ; phrase(line(Triples, Tail), Line), stream_to_triples(In, Tail) ). line(Triples, Tail) --> wss, ( comment -> {Triples = Tail} ; triple(Triple) -> {Triples = [Triple|Tail]} ). comment --> "#", !, skip_rest. comment --> end_of_input. triple(rdf(Subject, Predicate, Object)) --> subject(Subject), ws, wss, predicate(Predicate), ws, wss, object(Object), wss, ".", wss. subject(Subject) --> uniref(Subject), !. subject(Subject) --> node_id(Subject). predicate(Predicate) --> uniref(Predicate). object(Object) --> uniref(Object), !. object(Object) --> node_id(Object). object(Object) --> literal(Object). uniref(URI) --> "<", escaped_uri_codes(Codes), ">", !, { atom_codes(URI, Codes) }. node_id(node(Id)) --> % anonymous nodes "_:", name_start(C0), name_codes(Codes), { atom_codes(Id, [C0|Codes]) }. literal(Literal) --> lang_string(Literal), !. literal(Literal) --> xml_string(Literal). % name_start(-Code) % name_codes(-ListfCodes) % % Parse identifier names name_start(C) --> [C], { code_type(C, alpha) }. name_codes([C|T]) --> [C], { code_type(C, alnum) }, !, name_codes(T). name_codes([]) --> []. % escaped_uri_codes(-CodeList) % % Decode string holding %xx escaped characters. escaped_uri_codes([]) --> []. escaped_uri_codes([C|T]) --> "%", [D0,D1], !, { code_type(D0, xdigit(V0)), code_type(D1, xdigit(V1)), C is V0<<4 + V1 }, escaped_uri_codes(T). escaped_uri_codes([C|T]) --> "\\u", [D0,D1,D2,D3], !, { code_type(D0, xdigit(V0)), code_type(D1, xdigit(V1)), code_type(D2, xdigit(V2)), code_type(D3, xdigit(V3)), C is V0<<12 + V1<<8 + V2<<4 + V3 }, escaped_uri_codes(T). escaped_uri_codes([C|T]) --> [C], escaped_uri_codes(T). % lang_string() % % Process a language string lang_string(String) --> "\"", string(Codes), "\"", !, { atom_codes(Atom, Codes) }, ( langsep -> language(Lang), { String = literal(lang(Lang, Atom)) } ; "^^" -> uniref(Type), { String = literal(type(Type, Atom)) } ; { String = literal(Atom) } ). langsep --> "-". langsep --> "@". % xml_string(String) % % Handle xml"..." xml_string(xml(String)) --> "xml\"", % really no whitespace? string(Codes), "\"", { atom_codes(String, Codes) }. string([]) --> []. string([C0|T]) --> string_char(C0), string(T). string_char(0'\\) --> "\\\\". string_char(0'") --> "\\\"". string_char(10) --> "\\n". string_char(13) --> "\\r". string_char(9) --> "\\t". string_char(C) --> "\\u", '4xdigits'(C). string_char(C) --> "\\u", '4xdigits'(C0), '4xdigits'(C1), { C is C0<<16 + C1 }. string_char(C) --> [C]. '4xdigits'(C) --> [C0,C1,C2,C3], { code_type(C0, xdigit(V0)), code_type(C1, xdigit(V1)), code_type(C2, xdigit(V2)), code_type(C3, xdigit(V3)), C is V0<<12 + V1<<8 + V2<<4 + V3 }. % language(-Lang) % % Return xml:lang language identifier. language(Lang) --> lang_code(C0), lang_codes(Codes), { atom_codes(Lang, [C0|Codes]) }. lang_code(C) --> [C], { C \== 0'., \+ code_type(C, white) }. lang_codes([C|T]) --> lang_code(C), !, lang_codes(T). lang_codes([]) --> []. /******************************* * BASICS * *******************************/ skip_rest(_,[]). ws --> [C], { code_type(C, white) }. end_of_input([], []). wss --> ws, !, wss. wss --> [].