This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/sgml/RDF/rdf_ntriples.pl
2009-03-13 19:39:06 +00:00

312 lines
5.6 KiB
Prolog

/* $Id$
Part of SWI-Prolog SGML/XML parser
Author: Jan Wielemaker
E-mail: jan@swi.psy.uva.nl
WWW: http://www.swi.psy.uva.nl/projects/SWI-Prolog/
Copying: LGPL-2. See the file COPYING or http://www.gnu.org
Copyright (C) 1990-2002 SWI, University of Amsterdam. All rights reserved.
*/
:- module(rdf_ntriples,
[ load_rdf_ntriples/2, % +File, -Triples
rdf_ntriple_part/4 % +Field, -Value, <DCG>
]).
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This module parses n-triple files as defined by the W3C RDF working in
http://www.w3.org/TR/rdf-testcases/#ntriples. This format is a
simplified version of the RDF N3 notation used in the *.nt files that
are used to describe the normative outcome of the RDF test-cases.
The returned list terms are of the form
rdf(Subject, Predicate, Object)
where
# Subject
is an atom or node(Id) for anonymous nodes
# Predicate
is an atom
# Object
is an atom, node(Id), literal(Atom) or xml(Atom)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
% load_rdf_ntriples(+Source, -Triples)
%
% Load a file or stream to a list of rdf(S,P,O) triples.
load_rdf_ntriples(File, Triples) :-
open_nt_file(File, In, Close),
call_cleanup(stream_to_triples(In, Triples), Close).
% open_nt_file(+Input, -Stream, -Close)
%
% Open Input, returning Stream and a goal to cleanup Stream if it
% was opened.
open_nt_file(stream(Stream), Stream, true) :- !.
open_nt_file(Stream, Stream, true) :-
is_stream(Stream), !.
open_nt_file(Spec, Stream, close(Stream)) :-
absolute_file_name(Spec,
[ access(read),
extensions([nt,''])
], Path),
open(Path, read, Stream).
% rdf_ntriple_part(+Type, -Value, <DCG>)
%
% Parse one of the fields of an ntriple. This is used for the
% SWI-Prolog Sesame (www.openrdf.org) implementation to realise
% /servlets/removeStatements. I do not think public use of this
% predicate should be stimulated.
rdf_ntriple_part(subject, Subject) -->
subject(Subject).
rdf_ntriple_part(predicate, Predicate) -->
predicate(Predicate).
rdf_ntriple_part(object, Object) -->
predicate(Object).
% stream_to_triples(+Stream, -ListOfTriples)
%
% Read Stream, returning all its triples
stream_to_triples(In, Triples) :-
read_line_to_codes(In, Line),
( Line == end_of_file
-> Triples = []
; phrase(line(Triples, Tail), Line),
stream_to_triples(In, Tail)
).
line(Triples, Tail) -->
wss,
( comment
-> {Triples = Tail}
; triple(Triple)
-> {Triples = [Triple|Tail]}
).
comment -->
"#", !,
skip_rest.
comment -->
end_of_input.
triple(rdf(Subject, Predicate, Object)) -->
subject(Subject), ws, wss,
predicate(Predicate), ws, wss,
object(Object), wss, ".", wss.
subject(Subject) -->
uniref(Subject), !.
subject(Subject) -->
node_id(Subject).
predicate(Predicate) -->
uniref(Predicate).
object(Object) -->
uniref(Object), !.
object(Object) -->
node_id(Object).
object(Object) -->
literal(Object).
uniref(URI) -->
"<",
escaped_uri_codes(Codes),
">", !,
{ atom_codes(URI, Codes)
}.
node_id(node(Id)) --> % anonymous nodes
"_:",
name_start(C0),
name_codes(Codes),
{ atom_codes(Id, [C0|Codes])
}.
literal(Literal) -->
lang_string(Literal), !.
literal(Literal) -->
xml_string(Literal).
% name_start(-Code)
% name_codes(-ListfCodes)
%
% Parse identifier names
name_start(C) -->
[C],
{ code_type(C, alpha)
}.
name_codes([C|T]) -->
[C],
{ code_type(C, alnum)
}, !,
name_codes(T).
name_codes([]) -->
[].
% escaped_uri_codes(-CodeList)
%
% Decode string holding %xx escaped characters.
escaped_uri_codes([]) -->
[].
escaped_uri_codes([C|T]) -->
"%", [D0,D1], !,
{ code_type(D0, xdigit(V0)),
code_type(D1, xdigit(V1)),
C is V0<<4 + V1
},
escaped_uri_codes(T).
escaped_uri_codes([C|T]) -->
"\\u", [D0,D1,D2,D3], !,
{ code_type(D0, xdigit(V0)),
code_type(D1, xdigit(V1)),
code_type(D2, xdigit(V2)),
code_type(D3, xdigit(V3)),
C is V0<<12 + V1<<8 + V2<<4 + V3
},
escaped_uri_codes(T).
escaped_uri_codes([C|T]) -->
[C],
escaped_uri_codes(T).
% lang_string()
%
% Process a language string
lang_string(String) -->
"\"",
string(Codes),
"\"", !,
{ atom_codes(Atom, Codes)
},
( langsep
-> language(Lang),
{ String = literal(lang(Lang, Atom))
}
; "^^"
-> uniref(Type),
{ String = literal(type(Type, Atom))
}
; { String = literal(Atom)
}
).
langsep -->
"-".
langsep -->
"@".
% xml_string(String)
%
% Handle xml"..."
xml_string(xml(String)) -->
"xml\"", % really no whitespace?
string(Codes),
"\"",
{ atom_codes(String, Codes)
}.
string([]) -->
[].
string([C0|T]) -->
string_char(C0),
string(T).
string_char(0'\\) -->
"\\\\".
string_char(0'") -->
"\\\"".
string_char(10) -->
"\\n".
string_char(13) -->
"\\r".
string_char(9) -->
"\\t".
string_char(C) -->
"\\u",
'4xdigits'(C).
string_char(C) -->
"\\u",
'4xdigits'(C0),
'4xdigits'(C1),
{ C is C0<<16 + C1
}.
string_char(C) -->
[C].
'4xdigits'(C) -->
[C0,C1,C2,C3],
{ code_type(C0, xdigit(V0)),
code_type(C1, xdigit(V1)),
code_type(C2, xdigit(V2)),
code_type(C3, xdigit(V3)),
C is V0<<12 + V1<<8 + V2<<4 + V3
}.
% language(-Lang)
%
% Return xml:lang language identifier.
language(Lang) -->
lang_code(C0),
lang_codes(Codes),
{ atom_codes(Lang, [C0|Codes])
}.
lang_code(C) -->
[C],
{ C \== 0'.,
\+ code_type(C, white)
}.
lang_codes([C|T]) -->
lang_code(C), !,
lang_codes(T).
lang_codes([]) -->
[].
/*******************************
* BASICS *
*******************************/
skip_rest(_,[]).
ws -->
[C],
{ code_type(C, white)
}.
end_of_input([], []).
wss -->
ws, !,
wss.
wss -->
[].