semweb and http compile now (but they don't work properly yet).
This commit is contained in:
910
packages/semweb/rdf_turtle.pl
Normal file
910
packages/semweb/rdf_turtle.pl
Normal file
@@ -0,0 +1,910 @@
|
||||
/* $Id$
|
||||
|
||||
Part of SWI-Prolog
|
||||
|
||||
Author: Jan Wielemaker
|
||||
E-mail: J.Wielemaker@cs.vu.nl
|
||||
WWW: http://www.swi-prolog.org
|
||||
Copyright (C): 2004-2009, University of Amsterdam
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
As a special exception, if you link this library with other files,
|
||||
compiled with a Free Software compiler, to produce an executable, this
|
||||
library does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however
|
||||
invalidate any other reasons why the executable file might be covered by
|
||||
the GNU General Public License.
|
||||
*/
|
||||
|
||||
:- module(rdf_turtle,
|
||||
[ rdf_load_turtle/3, % +Input, -Triples, +Options
|
||||
rdf_read_turtle/3, % +Input, -Triples, +Options
|
||||
rdf_process_turtle/3 % +Input, :OnObject, +Options
|
||||
]).
|
||||
:- use_module(library(assoc)).
|
||||
:- use_module(library(option)).
|
||||
:- use_module(library('semweb/rdf_db')).
|
||||
:- use_module(library(debug)).
|
||||
:- use_module(library(uri)).
|
||||
:- use_module(library(record)).
|
||||
:- use_module(library(http/http_open)).
|
||||
:- use_module(turtle_base).
|
||||
|
||||
:- meta_predicate
|
||||
rdf_process_turtle(+,2,+).
|
||||
|
||||
/** <module> Turtle: Terse RDF Triple Language
|
||||
|
||||
This module implements the Turtle language for representing the RDF
|
||||
triple model as defined by Dave Beckett from the Institute for Learning
|
||||
and Research Technology University of Bristol in the document:
|
||||
|
||||
* http://www.w3.org/TeamSubmission/turtle/
|
||||
* http://www.w3.org/TeamSubmission/2008/SUBM-turtle-20080114/#sec-conformance
|
||||
|
||||
This parser passes all tests, except for test-28.ttl (decial number
|
||||
serialization) and test-29.ttl (uri containing ...%&...). It is unclear
|
||||
to me whether these tests are correct. Notably, it is unclear whether we
|
||||
must do %-decoding. Certainly, this is expected by various real-life
|
||||
datasets that we came accross with.
|
||||
|
||||
This module acts as a plugin to rdf_load/2, for processing files with
|
||||
one of the extensions =|.ttl|=, =|.n3|= or =|.nt|=.
|
||||
|
||||
@tbd Better error handling
|
||||
*/
|
||||
|
||||
:- record ttl_state(base_uri,
|
||||
resources:oneof([uri,iri])=uri,
|
||||
prefix_map,
|
||||
nodeid_map,
|
||||
anon_prefix,
|
||||
anon_count=0,
|
||||
graph,
|
||||
input,
|
||||
line_no=0,
|
||||
on_error:oneof([warning,error])=warning,
|
||||
error_count=0).
|
||||
|
||||
%% rdf_read_turtle(+Input, -Triples, +Options)
|
||||
%
|
||||
% Read a stream or file into a set of triples of the format
|
||||
%
|
||||
% rdf(Subject, Predicate, Object)
|
||||
%
|
||||
% The representation is consistent with the SWI-Prolog RDF/XML
|
||||
% and ntriples parsers. Provided options are:
|
||||
%
|
||||
% * base_uri(+BaseURI)
|
||||
% Initial base URI. Defaults to file://<file> for loading
|
||||
% files.
|
||||
%
|
||||
% * anon_prefix(+Prefix)
|
||||
% Blank nodes are generated as <Prefix>1, <Prefix>2, etc.
|
||||
% If Prefix is not an atom blank nodes are generated as
|
||||
% node(1), node(2), ...
|
||||
%
|
||||
% * resources(URIorIRI)
|
||||
% Officially, Turtle resources are IRIs. Quite a
|
||||
% few applications however send URIs. By default we
|
||||
% do URI->IRI mapping because this rarely causes errors.
|
||||
% To force strictly conforming mode, pass =iri=.
|
||||
%
|
||||
% * prefixes(-Pairs)
|
||||
% Return encountered prefix declarations as a
|
||||
% list of Alias-URI
|
||||
%
|
||||
% * namespaces(-Pairs)
|
||||
% Same as prefixes(Pairs). Compatibility to rdf_load/2.
|
||||
%
|
||||
% * base_used(-Base)
|
||||
% Base URI used for processing the data. Unified to
|
||||
% [] if there is no base-uri.
|
||||
%
|
||||
% * on_error(+ErrorMode)
|
||||
% In =warning= (default), print the error and continue
|
||||
% parsing the remainder of the file. If =error=, abort
|
||||
% with an exception on the first error encountered.
|
||||
%
|
||||
% * error_count(-Count)
|
||||
% If on_error(warning) is active, this option cane be
|
||||
% used to retrieve the number of generated errors.
|
||||
|
||||
rdf_read_turtle(In, Triples, Options) :-
|
||||
open_input(In, Stream, Close),
|
||||
init_state(In, Stream, Options, State),
|
||||
call_cleanup(phrase(turtle_file(State, Stream), Triples),
|
||||
Close),
|
||||
post_options(State, Options).
|
||||
|
||||
|
||||
%% rdf_load_turtle(+Input, -Triples, +Options)
|
||||
%
|
||||
% @deprecated Use rdf_read_turtle/3
|
||||
|
||||
rdf_load_turtle(Input, Triples, Options) :-
|
||||
rdf_read_turtle(Input, Triples, Options).
|
||||
|
||||
|
||||
%% rdf_process_turtle(+Input, :OnObject, +Options) is det.
|
||||
%
|
||||
% Process Turtle input from Input, calling OnObject with a list of
|
||||
% triples. Options is the same as for rdf_load_turtle/3.
|
||||
%
|
||||
% Errors encountered are sent to print_message/2, after which the
|
||||
% parser tries to recover and parse the remainder of the data.
|
||||
|
||||
rdf_process_turtle(In, OnObject, Options) :-
|
||||
open_input(In, Stream, Close),
|
||||
init_state(In, Stream, Options, State),
|
||||
call_cleanup(process_stream(State, Stream, OnObject),
|
||||
Close),
|
||||
post_options(State, Options).
|
||||
|
||||
post_options(State, Options) :-
|
||||
prefix_option(State, Options),
|
||||
namespace_option(State, Options),
|
||||
base_option(State, Options),
|
||||
error_option(State, Options).
|
||||
|
||||
prefix_option(State, Options) :-
|
||||
( option(prefixes(Pairs), Options)
|
||||
-> ttl_state_prefix_map(State, Map),
|
||||
assoc_to_list(Map, Pairs)
|
||||
; true
|
||||
).
|
||||
namespace_option(State, Options) :-
|
||||
( option(namespaces(Pairs), Options)
|
||||
-> ttl_state_prefix_map(State, Map),
|
||||
assoc_to_list(Map, Pairs)
|
||||
; true
|
||||
).
|
||||
base_option(State, Options) :-
|
||||
( option(base_used(Base), Options)
|
||||
-> ttl_state_base_uri(State, Base)
|
||||
; true
|
||||
).
|
||||
error_option(State, Options) :-
|
||||
( option(error_count(Count), Options)
|
||||
-> ttl_state_error_count(State, Count)
|
||||
; true
|
||||
).
|
||||
|
||||
|
||||
process_stream(State, In, OnObject) :-
|
||||
read_turtle_tokens(In, Tokens, State),
|
||||
debug(turtle, 'Tokens: ~w~n', [Tokens]),
|
||||
ttl_state_line_no(State, LineNo),
|
||||
( Tokens == end_of_file
|
||||
-> true
|
||||
; catch(phrase(triples(State, Triples), Tokens), E, true)
|
||||
-> ( var(E)
|
||||
-> ( Triples == []
|
||||
-> true
|
||||
; ttl_state_graph(State, DB),
|
||||
call(OnObject, Triples, DB:LineNo)
|
||||
)
|
||||
; print_message(error, E)
|
||||
),
|
||||
process_stream(State, In, OnObject)
|
||||
; syntax_error_term(In, LineNo, cannot_parse, Error),
|
||||
step_error(State, Error),
|
||||
process_stream(State, In, OnObject)
|
||||
).
|
||||
|
||||
|
||||
%% step_error(+State, +Error) is det.
|
||||
%
|
||||
% Throw Error of =on_error= is =error=. Otherwise print the error
|
||||
% and increment =error_count=.
|
||||
%
|
||||
% @error syntax_error(Culprit).
|
||||
|
||||
step_error(State, Error) :-
|
||||
ttl_state_on_error(State, error), !,
|
||||
throw(Error).
|
||||
step_error(State, Error) :-
|
||||
ttl_state_error_count(State, E0),
|
||||
succ(E0, E),
|
||||
nb_set_error_count_of_ttl_state(E, State),
|
||||
print_message(error, Error).
|
||||
|
||||
|
||||
%% open_input(+Input, -Stream, -Close) is det.
|
||||
%
|
||||
% Open given input.
|
||||
%
|
||||
% @param Close goal to undo the open action
|
||||
% @tbd Synchronize with input handling of rdf_db.pl.
|
||||
% @error existence_error, permission_error
|
||||
|
||||
open_input(stream(Stream), Stream, true) :- !,
|
||||
stream_property(Stream, encoding(Old)),
|
||||
( Old == utf8
|
||||
-> Close = true
|
||||
; set_stream(Stream, encoding(utf8)),
|
||||
Close = set_stream(Stream, encoding(Old))
|
||||
).
|
||||
open_input(Stream, Stream, Close) :-
|
||||
is_stream(Stream), !,
|
||||
open_input(stream(Stream), Stream, Close).
|
||||
open_input(URL, Stream, close(Stream)) :-
|
||||
sub_atom(URL, 0, _, _, 'http://'), !,
|
||||
http_open(URL, Stream, []),
|
||||
set_stream(Stream, encoding(utf8)).
|
||||
open_input(File, Stream, close(Stream)) :-
|
||||
absolute_file_name(File, Path,
|
||||
[ access(read),
|
||||
extensions([ttl, ''])
|
||||
]),
|
||||
open(Path, read, Stream, [encoding(utf8)]).
|
||||
|
||||
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
The parser is a two-stage processor. The first reads the raw file input
|
||||
and generates a list of tokens, stripping comments and white space. It
|
||||
is defined to read a single statement upto its terminating '.'. The
|
||||
second stage is a traditional DCG parser generating the triples for the
|
||||
statement.
|
||||
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
||||
|
||||
init_state(In, Stream, Options, State) :-
|
||||
( option(base_uri(BaseURI), Options)
|
||||
-> true
|
||||
; In = stream(_)
|
||||
-> BaseURI = []
|
||||
; uri_is_global(In),
|
||||
\+ is_absolute_file_name(In) % Avoid C:Path in Windows
|
||||
-> uri_normalized(In, BaseURI)
|
||||
; uri_file_name(BaseURI, In)
|
||||
),
|
||||
( option(anon_prefix(Prefix), Options)
|
||||
-> true
|
||||
; BaseURI == []
|
||||
-> Prefix = '__bnode'
|
||||
; atom_concat('__', BaseURI, Prefix)
|
||||
),
|
||||
option(db(DB), Options, BaseURI),
|
||||
option(on_error(OnError), Options, warning),
|
||||
option(resources(URIIRI), Options, uri),
|
||||
empty_assoc(Map),
|
||||
empty_assoc(NodeMap),
|
||||
make_ttl_state([ base_uri(BaseURI),
|
||||
resources(URIIRI),
|
||||
prefix_map(Map),
|
||||
nodeid_map(NodeMap),
|
||||
anon_prefix(Prefix),
|
||||
graph(DB),
|
||||
input(Stream),
|
||||
on_error(OnError)
|
||||
], State).
|
||||
|
||||
|
||||
turtle_file(State, In) -->
|
||||
{ read_turtle_tokens(In, Tokens, State),
|
||||
debug(turtle, 'Tokens: ~w~n', [Tokens])
|
||||
},
|
||||
( { Tokens == end_of_file }
|
||||
-> []
|
||||
; { catch(phrase(triples(State, Triples), Tokens), E, true) }
|
||||
-> ( { var(E) }
|
||||
-> list(Triples),
|
||||
turtle_file(State, In)
|
||||
; { step_error(State, E) },
|
||||
turtle_file(State, In)
|
||||
)
|
||||
; { ttl_state_line_no(State, LineNo),
|
||||
syntax_error_term(In, LineNo, cannot_parse, Error),
|
||||
step_error(State, Error)
|
||||
},
|
||||
turtle_file(State, In)
|
||||
).
|
||||
|
||||
list([]) --> [].
|
||||
list([H|T]) --> [H], list(T).
|
||||
|
||||
triples(State, []) -->
|
||||
[ '@', name(prefix), name(Prefix), : ], !,
|
||||
iri(State, URI),
|
||||
{ ttl_state_prefix_map(State, Map0),
|
||||
put_assoc(Prefix, Map0, URI, Map),
|
||||
set_prefix_map_of_ttl_state(Map, State)
|
||||
}.
|
||||
triples(State, []) -->
|
||||
[ '@', name(prefix), ':' ], !,
|
||||
iri(State, URI),
|
||||
{ set_base_uri_of_ttl_state(URI, State)
|
||||
}.
|
||||
triples(State, []) -->
|
||||
[ '@', name(base) ], !,
|
||||
iri(State,URI),
|
||||
{ set_base_uri_of_ttl_state(URI, State)
|
||||
}.
|
||||
triples(State, Triples) -->
|
||||
subject(State, Subject, Triples, T),
|
||||
( predicate_object_list(State, Subject, T, [])
|
||||
-> ( eos
|
||||
-> []
|
||||
; syntax_rule(State, expected(predicate_object_list))
|
||||
)
|
||||
; { Triples \== T } % [ p o ; ... ] .
|
||||
-> { T = [] }
|
||||
).
|
||||
|
||||
eos([], []).
|
||||
|
||||
subject(State, Subject, T, T) -->
|
||||
resource(State, Subject), !.
|
||||
subject(State, Subject, T0, T) -->
|
||||
blank(State, Subject, T0, T), !.
|
||||
subject(State, _, _, _) -->
|
||||
syntax_rule(State, subject_expected).
|
||||
|
||||
predicate_object_list(State, Subject, Triples, Tail) -->
|
||||
verb(State, Predicate),
|
||||
object_list(State, Subject, Predicate, Triples, Tail0),
|
||||
( [';']
|
||||
-> opt_predicate_object_list(State, Subject, Tail0, Tail)
|
||||
; {Tail0 = Tail}
|
||||
).
|
||||
|
||||
opt_predicate_object_list(State, Subject, Triples, Tail) -->
|
||||
predicate_object_list(State, Subject, Triples, Tail), !.
|
||||
opt_predicate_object_list(_, _, Tail, Tail) -->
|
||||
[].
|
||||
|
||||
object_list(State, Subject, Predicate,
|
||||
[rdf(Subject, Predicate, Object)|T0], T) -->
|
||||
object(State, Object, T0, T1),
|
||||
( [',']
|
||||
-> object_list(State, Subject, Predicate, T1, T)
|
||||
; {T1 = T}
|
||||
).
|
||||
|
||||
verb(_, P) -->
|
||||
[name(a)], !,
|
||||
{ rdf_equal(rdf:type, P)
|
||||
}.
|
||||
verb(State, P) -->
|
||||
resource(State, P).
|
||||
|
||||
object(State, Object, T, T) -->
|
||||
[ literal(Value) ], !,
|
||||
{ mk_object(Value, State, Object)
|
||||
}.
|
||||
object(_, literal(type(Type, N)), T, T) -->
|
||||
[ numeric(Tp, Codes) ], !,
|
||||
{ numeric_url(Tp, Type),
|
||||
normalise_number(Tp, Codes, N)
|
||||
}.
|
||||
object(State, Object, T, T) -->
|
||||
resource(State, Object), !.
|
||||
object(State, Object, T0, T) -->
|
||||
blank(State, Object, T0, T), !.
|
||||
object(_, Object, T, T) -->
|
||||
[ name(Bool) ],
|
||||
{ boolean(Bool),
|
||||
Object = literal(type(BoolType, Bool)),
|
||||
rdf_equal(BoolType, xsd:boolean)
|
||||
}.
|
||||
object(State, _, _, _) -->
|
||||
syntax_rule(State, expected_object).
|
||||
|
||||
%% normalise_number(+Type, +Codes:list, -Literal:atom) is det.
|
||||
%
|
||||
% Turtle normalisation of numbers. Currently only implemented for
|
||||
% integers. This ensures that 0001 is parsed as "1"^^xsd:integer.
|
||||
%
|
||||
% Hmmm. Acording to test-10.ttl, this must *not* be done, so for
|
||||
% now we disable all normalization.
|
||||
|
||||
%normalise_number(integer, Codes, N) :-
|
||||
% number_codes(I, Codes),
|
||||
% atom_number(N, I).
|
||||
normalise_number(_, Codes, N) :-
|
||||
atom_codes(N, Codes).
|
||||
|
||||
term_expansion(numeric_url(I, Local),
|
||||
numeric_url(I, URI)) :-
|
||||
rdf_global_id(Local, URI).
|
||||
|
||||
numeric_url(integer, xsd:integer).
|
||||
numeric_url(decimal, xsd:decimal).
|
||||
numeric_url(double, xsd:double).
|
||||
|
||||
boolean(true).
|
||||
boolean(false).
|
||||
|
||||
resource(State, IRI) -->
|
||||
iri(State, IRI), !.
|
||||
resource(State, IRI) -->
|
||||
[ :(Name) ], !,
|
||||
{ ttl_state_base_uri(State, Base),
|
||||
atom_concat(Base, Name, URI),
|
||||
uri_iri(State, URI, IRI)
|
||||
}.
|
||||
resource(State, IRI) -->
|
||||
[ name(Prefix), : ], !,
|
||||
{ ttl_state_prefix_map(State, Map),
|
||||
get_assoc(Prefix, Map, IRI)
|
||||
}.
|
||||
resource(State, IRI) -->
|
||||
[ Prefix:Name ], !,
|
||||
{ ttl_state_prefix_map(State, Map),
|
||||
( get_assoc(Prefix, Map, Base)
|
||||
-> atom_concat(Base, Name, URI),
|
||||
uri_iri(State, URI, IRI)
|
||||
; throw(error(existence_error(prefix, Prefix), _))
|
||||
)
|
||||
}.
|
||||
resource(State, BaseIRI) -->
|
||||
[ : ], !,
|
||||
{ ttl_state_base_uri(State, BaseIRI)
|
||||
}.
|
||||
|
||||
uri_iri(State, URI, IRI) :-
|
||||
( ttl_state_resources(State, uri)
|
||||
-> uri_iri(URI, IRI)
|
||||
; IRI = URI
|
||||
).
|
||||
|
||||
iri(State, IRI) -->
|
||||
[ relative_uri(Rel)
|
||||
],
|
||||
{ ttl_state_base_uri(State, Base),
|
||||
( Rel == '' % must be in global_url?
|
||||
-> IRI = Base
|
||||
; uri_normalized_iri(Rel, Base, IRI)
|
||||
)
|
||||
}.
|
||||
|
||||
blank(State, Resource, T, T) -->
|
||||
[ nodeId(NodeId) ], !,
|
||||
{ ttl_state_nodeid_map(State, IdMap),
|
||||
( get_assoc(NodeId, IdMap, Resource)
|
||||
-> true
|
||||
; anonid(State, NodeId, Resource),
|
||||
put_assoc(NodeId, IdMap, Resource, NewIdMap),
|
||||
set_nodeid_map_of_ttl_state(NewIdMap, State)
|
||||
)
|
||||
}.
|
||||
blank(State, Resource, T, T) -->
|
||||
[ '[', ']' ], !,
|
||||
{ anonid(State, Resource)
|
||||
}.
|
||||
blank(State, Resource, T0, T) -->
|
||||
[ '[' ], !,
|
||||
{ anonid(State, Resource)
|
||||
},
|
||||
predicate_object_list(State, Resource, T0, T),
|
||||
[ ']' ].
|
||||
blank(State, Resource, T0, T) -->
|
||||
[ '(' ],
|
||||
item_list(State, Resource, T0, T).
|
||||
|
||||
item_list(_State, Resource, T, T) -->
|
||||
[ ')' ], !,
|
||||
{ rdf_equal(rdf:nil, Resource)
|
||||
}.
|
||||
item_list(State, Resource, T0, T) -->
|
||||
{ anonid(State, Resource) },
|
||||
object(State, Object, T0, T1),
|
||||
{ rdf_equal(rdf:first, First),
|
||||
rdf_equal(rdf:rest, Rest),
|
||||
T1 = [ rdf(Resource, First, Object),
|
||||
rdf(Resource, Rest, Tail)
|
||||
| T2
|
||||
]
|
||||
},
|
||||
item_list(State, Tail, T2, T).
|
||||
|
||||
|
||||
anonid(State, Node) :-
|
||||
ttl_state_anon_prefix(State, AnonPrefix),
|
||||
ttl_state_anon_count(State, C0),
|
||||
Count is C0 + 1,
|
||||
set_anon_count_of_ttl_state(Count, State),
|
||||
( atom(AnonPrefix)
|
||||
-> atom_concat(AnonPrefix, Count, Node)
|
||||
; Node = node(Count)
|
||||
).
|
||||
|
||||
anonid(State, _NodeId, Node) :-
|
||||
ttl_state_anon_prefix(State, AnonPrefix),
|
||||
atom(AnonPrefix), !,
|
||||
anonid(State, Node).
|
||||
anonid(_State, NodeId, node(NodeId)).
|
||||
|
||||
mk_object(type(Prefix:Name, Value), State, literal(type(Type, Value))) :- !,
|
||||
ttl_state_prefix_map(State, Map),
|
||||
get_assoc(Prefix, Map, Base),
|
||||
atom_concat(Base, Name, Type).
|
||||
mk_object(type(relative_uri(Rel), Value), State, literal(type(Type, Value))) :- !,
|
||||
ttl_state_base_uri(State, Base),
|
||||
( Rel == '' % must be in global_url?
|
||||
-> Type = Base
|
||||
; uri_normalized_iri(Rel, Base, Type)
|
||||
).
|
||||
mk_object(type(:(Name), Value), State, literal(type(Type, Value))) :- !,
|
||||
ttl_state_base_uri(State, Base),
|
||||
atom_concat(Base, Name, Type).
|
||||
mk_object(Value, _State, literal(Value)).
|
||||
|
||||
syntax_rule(State, Error) -->
|
||||
error_tokens(7, Tokens),
|
||||
{ ttl_state_input(State, Stream),
|
||||
stream_property(Stream, file_name(File)),
|
||||
ttl_state_line_no(State, LineNo),
|
||||
atomic_list_concat(Tokens, ' ', Before),
|
||||
format(string(Msg), '~w:~d (before "~w ...")',
|
||||
[File, LineNo, Before]),
|
||||
throw(error(syntax_error(Error),
|
||||
context(_, Msg)))
|
||||
}.
|
||||
|
||||
%% error_tokens(+Count, -Tokens) is det.
|
||||
%
|
||||
% Return maximum Count tokens, converted back to turtle input
|
||||
% syntax.
|
||||
|
||||
error_tokens(N, [H|T]) -->
|
||||
{ succ(N2, N) },
|
||||
error_token(H), !,
|
||||
error_tokens(N2, T).
|
||||
error_tokens(_, []) --> [].
|
||||
|
||||
error_token(Name) -->
|
||||
[ name(Name) ], !.
|
||||
error_token(Text) -->
|
||||
[ numeric(_, Codes) ], !,
|
||||
{ atom_codes(Text, Codes) }.
|
||||
error_token(Text) -->
|
||||
[ literal(Literal) ], !,
|
||||
{ literal_text(Literal, Text) }.
|
||||
error_token(Text) -->
|
||||
[ URIToken ],
|
||||
{ uri_text(URIToken, Text) }, !.
|
||||
error_token(Punct) -->
|
||||
[ Punct ],
|
||||
{ atom(Punct) }, !.
|
||||
error_token(Rest) -->
|
||||
[ H ],
|
||||
{ term_to_atom(H, Rest) }.
|
||||
|
||||
literal_text(type(Type, Value), Text) :- !,
|
||||
uri_text(Type, TypeText),
|
||||
format(atom(Text), '"~w"^^~w', [Value, TypeText]).
|
||||
|
||||
uri_text(relative_uri(URI), Text) :-
|
||||
format(atom(Text), '<~w>', [URI]).
|
||||
uri_text(:(Name), Text) :-
|
||||
format(atom(Text), ':~w', [Name]).
|
||||
|
||||
|
||||
/*******************************
|
||||
* TOKENISER *
|
||||
*******************************/
|
||||
|
||||
%% read_turtle_tokens(+In, -List, +State) is det.
|
||||
%
|
||||
% Read the next Turtle statement as a list of tokens. If
|
||||
% on_error(warning) is active, failure prints a message and
|
||||
% continues reading the next statements.
|
||||
%
|
||||
% The line_no property of the state is set to the start-line
|
||||
%
|
||||
% @error syntax_error(Culprit)
|
||||
|
||||
read_turtle_tokens(In, List, State) :-
|
||||
ttl_state_on_error(State, error), !,
|
||||
line_count(In, LineNo),
|
||||
nb_set_line_no_of_ttl_state(LineNo, State),
|
||||
( turtle_tokens(In, List)
|
||||
-> true
|
||||
; syntax_error_term(In, LineNo, illegal_token, Error),
|
||||
throw(Error)
|
||||
).
|
||||
read_turtle_tokens(In, List, State) :-
|
||||
line_count(In, LineNo),
|
||||
nb_set_line_no_of_ttl_state(LineNo, State),
|
||||
( catch(turtle_tokens(In, List), Error, true)
|
||||
-> ( var(Error)
|
||||
-> true
|
||||
; print_message(error, Error),
|
||||
skip_statement(In),
|
||||
read_turtle_tokens(In, List, State)
|
||||
)
|
||||
; syntax_error_term(In, LineNo, illegal_token, Error),
|
||||
print_message(error, Error),
|
||||
skip_statement(In),
|
||||
read_turtle_tokens(In, List, State)
|
||||
).
|
||||
|
||||
%% skip_statement(+In)
|
||||
%
|
||||
% Skip to the end of the statement
|
||||
|
||||
skip_statement(In) :-
|
||||
get_code(In, C0),
|
||||
skip_statement(C0, In).
|
||||
|
||||
skip_statement(-1, _) :- !.
|
||||
skip_statement(0'., In) :-
|
||||
get_code(In, C),
|
||||
( turtle_ws(C)
|
||||
-> !
|
||||
; skip_statement(C, In)
|
||||
).
|
||||
skip_statement(_, In) :-
|
||||
get_code(In, C),
|
||||
skip_statement(C, In).
|
||||
|
||||
%% turtle_tokens(+In, -List)
|
||||
%
|
||||
% Read a statement from a turtle file, returning the contents as a
|
||||
% list of tokens.
|
||||
|
||||
turtle_tokens(In, List) :-
|
||||
get_code(In, C0),
|
||||
turtle_token(C0, In, C1, Tok1),
|
||||
( Tok1 == end_of_file
|
||||
-> List = end_of_file
|
||||
; List = [Tok1|Tokens],
|
||||
turtle_tokens(C1, In, Tokens)
|
||||
).
|
||||
|
||||
turtle_tokens(C0, In, List) :-
|
||||
( turtle_token(C0, In, C1, H)
|
||||
-> debug(turtle(token), 'Token: ~q', [H])
|
||||
; syntax_error(In, -1, illegal_token)
|
||||
),
|
||||
( H == '.'
|
||||
-> List = []
|
||||
; H == end_of_file
|
||||
-> syntax_error(In, -1, unexpected_end_of_input)
|
||||
; List = [H|T],
|
||||
turtle_tokens(C1, In, T)
|
||||
).
|
||||
|
||||
turtle_token(-1, _, -1, end_of_file) :- !.
|
||||
turtle_token(0'., _, end, '.') :- !. % Turtle does not demand a space here!
|
||||
turtle_token(0'#, In, C, Token) :- !,
|
||||
get_code(In, C1),
|
||||
skip_line(C1, In, C2),
|
||||
turtle_token(C2, In, C, Token).
|
||||
turtle_token(WS, In, C, Token) :-
|
||||
turtle_ws(WS), !,
|
||||
get_code(In, C1),
|
||||
turtle_token(C1, In, C, Token).
|
||||
turtle_token(C0, In, C, Number) :-
|
||||
between(0'0, 0'9, C0), !,
|
||||
turtle_number(C0, In, C, Number).
|
||||
turtle_token(0'-, In, C, Number) :- !,
|
||||
turtle_number(0'-, In, C, Number).
|
||||
turtle_token(0'+, In, C, Number) :- !,
|
||||
turtle_number(0'+, In, C, Number).
|
||||
turtle_token(0'", In, C, Literal) :- !,
|
||||
turtle_read_string(0'", In, C1, Atom),
|
||||
( C1 == 0'@
|
||||
-> get_code(In, C2),
|
||||
language(C2, In, C, LangCodes),
|
||||
atom_codes(LangId, LangCodes),
|
||||
Literal = literal(lang(LangId, Atom))
|
||||
; C1 == 0'^,
|
||||
peek_code(In, 0'^)
|
||||
-> get_code(In, 0'^),
|
||||
get_code(In, C2),
|
||||
resource_token(C2, In, C, Type),
|
||||
Literal = literal(type(Type, Atom))
|
||||
; C = C1,
|
||||
Literal = literal(Atom)
|
||||
).
|
||||
turtle_token(0'_, In, C, nodeId(NodeID)) :-
|
||||
peek_code(In, 0':), !,
|
||||
get_code(In, _),
|
||||
get_code(In, C1),
|
||||
turtle_read_name(C1, In, C, NodeID).
|
||||
turtle_token(0'<, In, C, URI) :- !,
|
||||
resource_token(0'<, In, C, URI).
|
||||
turtle_token(0':, In, C, URI) :- !,
|
||||
resource_token(0':, In, C, URI).
|
||||
turtle_token(C0, In, C, Token) :-
|
||||
turtle_read_name(C0, In, C1, Name), !,
|
||||
( C1 == 0':,
|
||||
\+ sub_atom(Name, 0, _, _, '_'),
|
||||
peek_code(In, C2),
|
||||
turtle_name_start_char(C2)
|
||||
-> get_code(In, C2),
|
||||
turtle_read_name(C2, In, C, Name2),
|
||||
Token = (Name:Name2)
|
||||
; Token = name(Name),
|
||||
C = C1
|
||||
).
|
||||
turtle_token(Punct, In, C, P) :-
|
||||
punctuation(Punct, P), !,
|
||||
get_code(In, C).
|
||||
|
||||
%% turtle_number(+Char0, +In, -CharNext, -Value)
|
||||
%
|
||||
% Value is Type:CodeList
|
||||
|
||||
turtle_number(0'-, In, CN, numeric(T, [0'-|Codes])) :- !,
|
||||
get_code(In, C0),
|
||||
turtle_number_nn(C0, In, CN, numeric(T, Codes)).
|
||||
turtle_number(0'+, In, CN, numeric(T, [0'+|Codes])) :- !,
|
||||
get_code(In, C0),
|
||||
turtle_number_nn(C0, In, CN, numeric(T, Codes)).
|
||||
turtle_number(C0, In, CN, Value) :-
|
||||
turtle_number_nn(C0, In, CN, Value).
|
||||
|
||||
turtle_number_nn(C, In, CN, numeric(Type, Codes)) :-
|
||||
turtle_integer_codes(C, In, CN0, Codes, T0), % [0-9]+
|
||||
( CN0 == 0'.
|
||||
-> T0 = [CN0|T1],
|
||||
get_code(In, C1),
|
||||
turtle_integer_codes(C1, In, CN1, T1, T2), % [0-9]+.[0-9]+
|
||||
( exponent(CN1, In, CN, T2)
|
||||
-> Type = double
|
||||
; CN = CN1,
|
||||
T2 = [],
|
||||
Type = decimal
|
||||
)
|
||||
; exponent(CN0, In, CN, T0)
|
||||
-> Type = double
|
||||
; T0 = [],
|
||||
CN = CN0,
|
||||
Type = integer
|
||||
).
|
||||
|
||||
turtle_integer_codes(C0, In, CN, [C0|T0], T) :-
|
||||
between(0'0, 0'9, C0), !,
|
||||
get_code(In, C1),
|
||||
turtle_integer_codes(C1, In, CN, T0, T).
|
||||
turtle_integer_codes(CN, _, CN, T, T).
|
||||
|
||||
exponent(C0, In, CN, [C0|T0]) :-
|
||||
e(C0), !,
|
||||
get_code(In, C1),
|
||||
optional_sign(C1, In, CN0, T0, T1),
|
||||
turtle_integer_codes(CN0, In, CN, T1, []).
|
||||
|
||||
optional_sign(C0, In, CN, [C0|T], T) :-
|
||||
sign(C0), !,
|
||||
get_code(In, CN).
|
||||
optional_sign(CN, _, CN, T, T).
|
||||
|
||||
e(0'e).
|
||||
e(0'E).
|
||||
|
||||
sign(0'-).
|
||||
sign(0'+). %'
|
||||
|
||||
|
||||
% language: [a-z]+ ('-' [a-z0-9]+ )*
|
||||
language(C0, In, C, [C0|Codes]) :-
|
||||
code_type(C0, lower),
|
||||
get_code(In, C1),
|
||||
lwr_word(C1, In, C2, Codes, Tail),
|
||||
sub_langs(C2, In, C, Tail, []), !.
|
||||
language(_, In, _, _) :-
|
||||
line_count(In, LineNo),
|
||||
syntax_error(In, LineNo, language_specifier).
|
||||
|
||||
lwr_word(C0, In, C, [C0|T0], T) :-
|
||||
code_type(C0, lower), !,
|
||||
get_code(In, C1),
|
||||
lwr_word(C1, In, C, T0, T).
|
||||
lwr_word(C, _, C, T, T).
|
||||
|
||||
sub_langs(0'-, In, C, [0'-, C1|Codes], T) :- !,
|
||||
get_code(In, C1),
|
||||
lwrdig(C1), !,
|
||||
get_code(In, C2),
|
||||
lwrdigs(C2, In, C3, Codes, Tail),
|
||||
sub_langs(C3, In, C, Tail, T).
|
||||
sub_langs(C, _, C, T, T).
|
||||
|
||||
lwrdig(C) :-
|
||||
code_type(C, lower), !.
|
||||
lwrdig(C) :-
|
||||
code_type(C, digit).
|
||||
|
||||
lwrdigs(C0, In, C, [C0|T0], T) :-
|
||||
lwrdig(C0), !,
|
||||
get_code(In, C1),
|
||||
lwr_word(C1, In, C, T0, T).
|
||||
lwrdigs(C, _, C, T, T).
|
||||
|
||||
% resource_token
|
||||
resource_token(0'<, In, C, relative_uri(URI)) :- !,
|
||||
turtle_read_relative_uri(0'<, In, C, URI).
|
||||
resource_token(0':, In, C, Token) :- !,
|
||||
get_code(In, C0),
|
||||
( turtle_read_name(C0, In, C, Name)
|
||||
-> Token = :(Name)
|
||||
; Token = :,
|
||||
C = C0
|
||||
).
|
||||
resource_token(C0, In, C, Prefix:Name) :-
|
||||
turtle_read_name(C0, In, C1, Prefix),
|
||||
\+ sub_atom(Prefix, 0, _, _, '_'), !,
|
||||
C1 == 0':,
|
||||
get_code(In, C2),
|
||||
turtle_read_name(C2, In, C, Name).
|
||||
|
||||
|
||||
punctuation(0'(, '(').
|
||||
punctuation(0'), ')').
|
||||
punctuation(0'[, '[').
|
||||
punctuation(0'], ']').
|
||||
punctuation(0',, ',').
|
||||
punctuation(0'@, '@').
|
||||
punctuation(0':, ':').
|
||||
punctuation(0';, ';').
|
||||
|
||||
% comment
|
||||
skip_line(0xA, In, C) :- !,
|
||||
get_code(In, C).
|
||||
skip_line(0xD, In, C) :- !,
|
||||
get_code(In, C).
|
||||
skip_line(-1, _, -1) :- !.
|
||||
skip_line(_, In, C) :-
|
||||
get_code(In, C1),
|
||||
skip_line(C1, In, C).
|
||||
|
||||
% ws
|
||||
turtle_ws(0x9).
|
||||
turtle_ws(0xA).
|
||||
turtle_ws(0xD).
|
||||
turtle_ws(0x20).
|
||||
|
||||
syntax_error(Stream, Line, Which) :-
|
||||
syntax_error_term(Stream, Line, Which, Error),
|
||||
throw(Error).
|
||||
|
||||
syntax_error_term(Stream, -1, Which, Error) :- !,
|
||||
stream_property(Stream, file_name(File)),
|
||||
line_count(Stream, LineNo),
|
||||
line_position(Stream, LinePos),
|
||||
character_count(Stream, CharIndex),
|
||||
Error = error(syntax_error(Which),
|
||||
file(File, LineNo, LinePos, CharIndex)).
|
||||
syntax_error_term(Stream, LineNo, Which, Error) :-
|
||||
stream_property(Stream, file_name(File)),
|
||||
Error = error(syntax_error(Which),
|
||||
file(File, LineNo, -1, -1)).
|
||||
|
||||
|
||||
/*******************************
|
||||
* RDF-DB HOOK *
|
||||
*******************************/
|
||||
|
||||
:- multifile
|
||||
rdf_db:rdf_load_stream/3,
|
||||
rdf_db:rdf_file_type/2.
|
||||
|
||||
rdf_db:rdf_load_stream(turtle, Stream, _Module:Options) :-
|
||||
rdf_db:graph(Options, Id),
|
||||
rdf_transaction(rdf_process_turtle(Stream, assert_triples, Options),
|
||||
parse(Id)).
|
||||
|
||||
assert_triples([], _).
|
||||
assert_triples([rdf(S,P,O)|T], Location) :-
|
||||
rdf_assert(S,P,O,Location),
|
||||
assert_triples(T, Location).
|
||||
|
||||
rdf_db:rdf_file_type(ttl, turtle).
|
||||
rdf_db:rdf_file_type(n3, turtle). % not really, but good enough
|
||||
rdf_db:rdf_file_type(nt, turtle). % not really, but good enough
|
Reference in New Issue
Block a user