911 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
			
		
		
	
	
			911 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
/*  $Id$
 | 
						|
 | 
						|
    Part of SWI-Prolog
 | 
						|
 | 
						|
    Author:        Jan Wielemaker
 | 
						|
    E-mail:        J.Wielemaker@cs.vu.nl
 | 
						|
    WWW:           http://www.swi-prolog.org
 | 
						|
    Copyright (C): 2004-2009, University of Amsterdam
 | 
						|
 | 
						|
    This program is free software; you can redistribute it and/or
 | 
						|
    modify it under the terms of the GNU General Public License
 | 
						|
    as published by the Free Software Foundation; either version 2
 | 
						|
    of the License, or (at your option) any later version.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Lesser General Public
 | 
						|
    License along with this library; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
 | 
						|
    As a special exception, if you link this library with other files,
 | 
						|
    compiled with a Free Software compiler, to produce an executable, this
 | 
						|
    library does not by itself cause the resulting executable to be covered
 | 
						|
    by the GNU General Public License. This exception does not however
 | 
						|
    invalidate any other reasons why the executable file might be covered by
 | 
						|
    the GNU General Public License.
 | 
						|
*/
 | 
						|
 | 
						|
:- module(rdf_turtle,
 | 
						|
	  [ rdf_load_turtle/3,		% +Input, -Triples, +Options
 | 
						|
	    rdf_read_turtle/3,		% +Input, -Triples, +Options
 | 
						|
	    rdf_process_turtle/3	% +Input, :OnObject, +Options
 | 
						|
	  ]).
 | 
						|
:- use_module(library(assoc)).
 | 
						|
:- use_module(library(option)).
 | 
						|
:- use_module(library('semweb/rdf_db')).
 | 
						|
:- use_module(library(debug)).
 | 
						|
:- use_module(library(uri)).
 | 
						|
:- use_module(library(record)).
 | 
						|
:- use_module(library(http/http_open)).
 | 
						|
:- use_module(turtle_base).
 | 
						|
 | 
						|
:- meta_predicate
 | 
						|
	rdf_process_turtle(+,2,+).
 | 
						|
 | 
						|
/** <module> Turtle: Terse RDF Triple Language
 | 
						|
 | 
						|
This module implements the Turtle  language   for  representing  the RDF
 | 
						|
triple model as defined by Dave Beckett  from the Institute for Learning
 | 
						|
and Research Technology University of Bristol in the document:
 | 
						|
 | 
						|
  * http://www.w3.org/TeamSubmission/turtle/
 | 
						|
  * http://www.w3.org/TeamSubmission/2008/SUBM-turtle-20080114/#sec-conformance
 | 
						|
 | 
						|
This parser passes all tests,  except   for  test-28.ttl  (decial number
 | 
						|
serialization) and test-29.ttl (uri containing  ...%&...). It is unclear
 | 
						|
to me whether these tests are correct. Notably, it is unclear whether we
 | 
						|
must do %-decoding. Certainly, this  is   expected  by various real-life
 | 
						|
datasets that we came accross with.
 | 
						|
 | 
						|
This module acts as a plugin to   rdf_load/2,  for processing files with
 | 
						|
one of the extensions =|.ttl|=, =|.n3|= or =|.nt|=.
 | 
						|
 | 
						|
@tbd Better error handling
 | 
						|
*/
 | 
						|
 | 
						|
:- record ttl_state(base_uri,
 | 
						|
		    resources:oneof([uri,iri])=uri,
 | 
						|
		    prefix_map,
 | 
						|
		    nodeid_map,
 | 
						|
		    anon_prefix,
 | 
						|
		    anon_count=0,
 | 
						|
		    graph,
 | 
						|
		    input,
 | 
						|
		    line_no=0,
 | 
						|
		    on_error:oneof([warning,error])=warning,
 | 
						|
		    error_count=0).
 | 
						|
 | 
						|
%%	rdf_read_turtle(+Input, -Triples, +Options)
 | 
						|
%
 | 
						|
%	Read a stream or file into a set of triples of the format
 | 
						|
%
 | 
						|
%		rdf(Subject, Predicate, Object)
 | 
						|
%
 | 
						|
%	The representation is consistent with the SWI-Prolog RDF/XML
 | 
						|
%	and ntriples parsers.  Provided options are:
 | 
						|
%
 | 
						|
%		* base_uri(+BaseURI)
 | 
						|
%		Initial base URI.  Defaults to file://<file> for loading
 | 
						|
%		files.
 | 
						|
%
 | 
						|
%		* anon_prefix(+Prefix)
 | 
						|
%		Blank nodes are generated as <Prefix>1, <Prefix>2, etc.
 | 
						|
%		If Prefix is not an atom blank nodes are generated as
 | 
						|
%		node(1), node(2), ...
 | 
						|
%
 | 
						|
%		* resources(URIorIRI)
 | 
						|
%		Officially, Turtle resources are IRIs.  Quite a
 | 
						|
%		few applications however send URIs.  By default we
 | 
						|
%		do URI->IRI mapping because this rarely causes errors.
 | 
						|
%		To force strictly conforming mode, pass =iri=.
 | 
						|
%
 | 
						|
%		* prefixes(-Pairs)
 | 
						|
%		Return encountered prefix declarations as a
 | 
						|
%		list of Alias-URI
 | 
						|
%
 | 
						|
%		* namespaces(-Pairs)
 | 
						|
%		Same as prefixes(Pairs).  Compatibility to rdf_load/2.
 | 
						|
%
 | 
						|
%		* base_used(-Base)
 | 
						|
%		Base URI used for processing the data.  Unified to
 | 
						|
%		[] if there is no base-uri.
 | 
						|
%
 | 
						|
%		* on_error(+ErrorMode)
 | 
						|
%		In =warning= (default), print the error and continue
 | 
						|
%		parsing the remainder of the file.  If =error=, abort
 | 
						|
%		with an exception on the first error encountered.
 | 
						|
%
 | 
						|
%		* error_count(-Count)
 | 
						|
%		If on_error(warning) is active, this option cane be
 | 
						|
%		used to retrieve the number of generated errors.
 | 
						|
 | 
						|
rdf_read_turtle(In, Triples, Options) :-
 | 
						|
	open_input(In, Stream, Close),
 | 
						|
	init_state(In, Stream, Options, State),
 | 
						|
	call_cleanup(phrase(turtle_file(State, Stream), Triples),
 | 
						|
		     Close),
 | 
						|
	post_options(State, Options).
 | 
						|
 | 
						|
 | 
						|
%%	rdf_load_turtle(+Input, -Triples, +Options)
 | 
						|
%
 | 
						|
%	@deprecated Use rdf_read_turtle/3
 | 
						|
 | 
						|
rdf_load_turtle(Input, Triples, Options) :-
 | 
						|
	rdf_read_turtle(Input, Triples, Options).
 | 
						|
 | 
						|
 | 
						|
%%	rdf_process_turtle(+Input, :OnObject, +Options) is det.
 | 
						|
%
 | 
						|
%	Process Turtle input from Input, calling OnObject with a list of
 | 
						|
%	triples. Options is the same as for rdf_load_turtle/3.
 | 
						|
%
 | 
						|
%	Errors encountered are sent to  print_message/2, after which the
 | 
						|
%	parser tries to recover and parse the remainder of the data.
 | 
						|
 | 
						|
rdf_process_turtle(In, OnObject, Options) :-
 | 
						|
	open_input(In, Stream, Close),
 | 
						|
	init_state(In, Stream, Options, State),
 | 
						|
	call_cleanup(process_stream(State, Stream, OnObject),
 | 
						|
		     Close),
 | 
						|
	post_options(State, Options).
 | 
						|
 | 
						|
post_options(State, Options) :-
 | 
						|
	prefix_option(State, Options),
 | 
						|
	namespace_option(State, Options),
 | 
						|
	base_option(State, Options),
 | 
						|
	error_option(State, Options).
 | 
						|
 | 
						|
prefix_option(State, Options) :-
 | 
						|
	(   option(prefixes(Pairs), Options)
 | 
						|
	->  ttl_state_prefix_map(State, Map),
 | 
						|
	    assoc_to_list(Map, Pairs)
 | 
						|
	;   true
 | 
						|
	).
 | 
						|
namespace_option(State, Options) :-
 | 
						|
	(   option(namespaces(Pairs), Options)
 | 
						|
	->  ttl_state_prefix_map(State, Map),
 | 
						|
	    assoc_to_list(Map, Pairs)
 | 
						|
	;   true
 | 
						|
	).
 | 
						|
base_option(State, Options) :-
 | 
						|
	(   option(base_used(Base), Options)
 | 
						|
	->  ttl_state_base_uri(State, Base)
 | 
						|
	;   true
 | 
						|
	).
 | 
						|
error_option(State, Options) :-
 | 
						|
	(   option(error_count(Count), Options)
 | 
						|
	->  ttl_state_error_count(State, Count)
 | 
						|
	;   true
 | 
						|
	).
 | 
						|
 | 
						|
 | 
						|
process_stream(State, In, OnObject) :-
 | 
						|
	read_turtle_tokens(In, Tokens, State),
 | 
						|
	debug(turtle, 'Tokens: ~w~n', [Tokens]),
 | 
						|
	ttl_state_line_no(State, LineNo),
 | 
						|
	(   Tokens == end_of_file
 | 
						|
	->  true
 | 
						|
	;   catch(phrase(triples(State, Triples), Tokens), E, true)
 | 
						|
	->  (   var(E)
 | 
						|
	    ->  (   Triples == []
 | 
						|
		->  true
 | 
						|
		;   ttl_state_graph(State, DB),
 | 
						|
		    call(OnObject, Triples, DB:LineNo)
 | 
						|
		)
 | 
						|
	    ;   print_message(error, E)
 | 
						|
	    ),
 | 
						|
	    process_stream(State, In, OnObject)
 | 
						|
	;   syntax_error_term(In, LineNo, cannot_parse, Error),
 | 
						|
	    step_error(State, Error),
 | 
						|
	    process_stream(State, In, OnObject)
 | 
						|
	).
 | 
						|
 | 
						|
 | 
						|
%%	step_error(+State, +Error) is det.
 | 
						|
%
 | 
						|
%	Throw Error of =on_error= is =error=.  Otherwise print the error
 | 
						|
%	and increment =error_count=.
 | 
						|
%
 | 
						|
%	@error syntax_error(Culprit).
 | 
						|
 | 
						|
step_error(State, Error) :-
 | 
						|
	ttl_state_on_error(State, error), !,
 | 
						|
	throw(Error).
 | 
						|
step_error(State, Error) :-
 | 
						|
	ttl_state_error_count(State, E0),
 | 
						|
	succ(E0, E),
 | 
						|
	nb_set_error_count_of_ttl_state(E, State),
 | 
						|
	print_message(error, Error).
 | 
						|
 | 
						|
 | 
						|
%%	open_input(+Input, -Stream, -Close) is det.
 | 
						|
%
 | 
						|
%	Open given input.
 | 
						|
%
 | 
						|
%	@param  Close goal to undo the open action
 | 
						|
%	@tbd	Synchronize with input handling of rdf_db.pl.
 | 
						|
%	@error	existence_error, permission_error
 | 
						|
 | 
						|
open_input(stream(Stream), Stream, true) :- !,
 | 
						|
	stream_property(Stream, encoding(Old)),
 | 
						|
	(   Old == utf8
 | 
						|
	->  Close = true
 | 
						|
	;   set_stream(Stream, encoding(utf8)),
 | 
						|
	    Close = set_stream(Stream, encoding(Old))
 | 
						|
	).
 | 
						|
open_input(Stream, Stream, Close) :-
 | 
						|
	is_stream(Stream), !,
 | 
						|
	open_input(stream(Stream), Stream, Close).
 | 
						|
open_input(URL, Stream, close(Stream)) :-
 | 
						|
	sub_atom(URL, 0, _, _, 'http://'), !,
 | 
						|
	http_open(URL, Stream, []),
 | 
						|
	set_stream(Stream, encoding(utf8)).
 | 
						|
open_input(File, Stream, close(Stream)) :-
 | 
						|
	absolute_file_name(File, Path,
 | 
						|
			   [ access(read),
 | 
						|
			     extensions([ttl, ''])
 | 
						|
			   ]),
 | 
						|
	open(Path, read, Stream, [encoding(utf8)]).
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
The parser is a two-stage processor. The  first reads the raw file input
 | 
						|
and generates a list of tokens, stripping   comments and white space. It
 | 
						|
is defined to read a single  statement   upto  its  terminating '.'. The
 | 
						|
second stage is a traditional DCG parser  generating the triples for the
 | 
						|
statement.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
init_state(In, Stream, Options, State) :-
 | 
						|
	(   option(base_uri(BaseURI), Options)
 | 
						|
	->  true
 | 
						|
	;   In = stream(_)
 | 
						|
	->  BaseURI = []
 | 
						|
	;   uri_is_global(In),
 | 
						|
	    \+ is_absolute_file_name(In) 	% Avoid C:Path in Windows
 | 
						|
	->  uri_normalized(In, BaseURI)
 | 
						|
	;   uri_file_name(BaseURI, In)
 | 
						|
	),
 | 
						|
	(   option(anon_prefix(Prefix), Options)
 | 
						|
	->  true
 | 
						|
	;   BaseURI == []
 | 
						|
	->  Prefix = '__bnode'
 | 
						|
	;   atom_concat('__', BaseURI, Prefix)
 | 
						|
	),
 | 
						|
	option(db(DB), Options, BaseURI),
 | 
						|
	option(on_error(OnError), Options, warning),
 | 
						|
	option(resources(URIIRI), Options, uri),
 | 
						|
	empty_assoc(Map),
 | 
						|
	empty_assoc(NodeMap),
 | 
						|
	make_ttl_state([ base_uri(BaseURI),
 | 
						|
			 resources(URIIRI),
 | 
						|
			 prefix_map(Map),
 | 
						|
			 nodeid_map(NodeMap),
 | 
						|
			 anon_prefix(Prefix),
 | 
						|
			 graph(DB),
 | 
						|
			 input(Stream),
 | 
						|
			 on_error(OnError)
 | 
						|
		       ], State).
 | 
						|
 | 
						|
 | 
						|
turtle_file(State, In) -->
 | 
						|
	{ read_turtle_tokens(In, Tokens, State),
 | 
						|
	  debug(turtle, 'Tokens: ~w~n', [Tokens])
 | 
						|
	},
 | 
						|
	(   { Tokens == end_of_file }
 | 
						|
	->  []
 | 
						|
	;   { catch(phrase(triples(State, Triples), Tokens), E, true) }
 | 
						|
	->  (   { var(E) }
 | 
						|
	    ->	list(Triples),
 | 
						|
		turtle_file(State, In)
 | 
						|
	    ;	{ step_error(State, E) },
 | 
						|
		turtle_file(State, In)
 | 
						|
	    )
 | 
						|
	;   { ttl_state_line_no(State, LineNo),
 | 
						|
	      syntax_error_term(In, LineNo, cannot_parse, Error),
 | 
						|
	      step_error(State, Error)
 | 
						|
	    },
 | 
						|
	    turtle_file(State, In)
 | 
						|
	).
 | 
						|
 | 
						|
list([]) --> [].
 | 
						|
list([H|T]) --> [H], list(T).
 | 
						|
 | 
						|
triples(State, []) -->
 | 
						|
	[ '@', name(prefix), name(Prefix), : ], !,
 | 
						|
	iri(State, URI),
 | 
						|
	{ ttl_state_prefix_map(State, Map0),
 | 
						|
	  put_assoc(Prefix, Map0, URI, Map),
 | 
						|
	  set_prefix_map_of_ttl_state(Map, State)
 | 
						|
	}.
 | 
						|
triples(State, []) -->
 | 
						|
	[ '@', name(prefix), ':' ], !,
 | 
						|
	iri(State, URI),
 | 
						|
	{ set_base_uri_of_ttl_state(URI, State)
 | 
						|
	}.
 | 
						|
triples(State, []) -->
 | 
						|
	[ '@', name(base) ], !,
 | 
						|
	iri(State,URI),
 | 
						|
	{ set_base_uri_of_ttl_state(URI, State)
 | 
						|
	}.
 | 
						|
triples(State, Triples) -->
 | 
						|
	subject(State, Subject, Triples, T),
 | 
						|
	(   predicate_object_list(State, Subject, T, [])
 | 
						|
	->  (   eos
 | 
						|
	    ->	[]
 | 
						|
	    ;	syntax_rule(State, expected(predicate_object_list))
 | 
						|
	    )
 | 
						|
	;   { Triples \== T }		% [ p o ; ... ] .
 | 
						|
	->  { T = [] }
 | 
						|
	).
 | 
						|
 | 
						|
eos([], []).
 | 
						|
 | 
						|
subject(State, Subject, T, T) -->
 | 
						|
	resource(State, Subject), !.
 | 
						|
subject(State, Subject, T0, T) -->
 | 
						|
	blank(State, Subject, T0, T), !.
 | 
						|
subject(State, _, _, _) -->
 | 
						|
	syntax_rule(State, subject_expected).
 | 
						|
 | 
						|
predicate_object_list(State, Subject, Triples, Tail) -->
 | 
						|
	verb(State, Predicate),
 | 
						|
	object_list(State, Subject, Predicate, Triples, Tail0),
 | 
						|
	(   [';']
 | 
						|
	->  opt_predicate_object_list(State, Subject, Tail0, Tail)
 | 
						|
	;   {Tail0 = Tail}
 | 
						|
	).
 | 
						|
 | 
						|
opt_predicate_object_list(State, Subject, Triples, Tail) -->
 | 
						|
	predicate_object_list(State, Subject, Triples, Tail), !.
 | 
						|
opt_predicate_object_list(_, _, Tail, Tail) -->
 | 
						|
	[].
 | 
						|
 | 
						|
object_list(State, Subject, Predicate,
 | 
						|
	    [rdf(Subject, Predicate, Object)|T0], T) -->
 | 
						|
	object(State, Object, T0, T1),
 | 
						|
	(   [',']
 | 
						|
	->  object_list(State, Subject, Predicate, T1, T)
 | 
						|
	;   {T1 = T}
 | 
						|
	).
 | 
						|
 | 
						|
verb(_, P) -->
 | 
						|
	[name(a)], !,
 | 
						|
	{ rdf_equal(rdf:type, P)
 | 
						|
	}.
 | 
						|
verb(State, P) -->
 | 
						|
	resource(State, P).
 | 
						|
 | 
						|
object(State, Object, T, T) -->
 | 
						|
	[ literal(Value) ], !,
 | 
						|
	{ mk_object(Value, State, Object)
 | 
						|
	}.
 | 
						|
object(_, literal(type(Type, N)), T, T) -->
 | 
						|
	[ numeric(Tp, Codes) ], !,
 | 
						|
	{ numeric_url(Tp, Type),
 | 
						|
	  normalise_number(Tp, Codes, N)
 | 
						|
	}.
 | 
						|
object(State, Object, T, T) -->
 | 
						|
	resource(State, Object), !.
 | 
						|
object(State, Object, T0, T) -->
 | 
						|
	blank(State, Object, T0, T), !.
 | 
						|
object(_, Object, T, T) -->
 | 
						|
	[ name(Bool) ],
 | 
						|
	{ boolean(Bool),
 | 
						|
	  Object = literal(type(BoolType, Bool)),
 | 
						|
	  rdf_equal(BoolType, xsd:boolean)
 | 
						|
	}.
 | 
						|
object(State, _, _, _) -->
 | 
						|
	syntax_rule(State, expected_object).
 | 
						|
 | 
						|
%%	normalise_number(+Type, +Codes:list, -Literal:atom) is det.
 | 
						|
%
 | 
						|
%	Turtle normalisation of numbers. Currently  only implemented for
 | 
						|
%	integers. This ensures that 0001 is parsed as "1"^^xsd:integer.
 | 
						|
%
 | 
						|
%	Hmmm.  Acording to test-10.ttl, this must *not* be done, so for
 | 
						|
%	now we disable all normalization.
 | 
						|
 | 
						|
%normalise_number(integer, Codes, N) :-
 | 
						|
%	number_codes(I, Codes),
 | 
						|
%	atom_number(N, I).
 | 
						|
normalise_number(_, Codes, N) :-
 | 
						|
	atom_codes(N, Codes).
 | 
						|
 | 
						|
term_expansion(numeric_url(I, Local),
 | 
						|
	       numeric_url(I, URI)) :-
 | 
						|
	rdf_global_id(Local, URI).
 | 
						|
 | 
						|
numeric_url(integer, xsd:integer).
 | 
						|
numeric_url(decimal, xsd:decimal).
 | 
						|
numeric_url(double,  xsd:double).
 | 
						|
 | 
						|
boolean(true).
 | 
						|
boolean(false).
 | 
						|
 | 
						|
resource(State, IRI) -->
 | 
						|
	iri(State, IRI), !.
 | 
						|
resource(State, IRI) -->
 | 
						|
	[ :(Name) ], !,
 | 
						|
	{ ttl_state_base_uri(State, Base),
 | 
						|
	  atom_concat(Base, Name, URI),
 | 
						|
	  uri_iri(State, URI, IRI)
 | 
						|
	}.
 | 
						|
resource(State, IRI) -->
 | 
						|
	[ name(Prefix), : ], !,
 | 
						|
	{ ttl_state_prefix_map(State, Map),
 | 
						|
	  get_assoc(Prefix, Map, IRI)
 | 
						|
	}.
 | 
						|
resource(State, IRI) -->
 | 
						|
	[ Prefix:Name ], !,
 | 
						|
	{ ttl_state_prefix_map(State, Map),
 | 
						|
	  (   get_assoc(Prefix, Map, Base)
 | 
						|
	  ->  atom_concat(Base, Name, URI),
 | 
						|
	      uri_iri(State, URI, IRI)
 | 
						|
	  ;   throw(error(existence_error(prefix, Prefix), _))
 | 
						|
	  )
 | 
						|
	}.
 | 
						|
resource(State, BaseIRI) -->
 | 
						|
	[ : ], !,
 | 
						|
	{ ttl_state_base_uri(State, BaseIRI)
 | 
						|
	}.
 | 
						|
 | 
						|
uri_iri(State, URI, IRI) :-
 | 
						|
	(   ttl_state_resources(State, uri)
 | 
						|
	->  uri_iri(URI, IRI)
 | 
						|
	;   IRI = URI
 | 
						|
	).
 | 
						|
 | 
						|
iri(State, IRI) -->
 | 
						|
	[ relative_uri(Rel)
 | 
						|
	],
 | 
						|
	{ ttl_state_base_uri(State, Base),
 | 
						|
	  (   Rel == ''			% must be in global_url?
 | 
						|
	  ->  IRI = Base
 | 
						|
	  ;   uri_normalized_iri(Rel, Base, IRI)
 | 
						|
	  )
 | 
						|
	}.
 | 
						|
 | 
						|
blank(State, Resource, T, T) -->
 | 
						|
	[ nodeId(NodeId) ], !,
 | 
						|
	{ ttl_state_nodeid_map(State, IdMap),
 | 
						|
	  (   get_assoc(NodeId, IdMap, Resource)
 | 
						|
	  ->  true
 | 
						|
	  ;   anonid(State, NodeId, Resource),
 | 
						|
	      put_assoc(NodeId, IdMap, Resource, NewIdMap),
 | 
						|
	      set_nodeid_map_of_ttl_state(NewIdMap, State)
 | 
						|
	  )
 | 
						|
	}.
 | 
						|
blank(State, Resource, T, T) -->
 | 
						|
	[ '[', ']' ], !,
 | 
						|
	{ anonid(State, Resource)
 | 
						|
	}.
 | 
						|
blank(State, Resource, T0, T) -->
 | 
						|
	[ '[' ], !,
 | 
						|
	{ anonid(State, Resource)
 | 
						|
	},
 | 
						|
	predicate_object_list(State, Resource, T0, T),
 | 
						|
	[ ']' ].
 | 
						|
blank(State, Resource, T0, T) -->
 | 
						|
	[ '(' ],
 | 
						|
	item_list(State, Resource, T0, T).
 | 
						|
 | 
						|
item_list(_State, Resource, T, T) -->
 | 
						|
	[ ')' ], !,
 | 
						|
	{ rdf_equal(rdf:nil, Resource)
 | 
						|
	}.
 | 
						|
item_list(State, Resource, T0, T) -->
 | 
						|
	{ anonid(State, Resource) },
 | 
						|
	object(State, Object, T0, T1),
 | 
						|
	{ rdf_equal(rdf:first, First),
 | 
						|
	  rdf_equal(rdf:rest, Rest),
 | 
						|
	  T1 = [ rdf(Resource, First, Object),
 | 
						|
		 rdf(Resource, Rest, Tail)
 | 
						|
	       | T2
 | 
						|
	       ]
 | 
						|
	},
 | 
						|
	item_list(State, Tail, T2, T).
 | 
						|
 | 
						|
 | 
						|
anonid(State, Node) :-
 | 
						|
	ttl_state_anon_prefix(State, AnonPrefix),
 | 
						|
	ttl_state_anon_count(State, C0),
 | 
						|
	Count is C0 + 1,
 | 
						|
	set_anon_count_of_ttl_state(Count, State),
 | 
						|
	(   atom(AnonPrefix)
 | 
						|
	->  atom_concat(AnonPrefix, Count, Node)
 | 
						|
	;   Node = node(Count)
 | 
						|
	).
 | 
						|
 | 
						|
anonid(State, _NodeId, Node) :-
 | 
						|
	ttl_state_anon_prefix(State, AnonPrefix),
 | 
						|
	atom(AnonPrefix), !,
 | 
						|
	anonid(State, Node).
 | 
						|
anonid(_State, NodeId, node(NodeId)).
 | 
						|
 | 
						|
mk_object(type(Prefix:Name, Value), State, literal(type(Type, Value))) :- !,
 | 
						|
	  ttl_state_prefix_map(State, Map),
 | 
						|
	  get_assoc(Prefix, Map, Base),
 | 
						|
	  atom_concat(Base, Name, Type).
 | 
						|
mk_object(type(relative_uri(Rel), Value), State, literal(type(Type, Value))) :- !,
 | 
						|
	  ttl_state_base_uri(State, Base),
 | 
						|
	  (   Rel == ''			% must be in global_url?
 | 
						|
	  ->  Type = Base
 | 
						|
	  ;   uri_normalized_iri(Rel, Base, Type)
 | 
						|
	  ).
 | 
						|
mk_object(type(:(Name), Value), State, literal(type(Type, Value))) :- !,
 | 
						|
	  ttl_state_base_uri(State, Base),
 | 
						|
	  atom_concat(Base, Name, Type).
 | 
						|
mk_object(Value, _State, literal(Value)).
 | 
						|
 | 
						|
syntax_rule(State, Error) -->
 | 
						|
	error_tokens(7, Tokens),
 | 
						|
	{ ttl_state_input(State, Stream),
 | 
						|
	  stream_property(Stream, file_name(File)),
 | 
						|
	  ttl_state_line_no(State, LineNo),
 | 
						|
	  atomic_list_concat(Tokens, ' ', Before),
 | 
						|
	  format(string(Msg), '~w:~d (before "~w ...")',
 | 
						|
		 [File, LineNo, Before]),
 | 
						|
	  throw(error(syntax_error(Error),
 | 
						|
		      context(_, Msg)))
 | 
						|
	}.
 | 
						|
 | 
						|
%%	error_tokens(+Count, -Tokens) is det.
 | 
						|
%
 | 
						|
%	Return maximum Count tokens,  converted   back  to  turtle input
 | 
						|
%	syntax.
 | 
						|
 | 
						|
error_tokens(N, [H|T]) -->
 | 
						|
	{ succ(N2, N) },
 | 
						|
	error_token(H), !,
 | 
						|
	error_tokens(N2, T).
 | 
						|
error_tokens(_, []) --> [].
 | 
						|
 | 
						|
error_token(Name) -->
 | 
						|
	[ name(Name) ], !.
 | 
						|
error_token(Text) -->
 | 
						|
	[ numeric(_, Codes) ], !,
 | 
						|
	{ atom_codes(Text, Codes) }.
 | 
						|
error_token(Text) -->
 | 
						|
	[ literal(Literal) ], !,
 | 
						|
	{ literal_text(Literal, Text) }.
 | 
						|
error_token(Text) -->
 | 
						|
	[ URIToken ],
 | 
						|
	{ uri_text(URIToken, Text) }, !.
 | 
						|
error_token(Punct) -->
 | 
						|
	[ Punct ],
 | 
						|
	{ atom(Punct) }, !.
 | 
						|
error_token(Rest) -->
 | 
						|
	[ H ],
 | 
						|
	{ term_to_atom(H, Rest) }.
 | 
						|
 | 
						|
literal_text(type(Type, Value), Text) :- !,
 | 
						|
	uri_text(Type, TypeText),
 | 
						|
	format(atom(Text), '"~w"^^~w', [Value, TypeText]).
 | 
						|
 | 
						|
uri_text(relative_uri(URI), Text) :-
 | 
						|
	format(atom(Text), '<~w>', [URI]).
 | 
						|
uri_text(:(Name), Text) :-
 | 
						|
	format(atom(Text), ':~w', [Name]).
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	     TOKENISER		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
%%	read_turtle_tokens(+In, -List, +State) is det.
 | 
						|
%
 | 
						|
%	Read  the  next  Turtle  statement  as  a  list  of  tokens.  If
 | 
						|
%	on_error(warning)  is  active,  failure  prints  a  message  and
 | 
						|
%	continues reading the next statements.
 | 
						|
%
 | 
						|
%	The line_no property of the state is set to the start-line
 | 
						|
%
 | 
						|
%	@error syntax_error(Culprit)
 | 
						|
 | 
						|
read_turtle_tokens(In, List, State) :-
 | 
						|
	ttl_state_on_error(State, error), !,
 | 
						|
	line_count(In, LineNo),
 | 
						|
	nb_set_line_no_of_ttl_state(LineNo, State),
 | 
						|
	(   turtle_tokens(In, List)
 | 
						|
	->  true
 | 
						|
	;   syntax_error_term(In, LineNo, illegal_token, Error),
 | 
						|
	    throw(Error)
 | 
						|
	).
 | 
						|
read_turtle_tokens(In, List, State) :-
 | 
						|
	line_count(In, LineNo),
 | 
						|
	nb_set_line_no_of_ttl_state(LineNo, State),
 | 
						|
	(   catch(turtle_tokens(In, List), Error, true)
 | 
						|
	->  (   var(Error)
 | 
						|
	    ->  true
 | 
						|
	    ;   print_message(error, Error),
 | 
						|
		skip_statement(In),
 | 
						|
		read_turtle_tokens(In, List, State)
 | 
						|
	    )
 | 
						|
	;   syntax_error_term(In, LineNo, illegal_token, Error),
 | 
						|
	    print_message(error, Error),
 | 
						|
	    skip_statement(In),
 | 
						|
	    read_turtle_tokens(In, List, State)
 | 
						|
	).
 | 
						|
 | 
						|
%%	skip_statement(+In)
 | 
						|
%
 | 
						|
%	Skip to the end of the statement
 | 
						|
 | 
						|
skip_statement(In) :-
 | 
						|
	get_code(In, C0),
 | 
						|
	skip_statement(C0, In).
 | 
						|
 | 
						|
skip_statement(-1, _) :- !.
 | 
						|
skip_statement(0'., In) :-
 | 
						|
	get_code(In, C),
 | 
						|
	(   turtle_ws(C)
 | 
						|
	->  !
 | 
						|
	;   skip_statement(C, In)
 | 
						|
	).
 | 
						|
skip_statement(_, In) :-
 | 
						|
	get_code(In, C),
 | 
						|
	skip_statement(C, In).
 | 
						|
 | 
						|
%%	turtle_tokens(+In, -List)
 | 
						|
%
 | 
						|
%	Read a statement from a turtle file, returning the contents as a
 | 
						|
%	list of tokens.
 | 
						|
 | 
						|
turtle_tokens(In, List) :-
 | 
						|
	get_code(In, C0),
 | 
						|
	turtle_token(C0, In, C1, Tok1),
 | 
						|
	(   Tok1 == end_of_file
 | 
						|
	->  List = end_of_file
 | 
						|
	;   List = [Tok1|Tokens],
 | 
						|
	    turtle_tokens(C1, In, Tokens)
 | 
						|
	).
 | 
						|
 | 
						|
turtle_tokens(C0, In, List) :-
 | 
						|
	(   turtle_token(C0, In, C1, H)
 | 
						|
	->  debug(turtle(token), 'Token: ~q', [H])
 | 
						|
	;   syntax_error(In, -1, illegal_token)
 | 
						|
	),
 | 
						|
	(   H == '.'
 | 
						|
	->  List = []
 | 
						|
	;   H == end_of_file
 | 
						|
	->  syntax_error(In, -1, unexpected_end_of_input)
 | 
						|
	;   List = [H|T],
 | 
						|
	    turtle_tokens(C1, In, T)
 | 
						|
	).
 | 
						|
 | 
						|
turtle_token(-1, _, -1, end_of_file) :- !.
 | 
						|
turtle_token(0'., _, end, '.') :- !.	% Turtle does not demand a space here!
 | 
						|
turtle_token(0'#, In, C, Token) :- !,
 | 
						|
	get_code(In, C1),
 | 
						|
	skip_line(C1, In, C2),
 | 
						|
	turtle_token(C2, In, C, Token).
 | 
						|
turtle_token(WS, In, C, Token) :-
 | 
						|
	turtle_ws(WS), !,
 | 
						|
	get_code(In, C1),
 | 
						|
	turtle_token(C1, In, C, Token).
 | 
						|
turtle_token(C0, In, C, Number) :-
 | 
						|
	between(0'0, 0'9, C0), !,
 | 
						|
	turtle_number(C0, In, C, Number).
 | 
						|
turtle_token(0'-, In, C, Number) :- !,
 | 
						|
	turtle_number(0'-, In, C, Number).
 | 
						|
turtle_token(0'+, In, C, Number) :- !,
 | 
						|
	turtle_number(0'+, In, C, Number).
 | 
						|
turtle_token(0'", In, C, Literal) :- !,
 | 
						|
	turtle_read_string(0'", In, C1, Atom),
 | 
						|
	(   C1 == 0'@
 | 
						|
	->  get_code(In, C2),
 | 
						|
	    language(C2, In, C, LangCodes),
 | 
						|
	    atom_codes(LangId, LangCodes),
 | 
						|
	    Literal = literal(lang(LangId, Atom))
 | 
						|
	;   C1 == 0'^,
 | 
						|
	    peek_code(In, 0'^)
 | 
						|
	->  get_code(In, 0'^),
 | 
						|
	    get_code(In, C2),
 | 
						|
	    resource_token(C2, In, C, Type),
 | 
						|
	    Literal = literal(type(Type, Atom))
 | 
						|
	;   C = C1,
 | 
						|
	    Literal = literal(Atom)
 | 
						|
	).
 | 
						|
turtle_token(0'_, In, C, nodeId(NodeID)) :-
 | 
						|
	peek_code(In, 0':), !,
 | 
						|
	get_code(In, _),
 | 
						|
	get_code(In, C1),
 | 
						|
	turtle_read_name(C1, In, C, NodeID).
 | 
						|
turtle_token(0'<, In, C, URI) :- !,
 | 
						|
	resource_token(0'<, In, C, URI).
 | 
						|
turtle_token(0':, In, C, URI) :- !,
 | 
						|
	resource_token(0':, In, C, URI).
 | 
						|
turtle_token(C0, In, C, Token) :-
 | 
						|
	turtle_read_name(C0, In, C1, Name), !,
 | 
						|
	(   C1 == 0':,
 | 
						|
	    \+ sub_atom(Name, 0, _, _, '_'),
 | 
						|
	    peek_code(In, C2),
 | 
						|
	    turtle_name_start_char(C2)
 | 
						|
	->  get_code(In, C2),
 | 
						|
	    turtle_read_name(C2, In, C, Name2),
 | 
						|
	    Token = (Name:Name2)
 | 
						|
	;   Token = name(Name),
 | 
						|
	    C = C1
 | 
						|
	).
 | 
						|
turtle_token(Punct, In, C, P) :-
 | 
						|
	punctuation(Punct, P), !,
 | 
						|
	get_code(In, C).
 | 
						|
 | 
						|
%%	turtle_number(+Char0, +In, -CharNext, -Value)
 | 
						|
%
 | 
						|
%	Value is Type:CodeList
 | 
						|
 | 
						|
turtle_number(0'-, In, CN, numeric(T, [0'-|Codes])) :- !,
 | 
						|
	get_code(In, C0),
 | 
						|
	turtle_number_nn(C0, In, CN, numeric(T, Codes)).
 | 
						|
turtle_number(0'+, In, CN, numeric(T, [0'+|Codes])) :- !,
 | 
						|
	get_code(In, C0),
 | 
						|
	turtle_number_nn(C0, In, CN, numeric(T, Codes)).
 | 
						|
turtle_number(C0, In, CN, Value) :-
 | 
						|
	turtle_number_nn(C0, In, CN, Value).
 | 
						|
 | 
						|
turtle_number_nn(C, In, CN, numeric(Type, Codes)) :-
 | 
						|
	turtle_integer_codes(C, In, CN0, Codes, T0), 	% [0-9]+
 | 
						|
	(   CN0 == 0'.
 | 
						|
	->  T0 = [CN0|T1],
 | 
						|
	    get_code(In, C1),
 | 
						|
	    turtle_integer_codes(C1, In, CN1, T1, T2), % [0-9]+.[0-9]+
 | 
						|
	    (	exponent(CN1, In, CN, T2)
 | 
						|
	    ->	Type = double
 | 
						|
	    ;	CN = CN1,
 | 
						|
		T2 = [],
 | 
						|
		Type = decimal
 | 
						|
	    )
 | 
						|
	;   exponent(CN0, In, CN, T0)
 | 
						|
	->  Type = double
 | 
						|
	;   T0 = [],
 | 
						|
	    CN = CN0,
 | 
						|
	    Type = integer
 | 
						|
	).
 | 
						|
 | 
						|
turtle_integer_codes(C0, In, CN, [C0|T0], T) :-
 | 
						|
	between(0'0, 0'9, C0), !,
 | 
						|
	get_code(In, C1),
 | 
						|
	turtle_integer_codes(C1, In, CN, T0, T).
 | 
						|
turtle_integer_codes(CN, _, CN, T, T).
 | 
						|
 | 
						|
exponent(C0, In, CN, [C0|T0]) :-
 | 
						|
	e(C0), !,
 | 
						|
	get_code(In, C1),
 | 
						|
	optional_sign(C1, In, CN0, T0, T1),
 | 
						|
	turtle_integer_codes(CN0, In, CN, T1, []).
 | 
						|
 | 
						|
optional_sign(C0, In, CN, [C0|T], T) :-
 | 
						|
	sign(C0), !,
 | 
						|
	get_code(In, CN).
 | 
						|
optional_sign(CN, _, CN, T, T).
 | 
						|
 | 
						|
e(0'e).
 | 
						|
e(0'E).
 | 
						|
 | 
						|
sign(0'-).
 | 
						|
sign(0'+).				%'
 | 
						|
 | 
						|
 | 
						|
					% language: [a-z]+ ('-' [a-z0-9]+ )*
 | 
						|
language(C0, In, C, [C0|Codes]) :-
 | 
						|
	code_type(C0, lower),
 | 
						|
	get_code(In, C1),
 | 
						|
	lwr_word(C1, In, C2, Codes, Tail),
 | 
						|
	sub_langs(C2, In, C, Tail, []), !.
 | 
						|
language(_, In, _, _) :-
 | 
						|
	line_count(In, LineNo),
 | 
						|
	syntax_error(In, LineNo, language_specifier).
 | 
						|
 | 
						|
lwr_word(C0, In, C, [C0|T0], T) :-
 | 
						|
	code_type(C0, lower), !,
 | 
						|
	get_code(In, C1),
 | 
						|
	lwr_word(C1, In, C, T0, T).
 | 
						|
lwr_word(C, _, C, T, T).
 | 
						|
 | 
						|
sub_langs(0'-, In, C, [0'-, C1|Codes], T) :- !,
 | 
						|
	get_code(In, C1),
 | 
						|
	lwrdig(C1), !,
 | 
						|
	get_code(In, C2),
 | 
						|
	lwrdigs(C2, In, C3, Codes, Tail),
 | 
						|
	sub_langs(C3, In, C, Tail, T).
 | 
						|
sub_langs(C, _, C, T, T).
 | 
						|
 | 
						|
lwrdig(C) :-
 | 
						|
	code_type(C, lower), !.
 | 
						|
lwrdig(C) :-
 | 
						|
	code_type(C, digit).
 | 
						|
 | 
						|
lwrdigs(C0, In, C, [C0|T0], T) :-
 | 
						|
	lwrdig(C0), !,
 | 
						|
	get_code(In, C1),
 | 
						|
	lwr_word(C1, In, C, T0, T).
 | 
						|
lwrdigs(C, _, C, T, T).
 | 
						|
 | 
						|
					% resource_token
 | 
						|
resource_token(0'<, In, C, relative_uri(URI)) :- !,
 | 
						|
	turtle_read_relative_uri(0'<, In, C, URI).
 | 
						|
resource_token(0':, In, C, Token) :- !,
 | 
						|
	get_code(In, C0),
 | 
						|
	(   turtle_read_name(C0, In, C, Name)
 | 
						|
	->  Token = :(Name)
 | 
						|
	;   Token = :,
 | 
						|
	    C = C0
 | 
						|
	).
 | 
						|
resource_token(C0, In, C, Prefix:Name) :-
 | 
						|
	turtle_read_name(C0, In, C1, Prefix),
 | 
						|
	\+ sub_atom(Prefix, 0, _, _, '_'), !,
 | 
						|
	C1 == 0':,
 | 
						|
	get_code(In, C2),
 | 
						|
	turtle_read_name(C2, In, C, Name).
 | 
						|
 | 
						|
 | 
						|
punctuation(0'(, '(').
 | 
						|
punctuation(0'), ')').
 | 
						|
punctuation(0'[, '[').
 | 
						|
punctuation(0'], ']').
 | 
						|
punctuation(0',, ',').
 | 
						|
punctuation(0'@, '@').
 | 
						|
punctuation(0':, ':').
 | 
						|
punctuation(0';, ';').
 | 
						|
 | 
						|
					% comment
 | 
						|
skip_line(0xA, In, C) :- !,
 | 
						|
	get_code(In, C).
 | 
						|
skip_line(0xD, In, C) :- !,
 | 
						|
	get_code(In, C).
 | 
						|
skip_line(-1, _, -1) :- !.
 | 
						|
skip_line(_, In, C) :-
 | 
						|
	get_code(In, C1),
 | 
						|
	skip_line(C1, In, C).
 | 
						|
 | 
						|
					% ws
 | 
						|
turtle_ws(0x9).
 | 
						|
turtle_ws(0xA).
 | 
						|
turtle_ws(0xD).
 | 
						|
turtle_ws(0x20).
 | 
						|
 | 
						|
syntax_error(Stream, Line, Which) :-
 | 
						|
	syntax_error_term(Stream, Line, Which, Error),
 | 
						|
	throw(Error).
 | 
						|
 | 
						|
syntax_error_term(Stream, -1, Which, Error) :- !,
 | 
						|
	stream_property(Stream, file_name(File)),
 | 
						|
	line_count(Stream, LineNo),
 | 
						|
	line_position(Stream, LinePos),
 | 
						|
	character_count(Stream, CharIndex),
 | 
						|
	Error = error(syntax_error(Which),
 | 
						|
		      file(File, LineNo, LinePos, CharIndex)).
 | 
						|
syntax_error_term(Stream, LineNo, Which, Error) :-
 | 
						|
	stream_property(Stream, file_name(File)),
 | 
						|
	Error = error(syntax_error(Which),
 | 
						|
		      file(File, LineNo, -1, -1)).
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    RDF-DB HOOK		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
:- multifile
 | 
						|
	rdf_db:rdf_load_stream/3,
 | 
						|
	rdf_db:rdf_file_type/2.
 | 
						|
 | 
						|
rdf_db:rdf_load_stream(turtle, Stream, _Module:Options) :-
 | 
						|
	rdf_db:graph(Options, Id),
 | 
						|
	rdf_transaction(rdf_process_turtle(Stream, assert_triples, Options),
 | 
						|
			parse(Id)).
 | 
						|
 | 
						|
assert_triples([], _).
 | 
						|
assert_triples([rdf(S,P,O)|T], Location) :-
 | 
						|
	rdf_assert(S,P,O,Location),
 | 
						|
	assert_triples(T, Location).
 | 
						|
 | 
						|
rdf_db:rdf_file_type(ttl, turtle).
 | 
						|
rdf_db:rdf_file_type(n3,  turtle).	% not really, but good enough
 | 
						|
rdf_db:rdf_file_type(nt,  turtle).	% not really, but good enough
 |