yap-6.3/packages/sgml/RDF/rdf_ntriples.pl

/*  $Id$

    Part of SWI-Prolog SGML/XML parser

    Author:  Jan Wielemaker
    E-mail:  jan@swi.psy.uva.nl
    WWW:     http://www.swi.psy.uva.nl/projects/SWI-Prolog/
    Copying: LGPL-2.  See the file COPYING or http://www.gnu.org

    Copyright (C) 1990-2002 SWI, University of Amsterdam. All rights reserved.
*/

:- module(rdf_ntriples,
	  [ load_rdf_ntriples/2,	% +File, -Triples
	    rdf_ntriple_part/4		% +Field, -Value, <DCG>
	  ]).


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This module parses n-triple files as defined   by the W3C RDF working in
http://www.w3.org/TR/rdf-testcases/#ntriples.   This   format     is   a
simplified version of the RDF N3 notation   used  in the *.nt files that
are used to describe the normative outcome of the RDF test-cases.

The returned list terms are of the form

	rdf(Subject, Predicate, Object)

where

	# Subject
	is an atom or node(Id) for anonymous nodes

	# Predicate
	is an atom

	# Object
	is an atom, node(Id), literal(Atom) or xml(Atom)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */


%	load_rdf_ntriples(+Source, -Triples)
%	
%	Load a file or stream to a list of rdf(S,P,O) triples.

load_rdf_ntriples(File, Triples) :-
	open_nt_file(File, In, Close),
	call_cleanup(stream_to_triples(In, Triples), Close).

%	open_nt_file(+Input, -Stream, -Close)
%	
%	Open Input, returning Stream and a goal to cleanup Stream if it
%	was opened.

open_nt_file(stream(Stream), Stream, true) :- !.
open_nt_file(Stream, Stream, true) :-
	is_stream(Stream), !.
open_nt_file(Spec, Stream, close(Stream)) :-
	absolute_file_name(Spec,
			   [ access(read),
			     extensions([nt,''])
			   ], Path),
	open(Path, read, Stream).


%	rdf_ntriple_part(+Type, -Value, <DCG>)
%	
%	Parse one of the fields of  an   ntriple.  This  is used for the
%	SWI-Prolog Sesame (www.openrdf.org) implementation   to  realise
%	/servlets/removeStatements. I do not think   public  use of this
%	predicate should be stimulated.

rdf_ntriple_part(subject, Subject) -->
	subject(Subject).
rdf_ntriple_part(predicate, Predicate) -->
	predicate(Predicate).
rdf_ntriple_part(object, Object) -->
	predicate(Object).


%	stream_to_triples(+Stream, -ListOfTriples)
%	
%	Read Stream, returning all its triples

stream_to_triples(In, Triples) :-
	read_line_to_codes(In, Line),
	(   Line == end_of_file
	->  Triples = []
	;   phrase(line(Triples, Tail), Line),
	    stream_to_triples(In, Tail)
	).

line(Triples, Tail) -->
	wss,
	(   comment
	->  {Triples = Tail}
	;   triple(Triple)
	->  {Triples = [Triple|Tail]}
	).

comment -->
	"#", !,
	skip_rest.
comment -->
	end_of_input.

triple(rdf(Subject, Predicate, Object)) -->
	subject(Subject), ws, wss,
	predicate(Predicate), ws, wss,
	object(Object), wss, ".", wss.

subject(Subject) -->
	uniref(Subject), !.
subject(Subject) -->
	node_id(Subject).

predicate(Predicate) -->
	uniref(Predicate).

object(Object) -->
	uniref(Object), !.
object(Object) -->
	node_id(Object).
object(Object) -->
	literal(Object).


uniref(URI) -->
	"<",
	escaped_uri_codes(Codes),
	">", !,
	{ atom_codes(URI, Codes)
	}.

node_id(node(Id)) -->			% anonymous nodes
	"_:",
	name_start(C0),
	name_codes(Codes),
	{ atom_codes(Id, [C0|Codes])
	}.

literal(Literal) -->
	lang_string(Literal), !.
literal(Literal) -->
	xml_string(Literal).
	

%	name_start(-Code)
%	name_codes(-ListfCodes)
%	
%	Parse identifier names

name_start(C) -->
	[C],
	{ code_type(C, alpha)
	}.

name_codes([C|T]) -->
	[C],
	{ code_type(C, alnum)
	}, !,
	name_codes(T).
name_codes([]) -->
	[].


%	escaped_uri_codes(-CodeList)
%	
%	Decode string holding %xx escaped characters.

escaped_uri_codes([]) -->
	[].
escaped_uri_codes([C|T]) -->
	"%", [D0,D1], !,
	{ code_type(D0, xdigit(V0)),
	  code_type(D1, xdigit(V1)),
	  C is V0<<4 + V1
	},
	escaped_uri_codes(T).
escaped_uri_codes([C|T]) -->
	"\\u", [D0,D1,D2,D3], !,
	{ code_type(D0, xdigit(V0)),
	  code_type(D1, xdigit(V1)),
	  code_type(D2, xdigit(V2)),
	  code_type(D3, xdigit(V3)),
	  C is V0<<12 + V1<<8 + V2<<4 + V3
	},
	escaped_uri_codes(T).
escaped_uri_codes([C|T]) -->
	[C],
	escaped_uri_codes(T).


%	lang_string()
%	
%	Process a language string

lang_string(String) -->
	"\"",
	string(Codes),
	"\"", !,
	{ atom_codes(Atom, Codes)
	},
	(   langsep
	->  language(Lang),
	    { String = literal(lang(Lang, Atom))
	    }
	;   "^^"
	->  uniref(Type),
	    { String = literal(type(Type, Atom))
	    }	
	;   { String = literal(Atom)
	    }
	).

langsep -->
	"-".
langsep -->
	"@".

%	xml_string(String)
%	
%	Handle xml"..."

xml_string(xml(String)) -->
	"xml\"",			% really no whitespace?
	string(Codes),
	"\"",
	{ atom_codes(String, Codes)
	}.

string([]) -->
	[].
string([C0|T]) -->
	string_char(C0),
	string(T).

string_char(0'\\) -->
	"\\\\".
string_char(0'") -->
	"\\\"".
string_char(10) -->
	"\\n".
string_char(13) -->
	"\\r".
string_char(9) -->
	"\\t".
string_char(C) -->
	"\\u",
	'4xdigits'(C).
string_char(C) -->
	"\\u",
	'4xdigits'(C0),
	'4xdigits'(C1),
	{ C is C0<<16 + C1
	}.
string_char(C) -->
	[C].
	
'4xdigits'(C) -->
	[C0,C1,C2,C3],
	{ code_type(C0, xdigit(V0)),
	  code_type(C1, xdigit(V1)),
	  code_type(C2, xdigit(V2)),
	  code_type(C3, xdigit(V3)),
	  
	  C is V0<<12 + V1<<8 + V2<<4 + V3
	}.

%	language(-Lang)
%	
%	Return xml:lang language identifier.

language(Lang) -->
	lang_code(C0),
	lang_codes(Codes),
	{ atom_codes(Lang, [C0|Codes])
	}.

lang_code(C) -->
	[C],
	{ C \== 0'.,
	  \+ code_type(C, white)
	}.

lang_codes([C|T]) -->
	lang_code(C), !,
	lang_codes(T).
lang_codes([]) -->
	[].


		 /*******************************
		 *	       BASICS		*
		 *******************************/

skip_rest(_,[]).

ws -->
	[C],
	{ code_type(C, white)
	}.

end_of_input([], []).
	

wss -->
	ws, !,
	wss.
wss -->
	[].
Experiment with porting SGML to YAP, and trying to preserve SWI code as much as possible. 2009-03-13 19:39:06 +00:00			`/* $Id$`

			`Part of SWI-Prolog SGML/XML parser`

			`Author: Jan Wielemaker`
			`E-mail: jan@swi.psy.uva.nl`
			`WWW: http://www.swi.psy.uva.nl/projects/SWI-Prolog/`
			`Copying: LGPL-2. See the file COPYING or http://www.gnu.org`

			`Copyright (C) 1990-2002 SWI, University of Amsterdam. All rights reserved.`
			`*/`

			`:- module(rdf_ntriples,`
			`[ load_rdf_ntriples/2, % +File, -Triples`
			`rdf_ntriple_part/4 % +Field, -Value, <DCG>`
			`]).`


			`/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`This module parses n-triple files as defined by the W3C RDF working in`
			`http://www.w3.org/TR/rdf-testcases/#ntriples. This format is a`
			`simplified version of the RDF N3 notation used in the *.nt files that`
			`are used to describe the normative outcome of the RDF test-cases.`

			`The returned list terms are of the form`

			`rdf(Subject, Predicate, Object)`

			`where`

			`# Subject`
			`is an atom or node(Id) for anonymous nodes`

			`# Predicate`
			`is an atom`

			`# Object`
			`is an atom, node(Id), literal(Atom) or xml(Atom)`
			`- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */`


			`% load_rdf_ntriples(+Source, -Triples)`
			`%`
			`% Load a file or stream to a list of rdf(S,P,O) triples.`

			`load_rdf_ntriples(File, Triples) :-`
			`open_nt_file(File, In, Close),`
			`call_cleanup(stream_to_triples(In, Triples), Close).`

			`% open_nt_file(+Input, -Stream, -Close)`
			`%`
			`% Open Input, returning Stream and a goal to cleanup Stream if it`
			`% was opened.`

			`open_nt_file(stream(Stream), Stream, true) :- !.`
			`open_nt_file(Stream, Stream, true) :-`
			`is_stream(Stream), !.`
			`open_nt_file(Spec, Stream, close(Stream)) :-`
			`absolute_file_name(Spec,`
			`[ access(read),`
			`extensions([nt,''])`
			`], Path),`
			`open(Path, read, Stream).`


			`% rdf_ntriple_part(+Type, -Value, <DCG>)`
			`%`
			`% Parse one of the fields of an ntriple. This is used for the`
			`% SWI-Prolog Sesame (www.openrdf.org) implementation to realise`
			`% /servlets/removeStatements. I do not think public use of this`
			`% predicate should be stimulated.`

			`rdf_ntriple_part(subject, Subject) -->`
			`subject(Subject).`
			`rdf_ntriple_part(predicate, Predicate) -->`
			`predicate(Predicate).`
			`rdf_ntriple_part(object, Object) -->`
			`predicate(Object).`


			`% stream_to_triples(+Stream, -ListOfTriples)`
			`%`
			`% Read Stream, returning all its triples`

			`stream_to_triples(In, Triples) :-`
			`read_line_to_codes(In, Line),`
			`( Line == end_of_file`
			`-> Triples = []`
			`; phrase(line(Triples, Tail), Line),`
			`stream_to_triples(In, Tail)`
			`).`

			`line(Triples, Tail) -->`
			`wss,`
			`( comment`
			`-> {Triples = Tail}`
			`; triple(Triple)`
			`-> {Triples = [Triple\|Tail]}`
			`).`

			`comment -->`
			`"#", !,`
			`skip_rest.`
			`comment -->`
			`end_of_input.`

			`triple(rdf(Subject, Predicate, Object)) -->`
			`subject(Subject), ws, wss,`
			`predicate(Predicate), ws, wss,`
			`object(Object), wss, ".", wss.`

			`subject(Subject) -->`
			`uniref(Subject), !.`
			`subject(Subject) -->`
			`node_id(Subject).`

			`predicate(Predicate) -->`
			`uniref(Predicate).`

			`object(Object) -->`
			`uniref(Object), !.`
			`object(Object) -->`
			`node_id(Object).`
			`object(Object) -->`
			`literal(Object).`


			`uniref(URI) -->`
			`"<",`
			`escaped_uri_codes(Codes),`
			`">", !,`
			`{ atom_codes(URI, Codes)`
			`}.`

			`node_id(node(Id)) --> % anonymous nodes`
			`"_:",`
			`name_start(C0),`
			`name_codes(Codes),`
			`{ atom_codes(Id, [C0\|Codes])`
			`}.`

			`literal(Literal) -->`
			`lang_string(Literal), !.`
			`literal(Literal) -->`
			`xml_string(Literal).`


			`% name_start(-Code)`
			`% name_codes(-ListfCodes)`
			`%`
			`% Parse identifier names`

			`name_start(C) -->`
			`[C],`
			`{ code_type(C, alpha)`
			`}.`

			`name_codes([C\|T]) -->`
			`[C],`
			`{ code_type(C, alnum)`
			`}, !,`
			`name_codes(T).`
			`name_codes([]) -->`
			`[].`


			`% escaped_uri_codes(-CodeList)`
			`%`
			`% Decode string holding %xx escaped characters.`

			`escaped_uri_codes([]) -->`
			`[].`
			`escaped_uri_codes([C\|T]) -->`
			`"%", [D0,D1], !,`
			`{ code_type(D0, xdigit(V0)),`
			`code_type(D1, xdigit(V1)),`
			`C is V0<<4 + V1`
			`},`
			`escaped_uri_codes(T).`
			`escaped_uri_codes([C\|T]) -->`
			`"\\u", [D0,D1,D2,D3], !,`
			`{ code_type(D0, xdigit(V0)),`
			`code_type(D1, xdigit(V1)),`
			`code_type(D2, xdigit(V2)),`
			`code_type(D3, xdigit(V3)),`
			`C is V0<<12 + V1<<8 + V2<<4 + V3`
			`},`
			`escaped_uri_codes(T).`
			`escaped_uri_codes([C\|T]) -->`
			`[C],`
			`escaped_uri_codes(T).`


			`% lang_string()`
			`%`
			`% Process a language string`

			`lang_string(String) -->`
			`"\"",`
			`string(Codes),`
			`"\"", !,`
			`{ atom_codes(Atom, Codes)`
			`},`
			`( langsep`
			`-> language(Lang),`
			`{ String = literal(lang(Lang, Atom))`
			`}`
			`; "^^"`
			`-> uniref(Type),`
			`{ String = literal(type(Type, Atom))`
			`}`
			`; { String = literal(Atom)`
			`}`
			`).`

			`langsep -->`
			`"-".`
			`langsep -->`
			`"@".`

			`% xml_string(String)`
			`%`
			`% Handle xml"..."`

			`xml_string(xml(String)) -->`
			`"xml\"", % really no whitespace?`
			`string(Codes),`
			`"\"",`
			`{ atom_codes(String, Codes)`
			`}.`

			`string([]) -->`
			`[].`
			`string([C0\|T]) -->`
			`string_char(C0),`
			`string(T).`

			`string_char(0'\\) -->`
			`"\\\\".`
			`string_char(0'") -->`
			`"\\\"".`
			`string_char(10) -->`
			`"\\n".`
			`string_char(13) -->`
			`"\\r".`
			`string_char(9) -->`
			`"\\t".`
			`string_char(C) -->`
			`"\\u",`
			`'4xdigits'(C).`
			`string_char(C) -->`
			`"\\u",`
			`'4xdigits'(C0),`
			`'4xdigits'(C1),`
			`{ C is C0<<16 + C1`
			`}.`
			`string_char(C) -->`
			`[C].`

			`'4xdigits'(C) -->`
			`[C0,C1,C2,C3],`
			`{ code_type(C0, xdigit(V0)),`
			`code_type(C1, xdigit(V1)),`
			`code_type(C2, xdigit(V2)),`
			`code_type(C3, xdigit(V3)),`

			`C is V0<<12 + V1<<8 + V2<<4 + V3`
			`}.`

			`% language(-Lang)`
			`%`
			`% Return xml:lang language identifier.`

			`language(Lang) -->`
			`lang_code(C0),`
			`lang_codes(Codes),`
			`{ atom_codes(Lang, [C0\|Codes])`
			`}.`

			`lang_code(C) -->`
			`[C],`
			`{ C \== 0'.,`
			`\+ code_type(C, white)`
			`}.`

			`lang_codes([C\|T]) -->`
			`lang_code(C), !,`
			`lang_codes(T).`
			`lang_codes([]) -->`
			`[].`


			`/*******************************`
			`* BASICS *`
			`*******************************/`

			`skip_rest(_,[]).`

			`ws -->`
			`[C],`
			`{ code_type(C, white)`
			`}.`

			`end_of_input([], []).`


			`wss -->`
			`ws, !,`
			`wss.`
			`wss -->`
			`[].`