462 lines
11 KiB
Perl
462 lines
11 KiB
Perl
|
/* $Id$
|
||
|
|
||
|
Part of SWI-Prolog RDF parser
|
||
|
|
||
|
Author: Jan Wielemaker
|
||
|
E-mail: jan@swi.psy.uva.nl
|
||
|
WWW: http://www.swi.psy.uva.nl/projects/SWI-Prolog/
|
||
|
Copying: LGPL-2. See the file COPYING or http://www.gnu.org
|
||
|
|
||
|
Copyright (C) 1990-2000 SWI, University of Amsterdam. All rights reserved.
|
||
|
*/
|
||
|
|
||
|
:- module(rdf_triple,
|
||
|
[ rdf_triples/2, % +Parsed, -Tripples
|
||
|
rdf_triples/3, % +Parsed, -Tripples, +Tail
|
||
|
rdf_reset_ids/0, % Reset gensym id's
|
||
|
rdf_start_file/2, % +Options, -Cleanup
|
||
|
rdf_end_file/1, % +Cleanup
|
||
|
anon_prefix/1 % Prefix for anonynmous resources
|
||
|
]).
|
||
|
:- use_module(library(gensym)).
|
||
|
:- use_module(rdf_parser).
|
||
|
|
||
|
/** <module> Create triples from intermediate representation
|
||
|
|
||
|
Convert the output of xml_to_rdf/3 from library(rdf) into a list of
|
||
|
triples of the format described below. The intermediate representation
|
||
|
should be regarded a proprietary representation.
|
||
|
|
||
|
rdf(Subject, Predicate, Object).
|
||
|
|
||
|
Where `Subject' is
|
||
|
|
||
|
* Atom
|
||
|
The subject is a resource
|
||
|
|
||
|
* each(URI)
|
||
|
URI is the URI of an RDF Bag
|
||
|
|
||
|
* prefix(Pattern)
|
||
|
Pattern is the prefix of a fully qualified Subject URI
|
||
|
|
||
|
And `Predicate' is
|
||
|
|
||
|
* Atom
|
||
|
The predicate is always a resource
|
||
|
|
||
|
And `Object' is
|
||
|
|
||
|
* Atom
|
||
|
URI of Object resource
|
||
|
|
||
|
* literal(Value)
|
||
|
Literal value (Either a single atom or parsed XML data)
|
||
|
*/
|
||
|
|
||
|
%% rdf_triples(+Term, -Triples) is det.
|
||
|
%% rdf_triples(+Term, -Tridpples, +Tail) is det.
|
||
|
%
|
||
|
% Convert an object as parsed by rdf.pl into a list of rdf/3
|
||
|
% triples. The identifier of the main object created is returned
|
||
|
% by rdf_triples/3.
|
||
|
%
|
||
|
% Input is the `content' of the RDF element in the format as
|
||
|
% generated by load_structure(File, Term, [dialect(xmlns)]).
|
||
|
% rdf_triples/3 can process both individual descriptions as
|
||
|
% well as the entire content-list of an RDF element. The first
|
||
|
% mode is suitable when using library(sgml) in `call-back' mode.
|
||
|
|
||
|
rdf_triples(RDF, Tripples) :-
|
||
|
rdf_triples(RDF, Tripples, []).
|
||
|
|
||
|
rdf_triples([]) --> !,
|
||
|
[].
|
||
|
rdf_triples([H|T]) --> !,
|
||
|
rdf_triples(H),
|
||
|
rdf_triples(T).
|
||
|
rdf_triples(Term) -->
|
||
|
triples(Term, _).
|
||
|
|
||
|
%% triples(-Triples, -Id, +In, -Tail)
|
||
|
%
|
||
|
% DGC set processing the output of xml_to_rdf/3. In Id, the identifier
|
||
|
% of the main description or container is returned.
|
||
|
|
||
|
triples(container(Type, Id, Elements), Id) --> !,
|
||
|
{ container_id(Type, Id)
|
||
|
},
|
||
|
rdf(Id, rdf:type, rdf:Type),
|
||
|
container(Elements, 1, Id).
|
||
|
triples(description(Type, About, BagId, Props), Subject) -->
|
||
|
{ var(About),
|
||
|
var(BagId),
|
||
|
share_blank_nodes(true)
|
||
|
}, !,
|
||
|
( { shared_description(description(Type, Props), Subject)
|
||
|
}
|
||
|
-> []
|
||
|
; { make_id('__Description', Id)
|
||
|
},
|
||
|
triples(description(Type, about(Id), BagId, Props), Subject),
|
||
|
{ assert_shared_description(description(Type, Props), Subject)
|
||
|
}
|
||
|
).
|
||
|
triples(description(description, IdAbout, BagId, Props), Subject) --> !,
|
||
|
{ description_id(IdAbout, Subject)
|
||
|
},
|
||
|
properties(Props, BagId, Subject).
|
||
|
triples(description(Type, IdAbout, BagId, Props), Subject) -->
|
||
|
{ description_id(IdAbout, Subject),
|
||
|
name_to_type_uri(Type, TypeURI)
|
||
|
},
|
||
|
properties([ rdf:type = TypeURI
|
||
|
| Props
|
||
|
], BagId, Subject).
|
||
|
triples(unparsed(Data), Id) -->
|
||
|
{ make_id('__Error', Id),
|
||
|
print_message(error, rdf(unparsed(Data)))
|
||
|
},
|
||
|
[].
|
||
|
|
||
|
|
||
|
name_to_type_uri(NS:Local, URI) :- !,
|
||
|
atom_concat(NS, Local, URI).
|
||
|
name_to_type_uri(URI, URI).
|
||
|
|
||
|
/*******************************
|
||
|
* CONTAINERS *
|
||
|
*******************************/
|
||
|
|
||
|
container([], _, _) -->
|
||
|
[].
|
||
|
container([H0|T0], N, Id) -->
|
||
|
li(H0, N, Id),
|
||
|
{ NN is N + 1
|
||
|
},
|
||
|
container(T0, NN, Id).
|
||
|
|
||
|
li(li(Nid, V), _, Id) --> !,
|
||
|
rdf(Id, rdf:Nid, V).
|
||
|
li(V, N, Id) -->
|
||
|
triples(V, VId), !,
|
||
|
{ atom_concat('_', N, Nid)
|
||
|
},
|
||
|
rdf(Id, rdf:Nid, VId).
|
||
|
li(V, N, Id) -->
|
||
|
{ atom_concat('_', N, Nid)
|
||
|
},
|
||
|
rdf(Id, rdf:Nid, V).
|
||
|
|
||
|
container_id(_, Id) :-
|
||
|
nonvar(Id), !.
|
||
|
container_id(Type, Id) :-
|
||
|
container_base(Type, Base),
|
||
|
make_id(Base, Id).
|
||
|
|
||
|
container_base('Bag', '__Bag').
|
||
|
container_base('Seq', '__Seq').
|
||
|
container_base('Alt', '__Alt').
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* DESCRIPTIONS *
|
||
|
*******************************/
|
||
|
|
||
|
:- thread_local
|
||
|
node_id/2, % nodeID --> ID
|
||
|
unique_id/1. % known rdf:ID
|
||
|
|
||
|
rdf_reset_node_ids :-
|
||
|
retractall(node_id(_,_)),
|
||
|
retractall(unique_id(_)).
|
||
|
|
||
|
description_id(Id, Id) :-
|
||
|
var(Id), !,
|
||
|
make_id('__Description', Id).
|
||
|
description_id(about(Id), Id).
|
||
|
description_id(id(Id), Id) :-
|
||
|
( unique_id(Id)
|
||
|
-> print_message(error, rdf(redefined_id(Id)))
|
||
|
; assert(unique_id(Id))
|
||
|
).
|
||
|
description_id(each(Id), each(Id)).
|
||
|
description_id(prefix(Id), prefix(Id)).
|
||
|
description_id(node(NodeID), Id) :-
|
||
|
( node_id(NodeID, Id)
|
||
|
-> true
|
||
|
; make_id('__Node', Id),
|
||
|
assert(node_id(NodeID, Id))
|
||
|
).
|
||
|
|
||
|
properties(PlRDF, BagId, Subject) -->
|
||
|
{ nonvar(BagId)
|
||
|
}, !,
|
||
|
rdf(BagId, rdf:type, rdf:'Bag'),
|
||
|
properties(PlRDF, 1, Statements, [], Subject),
|
||
|
fill_bag(Statements, 1, BagId).
|
||
|
properties(PlRDF, _BagId, Subject) -->
|
||
|
properties(PlRDF, 1, [], [], Subject).
|
||
|
|
||
|
|
||
|
fill_bag([], _, _) -->
|
||
|
[].
|
||
|
fill_bag([H|T], N, BagId) -->
|
||
|
{ NN is N + 1,
|
||
|
atom_concat('_', N, ElemId)
|
||
|
},
|
||
|
rdf(BagId, rdf:ElemId, H),
|
||
|
fill_bag(T, NN, BagId).
|
||
|
|
||
|
|
||
|
properties([], _, Bag, Bag, _) -->
|
||
|
[].
|
||
|
properties([H0|T0], N, Bag0, Bag, Subject) -->
|
||
|
property(H0, N, NN, Bag0, Bag1, Subject),
|
||
|
properties(T0, NN, Bag1, Bag, Subject).
|
||
|
|
||
|
%% property(Property, N, NN, Subject)// is det.
|
||
|
%
|
||
|
% Generate triples for {Subject, Pred, Object}. Also generates
|
||
|
% triples for Object if necessary.
|
||
|
%
|
||
|
% @param Property One of
|
||
|
%
|
||
|
% * Pred = Object
|
||
|
% Used for normal statements
|
||
|
% * id(Id, Pred = Object)
|
||
|
% Used for reified statements
|
||
|
|
||
|
property(Pred0 = Object, N, NN, BagH, BagT, Subject) --> % inlined object
|
||
|
triples(Object, Id), !,
|
||
|
{ li_pred(Pred0, Pred, N, NN)
|
||
|
},
|
||
|
statement(Subject, Pred, Id, _, BagH, BagT).
|
||
|
property(Pred0 = collection(Elems), N, NN, BagH, BagT, Subject) --> !,
|
||
|
{ li_pred(Pred0, Pred, N, NN)
|
||
|
},
|
||
|
statement(Subject, Pred, Object, _Id, BagH, BagT),
|
||
|
collection(Elems, Object).
|
||
|
property(Pred0 = Object, N, NN, BagH, BagT, Subject) --> !,
|
||
|
{ li_pred(Pred0, Pred, N, NN)
|
||
|
},
|
||
|
statement(Subject, Pred, Object, _Id, BagH, BagT).
|
||
|
property(id(Id, Pred0 = Object), N, NN, BagH, BagT, Subject) -->
|
||
|
triples(Object, ObjectId), !,
|
||
|
{ li_pred(Pred0, Pred, N, NN)
|
||
|
},
|
||
|
statement(Subject, Pred, ObjectId, Id, BagH, BagT).
|
||
|
property(id(Id, Pred0 = collection(Elems)), N, NN, BagH, BagT, Subject) --> !,
|
||
|
{ li_pred(Pred0, Pred, N, NN)
|
||
|
},
|
||
|
statement(Subject, Pred, Object, Id, BagH, BagT),
|
||
|
collection(Elems, Object).
|
||
|
property(id(Id, Pred0 = Object), N, NN, BagH, BagT, Subject) -->
|
||
|
{ li_pred(Pred0, Pred, N, NN)
|
||
|
},
|
||
|
statement(Subject, Pred, Object, Id, BagH, BagT).
|
||
|
|
||
|
%% statement(+Subject, +Pred, +Object, +Id, +BagH, -BagT)
|
||
|
%
|
||
|
% Add a statement to the model. If nonvar(Id), we reinify the
|
||
|
% statement using the given Id.
|
||
|
|
||
|
statement(Subject, Pred, Object, Id, BagH, BagT) -->
|
||
|
rdf(Subject, Pred, Object),
|
||
|
{ BagH = [Id|BagT]
|
||
|
-> statement_id(Id)
|
||
|
; BagT = BagH
|
||
|
},
|
||
|
( { nonvar(Id)
|
||
|
}
|
||
|
-> rdf(Id, rdf:type, rdf:'Statement'),
|
||
|
rdf(Id, rdf:subject, Subject),
|
||
|
rdf(Id, rdf:predicate, Pred),
|
||
|
rdf(Id, rdf:object, Object)
|
||
|
; []
|
||
|
).
|
||
|
|
||
|
|
||
|
statement_id(Id) :-
|
||
|
nonvar(Id), !.
|
||
|
statement_id(Id) :-
|
||
|
make_id('__Statement', Id).
|
||
|
|
||
|
%% li_pred(+Pred, -Pred, +Nth, -NextNth)
|
||
|
%
|
||
|
% Transform rdf:li predicates into _1, _2, etc.
|
||
|
|
||
|
li_pred(rdf:li, rdf:Pred, N, NN) :- !,
|
||
|
NN is N + 1,
|
||
|
atom_concat('_', N, Pred).
|
||
|
li_pred(Pred, Pred, N, N).
|
||
|
|
||
|
%% collection(+Elems, -Id)
|
||
|
%
|
||
|
% Handle the elements of a collection and return the identifier
|
||
|
% for the whole collection in Id.
|
||
|
|
||
|
collection([], Nil) -->
|
||
|
{ global_ref(rdf:nil, Nil)
|
||
|
}.
|
||
|
collection([H|T], Id) -->
|
||
|
triples(H, HId),
|
||
|
{ make_id('__List', Id)
|
||
|
},
|
||
|
rdf(Id, rdf:type, rdf:'List'),
|
||
|
rdf(Id, rdf:first, HId),
|
||
|
rdf(Id, rdf:rest, TId),
|
||
|
collection(T, TId).
|
||
|
|
||
|
|
||
|
rdf(S0, P0, O0) -->
|
||
|
{ global_ref(S0, S),
|
||
|
global_ref(P0, P),
|
||
|
global_obj(O0, O)
|
||
|
},
|
||
|
[ rdf(S, P, O) ].
|
||
|
|
||
|
|
||
|
global_ref(URI, URI) :-
|
||
|
var(URI), !.
|
||
|
global_ref(rdf:Local, Global) :-
|
||
|
rdf_name_space(NS), !,
|
||
|
atom_concat(NS, Local, Global).
|
||
|
global_ref(NS:Local, Global) :- !,
|
||
|
atom_concat(NS, Local, Global).
|
||
|
global_ref(URI, URI).
|
||
|
|
||
|
global_obj(V, V) :-
|
||
|
var(V), !.
|
||
|
global_obj(literal(type(Local, X)), literal(type(Global, X))) :- !,
|
||
|
global_ref(Local, Global).
|
||
|
global_obj(literal(X), literal(X)) :- !.
|
||
|
global_obj(Local, Global) :-
|
||
|
global_ref(Local, Global).
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* SHARING *
|
||
|
*******************************/
|
||
|
|
||
|
:- thread_local
|
||
|
shared_description/3, % +Hash, +Term, -Subject
|
||
|
share_blank_nodes/1, % Boolean
|
||
|
shared_nodes/1. % counter
|
||
|
|
||
|
reset_shared_descriptions :-
|
||
|
retractall(shared_description(_,_,_)),
|
||
|
retractall(shared_nodes(_)).
|
||
|
|
||
|
shared_description(Term, Subject) :-
|
||
|
term_hash(Term, Hash),
|
||
|
shared_description(Hash, Term, Subject),
|
||
|
( retract(shared_nodes(N))
|
||
|
-> N1 is N + 1
|
||
|
; N1 = 1
|
||
|
),
|
||
|
assert(shared_nodes(N1)).
|
||
|
|
||
|
|
||
|
assert_shared_description(Term, Subject) :-
|
||
|
term_hash(Term, Hash),
|
||
|
assert(shared_description(Hash, Term, Subject)).
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* START/END *
|
||
|
*******************************/
|
||
|
|
||
|
%% rdf_start_file(+Options, -Cleanup) is det.
|
||
|
%
|
||
|
% Initialise for the translation of a file.
|
||
|
|
||
|
rdf_start_file(Options, Cleanup) :-
|
||
|
rdf_reset_node_ids, % play safe
|
||
|
reset_shared_descriptions,
|
||
|
set_bnode_sharing(Options, C1),
|
||
|
set_anon_prefix(Options, C2),
|
||
|
add_cleanup(C1, C2, Cleanup).
|
||
|
|
||
|
%% rdf_end_file(:Cleanup) is det.
|
||
|
%
|
||
|
% Cleanup reaching the end of an RDF file.
|
||
|
|
||
|
rdf_end_file(Cleanup) :-
|
||
|
rdf_reset_node_ids,
|
||
|
( shared_nodes(N)
|
||
|
-> print_message(informational, rdf(shared_blank_nodes(N)))
|
||
|
; true
|
||
|
),
|
||
|
reset_shared_descriptions,
|
||
|
Cleanup.
|
||
|
|
||
|
set_bnode_sharing(Options, erase(Ref)) :-
|
||
|
option(blank_nodes(Share), Options, noshare),
|
||
|
( Share == share
|
||
|
-> assert(share_blank_nodes(true), Ref), !
|
||
|
; Share == noshare
|
||
|
-> fail % next clause
|
||
|
; throw(error(domain_error(share, Share), _))
|
||
|
).
|
||
|
set_bnode_sharing(_, true).
|
||
|
|
||
|
set_anon_prefix(Options, erase(Ref)) :-
|
||
|
option(base_uri(BaseURI), Options, []),
|
||
|
BaseURI \== [], !,
|
||
|
concat_atom(['__', BaseURI, '#'], AnonBase),
|
||
|
asserta(anon_prefix(AnonBase), Ref).
|
||
|
set_anon_prefix(_, true).
|
||
|
|
||
|
add_cleanup(true, X, X) :- !.
|
||
|
add_cleanup(X, true, X) :- !.
|
||
|
add_cleanup(X, Y, (X, Y)).
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* UTIL *
|
||
|
*******************************/
|
||
|
|
||
|
%% anon_prefix(-Prefix) is semidet.
|
||
|
%
|
||
|
% If defined, it is the prefix used to generate a blank node.
|
||
|
|
||
|
:- thread_local
|
||
|
anon_prefix/1.
|
||
|
|
||
|
make_id(For, ID) :-
|
||
|
anon_prefix(Prefix), !,
|
||
|
atom_concat(Prefix, For, Base),
|
||
|
gensym(Base, ID).
|
||
|
make_id(For, ID) :-
|
||
|
gensym(For, ID).
|
||
|
|
||
|
anon_base('__Bag').
|
||
|
anon_base('__Seq').
|
||
|
anon_base('__Alt').
|
||
|
anon_base('__Description').
|
||
|
anon_base('__Statement').
|
||
|
anon_base('__List').
|
||
|
anon_base('__Node').
|
||
|
|
||
|
%% rdf_reset_ids is det.
|
||
|
%
|
||
|
% Utility predicate to reset the gensym counters for the various
|
||
|
% generated identifiers. This simplifies debugging and matching
|
||
|
% output with the stored desired output (see rdf_test.pl).
|
||
|
|
||
|
rdf_reset_ids :-
|
||
|
anon_prefix(Prefix), !,
|
||
|
( anon_base(Base),
|
||
|
atom_concat(Prefix, Base, X),
|
||
|
reset_gensym(X),
|
||
|
fail
|
||
|
; true
|
||
|
).
|
||
|
rdf_reset_ids :-
|
||
|
( anon_base(Base),
|
||
|
reset_gensym(Base),
|
||
|
fail
|
||
|
; true
|
||
|
).
|