This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/semweb/rdf_db.c

6709 lines
151 KiB
C

/* $Id$
Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: J.Wielemaker@uva.nl
WWW: http://www.swi-prolog.org
Copyright (C): 1985-2009, University of Amsterdam
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#define WITH_MD5 1
#define WITH_PL_MUTEX 1
#define _GNU_SOURCE 1 /* get rwlocks from glibc */
#ifdef _REENTRANT
#ifdef __WINDOWS__
#include <malloc.h> /* alloca() */
#define inline __inline
#ifndef SIZEOF_LONG
#define SIZEOF_LONG 4
#endif
#else
#if (!defined(__GNUC__) || defined(__hpux)) && defined(HAVE_ALLOCA_H)
#include <alloca.h>
#endif
#include <errno.h>
#endif
#endif
#include <SWI-Stream.h>
#include <SWI-Prolog.h>
#include "rdf_db.h"
#include <assert.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include <ctype.h>
#include "avl.h"
#ifdef WITH_MD5
#include "md5.h"
#include "atom.h"
#include "debug.h"
#include "hash.h"
#include "murmur.h"
#undef UNLOCK
static void md5_triple(triple *t, md5_byte_t *digest);
static void sum_digest(md5_byte_t *digest, md5_byte_t *add);
static void dec_digest(md5_byte_t *digest, md5_byte_t *add);
#endif
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
The ids form a mask. This must be kept consistent with monitor_mask/2 in
rdf_db.pl!
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
typedef enum
{ EV_ASSERT = 0x0001, /* triple */
EV_ASSERT_LOAD = 0x0002, /* triple */
EV_RETRACT = 0x0004, /* triple */
EV_UPDATE = 0x0008, /* old, new */
EV_NEW_LITERAL = 0x0010, /* literal */
EV_OLD_LITERAL = 0x0020, /* literal */
EV_TRANSACTION = 0x0040, /* id, begin/end */
EV_LOAD = 0x0080, /* id, begin/end */
EV_REHASH = 0x0100 /* begin/end */
} broadcast_id;
static int broadcast(broadcast_id id, void *a1, void *a2);
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
We now use malloc/free/realloc calls with explicit sizes to allow
maintaining statistics as well as to prepare for dealing with special
memory pools associated with databases. Using -DDIRECT_MALLOC the
library uses plain malloc to facilitate malloc debuggers.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#ifdef DIRECT_MALLOC
#define rdf_malloc(db, size) malloc(size)
#define rdf_free(db, ptr, size) free(ptr)
#define rdf_realloc(db, ptr, old, new) realloc(ptr, new)
#else /*DIRECT_MALLOC*/
#if CHECK_MALLOC_SIZES
static void *
rdf_malloc(rdf_db *db, size_t size)
{ size_t bytes = size + sizeof(size_t);
size_t *ptr = PL_malloc(bytes);
*ptr++ = size;
if ( db )
db->core += size;
return ptr;
}
static void
rdf_free(rdf_db *db, void *ptr, size_t size)
{ size_t *p = ptr;
assert(p[-1] == size);
db->core -= size;
PL_free(&p[-1]);
}
static void *
rdf_realloc(rdf_db *db, void *ptr, size_t old, size_t new)
{ size_t *p = ptr;
size_t bytes = new + sizeof(size_t);
assert(p[-1] == old);
p = PL_realloc(&p[-1], bytes);
*p++ = new;
db->core< += new-old;
return p;
}
#else /*CHECK_MALLOC_SIZES*/
static void *
rdf_malloc(rdf_db *db, size_t size)
{ if ( db )
db->core += size;
return PL_malloc(size);
}
static void
rdf_free(rdf_db *db, void *ptr, size_t size)
{ db->core -= size;
PL_free(ptr);
}
static void *
rdf_realloc(rdf_db *db, void *ptr, size_t old, size_t new)
{ db->core += new-old;
return PL_realloc(ptr, new);
}
#endif /*CHECK_MALLOC_SIZES*/
#endif /*DIRECT_MALLOC*/
static functor_t FUNCTOR_literal1;
static functor_t FUNCTOR_literal2;
static functor_t FUNCTOR_error2;
static functor_t FUNCTOR_type_error2;
static functor_t FUNCTOR_domain_error2;
static functor_t FUNCTOR_colon2;
static functor_t FUNCTOR_triples1;
static functor_t FUNCTOR_triples2;
static functor_t FUNCTOR_subjects1;
static functor_t FUNCTOR_predicates1;
static functor_t FUNCTOR_duplicates1;
static functor_t FUNCTOR_literals1;
static functor_t FUNCTOR_subject1;
static functor_t FUNCTOR_predicate1;
static functor_t FUNCTOR_object1;
static functor_t FUNCTOR_graph1;
static functor_t FUNCTOR_indexed8;
static functor_t FUNCTOR_exact1;
static functor_t FUNCTOR_plain1;
static functor_t FUNCTOR_substring1;
static functor_t FUNCTOR_word1;
static functor_t FUNCTOR_prefix1;
static functor_t FUNCTOR_like1;
static functor_t FUNCTOR_symmetric1;
static functor_t FUNCTOR_inverse_of1;
static functor_t FUNCTOR_transitive1;
static functor_t FUNCTOR_rdf_subject_branch_factor1; /* S --> BF*O */
static functor_t FUNCTOR_rdf_object_branch_factor1; /* O --> BF*S */
static functor_t FUNCTOR_rdfs_subject_branch_factor1; /* S --> BF*O */
static functor_t FUNCTOR_rdfs_object_branch_factor1; /* O --> BF*S */
static functor_t FUNCTOR_searched_nodes1;
static functor_t FUNCTOR_lang2;
static functor_t FUNCTOR_type2;
static functor_t FUNCTOR_gc2;
static functor_t FUNCTOR_rehash2;
static functor_t FUNCTOR_core1;
static functor_t FUNCTOR_assert4;
static functor_t FUNCTOR_retract4;
static functor_t FUNCTOR_update5;
static functor_t FUNCTOR_new_literal1;
static functor_t FUNCTOR_old_literal1;
static functor_t FUNCTOR_transaction2;
static functor_t FUNCTOR_load2;
static functor_t FUNCTOR_rehash1;
static functor_t FUNCTOR_begin1;
static functor_t FUNCTOR_end1;
static atom_t ATOM_user;
static atom_t ATOM_exact;
static atom_t ATOM_plain;
static atom_t ATOM_prefix;
static atom_t ATOM_substring;
static atom_t ATOM_word;
static atom_t ATOM_like;
static atom_t ATOM_error;
static atom_t ATOM_begin;
static atom_t ATOM_end;
static atom_t ATOM_infinite;
static atom_t ATOM_subPropertyOf;
static predicate_t PRED_call1;
#define MATCH_EXACT 0x01 /* exact triple match */
#define MATCH_SUBPROPERTY 0x02 /* Use subPropertyOf relations */
#define MATCH_SRC 0x04 /* Match graph location */
#define MATCH_INVERSE 0x08 /* use symmetric match too */
#define MATCH_QUAL 0x10 /* Match qualifiers too */
#define MATCH_DUPLICATE (MATCH_EXACT|MATCH_QUAL)
static int WANT_GC(rdf_db *db);
static int match_triples(triple *t, triple *p, unsigned flags);
static int update_duplicates_add(rdf_db *db, triple *t);
static void update_duplicates_del(rdf_db *db, triple *t);
static void unlock_atoms(triple *t);
static void lock_atoms(triple *t);
static void unlock_atoms_literal(literal *lit);
static int update_hash(rdf_db *db);
static int triple_hash(rdf_db *db, triple *t, int which);
static unsigned long object_hash(triple *t);
static void reset_db(rdf_db *db);
static void record_transaction(rdf_db *db,
tr_type type, triple *t);
static void record_md5_transaction(rdf_db *db,
graph *src, md5_byte_t *digest);
static void create_reachability_matrix(rdf_db *db, predicate_cloud *cloud);
static int get_predicate(rdf_db *db, term_t t, predicate **p);
static predicate_cloud *new_predicate_cloud(rdf_db *db, predicate **p, size_t count);
static int unify_literal(term_t lit, literal *l);
static int check_predicate_cloud(predicate_cloud *c);
/*******************************
* LOCKING *
*******************************/
#define RDLOCK(db) rdlock(&db->lock)
#define WRLOCK(db, allowreaders) wrlock(&db->lock, allowreaders)
#define LOCKOUT_READERS(db) lockout_readers(&db->lock)
#define REALLOW_READERS(db) reallow_readers(&db->lock)
#define WRUNLOCK(db) unlock(&db->lock, FALSE)
#define RDUNLOCK(db) unlock(&db->lock, TRUE)
#define LOCK_MISC(db) lock_misc(&db->lock)
#define UNLOCK_MISC(db) unlock_misc(&db->lock)
#define INIT_LOCK(db) init_lock(&db->lock)
/*******************************
* ERRORS *
*******************************/
static int
instantiation_error(term_t actual)
{ term_t ex;
if ( (ex = PL_new_term_ref()) &&
PL_unify_term(ex,
PL_FUNCTOR, FUNCTOR_error2,
PL_CHARS, "instantiation_error",
PL_VARIABLE) )
return PL_raise_exception(ex);
return FALSE;
}
static int
type_error(term_t actual, const char *expected)
{ term_t ex;
if ( (ex = PL_new_term_ref()) &&
PL_unify_term(ex,
PL_FUNCTOR, FUNCTOR_error2,
PL_FUNCTOR, FUNCTOR_type_error2,
PL_CHARS, expected,
PL_TERM, actual,
PL_VARIABLE) )
return PL_raise_exception(ex);
return FALSE;
}
static int
domain_error(term_t actual, const char *expected)
{ term_t ex;
if ( (ex = PL_new_term_ref()) &&
PL_unify_term(ex,
PL_FUNCTOR, FUNCTOR_error2,
PL_FUNCTOR, FUNCTOR_domain_error2,
PL_CHARS, expected,
PL_TERM, actual,
PL_VARIABLE) )
return PL_raise_exception(ex);
return FALSE;
}
static int
permission_error(const char *op, const char *type, const char *obj,
const char *msg)
{ term_t ex, ctx;
if ( !(ex = PL_new_term_ref()) ||
!(ctx = PL_new_term_ref()) )
return FALSE;
if ( msg )
{ if ( !PL_unify_term(ctx, PL_FUNCTOR_CHARS, "context", 2,
PL_VARIABLE,
PL_CHARS, msg) )
return FALSE;
}
if ( !PL_unify_term(ex, PL_FUNCTOR_CHARS, "error", 2,
PL_FUNCTOR_CHARS, "permission_error", 3,
PL_CHARS, op,
PL_CHARS, type,
PL_CHARS, obj,
PL_TERM, ctx) )
return FALSE;
return PL_raise_exception(ex);
}
static int
get_atom_ex(term_t t, atom_t *a)
{ if ( PL_get_atom(t, a) )
return TRUE;
return type_error(t, "atom");
}
static int
get_long_ex(term_t t, long *v)
{ if ( PL_get_long(t, v) )
return TRUE;
return type_error(t, "integer");
}
static int
get_double_ex(term_t t, double *v)
{ if ( PL_get_float(t, v) )
return TRUE;
return type_error(t, "float");
}
static int
get_atom_or_var_ex(term_t t, atom_t *a)
{ if ( PL_get_atom(t, a) )
return TRUE;
if ( PL_is_variable(t) )
{ *a = 0L;
return TRUE;
}
return type_error(t, "atom");
}
static int
get_resource_or_var_ex(term_t t, atom_t *a)
{ if ( PL_get_atom(t, a) )
return TRUE;
if ( PL_is_variable(t) )
{ *a = 0L;
return TRUE;
}
if ( PL_is_functor(t, FUNCTOR_literal1) )
return FALSE; /* fail on rdf(literal(_), ...) */
return type_error(t, "atom");
}
static int
get_bool_arg_ex(int a, term_t t, int *val)
{ term_t arg = PL_new_term_ref();
if ( !PL_get_arg(a, t, arg) )
return type_error(t, "compound");
if ( !PL_get_bool(arg, val) )
return type_error(arg, "bool");
return TRUE;
}
/*******************************
* DEBUG SUPPORT *
*******************************/
#ifdef O_DEBUG
#define PRT_SRC 0x1
static void
print_literal(literal *lit)
{ switch(lit->objtype)
{ case OBJ_STRING:
switch(lit->qualifier)
{ case Q_TYPE:
Sdprintf("%s^^\"%s\"",
PL_atom_chars(lit->value.string),
PL_atom_chars(lit->type_or_lang));
break;
case Q_LANG:
Sdprintf("%s@\"%s\"",
PL_atom_chars(lit->value.string),
PL_atom_chars(lit->type_or_lang));
break;
default:
{ size_t len;
const char *s;
const wchar_t *w;
if ( (s = PL_atom_nchars(lit->value.string, &len)) )
{ if ( strlen(s) == len )
Sdprintf("\"%s\"", s);
else
Sdprintf("\"%s\" (len=%d)", s, len);
} else if ( (w = PL_atom_wchars(lit->value.string, &len)) )
{ unsigned int i;
Sputc('L', Serror);
Sputc('"', Serror);
for(i=0; i<len; i++)
{ if ( w[i] < 0x7f )
Sputc(w[i], Serror);
else
Sfprintf(Serror, "\\\\u%04x", w[i]);
}
Sputc('"', Serror);
}
break;
}
}
break;
case OBJ_INTEGER:
Sdprintf("%ld", lit->value.integer);
break;
case OBJ_DOUBLE:
Sdprintf("%f", lit->value.real);
break;
case OBJ_TERM:
{ fid_t fid = PL_open_foreign_frame();
term_t term = PL_new_term_ref();
PL_recorded_external(lit->value.term.record, term);
PL_write_term(Serror, term, 1200,
PL_WRT_QUOTED|PL_WRT_NUMBERVARS|PL_WRT_PORTRAY);
PL_discard_foreign_frame(fid);
break;
}
default:
assert(0);
}
}
static void
print_object(triple *t)
{ if ( t->object_is_literal )
{ print_literal(t->object.literal);
} else
{ Sdprintf("%s", PL_atom_chars(t->object.resource));
}
}
static void
print_src(triple *t)
{ if ( t->line == NO_LINE )
Sdprintf(" [%s]", PL_atom_chars(t->graph));
else
Sdprintf(" [%s:%ld]", PL_atom_chars(t->graph), t->line);
}
static void
print_triple(triple *t, int flags)
{ Sdprintf("<%s %s ",
PL_atom_chars(t->subject),
PL_atom_chars(t->predicate.r->name));
print_object(t);
if ( (flags & PRT_SRC) )
print_src(t);
Sdprintf(">");
}
#endif
/*******************************
* STORAGE *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Our one and only database (for the time being).
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static rdf_db *DB;
/*******************************
* LISTS *
*******************************/
static int
add_list(rdf_db *db, list *list, void *value)
{ cell *c;
for(c=list->head; c; c=c->next)
{ if ( c->value == value )
return FALSE; /* already a member */
}
c = rdf_malloc(db, sizeof(*c));
c->value = value;
c->next = NULL;
if ( list->tail )
list->tail->next = c;
else
list->head = c;
list->tail = c;
return TRUE;
}
static int
del_list(rdf_db *db, list *list, void *value)
{ cell *c, *p = NULL;
for(c=list->head; c; p=c, c=c->next)
{ if ( c->value == value )
{ if ( p )
p->next = c->next;
else
list->head = c->next;
if ( !c->next )
list->tail = p;
rdf_free(db, c, sizeof(*c));
return TRUE;
}
}
return FALSE; /* not a member */
}
static void
free_list(rdf_db *db, list *list)
{ cell *c, *n;
for(c=list->head; c; c=n)
{ n = c->next;
rdf_free(db, c, sizeof(*c));
}
list->head = list->tail = NULL;
}
/*******************************
* ATOM SETS *
*******************************/
#define CHUNKSIZE 1024
typedef struct mchunk
{ struct mchunk *next;
size_t used;
char buf[CHUNKSIZE];
} mchunk;
typedef struct
{ avl_tree tree;
mchunk *node_store;
mchunk store0;
} atomset;
static void *
alloc_node_atomset(void *ptr, size_t size)
{ void *p;
atomset *as = ptr;
assert(size < CHUNKSIZE);
if ( as->node_store->used + size > CHUNKSIZE )
{ mchunk *ch = malloc(sizeof(mchunk));
ch->used = 0;
ch->next = as->node_store;
as->node_store = ch;
}
p = &as->node_store->buf[as->node_store->used];
as->node_store->used += size;
return p;
}
static void
free_node_atomset(void *ptr, void *data, size_t size)
{ assert(0);
}
static int
cmp_long_ptr(void *p1, void *p2, NODE type)
{ long *l1 = p1;
long *l2 = p2;
return *l1 < *l2 ? -1 : *l1 > *l2 ? 1 : 0;
}
static void
init_atomset(atomset *as)
{ avlinit(&as->tree, as, sizeof(atom_t),
cmp_long_ptr,
NULL,
alloc_node_atomset,
free_node_atomset);
as->node_store = &as->store0;
as->node_store->next = NULL;
as->node_store->used = 0;
}
static void
destroy_atomset(atomset *as)
{ mchunk *ch, *next;
for(ch=as->node_store; ch != &as->store0; ch = next)
{ next = ch->next;
free(ch);
}
}
static int
add_atomset(atomset *as, atom_t atom)
{ return avlins(&as->tree, &atom) ? FALSE : TRUE;
}
/*******************************
* PREDICATES *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Predicates are represented as first class citizens for three reasons:
quickly answer on the transitive rdfs:subPropertyOf relation for
rdf_hash/3, keep track of statistics that are useful for query
optimization (#triples, branching factor) and keep properties
(inverse/transitive).
To answer the rdfs:subPropertyOf quickly, predicates are organised in
`clouds', where a cloud defines a set of predicates connected through
rdfs:subPropertyOf triples. The cloud numbers its members and maintains
a bit-matrix that contains the closure of the reachability. Initially a
predicate has a simple cloud of size 1. merge_clouds() and split_cloud()
deals with adding and deleting rdfs:subPropertyOf relations. These
operations try to modify the clouds that have no triples, so it can be
done without a rehash. If this fails, the predicates keep their own hash
to make search without rdfs:subPropertyOf still possible (so we can
avoid frequent updates while loading triples), sets the cloud `dirty'
flag and the DB's need_update flag. Queries that need rdfs:subPropertyOf
find the need_update flag, which calls organise_predicates(), which
cause a rehash if some predicates have changed hash-code to the new
cloud they have become part of.
TBD: We can do a partial re-hash in that case!
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void
init_pred_table(rdf_db *db)
{ int bytes = sizeof(predicate*)*INITIAL_PREDICATE_TABLE_SIZE;
db->pred_table = rdf_malloc(db, bytes);
memset(db->pred_table, 0, bytes);
db->pred_table_size = INITIAL_PREDICATE_TABLE_SIZE;
}
static predicate *
existing_predicate(rdf_db *db, atom_t name)
{ int hash = atom_hash(name) % db->pred_table_size;
predicate *p;
LOCK_MISC(db);
for(p=db->pred_table[hash]; p; p = p->next)
{ if ( p->name == name )
{ UNLOCK_MISC(db);
return p;
}
}
UNLOCK_MISC(db);
return NULL;
}
static predicate *
lookup_predicate(rdf_db *db, atom_t name)
{ int hash = atom_hash(name) % db->pred_table_size;
predicate *p;
predicate_cloud *cp;
LOCK_MISC(db);
for(p=db->pred_table[hash]; p; p = p->next)
{ if ( p->name == name )
{ UNLOCK_MISC(db);
return p;
}
}
p = rdf_malloc(db, sizeof(*p));
memset(p, 0, sizeof(*p));
p->name = name;
cp = new_predicate_cloud(db, &p, 1);
p->hash = cp->hash;
PL_register_atom(name);
p->next = db->pred_table[hash];
db->pred_table[hash] = p;
db->pred_count++;
DEBUG(5, Sdprintf("Pred %s (count = %d)\n",
PL_atom_chars(name), db->pred_count));
UNLOCK_MISC(db);
return p;
}
static const char *
pname(predicate *p)
{ if ( p->name )
return PL_atom_chars(p->name);
else
{ static char *ring[10];
static int ri = 0;
char buf[25];
char *r;
Ssprintf(buf, "__D%p", p);
ring[ri++] = r = strdup(buf);
if ( ri == 10 )
{ ri = 0;
free(ring[ri]);
}
return (const char*)r;
}
}
static int
organise_predicates(rdf_db *db) /* TBD: rename&move */
{ predicate **ht;
int i;
int changed = 0;
DEBUG(2, Sdprintf("rdf_db: fixing predicate clouds\n"));
for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
{ predicate *p;
for( p = *ht; p; p = p->next )
{ predicate_cloud *cloud = p->cloud;
if ( cloud->dirty )
{ predicate **cp;
int i2;
for(i2=0, cp = cloud->members; i2 < cloud->size; i2++, cp++)
{ if ( (*cp)->hash != cloud->hash )
{ (*cp)->hash = cloud->hash;
if ( (*cp)->triple_count > 0 )
changed++;
}
}
cloud->dirty = FALSE;
}
}
}
return changed;
}
/*******************************
* PREDICATE CLOUDS *
*******************************/
static predicate_cloud *
new_predicate_cloud(rdf_db *db, predicate **p, size_t count)
{ predicate_cloud *cloud = rdf_malloc(db, sizeof(*cloud));
memset(cloud, 0, sizeof(*cloud));
cloud->hash = db->next_hash++;
if ( count )
{ int i;
predicate **p2;
cloud->size = count;
cloud->members = rdf_malloc(db, sizeof(predicate*)*count);
memcpy(cloud->members, p, sizeof(predicate*)*count);
for(i=0, p2=cloud->members; i<cloud->size; i++, p2++)
(*p2)->cloud = cloud;
}
create_reachability_matrix(db, cloud);
return cloud;
}
static void
free_predicate_cloud(rdf_db *db, predicate_cloud *cloud)
{ if ( cloud->members )
{ rdf_free(db, cloud->members, sizeof(predicate*)*cloud->size);
}
rdf_free(db, cloud, sizeof(*cloud));
}
static long
triples_in_predicate_cloud(predicate_cloud *cloud)
{ long triples = 0;
predicate **p;
int i;
for(i=0, p=cloud->members; i<cloud->size; i++, p++)
triples += (*p)->triple_count;
return triples;
}
/* Add the predicates of c2 to c1 and destroy c2. Returns c1 */
static predicate_cloud *
append_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2, int update_hash)
{ predicate **p;
int i;
for(i=0, p=c2->members; i<c2->size; i++, p++)
{ (*p)->cloud = c1;
if ( update_hash )
(*p)->hash = c1->hash;
}
if ( c1->size > 0 && c2->size > 0 )
{ c1->members = rdf_realloc(db, c1->members,
c1->size*sizeof(predicate*),
(c1->size+c2->size)*sizeof(predicate*));
memcpy(&c1->members[c1->size], c2->members, c2->size*sizeof(predicate*));
c1->size += c2->size;
free_predicate_cloud(db, c2);
} else if ( c2->size > 0 )
{ c1->members = c2->members;
c1->size = c2->size;
c2->members = NULL;
free_predicate_cloud(db, c2);
} else
{ free_predicate_cloud(db, c2);
}
return c1;
}
/* merge two predicate clouds. If either of them has no triples we
can do the merge without rehashing the database. Note that this
code is only called from addSubPropertyOf(). If c1==c2, we added
an rdfs:subPropertyOf between two predicates in the same cloud.
we must still update the matrix, though we could do it a bit more
efficient. I doubt this is worth the trouble though.
*/
static predicate_cloud *
merge_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2)
{ predicate_cloud *cloud;
if ( c1 != c2 )
{ if ( triples_in_predicate_cloud(c1) == 0 )
{ cloud = append_clouds(db, c2, c1, TRUE);
} else if ( triples_in_predicate_cloud(c2) == 0 )
{ cloud = append_clouds(db, c1, c2, TRUE);
} else
{ cloud = append_clouds(db, c1, c2, FALSE);
cloud->dirty = TRUE;
db->need_update++;
}
} else
{ cloud = c1;
}
DEBUG(1, if ( !db->need_update )
{ check_predicate_cloud(cloud);
});
create_reachability_matrix(db, cloud);
return cloud;
}
/* split a cloud into multiple disjoint clouds. The first cloud is
given the hash of the original, so we only need to update if new
clouds are created. Ideally we should se whether it is possible
to give the orginal hash to the one and only non-empty cloud to
avoid re-hashing alltogether.
*/
static void
pred_reachable(predicate *start, char *visited, predicate **nodes, int *size)
{ if ( !visited[start->label] )
{ cell *c;
visited[start->label] = TRUE;
nodes[(*size)++] = start;
for(c=start->subPropertyOf.head; c; c=c->next)
pred_reachable(c->value, visited, nodes, size);
for(c=start->siblings.head; c; c=c->next)
pred_reachable(c->value, visited, nodes, size);
}
}
static int
split_cloud(rdf_db *db, predicate_cloud *cloud,
predicate_cloud **parts, int size)
{ char *done = alloca(cloud->size*sizeof(char));
predicate **graph = alloca(cloud->size*sizeof(predicate*));
int found = 0;
int i;
memset(done, 0, cloud->size*sizeof(char));
for(i=0; i<cloud->size; i++)
{ if ( !done[i] )
{ predicate *start = cloud->members[i];
predicate_cloud *new_cloud;
int gsize = 0;
pred_reachable(start, done, graph, &gsize);
new_cloud = new_predicate_cloud(db, graph, gsize);
if ( found == 0 )
{ new_cloud->hash = cloud->hash;
} else
{ new_cloud->dirty = TRUE; /* preds come from another cloud */
db->need_update++;
}
parts[found++] = new_cloud;
}
}
free_predicate_cloud(db, cloud);
return found;
}
static unsigned long
predicate_hash(predicate *p)
{ return p->hash;
}
static void
addSubPropertyOf(rdf_db *db, predicate *sub, predicate *super)
{ /*DEBUG(2, Sdprintf("addSubPropertyOf(%s, %s)\n", pname(sub), pname(super)));*/
if ( add_list(db, &sub->subPropertyOf, super) )
{ add_list(db, &super->siblings, sub);
merge_clouds(db, sub->cloud, super->cloud);
}
}
/* deleting an rdfs:subPropertyOf. This is a bit naughty. If the
cloud is still connected we only need to refresh the reachability
matrix. Otherwise the cloud breaks in maximum two clusters. We
can decide to leave it as is, which saves a re-hash of the triples
but harms indexing. Alternative we can create a new cloud for one
of the clusters and re-hash.
*/
static void
delSubPropertyOf(rdf_db *db, predicate *sub, predicate *super)
{ if ( del_list(db, &sub->subPropertyOf, super) )
{ del_list(db, &super->siblings, sub);
/* if ( not worth the trouble )
create_reachability_matrix(db, sub->cloud);
else */
{ predicate_cloud *parts[2];
split_cloud(db, sub->cloud, parts, 2);
}
}
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Reachability matrix.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#define WBITSIZE (sizeof(int)*8)
static size_t
byte_size_bitmatrix(size_t w, size_t h)
{ size_t wsize = ((w*h)+WBITSIZE-1)/WBITSIZE;
return (size_t)(intptr_t)&((bitmatrix*)NULL)->bits[wsize];
}
static bitmatrix *
alloc_bitmatrix(rdf_db *db, size_t w, size_t h)
{ size_t size = byte_size_bitmatrix(w, h);
bitmatrix *m = rdf_malloc(db, size);
memset(m, 0, size);
m->width = w;
m->heigth = h;
return m;
}
static void
free_bitmatrix(rdf_db *db, bitmatrix *bm)
{ size_t size = byte_size_bitmatrix(bm->width, bm->heigth);
rdf_free(db, bm, size);
}
#undef setbit /* conflict in HPUX 11.23 */
static void
setbit(bitmatrix *m, int i, int j)
{ size_t ij = m->width*i+j;
size_t word = ij/WBITSIZE;
int bit = ij%WBITSIZE;
m->bits[word] |= 1<<bit;
}
static int
testbit(bitmatrix *m, int i, int j)
{ size_t ij = m->width*i+j;
size_t word = ij/WBITSIZE;
int bit = ij%WBITSIZE;
return ((m->bits[word] & (1<<bit)) != 0);
}
static int
label_predicate_cloud(predicate_cloud *cloud)
{ predicate **p;
int i;
for(i=0, p=cloud->members; i<cloud->size; i++, p++)
(*p)->label = i;
return i;
}
static void
fill_reachable(bitmatrix *bm, predicate *p0, predicate *p)
{ if ( !testbit(bm, p0->label, p->label) )
{ cell *c;
DEBUG(1, Sdprintf(" Reachable [%s (%d)]\n", pname(p), p->label));
setbit(bm, p0->label, p->label);
for(c = p->subPropertyOf.head; c; c=c->next)
fill_reachable(bm, p0, c->value);
}
}
static void
create_reachability_matrix(rdf_db *db, predicate_cloud *cloud)
{ bitmatrix *m = alloc_bitmatrix(db, cloud->size, cloud->size);
predicate **p;
int i;
label_predicate_cloud(cloud);
for(i=0, p=cloud->members; i<cloud->size; i++, p++)
{ DEBUG(1, Sdprintf("Reachability for %s (%d)\n", pname(*p), (*p)->label));
fill_reachable(m, *p, *p);
}
if ( cloud->reachable )
free_bitmatrix(db, cloud->reachable);
cloud->reachable = m;
}
static int
isSubPropertyOf(predicate *sub, predicate *p)
{ if ( sub->cloud == p->cloud )
return testbit(sub->cloud->reachable, sub->label, p->label);
return FALSE;
}
/*******************************
* PRINT PREDICATE HIERARCHY *
*******************************/
static int
check_predicate_cloud(predicate_cloud *c)
{ predicate **p;
int errors = 0;
int i;
DEBUG(1, if ( c->dirty ) Sdprintf("Cloud is dirty\n"));
for(i=0, p=c->members; i<c->size; i++, p++)
{ if ( !c->dirty )
{ if ( (*p)->hash != c->hash )
{ Sdprintf("Hash of %s doesn't match cloud hash\n", pname(*p));
errors++;
}
}
if ( (*p)->cloud != c )
{ Sdprintf("Wrong cloud of %s\n", pname(*p));
errors++;
}
}
return errors;
}
static void
print_reachability_cloud(predicate *p)
{ int x, y;
predicate_cloud *cloud = p->cloud;
check_predicate_cloud(cloud);
Sdprintf("Reachability matrix:\n");
for(x=0; x<cloud->reachable->width; x++)
Sdprintf("%d", x%10);
Sdprintf("\n");
for(y=0; y<cloud->reachable->heigth; y++)
{ for(x=0; x<cloud->reachable->width; x++)
{ if ( testbit(cloud->reachable, x, y) )
Sdprintf("X");
else
Sdprintf(".");
}
Sdprintf(" %2d %s\n", y, PL_atom_chars(cloud->members[y]->name));
assert(cloud->members[y]->label == y);
}
}
static foreign_t
rdf_print_predicate_cloud(term_t t)
{ predicate *p;
rdf_db *db = DB;
if ( !get_predicate(db, t, &p) )
return FALSE;
print_reachability_cloud(p);
return TRUE;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Branching factors are crucial in ordering the statements of a
conjunction. These functions compute the average branching factor in
both directions ("subject --> P --> object" and "object --> P -->
subject") by determining the number of unique values at either side of
the predicate. This number is only recomputed if it is considered
`dirty'.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
update_predicate_counts(rdf_db *db, predicate *p, int which)
{ long total = 0;
if ( which == DISTINCT_DIRECT )
{ long changed = abs(p->triple_count - p->distinct_updated[DISTINCT_DIRECT]);
if ( changed < p->distinct_updated[DISTINCT_DIRECT] )
return TRUE;
if ( p->triple_count == 0 )
{ p->distinct_count[which] = 0;
p->distinct_subjects[which] = 0;
p->distinct_objects[which] = 0;
return TRUE;
}
} else
{ long changed = db->generation - p->distinct_updated[DISTINCT_SUB];
if ( changed < p->distinct_count[DISTINCT_SUB] )
return TRUE;
}
if ( !update_hash(db) )
return FALSE;
{ atomset subject_set;
atomset object_set;
triple t;
triple *byp;
memset(&t, 0, sizeof(t));
t.predicate.r = p;
t.indexed |= BY_P;
init_atomset(&subject_set);
init_atomset(&object_set);
for(byp = db->table[t.indexed][triple_hash(db, &t, t.indexed)];
byp;
byp = byp->next[t.indexed])
{ if ( !byp->erased && !byp->is_duplicate )
{ if ( (which == DISTINCT_DIRECT && byp->predicate.r == p) ||
(which != DISTINCT_DIRECT && isSubPropertyOf(byp->predicate.r, p)) )
{ total++;
add_atomset(&subject_set, byp->subject);
add_atomset(&object_set, object_hash(byp)); /* NOTE: not exact! */
}
}
}
p->distinct_count[which] = total;
p->distinct_subjects[which] = subject_set.tree.count;
p->distinct_objects[which] = object_set.tree.count;
destroy_atomset(&subject_set);
destroy_atomset(&object_set);
if ( which == DISTINCT_DIRECT )
p->distinct_updated[DISTINCT_DIRECT] = total;
else
p->distinct_updated[DISTINCT_SUB] = db->generation;
DEBUG(1, Sdprintf("%s: distinct subjects (%s): %ld, objects: %ld\n",
PL_atom_chars(p->name),
(which == DISTINCT_DIRECT ? "rdf" : "rdfs"),
p->distinct_subjects[which],
p->distinct_objects[which]));
}
return TRUE;
}
static void
invalidate_distinct_counts(rdf_db *db)
{ predicate **ht;
int i;
for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
{ predicate *p;
for( p = *ht; p; p = p->next )
{ p->distinct_updated[DISTINCT_SUB] = 0;
p->distinct_count[DISTINCT_SUB] = 0;
p->distinct_subjects[DISTINCT_SUB] = 0;
p->distinct_objects[DISTINCT_SUB] = 0;
}
}
}
static double
subject_branch_factor(rdf_db *db, predicate *p, int which)
{ if ( !update_predicate_counts(db, p, which) )
return FALSE;
if ( p->distinct_subjects[which] == 0 )
return 0.0; /* 0 --> 0 */
return (double)p->distinct_count[which] /
(double)p->distinct_subjects[which];
}
static double
object_branch_factor(rdf_db *db, predicate *p, int which)
{ if ( !update_predicate_counts(db, p, which) )
return FALSE;
if ( p->distinct_objects[which] == 0 )
return 0.0; /* 0 --> 0 */
return (double)p->distinct_count[which] /
(double)p->distinct_objects[which];
}
/*******************************
* NAMED GRAPHS *
*******************************/
/* MT: all calls must be locked
*/
static void
init_graph_table(rdf_db *db)
{ int bytes = sizeof(predicate*)*INITIAL_GRAPH_TABLE_SIZE;
db->graph_table = rdf_malloc(db, bytes);
memset(db->graph_table, 0, bytes);
db->graph_table_size = INITIAL_GRAPH_TABLE_SIZE;
}
static graph *
lookup_graph(rdf_db *db, atom_t name, int create)
{ int hash = atom_hash(name) % db->graph_table_size;
graph *src;
LOCK_MISC(db);
for(src=db->graph_table[hash]; src; src = src->next)
{ if ( src->name == name )
{ UNLOCK_MISC(db);
return src;
}
}
if ( !create )
{ UNLOCK_MISC(db);
return NULL;
}
src = rdf_malloc(db, sizeof(*src));
memset(src, 0, sizeof(*src));
src->name = name;
src->md5 = TRUE;
PL_register_atom(name);
src->next = db->graph_table[hash];
db->graph_table[hash] = src;
UNLOCK_MISC(db);
return src;
}
static void
erase_graphs(rdf_db *db)
{ graph **ht;
int i;
for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
{ graph *src, *n;
for( src = *ht; src; src = n )
{ n = src->next;
PL_unregister_atom(src->name);
if ( src->source )
PL_unregister_atom(src->source);
rdf_free(db, src, sizeof(*src));
}
*ht = NULL;
}
db->last_graph = NULL;
}
static void
register_graph(rdf_db *db, triple *t)
{ graph *src;
if ( !t->graph )
return;
if ( db->last_graph && db->last_graph->name == t->graph )
{ src = db->last_graph;
} else
{ src = lookup_graph(db, t->graph, TRUE);
db->last_graph = src;
}
src->triple_count++;
#ifdef WITH_MD5
if ( src->md5 )
{ md5_byte_t digest[16];
md5_triple(t, digest);
sum_digest(src->digest, digest);
}
#endif
}
static void
unregister_graph(rdf_db *db, triple *t)
{ graph *src;
if ( !t->graph )
return;
if ( db->last_graph && db->last_graph->name == t->graph )
{ src = db->last_graph;
} else
{ src = lookup_graph(db, t->graph, TRUE);
db->last_graph = src;
}
src->triple_count--;
#ifdef WITH_MD5
if ( src->md5 )
{ md5_byte_t digest[16];
md5_triple(t, digest);
dec_digest(src->digest, digest);
}
#endif
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rdf_graphs_(-ListOfGraphs)
Return a list holding the names of all currently defined graphs. We
return a list to avoid the need for complicated long locks.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static foreign_t
rdf_graphs(term_t list)
{ int i;
term_t tail = PL_copy_term_ref(list);
term_t head = PL_new_term_ref();
rdf_db *db = DB;
if ( !RDLOCK(db) )
return FALSE;
for(i=0; i<db->graph_table_size; i++)
{ graph *src;
for(src=db->graph_table[i]; src; src = src->next)
{ if ( !PL_unify_list(tail, head, tail) ||
!PL_unify_atom(head, src->name) )
{ RDUNLOCK(db);
return FALSE;
}
}
}
RDUNLOCK(db);
return PL_unify_nil(tail);
}
static foreign_t
rdf_graph_source(term_t graph_name, term_t source, term_t modified)
{ atom_t gn;
int rc = FALSE;
rdf_db *db = DB;
if ( !get_atom_or_var_ex(graph_name, &gn) )
return FALSE;
if ( gn )
{ graph *s;
if ( !RDLOCK(db) )
return FALSE;
if ( (s = lookup_graph(db, gn, FALSE)) && s->source)
{ rc = ( PL_unify_atom(source, s->source) &&
PL_unify_float(modified, s->modified) );
}
RDUNLOCK(db);
} else
{ atom_t src;
if ( get_atom_ex(source, &src) )
{ int i;
graph **ht;
if ( !RDLOCK(db) )
return FALSE;
for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
{ graph *s;
for( s = *ht; s; s = s->next )
{ if ( s->source == src )
{ rc = ( PL_unify_atom(graph_name, s->name) &&
PL_unify_float(modified, s->modified) );
}
}
}
RDUNLOCK(db);
}
}
return rc;
}
static foreign_t
rdf_set_graph_source(term_t graph_name, term_t source, term_t modified)
{ atom_t gn, src;
int rc = FALSE;
rdf_db *db = DB;
graph *s;
double mtime;
if ( !get_atom_ex(graph_name, &gn) ||
!get_atom_ex(source, &src) ||
!get_double_ex(modified, &mtime) )
return FALSE;
if ( !RDLOCK(db) )
return FALSE;
if ( (s = lookup_graph(db, gn, TRUE)) )
{ if ( s->source != src )
{ if ( s->source )
PL_unregister_atom(s->source);
s->source = src;
PL_register_atom(s->source);
}
s->modified = mtime;
rc = TRUE;
}
RDUNLOCK(db);
return rc;
}
static foreign_t
rdf_unset_graph_source(term_t graph_name)
{ atom_t gn;
rdf_db *db = DB;
graph *s;
if ( !get_atom_ex(graph_name, &gn) )
return FALSE;
if ( (s = lookup_graph(db, gn, TRUE)) )
{ if ( s->source )
{ PL_unregister_atom(s->source);
s->source = 0;
}
s->modified = 0.0;
}
if ( !RDLOCK(db) )
return FALSE;
RDUNLOCK(db);
return TRUE;
}
/*******************************
* LITERALS *
*******************************/
#define LITERAL_EX_MAGIC 0x2b97e881
typedef struct literal_ex
{ literal *literal;
atom_info atom;
#ifdef O_SECURE
long magic;
#endif
} literal_ex;
static inline void
prepare_literal_ex(literal_ex *lex)
{ SECURE(lex->magic = 0x2b97e881);
if ( lex->literal->objtype == OBJ_STRING )
{ lex->atom.handle = lex->literal->value.string;
lex->atom.resolved = FALSE;
}
}
static literal *
new_literal(rdf_db *db)
{ literal *lit = rdf_malloc(db, sizeof(*lit));
memset(lit, 0, sizeof(*lit));
lit->references = 1;
return lit;
}
static void
free_literal(rdf_db *db, literal *lit)
{ if ( --lit->references == 0 )
{ unlock_atoms_literal(lit);
if ( lit->shared && !db->resetting )
{ literal_ex lex;
lit->shared = FALSE;
broadcast(EV_OLD_LITERAL, lit, NULL);
DEBUG(2,
Sdprintf("Delete %p from literal table: ", lit);
print_literal(lit);
Sdprintf("\n"));
lex.literal = lit;
prepare_literal_ex(&lex);
if ( !avldel(&db->literals, &lex) )
{ Sdprintf("Failed to delete %p (size=%ld): ", lit, db->literals.count);
print_literal(lit);
Sdprintf("\n");
assert(0);
}
}
if ( lit->objtype == OBJ_TERM &&
lit->value.term.record )
{ if ( lit->term_loaded )
rdf_free(db, lit->value.term.record, lit->value.term.len);
else
PL_erase_external(lit->value.term.record);
}
rdf_free(db, lit, sizeof(*lit));
}
}
static literal *
copy_literal(rdf_db *db, literal *lit)
{ lit->references++;
return lit;
}
static void
alloc_literal_triple(rdf_db *db, triple *t)
{ if ( !t->object_is_literal )
{ t->object.literal = new_literal(db);
t->object_is_literal = TRUE;
}
}
static void
lock_atoms_literal(literal *lit)
{ if ( !lit->atoms_locked )
{ lit->atoms_locked = TRUE;
switch(lit->objtype)
{ case OBJ_STRING:
PL_register_atom(lit->value.string);
if ( lit->qualifier )
PL_register_atom(lit->type_or_lang);
break;
}
}
}
static void
unlock_atoms_literal(literal *lit)
{ if ( lit->atoms_locked )
{ lit->atoms_locked = FALSE;
switch(lit->objtype)
{ case OBJ_STRING:
PL_unregister_atom(lit->value.string);
if ( lit->qualifier )
PL_unregister_atom(lit->type_or_lang);
break;
}
}
}
/*******************************
* LITERAL DB *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
compare_literals() sorts literals. Ordering is defined as:
* Numeric literals < string literals < term literals
* Numeric literals (int and float) are sorted by value
* String literals are sorted alhabetically
- case independent, but uppercase before lowercase
- locale (strcoll) sorting?
- delete dyadrics
- first on string, then on type, then on language
* Terms are sorted on Prolog standard order of terms
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
compare_literals(void *p1, void *p2, NODE type)
{ literal_ex *lex = p1;
literal *l1 = lex->literal;
literal *l2 = *(literal**)p2;
SECURE(assert(lex->magic == LITERAL_EX_MAGIC));
if ( l1->objtype == l2->objtype )
{ switch(l1->objtype)
{ case OBJ_INTEGER:
{ int64_t v1 = l1->value.integer;
int64_t v2 = l2->value.integer;
return v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
}
case OBJ_DOUBLE:
{ double v1 = l1->value.real;
double v2 = l2->value.real;
return v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
}
case OBJ_STRING:
{ int rc = cmp_atom_info(&lex->atom, l2->value.string);
if ( rc == 0 )
{ if ( l1->qualifier == l2->qualifier )
return cmp_atoms(l1->type_or_lang, l2->type_or_lang);
return l1->qualifier - l2->qualifier;
}
return rc;
}
case OBJ_TERM:
{ fid_t fid = PL_open_foreign_frame();
term_t t1 = PL_new_term_ref();
term_t t2 = PL_new_term_ref();
int rc;
PL_recorded_external(l1->value.term.record, t1); /* can also be handled in literal_ex */
PL_recorded_external(l2->value.term.record, t2);
rc = PL_compare(t1, t2);
PL_discard_foreign_frame(fid);
return rc;
}
default:
assert(0);
return 0;
}
} else if ( l1->objtype == OBJ_INTEGER && l2->objtype == OBJ_DOUBLE )
{ double v1 = (double)l1->value.integer;
double v2 = l2->value.real;
return v1 < v2 ? -1 : v1 > v2 ? 1 : -1;
} else if ( l1->objtype == OBJ_DOUBLE && l2->objtype == OBJ_INTEGER )
{ double v1 = l1->value.real;
double v2 = (double)l2->value.integer;
return v1 < v2 ? -1 : v1 > v2 ? 1 : 1;
} else
{ return l1->objtype - l2->objtype;
}
}
static void*
avl_malloc(void *ptr, size_t size)
{ return rdf_malloc(ptr, size);
}
static void
avl_free(void *ptr, void *data, size_t size)
{ rdf_free(ptr, data, size);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Create the sorted literal tree. Note that we do not register a free
handler for the tree as nodes are either already destroyed by
free_literal() or by rdf_reset_db().
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void
init_literal_table(rdf_db *db)
{ avlinit(&db->literals,
db, sizeof(literal*),
compare_literals,
NULL,
avl_malloc,
avl_free);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
share_literal() takes a literal and replaces it with one from the
literal database if there is a match. On a match, the argument literal
is destroyed. Without a match it adds the literal to the database and
returns it.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static literal *
share_literal(rdf_db *db, literal *from)
{ literal **data;
literal_ex lex;
lex.literal = from;
prepare_literal_ex(&lex);
if ( (data = avlins(&db->literals, &lex)) )
{ literal *l2 = *data;
DEBUG(2,
Sdprintf("Replace %p by %p:\n", from, l2);
Sdprintf("\tfrom: "); print_literal(from);
Sdprintf("\n\tto: "); print_literal(l2);
Sdprintf("\n"));
l2->references++;
free_literal(db, from);
return l2;
} else
{ DEBUG(2,
Sdprintf("Insert %p into literal table: ", from);
print_literal(from);
Sdprintf("\n"));
from->shared = TRUE;
broadcast(EV_NEW_LITERAL, from, NULL);
return from;
}
}
#ifdef O_SECURE
static literal **
add_literals(AVLtree node, literal **p)
{ literal **litp;
if ( node->subtree[LEFT] )
p = add_literals(node->subtree[LEFT], p);
litp = (literal**)node->data;
*p++ = *litp;
if ( node->subtree[RIGHT] )
p = add_literals(node->subtree[RIGHT], p);
return p;
}
static foreign_t
check_transitivity()
{ rdf_db *db = DB;
literal **array = malloc(sizeof(literal*)*db->literals.count);
literal **p = array;
int i,j;
add_literals(db->literals.root, p);
Sdprintf("Checking %ld literals ...\n", db->literals.count);
for(i=0; i<db->literals.count; i++)
{ int end;
Sdprintf("\r%6ld", i);
end = i+100;
if ( end > db->literals.count )
end = db->literals.count;
for(j=i+1; j<end; j++)
{ literal_ex lex;
lex.literal = &array[i];
prepare_literal_ex(&lex);
if ( compare_literals(&lex, &array[j], IS_NULL) >= 0 )
{ Sdprintf("\nERROR: i,j=%d,%d: ", i, j);
print_literal(array[i]);
Sdprintf(" >= ");
print_literal(array[j]);
Sdprintf("\n");
}
}
}
free(array);
return TRUE;
}
static void
dump_lnode(AVLtree node)
{ literal **litp;
if ( node->subtree[LEFT] )
dump_lnode(node->subtree[LEFT]);
litp = (literal**)node->data;
print_literal(*litp);
Sdprintf("\n");
if ( node->subtree[RIGHT] )
dump_lnode(node->subtree[RIGHT]);
}
static foreign_t
dump_literals()
{ rdf_db *db = DB;
dump_lnode(db->literals.root);
return TRUE;
}
#endif
/*******************************
* TRIPLES *
*******************************/
static void
init_tables(rdf_db *db)
{ int i;
int bytes = sizeof(triple*)*INITIAL_TABLE_SIZE;
int cbytes = sizeof(int)*INITIAL_TABLE_SIZE;
db->table[0] = &db->by_none;
db->tail[0] = &db->by_none_tail;
for(i=BY_S; i<=BY_OP; i++)
{ if ( i == BY_SO )
continue;
db->table[i] = rdf_malloc(db, bytes);
memset(db->table[i], 0, bytes);
db->tail[i] = rdf_malloc(db, bytes);
memset(db->tail[i], 0, bytes);
db->counts[i] = rdf_malloc(db, cbytes);
memset(db->counts[i], 0, cbytes);
db->table_size[i] = INITIAL_TABLE_SIZE;
}
init_pred_table(db);
init_graph_table(db);
init_literal_table(db);
}
static rdf_db *
new_db()
{ rdf_db *db = rdf_malloc(NULL, sizeof(*db));
memset(db, 0, sizeof(*db));
INIT_LOCK(db);
init_tables(db);
return db;
}
static triple *
new_triple(rdf_db *db)
{ triple *t = rdf_malloc(db, sizeof(*t));
memset(t, 0, sizeof(*t));
t->allocated = TRUE;
return t;
}
static void
free_triple(rdf_db *db, triple *t)
{ unlock_atoms(t);
if ( t->object_is_literal && t->object.literal )
free_literal(db, t->object.literal);
if ( t->allocated )
rdf_free(db, t, sizeof(*t));
}
#define HASHED 0x80000000
static unsigned int
literal_hash(literal *lit)
{ if ( lit->hash & HASHED )
{ return lit->hash;
} else
{ unsigned int hash;
switch(lit->objtype)
{ case OBJ_STRING:
hash = atom_hash_case(lit->value.string);
break;
case OBJ_INTEGER:
case OBJ_DOUBLE:
hash = rdf_murmer_hash(&lit->value.integer,
sizeof(lit->value.integer),
MURMUR_SEED);
break;
case OBJ_TERM:
hash = rdf_murmer_hash(lit->value.term.record,
(int)lit->value.term.len,
MURMUR_SEED);
break;
default:
assert(0);
return 0;
}
lit->hash = (hash | HASHED);
return lit->hash;
}
}
static unsigned long
object_hash(triple *t)
{ if ( t->object_is_literal )
{ return literal_hash(t->object.literal);
} else
{ return atom_hash(t->object.resource);
}
}
static int
triple_hash(rdf_db *db, triple *t, int which)
{ unsigned long v;
switch(which)
{ case BY_NONE:
return 0;
case BY_S:
v = atom_hash(t->subject);
break;
case BY_P:
v = predicate_hash(t->predicate.r);
break;
case BY_O:
v = object_hash(t);
break;
case BY_SP:
v = atom_hash(t->subject) ^ predicate_hash(t->predicate.r);
break;
case BY_OP:
v = predicate_hash(t->predicate.r) ^ object_hash(t);
break;
default:
v = 0; /* make compiler silent */
assert(0);
}
return (int)(v % (long)db->table_size[which]);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
by_inverse[] returns the index key to use for inverse search as needed
to realise symmetric and inverse predicates.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int by_inverse[8] =
{ BY_NONE, /* BY_NONE = 0 */
BY_O, /* BY_S = 1 */
BY_P, /* BY_P = 2 */
BY_OP, /* BY_SP = 3 */
BY_S, /* BY_O = 4 */
BY_SO, /* BY_SO = 5 */
BY_SP, /* BY_OP = 6 */
BY_SPO, /* BY_SPO = 7 */
};
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
triple *first(atom_t subject)
Find the first triple on subject. The first is marked to generate a
unique subjects quickly;
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static triple *
first(rdf_db *db, atom_t subject)
{ triple *t, tmp;
int hash;
tmp.subject = subject;
hash = triple_hash(db, &tmp, BY_S);
for(t=db->table[BY_S][hash]; t; t = t->next[BY_S])
{ if ( t->subject == subject && !t->erased )
return t;
}
return NULL;
}
static void
link_triple_hash(rdf_db *db, triple *t)
{ int i;
for(i=1; i<=BY_OP; i++)
{ if ( db->table[i] )
{ int hash = triple_hash(db, t, i);
if ( db->tail[i][hash] )
{ db->tail[i][hash]->next[i] = t;
} else
{ db->table[i][hash] = t;
}
db->tail[i][hash] = t;
db->counts[i][hash]++;
}
}
}
typedef enum
{ DUP_NONE,
DUP_DUPLICATE,
DUP_DISCARDED
} dub_state;
static dub_state
discard_duplicate(rdf_db *db, triple *t)
{ triple *d;
const int indexed = BY_SP;
dub_state rc = DUP_NONE;
assert(t->is_duplicate == FALSE);
assert(t->duplicates == 0);
if ( WANT_GC(db) ) /* (*) See above */
update_hash(db);
d = db->table[indexed][triple_hash(db, t, indexed)];
for( ; d && d != t; d = d->next[indexed] )
{ if ( match_triples(d, t, MATCH_DUPLICATE) )
{ if ( d->graph == t->graph &&
(d->line == NO_LINE || d->line == t->line) )
{ free_triple(db, t);
return DUP_DISCARDED;
}
rc = DUP_DUPLICATE;
}
}
return rc;
}
/* MT: must be locked by caller */
static int
link_triple_silent(rdf_db *db, triple *t)
{ triple *one;
dub_state dup;
if ( t->resolve_pred )
{ t->predicate.r = lookup_predicate(db, t->predicate.u);
t->resolve_pred = FALSE;
}
if ( (dup=discard_duplicate(db, t)) == DUP_DISCARDED )
return FALSE;
if ( db->by_none_tail )
db->by_none_tail->next[BY_NONE] = t;
else
db->by_none = t;
db->by_none_tail = t;
link_triple_hash(db, t);
if ( t->object_is_literal )
t->object.literal = share_literal(db, t->object.literal);
if ( dup == DUP_DUPLICATE && update_duplicates_add(db, t) )
goto ok; /* is a duplicate */
/* keep track of subjects */
one = first(db, t->subject);
if ( !one->first )
{ one->first = TRUE;
db->subjects++;
}
/* keep track of subPropertyOf */
if ( t->predicate.r->name == ATOM_subPropertyOf &&
t->object_is_literal == FALSE )
{ predicate *me = lookup_predicate(db, t->subject);
predicate *super = lookup_predicate(db, t->object.resource);
addSubPropertyOf(db, me, super);
}
ok:
db->created++;
t->predicate.r->triple_count++;
register_graph(db, t);
return TRUE;
}
static inline void
link_triple(rdf_db *db, triple *t)
{ if ( link_triple_silent(db, t) )
broadcast(EV_ASSERT, t, NULL);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rehash_triples()
Relink the triples in the hash-chains after the hash-keys for properties
have changed or the tables have been resized. The caller must ensure
there are no active queries and the tables are of the proper size.
At the same time, this predicate actually removes erased triples.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static long
tbl_size(long triples)
{ long s0 = 1024;
triples /= MIN_HASH_FACTOR;
while(s0 < triples)
s0 *= 2;
return s0;
}
static void
rehash_triples(rdf_db *db)
{ int i;
triple *t, *t2;
long count = db->created - db->freed;
long tsize = tbl_size(count);
DEBUG(1, Sdprintf("(%ld triples; %ld entries) ...", count, tsize));
broadcast(EV_REHASH, (void*)ATOM_begin, NULL);
for(i=1; i<INDEX_TABLES; i++)
{ if ( db->table[i] )
{ long bytes = sizeof(triple*) * tsize;
long cbytes = sizeof(int) * tsize;
long obytes = sizeof(triple*) * db->table_size[i];
long ocbytes = sizeof(int) * db->table_size[i];
db->table[i] = rdf_realloc(db, db->table[i], obytes, bytes);
db->tail[i] = rdf_realloc(db, db->tail[i], obytes, bytes);
db->counts[i] = rdf_realloc(db, db->counts[i], ocbytes, cbytes);
db->table_size[i] = tsize;
memset(db->table[i], 0, bytes);
memset(db->tail[i], 0, bytes);
memset(db->counts[i], 0, cbytes);
}
}
/* delete leading erased triples */
for(t=db->by_none; t && t->erased; t=t2)
{ t2 = t->next[BY_NONE];
free_triple(db, t);
db->freed++;
db->by_none = t2;
}
for(t=db->by_none; t; t = t2)
{ triple *t3;
t2 = t->next[BY_NONE];
for(i=1; i<INDEX_TABLES; i++)
t->next[i] = NULL;
assert(t->erased == FALSE);
link_triple_hash(db, t);
for( ; t2 && t2->erased; t2=t3 )
{ t3 = t2->next[BY_NONE];
free_triple(db, t2);
db->freed++;
}
t->next[BY_NONE] = t2;
if ( !t2 )
db->by_none_tail = t;
}
if ( db->by_none == NULL )
db->by_none_tail = NULL;
broadcast(EV_REHASH, (void*)ATOM_end, NULL);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
update_hash(). Note this may be called by readers and writers, but must
be done only onces and certainly not concurrently by multiple readers.
Hence we need a seperate lock.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
WANT_GC(rdf_db *db)
{ if ( db->gc_blocked )
{ return FALSE;
} else
{ long dirty = db->erased - db->freed;
long count = db->created - db->erased;
if ( dirty > 1000 && dirty > count )
return TRUE;
if ( count > db->table_size[1]*MAX_HASH_FACTOR )
return TRUE;
return FALSE;
}
}
static int
update_hash(rdf_db *db)
{ int want_gc = WANT_GC(db);
if ( want_gc )
DEBUG(1, Sdprintf("rdf_db: want GC\n"));
if ( db->need_update || want_gc )
{ LOCK_MISC(db);
if ( db->need_update ) /* check again */
{ if ( organise_predicates(db) )
{ long t0 = (long)PL_query(PL_QUERY_USER_CPU);
DEBUG(1, Sdprintf("Re-hash ..."));
invalidate_distinct_counts(db);
rehash_triples(db);
db->generation += (db->created-db->erased);
db->rehash_count++;
db->rehash_time += ((double)(PL_query(PL_QUERY_USER_CPU)-t0))/1000.0;
DEBUG(1, Sdprintf("ok\n"));
}
db->need_update = 0;
} else if ( WANT_GC(db) )
{ long t0 = (long)PL_query(PL_QUERY_USER_CPU);
DEBUG(1, Sdprintf("rdf_db: GC ..."));
rehash_triples(db);
db->gc_count++;
db->gc_time += ((double)(PL_query(PL_QUERY_USER_CPU)-t0))/1000.0;
DEBUG(1, Sdprintf("ok\n"));
}
UNLOCK_MISC(db);
}
return TRUE;
}
/* MT: Must be locked */
static void
erase_triple_silent(rdf_db *db, triple *t)
{ if ( !t->erased )
{ t->erased = TRUE;
update_duplicates_del(db, t);
if ( t->predicate.r->name == ATOM_subPropertyOf &&
t->object_is_literal == FALSE )
{ predicate *me = lookup_predicate(db, t->subject);
predicate *super = lookup_predicate(db, t->object.resource);
delSubPropertyOf(db, me, super);
}
if ( t->first )
{ triple *one = first(db, t->subject);
if ( one )
one->first = TRUE;
else
db->subjects--;
}
db->erased++;
t->predicate.r->triple_count--;
unregister_graph(db, t);
if ( t->object_is_literal )
{ literal *lit = t->object.literal;
t->object.literal = NULL;
free_literal(db, lit); /* TBD: thread-safe? */
}
}
}
static inline void
erase_triple(rdf_db *db, triple *t)
{ broadcast(EV_RETRACT, t, NULL);
erase_triple_silent(db, t);
}
static int
match_object(triple *t, triple *p, unsigned flags)
{ if ( p->object_is_literal )
{ if ( t->object_is_literal )
{ literal *plit = p->object.literal;
literal *tlit = t->object.literal;
if ( !plit->objtype && !plit->qualifier )
return TRUE;
if ( plit->objtype && plit->objtype != tlit->objtype )
return FALSE;
switch( plit->objtype )
{ case 0:
if ( plit->qualifier &&
tlit->qualifier != plit->qualifier )
return FALSE;
return TRUE;
case OBJ_STRING:
if ( (flags & MATCH_QUAL) ||
p->match == STR_MATCH_PLAIN )
{ if ( tlit->qualifier != plit->qualifier )
return FALSE;
} else
{ if ( plit->qualifier && tlit->qualifier &&
tlit->qualifier != plit->qualifier )
return FALSE;
}
if ( plit->type_or_lang &&
tlit->type_or_lang != plit->type_or_lang )
return FALSE;
if ( plit->value.string )
{ if ( tlit->value.string != plit->value.string )
{ if ( p->match >= STR_MATCH_EXACT )
{ return match_atoms(p->match,
plit->value.string, tlit->value.string);
} else
{ return FALSE;
}
}
}
return TRUE;
case OBJ_INTEGER:
return tlit->value.integer == plit->value.integer;
case OBJ_DOUBLE:
return tlit->value.real == plit->value.real;
case OBJ_TERM:
if ( plit->value.term.record &&
plit->value.term.len != tlit->value.term.len )
return FALSE;
return memcmp(tlit->value.term.record, plit->value.term.record,
plit->value.term.len) == 0;
default:
assert(0);
}
}
return FALSE;
} else
{ if ( p->object.resource )
{ if ( t->object_is_literal ||
(p->object.resource != t->object.resource) )
return FALSE;
}
}
return TRUE;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Match triple t to pattern p. Erased triples are always skipped.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
match_triples(triple *t, triple *p, unsigned flags)
{ /* DEBUG(3, Sdprintf("match_triple(");
print_triple(t, 0);
Sdprintf(")\n"));
*/
if ( t->erased )
return FALSE;
if ( p->subject && t->subject != p->subject )
return FALSE;
if ( !match_object(t, p, flags) )
return FALSE;
if ( flags & MATCH_SRC )
{ if ( p->graph && t->graph != p->graph )
return FALSE;
if ( p->line && t->line != p->line )
return FALSE;
}
/* last; may be expensive */
if ( p->predicate.r && t->predicate.r != p->predicate.r )
{ if ( (flags & MATCH_SUBPROPERTY) )
return isSubPropertyOf(t->predicate.r, p->predicate.r);
else
return FALSE;
}
return TRUE;
}
/*******************************
* SAVE/LOAD *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
The RDF triple format. This format is intended for quick save and load
and not for readability or exchange. Parts are based on the SWI-Prolog
Quick Load Format (implemented in pl-wic.c).
<file> ::= <magic>
<version>
['S' <graph-name>]
['F' <graph-source>]
['t' <modified>]
['M' <md5>]
{<triple>}
'E'
<magic> ::= "RDF-dump\n"
<version> ::= <integer>
<md5> ::= <byte>* (16 bytes digest)
<triple> ::= 'T'
<subject>
<predicate>
<object>
<graph>
<subject> ::= <resource>
<predicate> ::= <resource>
<object> ::= "R" <resource>
| "L" <atom>
<resource> ::= <atom>
<atom> ::= "X" <integer>
"A" <string>
"W" <utf-8 string>
<string> ::= <integer><bytes>
<graph-name> ::= <atom>
<graph-source> ::= <atom>
<graph> ::= <graph-file>
<line>
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#define SAVE_MAGIC "RDF-dump\n"
#define SAVE_VERSION 2
typedef struct saved
{ atom_t name;
long as;
struct saved *next;
} saved;
typedef struct save_context
{ saved ** saved_table;
long saved_size;
long saved_id;
} save_context;
long
next_table_size(long s0)
{ long size = 2;
while(size < s0)
size *= 2;
return size;
}
static void
init_saved(rdf_db *db, save_context *ctx)
{ long size = next_table_size((db->created - db->erased)/8);
long bytes = size * sizeof(*ctx->saved_table);
ctx->saved_table = rdf_malloc(db, bytes);
memset(ctx->saved_table, 0, bytes);
ctx->saved_size = size;
ctx->saved_id = 0;
}
static void
destroy_saved(rdf_db *db, save_context *ctx)
{ if ( ctx->saved_table )
{ saved **s = ctx->saved_table;
int i;
for(i=0; i<ctx->saved_size; i++, s++)
{ saved *c, *n;
for(c=*s; c; c = n)
{ n = c->next;
free(c);
}
}
rdf_free(db, ctx->saved_table, ctx->saved_size*sizeof(*ctx->saved_table));
}
}
#define INT64BITSIZE (sizeof(int64_t)*8)
#define PLMINLONG ((int64_t)((uint64_t)1<<(INT64BITSIZE-1)))
static void
save_int(IOSTREAM *fd, int64_t n)
{ int m;
int64_t absn = (n >= 0 ? n : -n);
if ( n != PLMINLONG )
{ if ( absn < ((intptr_t)1 << 5) )
{ Sputc((int)(n & 0x3f), fd);
return;
} else if ( absn < ((intptr_t)1 << 13) )
{ Sputc((int)(((n >> 8) & 0x3f) | (1 << 6)), fd);
Sputc((int)(n & 0xff), fd);
return;
} else if ( absn < ((intptr_t)1 << 21) )
{ Sputc((int)(((n >> 16) & 0x3f) | (2 << 6)), fd);
Sputc((int)((n >> 8) & 0xff), fd);
Sputc((int)(n & 0xff), fd);
return;
}
}
for(m = sizeof(n); ; m--)
{ int b = (int)((absn >> (((m-1)*8)-1)) & 0x1ff);
if ( b == 0 )
continue;
break;
}
Sputc(m | (3 << 6), fd);
for( ; m > 0; m--)
{ int b = (int)((n >> ((m-1)*8)) & 0xff);
Sputc(b, fd);
}
}
#define BYTES_PER_DOUBLE sizeof(double)
#ifdef WORDS_BIGENDIAN
static const int double_byte_order[] = { 7,6,5,4,3,2,1,0 };
#else
static const int double_byte_order[] = { 0,1,2,3,4,5,6,7 };
#endif
static int
save_double(IOSTREAM *fd, double f)
{ unsigned char *cl = (unsigned char *)&f;
unsigned int i;
for(i=0; i<BYTES_PER_DOUBLE; i++)
Sputc(cl[double_byte_order[i]], fd);
return TRUE;
}
static int
save_atom(rdf_db *db, IOSTREAM *out, atom_t a, save_context *ctx)
{ int hash = atom_hash(a) % ctx->saved_size;
saved *s;
size_t len;
const char *chars;
unsigned int i;
const wchar_t *wchars;
for(s=ctx->saved_table[hash]; s; s= s->next)
{ if ( s->name == a )
{ Sputc('X', out);
save_int(out, s->as);
return TRUE;
}
}
s = rdf_malloc(db, sizeof(*s));
s->name = a;
s->as = ctx->saved_id++;
s->next = ctx->saved_table[hash];
ctx->saved_table[hash] = s;
if ( (chars = PL_atom_nchars(a, &len)) )
{ Sputc('A', out);
save_int(out, len);
for(i=0; i<len; i++, chars++)
Sputc(*chars&0xff, out);
} else if ( (wchars = PL_atom_wchars(a, &len)) )
{ IOENC enc = out->encoding;
Sputc('W', out);
save_int(out, len);
out->encoding = ENC_UTF8;
for(i=0; i<len; i++, wchars++)
{ wint_t c = *wchars;
SECURE(assert(c>=0 && c <= 0x10ffff));
Sputcode(c, out);
}
out->encoding = enc;
} else
return FALSE;
return TRUE;
}
static void
write_triple(rdf_db *db, IOSTREAM *out, triple *t, save_context *ctx)
{ Sputc('T', out);
save_atom(db, out, t->subject, ctx);
save_atom(db, out, t->predicate.r->name, ctx);
if ( t->object_is_literal )
{ literal *lit = t->object.literal;
if ( lit->qualifier )
{ assert(lit->type_or_lang);
Sputc(lit->qualifier == Q_LANG ? 'l' : 't', out);
save_atom(db, out, lit->type_or_lang, ctx);
}
switch(lit->objtype)
{ case OBJ_STRING:
Sputc('L', out);
save_atom(db, out, lit->value.string, ctx);
break;
case OBJ_INTEGER:
Sputc('I', out);
save_int(out, lit->value.integer);
break;
case OBJ_DOUBLE:
{ Sputc('F', out);
save_double(out, lit->value.real);
break;
}
case OBJ_TERM:
{ const char *s = lit->value.term.record;
size_t len = lit->value.term.len;
Sputc('T', out);
save_int(out, len);
while(len-- > 0)
Sputc(*s++, out);
break;
}
default:
assert(0);
}
} else
{ Sputc('R', out);
save_atom(db, out, t->object.resource, ctx);
}
save_atom(db, out, t->graph, ctx);
save_int(out, t->line);
}
static void
write_source(rdf_db *db, IOSTREAM *out, atom_t src, save_context *ctx)
{ graph *s = lookup_graph(db, src, FALSE);
if ( s && s->source )
{ Sputc('F', out);
save_atom(db, out, s->source, ctx);
Sputc('t', out);
save_double(out, s->modified);
}
}
static void
write_md5(rdf_db *db, IOSTREAM *out, atom_t src)
{ graph *s = lookup_graph(db, src, FALSE);
if ( s )
{ md5_byte_t *p = s->digest;
int i;
Sputc('M', out);
for(i=0; i<16; i++)
Sputc(*p++, out);
}
}
static int
save_db(rdf_db *db, IOSTREAM *out, atom_t src)
{ triple *t;
save_context ctx;
if ( !RDLOCK(db) )
return FALSE;
init_saved(db, &ctx);
Sfprintf(out, "%s", SAVE_MAGIC);
save_int(out, SAVE_VERSION);
if ( src )
{ Sputc('S', out); /* start of graph header */
save_atom(db, out, src, &ctx);
write_source(db, out, src, &ctx);
write_md5(db, out, src);
}
if ( Sferror(out) )
{ RDUNLOCK(db);
return FALSE;
}
for(t = db->by_none; t; t = t->next[BY_NONE])
{ if ( !t->erased &&
(!src || t->graph == src) )
{ write_triple(db, out, t, &ctx);
if ( Sferror(out) )
return FALSE;
}
}
Sputc('E', out);
if ( Sferror(out) )
{ RDUNLOCK(db);
return FALSE;
}
destroy_saved(db, &ctx);
RDUNLOCK(db);
return TRUE;
}
static foreign_t
rdf_save_db(term_t stream, term_t graph)
{ IOSTREAM *out;
atom_t src;
if ( !PL_get_stream_handle(stream, &out) )
return type_error(stream, "stream");
if ( !get_atom_or_var_ex(graph, &src) )
return FALSE;
return save_db(DB, out, src);
}
static int64_t
load_int(IOSTREAM *fd)
{ int64_t first = Sgetc(fd);
int bytes, shift, b;
if ( !(first & 0xc0) ) /* 99% of them: speed up a bit */
{ first <<= (INT64BITSIZE-6);
first >>= (INT64BITSIZE-6);
return first;
}
bytes = (int) ((first >> 6) & 0x3);
first &= 0x3f;
if ( bytes <= 2 )
{ for( b = 0; b < bytes; b++ )
{ first <<= 8;
first |= Sgetc(fd) & 0xff;
}
shift = (sizeof(first)-1-bytes)*8 + 2;
} else
{ int m;
bytes = (int)first;
first = 0L;
for(m=0; m<bytes; m++)
{ first <<= 8;
first |= Sgetc(fd) & 0xff;
}
shift = (sizeof(first)-bytes)*8;
}
first <<= shift;
first >>= shift;
return first;
}
static int
load_double(IOSTREAM *fd, double *fp)
{ double f;
unsigned char *cl = (unsigned char *)&f;
unsigned int i;
for(i=0; i<BYTES_PER_DOUBLE; i++)
{ int c = Sgetc(fd);
if ( c == -1 )
{ *fp = 0.0;
return FALSE;
}
cl[double_byte_order[i]] = c;
}
*fp = f;
return TRUE;
}
typedef struct ld_context
{ long loaded_id; /* keep track of atoms */
atom_t *loaded_atoms;
long atoms_size;
atom_t graph; /* for single-graph files */
atom_t graph_source;
double modified;
int has_digest;
md5_byte_t digest[16];
atom_hash *graph_table; /* multi-graph file */
} ld_context;
static void
add_atom(rdf_db *db, atom_t a, ld_context *ctx)
{ if ( ctx->loaded_id >= ctx->atoms_size )
{ if ( ctx->atoms_size == 0 )
{ ctx->atoms_size = 1024;
ctx->loaded_atoms = rdf_malloc(db, sizeof(atom_t)*ctx->atoms_size);
} else
{ long obytes = sizeof(atom_t)*ctx->atoms_size;
long bytes;
ctx->atoms_size *= 2;
bytes = sizeof(atom_t)*ctx->atoms_size;
ctx->loaded_atoms = rdf_realloc(db, ctx->loaded_atoms, obytes, bytes);
}
}
ctx->loaded_atoms[ctx->loaded_id++] = a;
}
static atom_t
load_atom(rdf_db *db, IOSTREAM *in, ld_context *ctx)
{ switch(Sgetc(in))
{ case 'X':
{ intptr_t idx = (intptr_t)load_int(in);
return ctx->loaded_atoms[idx];
}
case 'A':
{ size_t len = (size_t)load_int(in);
atom_t a;
if ( len < 1024 )
{ char buf[1024];
Sfread(buf, 1, len, in);
a = PL_new_atom_nchars(len, buf);
} else
{ char *buf = rdf_malloc(db, len);
Sfread(buf, 1, len, in);
a = PL_new_atom_nchars(len, buf);
rdf_free(db, buf, len);
}
add_atom(db, a, ctx);
return a;
}
case 'W':
{ int len = (int)load_int(in);
atom_t a;
wchar_t buf[1024];
wchar_t *w;
IOENC enc = in->encoding;
int i;
if ( len < 1024 )
w = buf;
else
w = rdf_malloc(db, len*sizeof(wchar_t));
in->encoding = ENC_UTF8;
for(i=0; i<len; i++)
{ w[i] = Sgetcode(in);
SECURE(assert(w[i]>=0 && w[i] <= 0x10ffff));
}
in->encoding = enc;
a = PL_new_atom_wchars(len, w);
if ( w != buf )
rdf_free(db, w, len*sizeof(wchar_t));
add_atom(db, a, ctx);
return a;
}
default:
{ assert(0);
return 0;
}
}
}
static triple *
load_triple(rdf_db *db, IOSTREAM *in, ld_context *ctx)
{ triple *t = new_triple(db);
int c;
t->subject = load_atom(db, in, ctx);
t->predicate.u = load_atom(db, in, ctx);
t->resolve_pred = TRUE;
if ( (c=Sgetc(in)) == 'R' )
{ t->object.resource = load_atom(db, in, ctx);
} else
{ literal *lit = new_literal(db);
t->object_is_literal = TRUE;
t->object.literal = lit;
value:
switch(c)
{ case 'L':
lit->objtype = OBJ_STRING;
lit->value.string = load_atom(db, in, ctx);
break;
case 'I':
lit->objtype = OBJ_INTEGER;
lit->value.integer = load_int(in);
break;
case 'F':
lit->objtype = OBJ_DOUBLE;
load_double(in, &lit->value.real);
break;
case 'T':
{ unsigned int i;
char *s;
lit->objtype = OBJ_TERM;
lit->value.term.len = (size_t)load_int(in);
lit->value.term.record = rdf_malloc(db, lit->value.term.len);
lit->term_loaded = TRUE; /* see free_literal() */
s = (char *)lit->value.term.record;
for(i=0; i<lit->value.term.len; i++)
s[i] = Sgetc(in);
break;
}
case 'l':
lit->qualifier = Q_LANG;
lit->type_or_lang = load_atom(db, in, ctx);
c = Sgetc(in);
goto value;
case 't':
lit->qualifier = Q_TYPE;
lit->type_or_lang = load_atom(db, in, ctx);
c = Sgetc(in);
goto value;
default:
assert(0);
return NULL;
}
}
t->graph = load_atom(db, in, ctx);
t->line = (unsigned long)load_int(in);
if ( !ctx->graph )
{ if ( !ctx->graph_table )
ctx->graph_table = new_atom_hash(64);
add_atom_hash(ctx->graph_table, t->graph);
}
return t;
}
static int
load_magic(IOSTREAM *in)
{ char *s = SAVE_MAGIC;
for( ; *s; s++)
{ if ( Sgetc(in) != *s )
return FALSE;
}
return TRUE;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Note that we have two types of saved states. One holding many named
graphs and one holding the content of exactly one named graph.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#define LOAD_ERROR ((triple*)(intptr_t)-1)
static triple *
load_db(rdf_db *db, IOSTREAM *in, ld_context *ctx)
{ int version;
int c;
triple *list = NULL, *tail = NULL;
if ( !load_magic(in) )
return LOAD_ERROR;
version = (int)load_int(in);
while((c=Sgetc(in)) != EOF)
{ switch(c)
{ case 'T':
{ triple *t;
if ( !(t=load_triple(db, in, ctx)) )
return FALSE;
if ( tail )
{ tail->next[BY_NONE] = t;
tail = t;
} else
{ list = tail = t;
}
break;
}
/* file holding exactly one graph */
case 'S': /* name of the graph */
{ ctx->graph = load_atom(db, in, ctx);
break;
}
case 'M': /* MD5 of the graph */
{ int i;
for(i=0; i<16; i++)
ctx->digest[i] = Sgetc(in);
ctx->has_digest = TRUE;
break;
}
case 'F': /* file of the graph */
ctx->graph_source = load_atom(db, in, ctx);
break; /* end of one-graph handling */
case 't':
load_double(in, &ctx->modified);
break;
case 'E': /* end of file */
return list;
default:
break;
}
}
PL_warning("Illegal RDF triple file");
return LOAD_ERROR;
}
static int
link_loaded_triples(rdf_db *db, triple *t, ld_context *ctx)
{ long created0 = db->created;
graph *graph;
if ( ctx->graph ) /* lookup named graph */
{ graph = lookup_graph(db, ctx->graph, TRUE);
if ( ctx->graph_source && graph->source != ctx->graph_source )
{ if ( graph->source )
PL_unregister_atom(graph->source);
graph->source = ctx->graph_source;
PL_register_atom(graph->source);
graph->modified = ctx->modified;
}
if ( ctx->has_digest )
{ if ( graph->md5 )
{ if ( db->tr_first )
{ record_md5_transaction(db, graph, NULL);
} else
{ graph->md5 = FALSE; /* kill repetitive MD5 update */
}
} else
{ ctx->has_digest = FALSE;
}
}
} else
{ graph = NULL;
}
if ( db->tr_first ) /* loading in a transaction */
{ triple *next;
for( ; t; t = next )
{ next = t->next[BY_NONE];
t->next[BY_NONE] = NULL;
lock_atoms(t);
record_transaction(db, TR_ASSERT, t);
}
} else
{ triple *next;
for( ; t; t = next )
{ next = t->next[BY_NONE];
t->next[BY_NONE] = NULL;
lock_atoms(t);
if ( link_triple_silent(db, t) )
broadcast(EV_ASSERT_LOAD, t, NULL);
}
}
/* update the graph info */
if ( ctx->has_digest )
{ if ( db->tr_first )
{ md5_byte_t *d = rdf_malloc(db, sizeof(ctx->digest));
memcpy(d, ctx->digest, sizeof(ctx->digest));
record_md5_transaction(db, graph, d);
} else
{ sum_digest(graph->digest, ctx->digest);
}
graph->md5 = TRUE;
}
db->generation += (db->created-created0);
return TRUE;
}
static int
append_graph_to_list(ptr_hash_node *node, void *closure)
{ atom_t graph = (atom_t)node->value;
term_t tail = (term_t)closure;
term_t head = PL_new_term_ref();
int rc;
rc = (PL_unify_list(tail, head, tail) &&
PL_unify_atom(head, graph));
PL_reset_term_refs(head);
return rc;
}
static foreign_t
rdf_load_db(term_t stream, term_t id, term_t graphs)
{ ld_context ctx;
rdf_db *db = DB;
IOSTREAM *in;
triple *list;
int rc;
if ( !PL_get_stream_handle(stream, &in) )
return type_error(stream, "stream");
memset(&ctx, 0, sizeof(ctx));
if ( (list=load_db(db, in, &ctx)) == LOAD_ERROR )
return FALSE;
if ( !WRLOCK(db, FALSE) )
return FALSE;
broadcast(EV_LOAD, (void*)id, (void*)ATOM_begin);
if ( (rc=link_loaded_triples(db, list, &ctx)) )
{ if ( ctx.graph_table )
{ term_t tail = PL_copy_term_ref(graphs);
rc = ( for_atom_hash(ctx.graph_table, append_graph_to_list, (void*)tail) &&
PL_unify_nil(tail) );
destroy_atom_hash(ctx.graph_table);
} else
{ rc = PL_unify_atom(graphs, ctx.graph);
}
}
broadcast(EV_LOAD, (void*)id, (void*)ATOM_end);
WRUNLOCK(db);
PL_release_stream(in);
if ( ctx.loaded_atoms )
{ atom_t *ap, *ep;
for(ap=ctx.loaded_atoms, ep=ap+ctx.loaded_id; ap<ep; ap++)
PL_unregister_atom(*ap);
rdf_free(db, ctx.loaded_atoms, sizeof(atom_t)*ctx.atoms_size);
}
return rc;
}
#ifdef WITH_MD5
/*******************************
* MD5 SUPPORT *
*******************************/
/* md5_type is used to keep the MD5 independent from the internal
numbers
*/
static const char md5_type[] =
{ 0x0, /* OBJ_UNKNOWN */
0x3, /* OBJ_INTEGER */
0x4, /* OBJ_DOUBLE */
0x2, /* OBJ_STRING */
0x5 /* OBJ_TERM */
};
static void
md5_triple(triple *t, md5_byte_t *digest)
{ md5_state_t state;
size_t len;
md5_byte_t tmp[2];
const char *s;
literal *lit;
md5_init(&state);
s = PL_blob_data(t->subject, &len, NULL);
md5_append(&state, (const md5_byte_t *)s, (int)len);
md5_append(&state, (const md5_byte_t *)"P", 1);
s = PL_blob_data(t->predicate.r->name, &len, NULL);
md5_append(&state, (const md5_byte_t *)s, (int)len);
tmp[0] = 'O';
if ( t->object_is_literal )
{ lit = t->object.literal;
tmp[1] = md5_type[lit->objtype];
switch(lit->objtype)
{ case OBJ_STRING:
s = PL_blob_data(lit->value.string, &len, NULL);
break;
case OBJ_INTEGER: /* TBD: byte order issues */
s = (const char *)&lit->value.integer;
len = sizeof(lit->value.integer);
break;
case OBJ_DOUBLE:
s = (const char *)&lit->value.real;
len = sizeof(lit->value.real);
break;
case OBJ_TERM:
s = (const char *)lit->value.term.record;
len = lit->value.term.len;
break;
default:
assert(0);
}
} else
{ s = PL_blob_data(t->object.resource, &len, NULL);
tmp[1] = 0x1; /* old OBJ_RESOURCE */
lit = NULL;
}
md5_append(&state, tmp, 2);
md5_append(&state, (const md5_byte_t *)s, (int)len);
if ( lit && lit->qualifier )
{ assert(lit->type_or_lang);
md5_append(&state,
(const md5_byte_t *)(lit->qualifier == Q_LANG ? "l" : "t"),
1);
s = PL_blob_data(lit->type_or_lang, &len, NULL);
md5_append(&state, (const md5_byte_t *)s, (int)len);
}
if ( t->graph )
{ md5_append(&state, (const md5_byte_t *)"S", 1);
s = PL_blob_data(t->graph, &len, NULL);
md5_append(&state, (const md5_byte_t *)s, (int)len);
}
md5_finish(&state, digest);
}
static void
sum_digest(md5_byte_t *digest, md5_byte_t *add)
{ md5_byte_t *p, *q;
int n;
for(p=digest, q=add, n=16; --n>=0; )
*p++ += *q++;
}
static void
dec_digest(md5_byte_t *digest, md5_byte_t *add)
{ md5_byte_t *p, *q;
int n;
for(p=digest, q=add, n=16; --n>=0; )
*p++ -= *q++;
}
static int
md5_unify_digest(term_t t, md5_byte_t digest[16])
{ char hex_output[16*2];
int di;
char *pi;
static char hexd[] = "0123456789abcdef";
for(pi=hex_output, di = 0; di < 16; ++di)
{ *pi++ = hexd[(digest[di] >> 4) & 0x0f];
*pi++ = hexd[digest[di] & 0x0f];
}
return PL_unify_atom_nchars(t, 16*2, hex_output);
}
static foreign_t
rdf_md5(term_t graph_name, term_t md5)
{ atom_t src;
int rc;
rdf_db *db = DB;
if ( !get_atom_or_var_ex(graph_name, &src) )
return FALSE;
if ( src )
{ graph *s;
if ( !RDLOCK(db) )
return FALSE;
if ( (s = lookup_graph(db, src, FALSE)) )
{ rc = md5_unify_digest(md5, s->digest);
} else
{ md5_byte_t digest[16];
memset(digest, 0, sizeof(digest));
rc = md5_unify_digest(md5, digest);
}
RDUNLOCK(db);
} else
{ md5_byte_t digest[16];
graph **ht;
int i;
memset(&digest, 0, sizeof(digest));
if ( !RDLOCK(db) )
return FALSE;
for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
{ graph *s;
for( s = *ht; s; s = s->next )
sum_digest(digest, s->digest);
}
rc = md5_unify_digest(md5, digest);
RDUNLOCK(db);
}
return rc;
}
static foreign_t
rdf_atom_md5(term_t text, term_t times, term_t md5)
{ char *s;
int n, i;
size_t len;
md5_byte_t digest[16];
if ( !PL_get_nchars(text, &len, &s, CVT_ALL) )
return type_error(text, "text");
if ( !PL_get_integer(times, &n) )
return type_error(times, "integer");
if ( n < 1 )
return domain_error(times, "positive_integer");
for(i=0; i<n; i++)
{ md5_state_t state;
md5_init(&state);
md5_append(&state, (const md5_byte_t *)s, (int)len);
md5_finish(&state, digest);
s = (char *)digest;
len = sizeof(digest);
}
return md5_unify_digest(md5, digest);
}
#endif /*WITH_MD5*/
/*******************************
* ATOMS *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Lock atoms in triple against AGC. Note that the predicate name is locked
in the predicate structure.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void
lock_atoms(triple *t)
{ if ( !t->atoms_locked )
{ t->atoms_locked = TRUE;
PL_register_atom(t->subject);
if ( t->object_is_literal )
{ lock_atoms_literal(t->object.literal);
} else
{ PL_register_atom(t->object.resource);
}
}
}
static void
unlock_atoms(triple *t)
{ if ( t->atoms_locked )
{ t->atoms_locked = FALSE;
PL_unregister_atom(t->subject);
if ( !t->object_is_literal )
{ PL_unregister_atom(t->object.resource);
}
}
}
/*******************************
* PROLOG CONVERSION *
*******************************/
#define LIT_TYPED 0x1
#define LIT_NOERROR 0x2
#define LIT_PARTIAL 0x4
static int
get_lit_atom_ex(term_t t, atom_t *a, int flags)
{ if ( PL_get_atom(t, a) )
return TRUE;
if ( (flags & LIT_PARTIAL) && PL_is_variable(t) )
{ *a = 0L;
return TRUE;
}
return type_error(t, "atom");
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
get_literal() processes the argument of a literal/1 term passes as
object.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
get_literal(rdf_db *db, term_t litt, triple *t, int flags)
{ literal *lit;
alloc_literal_triple(db, t);
lit = t->object.literal;
if ( PL_get_atom(litt, &lit->value.string) )
{ lit->objtype = OBJ_STRING;
} else if ( PL_is_integer(litt) && PL_get_int64(litt, &lit->value.integer) )
{ lit->objtype = OBJ_INTEGER;
} else if ( PL_get_float(litt, &lit->value.real) )
{ lit->objtype = OBJ_DOUBLE;
} else if ( PL_is_functor(litt, FUNCTOR_lang2) )
{ term_t a = PL_new_term_ref();
_PL_get_arg(1, litt, a);
if ( !get_lit_atom_ex(a, &lit->type_or_lang, flags) )
return FALSE;
_PL_get_arg(2, litt, a);
if ( !get_lit_atom_ex(a, &lit->value.string, flags) )
return FALSE;
lit->qualifier = Q_LANG;
lit->objtype = OBJ_STRING;
} else if ( PL_is_functor(litt, FUNCTOR_type2) &&
!(flags & LIT_TYPED) ) /* avoid recursion */
{ term_t a = PL_new_term_ref();
_PL_get_arg(1, litt, a);
if ( !get_lit_atom_ex(a, &lit->type_or_lang, flags) )
return FALSE;
lit->qualifier = Q_TYPE;
_PL_get_arg(2, litt, a);
return get_literal(db, a, t, LIT_TYPED|flags);
} else if ( !PL_is_ground(litt) )
{ if ( !(flags & LIT_PARTIAL) )
return type_error(litt, "rdf_object");
if ( !PL_is_variable(litt) )
lit->objtype = OBJ_TERM;
} else
{ lit->value.term.record = PL_record_external(litt, &lit->value.term.len);
lit->objtype = OBJ_TERM;
}
return TRUE;
}
static int
get_object(rdf_db *db, term_t object, triple *t)
{ if ( PL_get_atom(object, &t->object.resource) )
{ assert(!t->object_is_literal);
} else if ( PL_is_functor(object, FUNCTOR_literal1) )
{ term_t a = PL_new_term_ref();
_PL_get_arg(1, object, a);
return get_literal(db, a, t, 0);
} else
return type_error(object, "rdf_object");
return TRUE;
}
static int
get_src(term_t src, triple *t)
{ if ( src && !PL_is_variable(src) )
{ if ( PL_get_atom(src, &t->graph) )
{ t->line = NO_LINE;
} else if ( PL_is_functor(src, FUNCTOR_colon2) )
{ term_t a = PL_new_term_ref();
long line;
_PL_get_arg(1, src, a);
if ( !get_atom_or_var_ex(a, &t->graph) )
return FALSE;
_PL_get_arg(2, src, a);
if ( PL_get_long(a, &line) )
t->line = line;
else if ( !PL_is_variable(a) )
return type_error(a, "integer");
} else
return type_error(src, "rdf_graph");
}
return TRUE;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Return values:
-1: exception
0: no predicate
1: the predicate
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
get_existing_predicate(rdf_db *db, term_t t, predicate **p)
{ atom_t name;
if ( !PL_get_atom(t, &name ) )
{ if ( PL_is_functor(t, FUNCTOR_literal1) )
return 0; /* rdf(_, literal(_), _) */
return type_error(t, "atom");
}
if ( (*p = existing_predicate(db, name)) )
return 1;
DEBUG(5, Sdprintf("No predicate %s\n", PL_atom_chars(name)));
return 0; /* no predicate */
}
static int
get_predicate(rdf_db *db, term_t t, predicate **p)
{ atom_t name;
if ( !get_atom_ex(t, &name ) )
return FALSE;
*p = lookup_predicate(db, name);
return TRUE;
}
static int
get_triple(rdf_db *db,
term_t subject, term_t predicate, term_t object,
triple *t)
{ if ( !get_atom_ex(subject, &t->subject) ||
!get_predicate(db, predicate, &t->predicate.r) ||
!get_object(db, object, t) )
return FALSE;
return TRUE;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
get_partial_triple() creates a triple for matching purposes. It can
return FALSE for two reasons. Mostly (type) errors, but also if
resources are accessed that do not exist and therefore the subsequent
matching will always fail. This is notably the case for predicates,
which are first class citizens to this library.
Return values:
1: ok
0: no predicate
-1: error
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
get_partial_triple(rdf_db *db,
term_t subject, term_t predicate, term_t object,
term_t src, triple *t)
{ int rc;
if ( subject && !get_resource_or_var_ex(subject, &t->subject) )
return FALSE;
if ( !PL_is_variable(predicate) &&
(rc=get_existing_predicate(db, predicate, &t->predicate.r)) != 1 )
return rc;
/* the object */
if ( object && !PL_is_variable(object) )
{ if ( PL_get_atom(object, &t->object.resource) )
{ assert(!t->object_is_literal);
} else if ( PL_is_functor(object, FUNCTOR_literal1) )
{ term_t a = PL_new_term_ref();
_PL_get_arg(1, object, a);
if ( !get_literal(db, a, t, LIT_PARTIAL) )
return FALSE;
} else if ( PL_is_functor(object, FUNCTOR_literal2) )
{ term_t a = PL_new_term_ref();
literal *lit;
alloc_literal_triple(db, t);
lit = t->object.literal;
_PL_get_arg(1, object, a);
if ( PL_is_functor(a, FUNCTOR_exact1) )
t->match = STR_MATCH_EXACT;
else if ( PL_is_functor(a, FUNCTOR_plain1) )
t->match = STR_MATCH_PLAIN;
else if ( PL_is_functor(a, FUNCTOR_substring1) )
t->match = STR_MATCH_SUBSTRING;
else if ( PL_is_functor(a, FUNCTOR_word1) )
t->match = STR_MATCH_WORD;
else if ( PL_is_functor(a, FUNCTOR_prefix1) )
t->match = STR_MATCH_PREFIX;
else if ( PL_is_functor(a, FUNCTOR_like1) )
t->match = STR_MATCH_LIKE;
else
return domain_error(a, "match_type");
_PL_get_arg(1, a, a);
if ( !get_atom_or_var_ex(a, &lit->value.string) )
return FALSE;
lit->objtype = OBJ_STRING;
} else
return type_error(object, "rdf_object");
}
/* the graph */
if ( !get_src(src, t) )
return FALSE;
if ( t->subject )
t->indexed |= BY_S;
if ( t->predicate.r )
t->indexed |= BY_P;
if ( t->object_is_literal )
{ literal *lit = t->object.literal;
if ( lit->objtype == OBJ_STRING &&
lit->value.string &&
t->match <= STR_MATCH_EXACT )
t->indexed |= BY_O;
} else if ( t->object.resource )
t->indexed |= BY_O;
db->indexed[t->indexed]++; /* statistics */
switch(t->indexed)
{ case BY_SPO:
t->indexed = BY_SP;
break;
case BY_SO:
t->indexed = BY_S;
break;
}
return TRUE;
}
static int
inverse_partial_triple(triple *t)
{ predicate *i = 0;
if ( !t->inversed &&
(!t->predicate.r || (i=t->predicate.r->inverse_of)) &&
!t->object_is_literal )
{ atom_t o = t->object.resource;
t->object.resource = t->subject;
t->subject = o;
if ( t->predicate.r )
t->predicate.r = i;
t->indexed = by_inverse[t->indexed];
t->inversed = TRUE;
return TRUE;
}
return FALSE;
}
static int
get_graph(term_t src, triple *t)
{ if ( PL_get_atom(src, &t->graph) )
{ t->line = NO_LINE;
return TRUE;
}
if ( PL_is_functor(src, FUNCTOR_colon2) )
{ term_t a = PL_new_term_ref();
long line;
_PL_get_arg(1, src, a);
if ( !get_atom_ex(a, &t->graph) )
return FALSE;
_PL_get_arg(2, src, a);
if ( !get_long_ex(a, &line) )
return FALSE;
t->line = line;
return TRUE;
}
return type_error(src, "rdf_graph");
}
static int
unify_graph(term_t src, triple *t)
{ switch( PL_term_type(src) )
{ case PL_VARIABLE:
{ if ( t->line == NO_LINE )
return PL_unify_atom(src, t->graph);
else
goto full_term;
}
case PL_ATOM:
{ atom_t a;
return (PL_get_atom(src, &a) &&
a == t->graph);
}
case PL_TERM:
{ if ( t->line == NO_LINE )
{ return PL_unify_term(src,
PL_FUNCTOR, FUNCTOR_colon2,
PL_ATOM, t->graph,
PL_VARIABLE);
} else
{ full_term:
return PL_unify_term(src,
PL_FUNCTOR, FUNCTOR_colon2,
PL_ATOM, t->graph,
PL_LONG, t->line);
}
}
default:
return type_error(src, "rdf_graph");
}
}
static int
same_graph(triple *t1, triple *t2)
{ return t1->line == t2->line &&
t1->graph == t2->graph;
}
static int
put_literal_value(term_t v, literal *lit)
{ switch(lit->objtype)
{ case OBJ_STRING:
PL_put_atom(v, lit->value.string);
break;
case OBJ_INTEGER:
PL_put_variable(v);
return PL_unify_int64(v, lit->value.integer);
case OBJ_DOUBLE:
return PL_put_float(v, lit->value.real);
case OBJ_TERM:
return PL_recorded_external(lit->value.term.record, v);
default:
assert(0);
return FALSE;
}
return TRUE;
}
static int
unify_literal(term_t lit, literal *l)
{ term_t v = PL_new_term_ref();
if ( !put_literal_value(v, l) )
return FALSE;
if ( l->qualifier )
{ functor_t qf;
assert(l->type_or_lang);
if ( l->qualifier == Q_LANG )
qf = FUNCTOR_lang2;
else
qf = FUNCTOR_type2;
if ( PL_unify_term(lit, PL_FUNCTOR, qf,
PL_ATOM, l->type_or_lang,
PL_TERM, v) )
return TRUE;
return PL_unify(lit, v); /* allow rdf(X, Y, literal(foo)) */
} else if ( PL_unify(lit, v) )
{ return TRUE;
} else if ( PL_is_functor(lit, FUNCTOR_lang2) &&
l->objtype == OBJ_STRING )
{ term_t a = PL_new_term_ref();
_PL_get_arg(2, lit, a);
return PL_unify(a, v);
} else if ( PL_is_functor(lit, FUNCTOR_type2) )
{ term_t a = PL_new_term_ref();
_PL_get_arg(2, lit, a);
return PL_unify(a, v);
} else
return FALSE;
}
static int
unify_object(term_t object, triple *t)
{ if ( t->object_is_literal )
{ term_t lit = PL_new_term_ref();
if ( PL_unify_functor(object, FUNCTOR_literal1) )
_PL_get_arg(1, object, lit);
else if ( PL_is_functor(object, FUNCTOR_literal2) )
_PL_get_arg(2, object, lit);
else
return FALSE;
return unify_literal(lit, t->object.literal);
} else
{ return PL_unify_atom(object, t->object.resource);
}
}
static int
unify_triple(term_t subject, term_t pred, term_t object,
term_t src, triple *t, int inversed)
{ predicate *p = t->predicate.r;
fid_t fid;
if ( inversed )
{ term_t tmp = object;
object = subject;
subject = tmp;
if ( !(p = p->inverse_of) )
return FALSE;
}
fid = PL_open_foreign_frame();
if ( !PL_unify_atom(subject, t->subject) ||
!PL_unify_atom(pred, p->name) ||
!unify_object(object, t) ||
(src && !unify_graph(src, t)) )
{ PL_discard_foreign_frame(fid);
return FALSE;
} else
{ PL_close_foreign_frame(fid);
return TRUE;
}
}
/*******************************
* DUBLICATE HANDLING *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
According to the RDF specs, duplicate triples have no meaning, but they
slow down search and often produce duplicate results in search. Worse,
some coding styles proposed in the OWL documents introduce huge amounts
of duplicate triples. We cannot simply ignore a triple if it is a
duplicate as a subsequent retract would delete the final triple. For
example, after loading two files that contain the same triple and
unloading one of these files the database would be left without triples.
In our solution, if a triple is added as a duplicate, it is flagged such
using the flag is_duplicate. The `principal' triple has a count
`duplicates', indicating the number of duplicate triples in the
database.
It might make sense to introduce the BY_SPO table as fully indexed
lookups are frequent with the introduction of duplicate detection.
(*) Iff too many triples are added, it may be time to enlarge the
hashtable. Note that we do not call update_hash() blindly as this would
cause each triple that modifies the predicate hierarchy to force a
rehash. As we are not searching using subPropertyOf semantics during the
duplicate update, there is no point updating. If it is incorrect it will
be updated on the first real query.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
update_duplicates_add(rdf_db *db, triple *t)
{ triple *d;
const int indexed = BY_SP;
assert(t->is_duplicate == FALSE);
assert(t->duplicates == 0);
if ( WANT_GC(db) ) /* (*) See above */
update_hash(db);
d = db->table[indexed][triple_hash(db, t, indexed)];
for( ; d && d != t; d = d->next[indexed] )
{ if ( match_triples(d, t, MATCH_DUPLICATE) )
{ t->is_duplicate = TRUE;
assert( !d->is_duplicate );
d->duplicates++;
DEBUG(2,
print_triple(t, PRT_SRC);
Sdprintf(" %p: %d-th duplicate: ", t, d->duplicates);
Sdprintf("Principal: %p at", d);
print_src(d);
Sdprintf("\n"));
assert(d->duplicates); /* check overflow */
db->duplicates++;
return TRUE;
}
}
return FALSE;
}
static void /* t is about to be deleted */
update_duplicates_del(rdf_db *db, triple *t)
{ const int indexed = BY_SP;
if ( t->duplicates ) /* I am the principal one */
{ triple *d;
DEBUG(2,
print_triple(t, PRT_SRC);
Sdprintf(": DEL principal %p, %d duplicates: ", t, t->duplicates));
db->duplicates--;
d = db->table[indexed][triple_hash(db, t, indexed)];
for( ; d; d = d->next[indexed] )
{ if ( d != t && match_triples(d, t, MATCH_DUPLICATE) )
{ assert(d->is_duplicate);
d->is_duplicate = FALSE;
d->duplicates = t->duplicates-1;
DEBUG(2,
Sdprintf("New principal: %p at", d);
print_src(d);
Sdprintf("\n"));
return;
}
}
assert(0);
} else if ( t->is_duplicate ) /* I am a duplicate */
{ triple *d;
DEBUG(2,
print_triple(t, PRT_SRC);
Sdprintf(": DEL: is a duplicate: "));
db->duplicates--;
d = db->table[indexed][triple_hash(db, t, indexed)];
for( ; d; d = d->next[indexed] )
{ if ( d != t && match_triples(d, t, MATCH_DUPLICATE) )
{ if ( d->duplicates )
{ d->duplicates--;
DEBUG(2,
Sdprintf("Principal %p at ", d);
print_src(d);
Sdprintf(" has %d duplicates\n", d->duplicates));
return;
}
}
}
Sdprintf("FATAL\n");
PL_halt(1);
assert(0);
}
}
/*******************************
* TRANSACTIONS *
*******************************/
static void
append_transaction(rdf_db *db, transaction_record *tr)
{ if ( db->tr_last )
{ tr->next = NULL;
tr->previous = db->tr_last;
db->tr_last->next = tr;
db->tr_last = tr;
} else
{ tr->next = tr->previous = NULL;
db->tr_first = db->tr_last = tr;
}
}
static void
open_transaction(rdf_db *db)
{ transaction_record *tr = rdf_malloc(db, sizeof(*tr));
memset(tr, 0, sizeof(*tr));
tr->type = TR_MARK;
if ( db->tr_first )
db->tr_nesting++;
else
db->tr_nesting = 0;
append_transaction(db, tr);
}
static void
record_transaction(rdf_db *db, tr_type type, triple *t)
{ transaction_record *tr = rdf_malloc(db, sizeof(*tr));
memset(tr, 0, sizeof(*tr));
tr->type = type;
tr->triple = t;
append_transaction(db, tr);
}
static void
record_md5_transaction(rdf_db *db, graph *src, md5_byte_t *digest)
{ transaction_record *tr = rdf_malloc(db, sizeof(*tr));
memset(tr, 0, sizeof(*tr));
tr->type = TR_UPDATE_MD5,
tr->update.md5.graph = src;
tr->update.md5.digest = digest;
append_transaction(db, tr);
}
static void
record_update_transaction(rdf_db *db, triple *t, triple *new)
{ transaction_record *tr = rdf_malloc(db, sizeof(*tr));
memset(tr, 0, sizeof(*tr));
tr->type = TR_UPDATE,
tr->triple = t;
tr->update.triple = new;
append_transaction(db, tr);
}
static void
record_update_src_transaction(rdf_db *db, triple *t,
atom_t src, unsigned long line)
{ transaction_record *tr = rdf_malloc(db, sizeof(*tr));
memset(tr, 0, sizeof(*tr));
tr->type = TR_UPDATE_SRC,
tr->triple = t;
tr->update.src.atom = src;
tr->update.src.line = line;
append_transaction(db, tr);
}
static void
void_transaction(rdf_db *db, transaction_record *tr)
{ switch(tr->type)
{ case TR_ASSERT:
free_triple(db, tr->triple);
break;
case TR_UPDATE:
free_triple(db, tr->update.triple);
break;
case TR_UPDATE_MD5:
if ( tr->update.md5.digest )
rdf_free(db, tr->update.md5.digest, sizeof(*tr->update.md5.digest));
break;
default:
break;
}
tr->type = TR_VOID;
}
static void
free_transaction(rdf_db *db, transaction_record *tr)
{ void_transaction(db, tr);
rdf_free(db, tr, sizeof(*tr));
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This must deal with multiple operations on the same triple. Most
probably the most important thing is to merge update records. We must
also make-up our mind with regard to updated records that are erased or
records that are erased after updating, etc.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void
clean_transaction(rdf_db *db, transaction_record *tr0)
{
#if 0
transaction_record *tr;
for(tr=tr0; tr; tr=tr->next)
{ if ( TR_RETRACT )
{ transaction_record *tr2;
for(tr2=tr->next; tr2; tr2=tr2->next)
{ if ( tr2->triple == tr->triple )
{ switch(tr2->type)
{ case TR_RETRACT:
case TR_UPDATE:
case TR_UPDATE_SRC:
void_transaction(db, tr2);
default:
;
}
}
}
}
}
#endif
}
static void
truncate_transaction(rdf_db *db, transaction_record *last)
{ db->tr_last = last;
if ( last )
{ db->tr_last->next = NULL;
} else
{ db->tr_first = NULL;
}
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
discard_transaction() simply destroys all actions in the last
transaction.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void
discard_transaction(rdf_db *db)
{ transaction_record *tr, *prev;
for(tr=db->tr_last; tr; tr = prev)
{ prev = tr->previous;
if ( tr->type == TR_SUB_END )
{ if ( tr->update.transaction_id )
PL_erase(tr->update.transaction_id);
}
if ( tr->type == TR_MARK )
{ rdf_free(db, tr, sizeof(*tr));
truncate_transaction(db, prev);
db->tr_nesting--;
return;
}
free_transaction(db, tr);
}
}
int
put_begin_end(term_t t, functor_t be, int level)
{ term_t av;
return ( (av = PL_new_term_ref()) &&
PL_put_integer(av, level) &&
PL_cons_functor_v(t, be, av) );
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Note (*) rdf-monitors can modify the database by opening new
transactions. Therefore we first close the transaction to allow opening
new ones. TBD: get this clear. Monitors have only restricted read
access?
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
commit_transaction_int(rdf_db *db, term_t id)
{ transaction_record *tr, *next;
int tr_level = 0; /* nesting level */
if ( db->tr_nesting > 0 ) /* commit nested transaction */
{ tr=db->tr_last;
if ( tr->type == TR_MARK ) /* empty nested transaction */
{ truncate_transaction(db, tr->previous);
rdf_free(db, tr, sizeof(*tr));
db->tr_nesting--;
return TRUE;
}
for(; tr; tr = tr->previous) /* not the last (tested above) */
{ /* not the first (we are nested) */
if ( tr->type == TR_MARK )
{ transaction_record *end = rdf_malloc(db, sizeof(*end));
memset(end, 0, sizeof(*end));
end->type = TR_SUB_END;
end->update.transaction_id = PL_record(id);
append_transaction(db, end);
tr->type = TR_SUB_START;
tr->update.transaction_id = end->update.transaction_id;
db->tr_nesting--;
return TRUE;
}
}
assert(0);
return FALSE;
}
while( (tr=db->tr_first) ) /* See above (*) */
{ db->tr_first = db->tr_last = NULL;
clean_transaction(db, tr);
/* real commit */
for(; tr; tr = next)
{ next = tr->next;
switch(tr->type)
{ case TR_MARK:
break;
case TR_SUB_START:
{ term_t id = PL_new_term_ref();
term_t be = PL_new_term_ref();
if ( !PL_recorded(tr->update.transaction_id, id) ||
!put_begin_end(be, FUNCTOR_begin1, ++tr_level) ||
!broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
return FALSE;
break;
}
case TR_SUB_END:
{ term_t id = PL_new_term_ref();
term_t be = PL_new_term_ref();
if ( !PL_recorded(tr->update.transaction_id, id) )
return FALSE;
PL_erase(tr->update.transaction_id);
if ( !put_begin_end(be, FUNCTOR_end1, tr_level--) ||
!broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
return FALSE;
break;
}
case TR_ASSERT:
link_triple(db, tr->triple);
db->generation++;
break;
case TR_RETRACT:
if ( !tr->triple->erased ) /* already erased */
{ erase_triple(db, tr->triple);
db->generation++;
}
break;
case TR_UPDATE:
if ( !tr->triple->erased )
{ if ( !broadcast(EV_UPDATE, tr->triple, tr->update.triple) )
return FALSE; /* TBD: how to handle? */
if ( !tr->triple->erased )
{ erase_triple_silent(db, tr->triple);
link_triple_silent(db, tr->update.triple);
db->generation++;
}
}
break;
case TR_UPDATE_SRC:
if ( !tr->triple->erased )
{ if ( tr->triple->graph != tr->update.src.atom )
{ if ( tr->triple->graph )
unregister_graph(db, tr->triple);
tr->triple->graph = tr->update.src.atom;
if ( tr->triple->graph )
register_graph(db, tr->triple);
}
tr->triple->line = tr->update.src.line;
db->generation++;
}
break;
case TR_UPDATE_MD5:
{ graph *src = tr->update.md5.graph;
md5_byte_t *digest = tr->update.md5.digest;
if ( digest )
{ sum_digest(digest, src->digest);
src->md5 = TRUE;
rdf_free(db, digest, sizeof(md5_byte_t)*16);
} else
{ src->md5 = FALSE;
}
break;
}
case TR_RESET:
db->tr_reset = FALSE;
reset_db(db);
break;
case TR_VOID:
break;
default:
assert(0);
}
rdf_free(db, tr, sizeof(*tr));
}
}
return TRUE;
}
static int
commit_transaction(rdf_db *db, term_t id)
{ int rc;
db->gc_blocked++;
rc = commit_transaction_int(db, id);
db->gc_blocked--;
return rc;
}
static foreign_t
rdf_transaction(term_t goal, term_t id)
{ int rc;
rdf_db *db = DB;
active_transaction me;
if ( !WRLOCK(db, TRUE) )
return FALSE;
open_transaction(db);
me.parent = db->tr_active;
me.id = id;
db->tr_active = &me;
rc = PL_call_predicate(NULL, PL_Q_PASS_EXCEPTION, PRED_call1, goal);
if ( rc )
{ int empty = (db->tr_last == NULL || db->tr_last->type == TR_MARK);
if ( empty || db->tr_nesting > 0 )
{ commit_transaction(db, id);
} else
{ term_t be;
if ( !(be=PL_new_term_ref()) ||
!put_begin_end(be, FUNCTOR_begin1, 0) ||
!broadcast(EV_TRANSACTION, (void*)id, (void*)be) ||
!put_begin_end(be, FUNCTOR_end1, 0) )
return FALSE;
if ( !LOCKOUT_READERS(db) ) /* interrupt, timeout */
{ broadcast(EV_TRANSACTION, (void*)id, (void*)be);
rc = FALSE;
goto discard;
}
commit_transaction(db, id);
REALLOW_READERS(db);
if ( !broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
return FALSE;
}
} else
{ discard:
discard_transaction(db);
}
db->tr_active = me.parent;
WRUNLOCK(db);
return rc;
}
static foreign_t
rdf_active_transactions(term_t list)
{ rdf_db *db = DB;
term_t tail = PL_copy_term_ref(list);
term_t head = PL_new_term_ref();
active_transaction *ot;
for(ot = db->tr_active; ot; ot=ot->parent)
{ if ( !PL_unify_list(tail, head, tail) ||
!PL_unify(head, ot->id) )
return FALSE;
}
return PL_unify_nil(tail);
}
/*******************************
* PREDICATES *
*******************************/
static foreign_t
rdf_assert4(term_t subject, term_t predicate, term_t object, term_t src)
{ rdf_db *db = DB;
triple *t = new_triple(db);
if ( !get_triple(db, subject, predicate, object, t) )
{ free_triple(db, t);
return FALSE;
}
if ( src )
{ if ( !get_graph(src, t) )
{ free_triple(db, t);
return FALSE;
}
} else
{ t->graph = ATOM_user;
t->line = NO_LINE;
}
lock_atoms(t);
if ( !WRLOCK(db, FALSE) )
{ free_triple(db, t);
return FALSE;
}
if ( db->tr_first )
{ record_transaction(db, TR_ASSERT, t);
} else
{ link_triple(db, t);
db->generation++;
}
WRUNLOCK(db);
return TRUE;
}
static foreign_t
rdf_assert3(term_t subject, term_t predicate, term_t object)
{ return rdf_assert4(subject, predicate, object, 0);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
inc_active_queries(rdf_db *db);
dec_active_queries(rdf_db *db);
TBD: Either delete this or use atomic inc/dec.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void
inc_active_queries(rdf_db *db)
{ LOCK_MISC(db);
db->active_queries++;
UNLOCK_MISC(db);
}
static void
dec_active_queries(rdf_db *db)
{ LOCK_MISC(db);
db->active_queries--;
assert(db->active_queries>=0);
UNLOCK_MISC(db);
}
typedef struct search_state
{ rdf_db *db; /* our database */
term_t subject; /* Prolog term references */
term_t object;
term_t predicate;
term_t src;
term_t realpred;
unsigned locked : 1; /* State has been locked */
unsigned allocated : 1; /* State has been allocated */
unsigned flags; /* Misc flags controlling search */
atom_t prefix; /* prefix and like search */
avl_enum *literal_state; /* Literal search state */
literal *literal_cursor; /* pointer in current literal */
literal_ex lit_ex; /* extended literal for fast compare */
triple *cursor; /* Pointer in triple DB */
triple pattern; /* Pattern triple */
} search_state;
static void free_search_state(search_state *state);
static void
init_cursor_from_literal(search_state *state, literal *cursor)
{ triple *p = &state->pattern;
unsigned long iv;
int i;
DEBUG(3,
Sdprintf("Trying literal search for ");
print_literal(cursor);
Sdprintf("\n"));
p->indexed |= BY_O;
p->indexed &= ~BY_S; /* we do not have index BY_SO */
switch(p->indexed)
{ case BY_O:
iv = literal_hash(cursor);
break;
case BY_OP:
iv = predicate_hash(p->predicate.r) ^ literal_hash(cursor);
break;
default:
iv = 0; /* make compiler silent */
assert(0);
}
i = (int)(iv % (long)state->db->table_size[p->indexed]);
state->cursor = state->db->table[p->indexed][i];
state->literal_cursor = cursor;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
(*) update_hash() is there to update the hash after a change to the
predicate organization. If we do not have a predicate or we do not
search using rdf_has/3, this is not needed.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
init_search_state(search_state *state)
{ triple *p = &state->pattern;
if ( get_partial_triple(state->db,
state->subject, state->predicate, state->object,
state->src, p) != TRUE )
{ free_triple(state->db, p);
return FALSE;
}
if ( !RDLOCK(state->db) )
{ free_triple(state->db, p);
return FALSE;
}
state->locked = TRUE;
if ( p->predicate.r && (state->flags & MATCH_SUBPROPERTY) ) /* See (*) */
{ if ( !update_hash(state->db) )
{ free_search_state(state);
return FALSE;
}
}
if ( (p->match == STR_MATCH_PREFIX || p->match == STR_MATCH_LIKE) &&
p->indexed != BY_SP &&
(state->prefix = first_atom(p->object.literal->value.string, p->match)))
{ literal lit;
literal **rlitp;
lit = *p->object.literal;
lit.value.string = state->prefix;
state->literal_state = rdf_malloc(state->db,
sizeof(*state->literal_state));
state->lit_ex.literal = &lit;
prepare_literal_ex(&state->lit_ex);
rlitp = avlfindfirst(&state->db->literals, &state->lit_ex, state->literal_state);
if ( rlitp )
{ init_cursor_from_literal(state, *rlitp);
} else
{ free_search_state(state);
return FALSE;
}
} else
{ state->cursor = state->db->table[p->indexed]
[triple_hash(state->db, p, p->indexed)];
}
return TRUE;
}
static void
free_search_state(search_state *state)
{ if ( state->locked )
{ RDUNLOCK(state->db);
}
free_triple(state->db, &state->pattern);
if ( state->prefix )
PL_unregister_atom(state->prefix);
if ( state->literal_state )
rdf_free(state->db, state->literal_state, sizeof(*state->literal_state));
if ( state->allocated ) /* also means redo! */
{ dec_active_queries(state->db);
rdf_free(state->db, state, sizeof(*state));
}
}
static foreign_t
allow_retry_state(search_state *state)
{ if ( !state->allocated )
{ search_state *copy = rdf_malloc(state->db, sizeof(*copy));
*copy = *state;
copy->allocated = TRUE;
inc_active_queries(state->db);
state = copy;
}
PL_retry_address(state);
}
/* TBD: simplify. Maybe split for resource and literal search, as
both involve mutual exclusive complications to this routine,
*/
static int
next_search_state(search_state *state)
{ triple *t = state->cursor;
triple *p = &state->pattern;
retry:
for( ; t; t = t->next[p->indexed])
{ if ( t->is_duplicate && !state->src )
continue;
/* hash-collision, skip */
if ( state->literal_state )
{ if ( !(t->object_is_literal &&
t->object.literal == state->literal_cursor) )
continue;
}
if ( match_triples(t, p, state->flags) )
{ term_t retpred = state->realpred ? state->realpred : state->predicate;
if ( !unify_triple(state->subject, retpred, state->object,
state->src, t, p->inversed) )
continue;
if ( state->realpred && PL_is_variable(state->predicate) )
{ if ( !PL_unify(state->predicate, retpred) )
return FALSE;
}
t=t->next[p->indexed];
inv_alt:
for(; t; t = t->next[p->indexed])
{ if ( state->literal_state )
{ if ( !(t->object_is_literal &&
t->object.literal == state->literal_cursor) )
continue;
}
if ( match_triples(t, p, state->flags) )
{ state->cursor = t;
return TRUE; /* non-deterministic */
}
}
if ( (state->flags & MATCH_INVERSE) && inverse_partial_triple(p) )
{ t = state->db->table[p->indexed][triple_hash(state->db, p, p->indexed)];
goto inv_alt;
}
state->cursor = NULL; /* deterministic */
return TRUE;
}
}
if ( (state->flags & MATCH_INVERSE) && inverse_partial_triple(p) )
{ t = state->db->table[p->indexed][triple_hash(state->db, p, p->indexed)];
goto retry;
}
if ( state->literal_state )
{ literal **litp;
if ( (litp = avlfindnext(state->literal_state)) )
{ if ( state->prefix )
{ literal *lit = *litp;
if ( !match_atoms(STR_MATCH_PREFIX, state->prefix, lit->value.string) )
{ DEBUG(1,
Sdprintf("Terminated literal iteration from ");
print_literal(lit);
Sdprintf("\n"));
return FALSE; /* no longer a prefix */
}
}
init_cursor_from_literal(state, *litp);
t = state->cursor;
goto retry;
}
}
return FALSE;
}
static foreign_t
rdf(term_t subject, term_t predicate, term_t object,
term_t src, term_t realpred, control_t h, unsigned flags)
{ rdf_db *db = DB;
search_state *state;
switch(PL_foreign_control(h))
{ case PL_FIRST_CALL:
{ search_state buf;
state = &buf;
memset(state, 0, sizeof(*state));
state->db = db;
state->subject = subject;
state->object = object;
state->predicate = predicate;
state->src = src;
state->realpred = realpred;
state->flags = flags;
if ( !init_search_state(state) )
return FALSE;
goto search;
}
case PL_REDO:
{ int rc;
state = PL_foreign_context_address(h);
assert(state->subject == subject);
search:
if ( (rc=next_search_state(state)) )
{ if ( state->cursor || state->literal_state )
return allow_retry_state(state);
}
free_search_state(state);
return rc;
}
case PL_CUTTED:
{ search_state *state = PL_foreign_context_address(h);
free_search_state(state);
return TRUE;
}
default:
assert(0);
return FALSE;
}
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rdf(Subject, Predicate, Object)
Search specifications:
Predicate:
subPropertyOf(X) = P
Object:
literal(substring(X), L)
literal(word(X), L)
literal(exact(X), L)
literal(prefix(X), L)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static foreign_t
rdf3(term_t subject, term_t predicate, term_t object, control_t h)
{ return rdf(subject, predicate, object, 0, 0, h,
MATCH_EXACT);
}
static foreign_t
rdf4(term_t subject, term_t predicate, term_t object,
term_t src, control_t h)
{ return rdf(subject, predicate, object, src, 0, h,
MATCH_EXACT|MATCH_SRC);
}
static foreign_t
rdf_has(term_t subject, term_t predicate, term_t object,
term_t realpred, control_t h)
{ return rdf(subject, predicate, object, 0, realpred, h,
MATCH_SUBPROPERTY|MATCH_INVERSE);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rdf_estimate_complexity(+S,+P,+O,-C)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static foreign_t
rdf_estimate_complexity(term_t subject, term_t predicate, term_t object,
term_t complexity)
{ triple t;
long c;
rdf_db *db = DB;
int rc;
memset(&t, 0, sizeof(t));
if ( (rc=get_partial_triple(db, subject, predicate, object, 0, &t)) != TRUE )
{ if ( rc == -1 )
{ return FALSE; /* error */
} else
{ return PL_unify_integer(complexity, 0); /* no predicate */
}
}
if ( !RDLOCK(db) )
return FALSE;
if ( !update_hash(db) ) /* or ignore this problem? */
{ RDUNLOCK(db);
free_triple(db, &t);
return FALSE;
}
if ( t.indexed == BY_NONE )
{ c = db->created - db->erased; /* = totale triple count */
#if 0
} else if ( t.indexed == BY_P )
{ c = t.predicate.r->triple_count; /* must sum over children */
#endif
} else
{ c = db->counts[t.indexed][triple_hash(db, &t, t.indexed)];
}
rc = PL_unify_integer(complexity, c);
RDUNLOCK(db);
free_triple(db, &t);
return rc;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
current_literal(?Literals)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static foreign_t
rdf_current_literal(term_t t, control_t h)
{ rdf_db *db = DB;
literal **data;
avl_enum *state;
int rc;
switch(PL_foreign_control(h))
{ case PL_FIRST_CALL:
if ( PL_is_variable(t) )
{ state = rdf_malloc(db, sizeof(*state));
RDLOCK(db);
inc_active_queries(db);
data = avlfindfirst(&db->literals, NULL, state);
goto next;
} else
{ return FALSE; /* TBD */
}
case PL_REDO:
state = PL_foreign_context_address(h);
data = avlfindnext(state);
next:
for(; data; data=avlfindnext(state))
{ literal *lit = *data;
if ( unify_literal(t, lit) )
{ PL_retry_address(state);
}
}
rc = FALSE;
goto cleanup;
case PL_CUTTED:
rc = TRUE;
cleanup:
state = PL_foreign_context_address(h);
avlfinddestroy(state);
rdf_free(db, state, sizeof(*state));
RDUNLOCK(db);
dec_active_queries(db);
return rc;
default:
assert(0);
return FALSE;
}
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rdf_update(+Subject, +Predicate, +Object, +Action)
Update a triple. Please note this is actually erase+assert as the triple
needs to be updated in the linked lists while erase simply flags a
triple as `erases' without deleting it to support queries which active
choicepoints.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
update_triple(rdf_db *db, term_t action, triple *t)
{ term_t a = PL_new_term_ref();
triple tmp, *new;
int i;
/* Create copy in local memory */
tmp = *t;
tmp.allocated = FALSE;
tmp.atoms_locked = FALSE;
if ( t->object_is_literal )
tmp.object.literal = copy_literal(db, t->object.literal);
if ( !PL_get_arg(1, action, a) )
return type_error(action, "rdf_action");
if ( PL_is_functor(action, FUNCTOR_subject1) )
{ atom_t s;
if ( !get_atom_ex(a, &s) )
return FALSE;
if ( tmp.subject == s )
return TRUE; /* no change */
tmp.subject = s;
} else if ( PL_is_functor(action, FUNCTOR_predicate1) )
{ predicate *p;
if ( !get_predicate(db, a, &p) )
return FALSE;
if ( tmp.predicate.r == p )
return TRUE; /* no change */
tmp.predicate.r = p;
} else if ( PL_is_functor(action, FUNCTOR_object1) )
{ triple t2;
memset(&t2, 0, sizeof(t2));
if ( !get_object(db, a, &t2) )
{ free_triple(db, &t2);
return FALSE;
}
if ( match_object(&t2, &tmp, MATCH_QUAL) )
{ free_triple(db, &t2);
return TRUE;
}
if ( tmp.object_is_literal )
free_literal(db, tmp.object.literal);
if ( (tmp.object_is_literal = t2.object_is_literal) )
{ tmp.object.literal = t2.object.literal;
} else
{ tmp.object.resource = t2.object.resource;
}
} else if ( PL_is_functor(action, FUNCTOR_graph1) )
{ triple t2;
if ( !get_graph(a, &t2) )
return FALSE;
if ( t2.graph == t->graph && t2.line == t->line )
return TRUE;
if ( db->tr_first )
{ record_update_src_transaction(db, t, t2.graph, t2.line);
} else
{ if ( t->graph )
unregister_graph(db, t);
t->graph = t2.graph;
t->line = t2.line;
if ( t->graph )
register_graph(db, t);
}
return TRUE; /* considered no change */
} else
return domain_error(action, "rdf_action");
for(i=0; i<INDEX_TABLES; i++)
tmp.next[i] = NULL;
new = new_triple(db);
new->subject = tmp.subject;
new->predicate.r = tmp.predicate.r;
if ( (new->object_is_literal = tmp.object_is_literal) )
{ new->object.literal = copy_literal(db, tmp.object.literal);
} else
{ new->object.resource = tmp.object.resource;
}
new->graph = tmp.graph;
new->line = tmp.line;
free_triple(db, &tmp);
lock_atoms(new);
if ( db->tr_first )
{ record_update_transaction(db, t, new);
} else
{ broadcast(EV_UPDATE, t, new);
erase_triple_silent(db, t);
link_triple_silent(db, new);
db->generation++;
}
return TRUE;
}
static foreign_t
rdf_update5(term_t subject, term_t predicate, term_t object, term_t src,
term_t action)
{ triple t, *p;
int indexed = BY_SP;
int done = 0;
rdf_db *db = DB;
memset(&t, 0, sizeof(t));
if ( !get_src(src, &t) ||
!get_triple(db, subject, predicate, object, &t) )
return FALSE;
if ( !WRLOCK(db, FALSE) )
{ free_triple(db, &t);
return FALSE;
}
if ( !update_hash(db) )
{ WRUNLOCK(db);
free_triple(db, &t);
return FALSE;
}
p = db->table[indexed][triple_hash(db, &t, indexed)];
for( ; p; p = p->next[indexed])
{ if ( match_triples(p, &t, MATCH_EXACT) )
{ if ( !update_triple(db, action, p) )
{ WRUNLOCK(db);
free_triple(db, &t);
return FALSE; /* type errors */
}
done++;
}
}
free_triple(db, &t);
WRUNLOCK(db);
return done ? TRUE : FALSE;
}
static foreign_t
rdf_update(term_t subject, term_t predicate, term_t object, term_t action)
{ return rdf_update5(subject, predicate, object, 0, action);
}
static foreign_t
rdf_retractall4(term_t subject, term_t predicate, term_t object, term_t src)
{ triple t, *p;
rdf_db *db = DB;
memset(&t, 0, sizeof(t));
switch( get_partial_triple(db, subject, predicate, object, src, &t) )
{ case 0: /* no such predicate */
return TRUE;
case -1: /* error */
return FALSE;
}
if ( t.graph ) /* speedup for rdf_retractall(_,_,_,DB) */
{ graph *gr = lookup_graph(db, t.graph, FALSE);
if ( !gr || gr->triple_count == 0 )
return TRUE;
}
if ( !WRLOCK(db, FALSE) )
return FALSE;
/* No need, as we do not search with subPropertyOf
if ( !update_hash(db) )
{ WRUNLOCK(db);
return FALSE;
}
*/
p = db->table[t.indexed][triple_hash(db, &t, t.indexed)];
for( ; p; p = p->next[t.indexed])
{ if ( match_triples(p, &t, MATCH_EXACT|MATCH_SRC) )
{ if ( t.object_is_literal && t.object.literal->objtype == OBJ_TERM )
{ fid_t fid = PL_open_foreign_frame();
int rc = unify_object(object, p);
PL_discard_foreign_frame(fid);
if ( !rc )
continue;
}
if ( db->tr_first )
{ if ( db->tr_reset )
{ WRUNLOCK(db);
return permission_error("retract", "triple", "",
"rdf_retractall cannot follow "
"rdf_reset_db in one transaction");
}
record_transaction(db, TR_RETRACT, p);
} else
{ erase_triple(db, p);
db->generation++;
}
}
}
WRUNLOCK(db);
free_triple(db, &t);
return TRUE;
}
static foreign_t
rdf_retractall3(term_t subject, term_t predicate, term_t object)
{ return rdf_retractall4(subject, predicate, object, 0);
}
/*******************************
* MONITOR *
*******************************/
typedef struct broadcast_callback
{ struct broadcast_callback *next;
predicate_t pred;
long mask;
} broadcast_callback;
static long joined_mask = 0L;
static broadcast_callback *callback_list;
static broadcast_callback *callback_tail;
static void
do_broadcast(term_t term, long mask)
{ if ( callback_list )
{ broadcast_callback *cb;
for(cb = callback_list; cb; cb = cb->next)
{ qid_t qid;
term_t ex;
if ( !(cb->mask & mask) )
continue;
qid = PL_open_query(NULL, PL_Q_CATCH_EXCEPTION, cb->pred, term);
if ( !PL_next_solution(qid) && (ex = PL_exception(qid)) )
{ term_t av = PL_new_term_refs(2);
PL_cut_query(qid);
PL_put_atom(av+0, ATOM_error);
PL_put_term(av+1, ex);
PL_call_predicate(NULL, PL_Q_NORMAL,
PL_predicate("print_message", 2, "user"),
av);
} else
{ PL_close_query(qid);
}
}
}
}
/* No longer used, but we keep it for if we need it again
static foreign_t
rdf_broadcast(term_t term, term_t mask)
{ long msk;
if ( !get_long_ex(mask, &msk) )
return FALSE;
do_broadcast(term, msk);
return TRUE;
}
*/
static int
broadcast(broadcast_id id, void *a1, void *a2)
{ if ( (joined_mask & id) )
{ fid_t fid;
term_t term;
functor_t funct;
if ( !(fid = PL_open_foreign_frame()) ||
!(term = PL_new_term_ref()) )
return FALSE;
switch(id)
{ case EV_ASSERT:
case EV_ASSERT_LOAD:
funct = FUNCTOR_assert4;
goto assert_retract;
case EV_RETRACT:
funct = FUNCTOR_retract4;
assert_retract:
{ triple *t = a1;
term_t tmp;
if ( !(tmp = PL_new_term_refs(4)) ||
!PL_put_atom(tmp+0, t->subject) ||
!PL_put_atom(tmp+1, t->predicate.r->name) ||
!unify_object(tmp+2, t) ||
!unify_graph(tmp+3, t) ||
!PL_cons_functor_v(term, funct, tmp) )
return FALSE;
break;
}
case EV_UPDATE:
{ triple *t = a1;
triple *new = a2;
term_t tmp, a;
functor_t action;
int rc;
if ( !(tmp = PL_new_term_refs(5)) ||
!(a = PL_new_term_ref()) ||
!PL_put_atom(tmp+0, t->subject) ||
!PL_put_atom(tmp+1, t->predicate.r->name) ||
!unify_object(tmp+2, t) ||
!unify_graph(tmp+3, t) )
return FALSE;
if ( t->subject != new->subject )
{ action = FUNCTOR_subject1;
rc = PL_put_atom(a, new->subject);
} else if ( t->predicate.r != new->predicate.r )
{ action = FUNCTOR_predicate1;
rc = PL_put_atom(a, new->predicate.r->name);
} else if ( !match_object(t, new, MATCH_QUAL) )
{ action = FUNCTOR_object1;
rc = unify_object(a, new);
} else if ( !same_graph(t, new) )
{ action = FUNCTOR_graph1;
rc = unify_graph(a, new);
} else
{ return TRUE; /* no change */
}
if ( !rc ||
!PL_cons_functor_v(tmp+4, action, a) ||
!PL_cons_functor_v(term, FUNCTOR_update5, tmp) )
return FALSE;
break;
}
case EV_NEW_LITERAL:
{ literal *lit = a1;
term_t tmp;
if ( !(tmp = PL_new_term_refs(1)) ||
!unify_literal(tmp, lit) ||
!PL_cons_functor_v(term, FUNCTOR_new_literal1, tmp) )
return FALSE;
break;
}
case EV_OLD_LITERAL:
{ literal *lit = a1;
term_t tmp;
if ( !(tmp = PL_new_term_refs(1)) ||
!unify_literal(tmp, lit) ||
!PL_cons_functor_v(term, FUNCTOR_old_literal1, tmp) )
return FALSE;
break;
}
case EV_LOAD:
{ term_t ctx = (term_t)a1;
atom_t be = (atom_t)a2;
term_t tmp;
if ( !(tmp = PL_new_term_refs(2)) ||
!PL_put_atom(tmp+0, be) || /* begin/end */
!PL_put_term(tmp+1, ctx) ||
!PL_cons_functor_v(term, FUNCTOR_load2, tmp) )
return FALSE;
break;
}
case EV_TRANSACTION:
{ term_t ctx = (term_t)a1;
term_t be = (term_t)a2;
term_t tmp;
if ( !(tmp = PL_new_term_refs(2)) ||
!PL_put_term(tmp+0, be) || /* begin/end */
!PL_put_term(tmp+1, ctx) ||
!PL_cons_functor_v(term, FUNCTOR_transaction2, tmp) )
return FALSE;
break;
}
case EV_REHASH:
{ atom_t be = (atom_t)a1;
term_t tmp = PL_new_term_refs(1);
if ( !(tmp = PL_new_term_refs(1)) ||
!PL_put_atom(tmp+0, be) ||
!PL_cons_functor_v(term, FUNCTOR_rehash1, tmp) )
return FALSE;
break;
}
default:
assert(0);
}
do_broadcast(term, id);
PL_discard_foreign_frame(fid);
}
return TRUE;
}
static foreign_t
rdf_monitor(term_t goal, term_t mask)
{ atom_t name;
broadcast_callback *cb;
predicate_t p;
long msk;
module_t m = NULL;
PL_strip_module(goal, &m, goal);
if ( !get_atom_ex(goal, &name) ||
!get_long_ex(mask, &msk) )
return FALSE;
p = PL_pred(PL_new_functor(name, 1), m);
for(cb=callback_list; cb; cb = cb->next)
{ if ( cb->pred == p )
{ broadcast_callback *cb2;
cb->mask = msk;
joined_mask = 0L;
for(cb2=callback_list; cb2; cb2 = cb2->next)
joined_mask |= cb2->mask;
DEBUG(2, Sdprintf("Set mask to 0x%x\n", joined_mask));
return TRUE;
}
}
cb = PL_malloc(sizeof(*cb));
cb->next = NULL;
cb->mask = msk;
cb->pred = p;
if ( callback_list )
{ callback_tail->next = cb;
callback_tail = cb;
} else
{ callback_list = callback_tail = cb;
}
joined_mask |= msk;
return TRUE;
}
/*******************************
* QUERY *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Enumerate the known subjects. This uses the `first' flag on triples to
avoid returning the same resource multiple times. As the `by_none' is
never re-hashed, we don't mark this query in the `active_queries'.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static foreign_t
rdf_subject(term_t subject, control_t h)
{ triple *t;
rdf_db *db = DB;
switch(PL_foreign_control(h))
{ case PL_FIRST_CALL:
{ if ( PL_is_variable(subject) )
{ t = db->table[BY_NONE][0];
goto next;
} else
{ atom_t a;
if ( get_atom_ex(subject, &a) )
{ if ( first(db, a) )
return TRUE;
return FALSE;
}
return FALSE;
}
}
case PL_REDO:
t = PL_foreign_context_address(h);
next:
for(; t; t = t->next[BY_NONE])
{ if ( t->first && !t->erased )
{ if ( !PL_unify_atom(subject, t->subject) )
return FALSE;
t = t->next[BY_NONE];
if ( t )
PL_retry_address(t);
return TRUE;
}
}
return FALSE;
case PL_CUTTED:
return TRUE;
default:
assert(0);
return FALSE;
}
}
static foreign_t
rdf_set_predicate(term_t pred, term_t option)
{ predicate *p;
rdf_db *db = DB;
if ( !get_predicate(db, pred, &p) )
return FALSE;
if ( PL_is_functor(option, FUNCTOR_symmetric1) )
{ int val;
if ( !get_bool_arg_ex(1, option, &val) )
return FALSE;
p->inverse_of = p;
return TRUE;
} else if ( PL_is_functor(option, FUNCTOR_inverse_of1) )
{ term_t a = PL_new_term_ref();
predicate *i;
_PL_get_arg(1, option, a);
if ( PL_get_nil(a) )
{ if ( p->inverse_of )
{ p->inverse_of->inverse_of = NULL;
p->inverse_of = NULL;
}
} else
{ if ( !get_predicate(db, a, &i) )
return FALSE;
p->inverse_of = i;
i->inverse_of = p;
}
return TRUE;
} else if ( PL_is_functor(option, FUNCTOR_transitive1) )
{ int val;
if ( !get_bool_arg_ex(1, option, &val) )
return FALSE;
p->transitive = val;
return TRUE;
} else
return type_error(option, "predicate_option");
}
#define PRED_PROPERTY_COUNT 9
static functor_t predicate_key[PRED_PROPERTY_COUNT];
static int
unify_predicate_property(rdf_db *db, predicate *p, term_t option, functor_t f)
{ if ( f == FUNCTOR_symmetric1 )
return PL_unify_term(option, PL_FUNCTOR, f,
PL_BOOL, p->inverse_of == p ? TRUE : FALSE);
else if ( f == FUNCTOR_inverse_of1 )
{ if ( p->inverse_of )
return PL_unify_term(option, PL_FUNCTOR, f,
PL_ATOM, p->inverse_of->name);
else
return FALSE;
} else if ( f == FUNCTOR_transitive1 )
{ return PL_unify_term(option, PL_FUNCTOR, f,
PL_BOOL, p->transitive);
} else if ( f == FUNCTOR_triples1 )
{ return PL_unify_term(option, PL_FUNCTOR, f,
PL_LONG, p->triple_count);
} else if ( f == FUNCTOR_rdf_subject_branch_factor1 )
{ return PL_unify_term(option, PL_FUNCTOR, f,
PL_FLOAT, subject_branch_factor(db, p, DISTINCT_DIRECT));
} else if ( f == FUNCTOR_rdf_object_branch_factor1 )
{ return PL_unify_term(option, PL_FUNCTOR, f,
PL_FLOAT, object_branch_factor(db, p, DISTINCT_DIRECT));
} else if ( f == FUNCTOR_rdfs_subject_branch_factor1 )
{ return PL_unify_term(option, PL_FUNCTOR, f,
PL_FLOAT, subject_branch_factor(db, p, DISTINCT_SUB));
} else if ( f == FUNCTOR_rdfs_object_branch_factor1 )
{ return PL_unify_term(option, PL_FUNCTOR, f,
PL_FLOAT, object_branch_factor(db, p, DISTINCT_SUB));
} else
{ assert(0);
return FALSE;
}
}
static foreign_t
rdf_current_predicates(term_t preds)
{ rdf_db *db = DB;
int i;
term_t head = PL_new_term_ref();
term_t tail = PL_copy_term_ref(preds);
LOCK_MISC(db);
for(i=0; i<db->pred_table_size; i++)
{ predicate *p;
for(p=db->pred_table[i]; p; p = p->next)
{ if ( !PL_unify_list(tail, head, tail) ||
!PL_unify_atom(head, p->name) )
{ UNLOCK_MISC(db);
return FALSE;
}
}
}
UNLOCK_MISC(db);
return PL_unify_nil(tail);
}
static foreign_t
rdf_predicate_property(term_t pred, term_t option, control_t h)
{ int n;
predicate *p;
rdf_db *db = DB;
if ( !predicate_key[0] )
{ int i = 0;
predicate_key[i++] = FUNCTOR_symmetric1;
predicate_key[i++] = FUNCTOR_inverse_of1;
predicate_key[i++] = FUNCTOR_transitive1;
predicate_key[i++] = FUNCTOR_triples1;
predicate_key[i++] = FUNCTOR_rdf_subject_branch_factor1;
predicate_key[i++] = FUNCTOR_rdf_object_branch_factor1;
predicate_key[i++] = FUNCTOR_rdfs_subject_branch_factor1;
predicate_key[i++] = FUNCTOR_rdfs_object_branch_factor1;
assert(i < PRED_PROPERTY_COUNT);
}
switch(PL_foreign_control(h))
{ case PL_FIRST_CALL:
{ functor_t f;
if ( PL_is_variable(option) )
{ n = 0;
goto redo;
} else if ( PL_get_functor(option, &f) )
{ for(n=0; predicate_key[n]; n++)
{ if ( predicate_key[n] == f )
{ if ( !get_predicate(db, pred, &p) )
return FALSE;
return unify_predicate_property(db, p, option, f);
}
}
return domain_error(option, "rdf_predicate_property");
} else
return type_error(option, "rdf_predicate_property");
}
case PL_REDO:
n = (int)PL_foreign_context(h);
redo:
if ( !get_predicate(db, pred, &p) )
return FALSE;
for( ; predicate_key[n]; n++ )
{ if ( unify_predicate_property(db, p, option, predicate_key[n]) )
{ n++;
if ( predicate_key[n] )
PL_retry(n);
return TRUE;
}
}
return FALSE;
case PL_CUTTED:
return TRUE;
default:
assert(0);
return TRUE;
}
}
/*******************************
* TRANSITIVE RELATIONS *
*******************************/
typedef struct visited
{ struct visited *next; /* next in list */
struct visited *hash_link; /* next in hashed link */
atom_t resource; /* visited resource */
uintptr_t distance; /* Distance */
} visited;
#define AGENDA_LOCAL_MAGIC 742736360
#define AGENDA_SAVED_MAGIC 742736362
typedef struct agenda
{ visited *head; /* visited list */
visited *tail; /* tail of visited list */
visited *to_expand; /* next to expand */
visited *to_return; /* next to return */
visited **hash; /* hash-table for cycle detection */
int magic; /* AGENDA_*_MAGIC */
int hash_size;
int size; /* size of the agenda */
uintptr_t max_d; /* max distance */
triple pattern; /* partial triple used as pattern */
atom_t target; /* resource we are seaching for */
struct chunk *chunk; /* node-allocation chunks */
} agenda;
#ifndef offsetof
#define offsetof(structure, field) ((size_t) &(((structure *)NULL)->field))
#endif
#define CHUNK_SIZE(n) offsetof(chunk, nodes[n])
typedef struct chunk
{ struct chunk *next;
int used; /* # used elements */
int size; /* size of the chunk */
struct visited nodes[1]; /* nodes in the chunk */
} chunk;
static visited *
alloc_node_agenda(rdf_db *db, agenda *a)
{ chunk *c;
int size;
if ( (c=a->chunk) )
{ if ( c->used < c->size )
{ visited *v = &c->nodes[c->used++];
return v;
}
}
size = (a->size == 0 ? 8 : 1024);
c = rdf_malloc(db, CHUNK_SIZE(size));
c->size = size;
c->used = 1;
c->next = a->chunk;
a->chunk = c;
return &c->nodes[0];
}
static void
empty_agenda(rdf_db *db, agenda *a)
{ chunk *c, *n;
for(c=a->chunk; c; c = n)
{ n = c->next;
rdf_free(db, c, CHUNK_SIZE(c->size));
}
if ( a->hash )
rdf_free(db, a->hash, sizeof(visited*)*a->hash_size);
if ( a->magic == AGENDA_SAVED_MAGIC )
{ a->magic = 0;
rdf_free(db, a, sizeof(*a));
} else
{ a->magic = 0;
}
}
static void
unlock_and_empty_agenda(rdf_db *db, agenda *a)
{ RDUNLOCK(db);
empty_agenda(db, a);
}
static agenda *
save_agenda(rdf_db *db, agenda *a)
{ agenda *r = rdf_malloc(db, sizeof(*r));
assert(a->magic == AGENDA_LOCAL_MAGIC);
*r = *a;
r->magic = AGENDA_SAVED_MAGIC;
return r;
}
static void
hash_agenda(rdf_db *db, agenda *a, int size)
{ if ( a->hash )
rdf_free(db, a->hash, sizeof(*a->hash));
if ( size > 0 )
{ visited *v;
a->hash = rdf_malloc(db, sizeof(visited*)*size);
memset(a->hash, 0, sizeof(visited*)*size);
a->hash_size = size;
for(v=a->head; v; v = v->next)
{ int key = atom_hash(v->resource)&(size-1);
v->hash_link = a->hash[key];
a->hash[key] = v;
}
}
}
static int
in_aganda(agenda *a, atom_t resource)
{ visited *v;
if ( a->hash )
{ int key = atom_hash(resource)&(a->hash_size-1);
v = a->hash[key];
for( ; v; v = v->hash_link )
{ if ( v->resource == resource )
return TRUE;
}
} else
{ v = a->head;
for( ; v; v = v->next )
{ if ( v->resource == resource )
return TRUE;
}
}
return FALSE;
}
static visited *
append_agenda(rdf_db *db, agenda *a, atom_t res, uintptr_t d)
{ visited *v = a->head;
if ( in_aganda(a, res) )
return NULL;
db->agenda_created++; /* statistics */
a->size++;
if ( !a->hash_size && a->size > 32 )
hash_agenda(db, a, 64);
else if ( a->size > a->hash_size * 4 )
hash_agenda(db, a, a->hash_size * 4);
v = alloc_node_agenda(db, a);
v->resource = res;
v->distance = d;
v->next = NULL;
if ( a->tail )
{ a->tail->next = v;
a->tail = v;
} else
{ a->head = a->tail = v;
}
if ( a->hash_size )
{ int key = atom_hash(res)&(a->hash_size-1);
v->hash_link = a->hash[key];
a->hash[key] = v;
}
return v;
}
static int
can_reach_target(rdf_db *db, agenda *a)
{ int indexed = a->pattern.indexed;
int rc = FALSE;
triple *p;
if ( indexed & BY_S ) /* subj ---> */
{ a->pattern.object.resource = a->target;
indexed |= BY_O;
} else
{ a->pattern.subject = a->target;
indexed |= BY_S;
}
p = db->table[indexed][triple_hash(db, &a->pattern, indexed)];
for( ; p; p = p->next[indexed])
{ if ( match_triples(p, &a->pattern, MATCH_SUBPROPERTY) )
{ rc = TRUE;
break;
}
}
if ( a->pattern.indexed & BY_S )
{ a->pattern.object.resource = 0;
} else
{ a->pattern.subject = 0;
}
return rc;
}
static visited *
bf_expand(rdf_db *db, agenda *a, atom_t resource, uintptr_t d)
{ triple *p;
int indexed = a->pattern.indexed;
visited *rc = NULL;
if ( indexed & BY_S ) /* subj ---> */
{ a->pattern.subject = resource;
} else
{ a->pattern.object.resource = resource;
}
if ( a->target && can_reach_target(db, a) )
{ return append_agenda(db, a, a->target, d);
}
p = db->table[indexed][triple_hash(db, &a->pattern, indexed)];
for( ; p; p = p->next[indexed])
{ if ( match_triples(p, &a->pattern, MATCH_SUBPROPERTY) )
{ atom_t found;
visited *v;
if ( indexed & BY_S )
{ if ( p->object_is_literal )
continue;
found = p->object.resource;
} else
{ found = p->subject;
}
v = append_agenda(db, a, found, d);
if ( !rc )
rc = v;
if ( found == a->target )
break;
}
}
/* TBD: handle owl:inverseOf */
/* TBD: handle owl:sameAs */
return rc;
}
static visited *
next_agenda(rdf_db *db, agenda *a)
{ visited *v;
if ( (v=a->to_return) )
{ ok:
a->to_return = a->to_return->next;
return v;
}
while( a->to_expand )
{ uintptr_t next_d = a->to_expand->distance+1;
if ( next_d >= a->max_d )
return NULL;
a->to_return = bf_expand(db, a,
a->to_expand->resource,
next_d);
a->to_expand = a->to_expand->next;
if ( (v=a->to_return) )
goto ok;
}
return NULL;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rdf_reachable(+Subject, +Predicate, -Object)
rdf_reachable(-Subject, +Predicate, ?Object)
Examine transitive relations, reporting all `Object' that can be
reached from `Subject' using Predicate without going into a loop
if the relation is cyclic.
directly_attached() deals with the posibility that the predicate is not
defined and Subject and Object are the same. Should use clean error
handling, but that means a lot of changes. For now this will do.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
directly_attached(term_t pred, term_t from, term_t to)
{ if ( PL_is_atom(pred) && PL_is_atom(from) )
return PL_unify(to, from);
return FALSE;
}
static int
unify_distance(term_t d, uintptr_t dist)
{ if ( d )
return PL_unify_integer(d, dist);
return TRUE;
}
static foreign_t
rdf_reachable(term_t subj, term_t pred, term_t obj,
term_t max_d, term_t d,
control_t h)
{ rdf_db *db = DB;
switch(PL_foreign_control(h))
{ case PL_FIRST_CALL:
{ agenda a;
visited *v;
term_t target_term;
int is_det = FALSE;
if ( PL_is_variable(pred) )
return instantiation_error(pred);
memset(&a, 0, sizeof(a));
a.magic = AGENDA_LOCAL_MAGIC;
if ( max_d )
{ long md;
atom_t inf;
if ( PL_get_atom(max_d, &inf) && inf == ATOM_infinite )
a.max_d = (uintptr_t)-1;
if ( !get_long_ex(max_d, &md) || md < 0 )
return FALSE;
a.max_d = md;
} else
{ a.max_d = (uintptr_t)-1;
}
if ( !PL_is_variable(subj) ) /* subj .... obj */
{ switch(get_partial_triple(db, subj, pred, 0, 0, &a.pattern))
{ case 0:
return directly_attached(pred, subj, obj) &&
unify_distance(d, 0);
case -1:
return FALSE;
}
is_det = PL_is_ground(obj);
target_term = obj;
} else if ( !PL_is_variable(obj) ) /* obj .... subj */
{ switch(get_partial_triple(db, 0, pred, obj, 0, &a.pattern))
{ case 0:
return directly_attached(pred, obj, subj);
case -1:
return FALSE;
}
if ( a.pattern.object_is_literal )
return FALSE; /* rdf_reachable(-,+,literal(...)) */
target_term = subj;
} else
return instantiation_error(subj);
if ( !RDLOCK(db) )
return FALSE;
if ( !update_hash(db) )
return FALSE;
if ( (a.pattern.indexed & BY_S) ) /* subj ... */
append_agenda(db, &a, a.pattern.subject, 0);
else
append_agenda(db, &a, a.pattern.object.resource, 0);
a.to_return = a.head;
a.to_expand = a.head;
while( (v=next_agenda(db, &a)) )
{ if ( PL_unify_atom(target_term, v->resource) )
{ if ( is_det ) /* mode(+, +, +) */
{ int rc = unify_distance(d, v->distance);
unlock_and_empty_agenda(db, &a);
return rc;
} else if ( unify_distance(d, v->distance) )
{ /* mode(+, +, -) or mode(-, +, +) */
agenda *ra = save_agenda(db, &a);
inc_active_queries(db);
DEBUG(9, Sdprintf("Saved agenta to %p\n", ra));
PL_retry_address(ra);
}
}
}
unlock_and_empty_agenda(db, &a);
return FALSE;
}
case PL_REDO:
{ agenda *a = PL_foreign_context_address(h);
term_t target_term;
visited *v;
assert(a->magic == AGENDA_SAVED_MAGIC);
if ( !PL_is_variable(subj) ) /* +, +, - */
target_term = obj;
else
target_term = subj; /* -, +, + */
while( (v=next_agenda(db, a)) )
{ if ( PL_unify_atom(target_term, v->resource) &&
unify_distance(d, v->distance) )
{ assert(a->magic == AGENDA_SAVED_MAGIC);
PL_retry_address(a);
}
}
dec_active_queries(db);
unlock_and_empty_agenda(db, a);
return FALSE;
}
case PL_CUTTED:
{ agenda *a = PL_foreign_context_address(h);
DEBUG(9, Sdprintf("Cutted; agenda = %p\n", a));
assert(a->magic == AGENDA_SAVED_MAGIC);
dec_active_queries(db);
unlock_and_empty_agenda(db, a);
return TRUE;
}
default:
assert(0);
return FALSE;
}
}
static foreign_t
rdf_reachable3(term_t subj, term_t pred, term_t obj, control_t h)
{ return rdf_reachable(subj, pred, obj, 0, 0, h);
}
static foreign_t
rdf_reachable5(term_t subj, term_t pred, term_t obj, term_t max_d, term_t d,
control_t h)
{ return rdf_reachable(subj, pred, obj, max_d, d, h);
}
/*******************************
* STATISTICS *
*******************************/
static functor_t keys[16]; /* initialised in install_rdf_db() */
static int
unify_statistics(rdf_db *db, term_t key, functor_t f)
{ int64_t v;
if ( f == FUNCTOR_triples1 )
{ v = db->created - db->erased;
} else if ( f == FUNCTOR_subjects1 )
{ v = db->subjects;
} else if ( f == FUNCTOR_predicates1 )
{ v = db->pred_count;
} else if ( f == FUNCTOR_core1 )
{ v = db->core;
} else if ( f == FUNCTOR_indexed8 )
{ int i;
term_t a = PL_new_term_ref();
if ( !PL_unify_functor(key, FUNCTOR_indexed8) )
return FALSE;
for(i=0; i<8; i++)
{ if ( !PL_get_arg(i+1, key, a) ||
!PL_unify_integer(a, db->indexed[i]) )
return FALSE;
}
return TRUE;
} else if ( f == FUNCTOR_searched_nodes1 )
{ v = db->agenda_created;
} else if ( f == FUNCTOR_duplicates1 )
{ v = db->duplicates;
} else if ( f == FUNCTOR_literals1 )
{ v = db->literals.count;
} else if ( f == FUNCTOR_triples2 && PL_is_functor(key, f) )
{ graph *src;
term_t a = PL_new_term_ref();
atom_t name;
_PL_get_arg(1, key, a);
if ( !PL_get_atom(a, &name) )
return type_error(a, "atom");
if ( (src = lookup_graph(db, name, FALSE)) )
v = src->triple_count;
else
v = 0;
_PL_get_arg(2, key, a);
return PL_unify_int64(a, v);
} else if ( f == FUNCTOR_gc2 )
{ return PL_unify_term(key,
PL_FUNCTOR, f,
PL_INT, db->gc_count,
PL_FLOAT, db->gc_time); /* time spent */
} else if ( f == FUNCTOR_rehash2 )
{ return PL_unify_term(key,
PL_FUNCTOR, f,
PL_INT, db->rehash_count,
PL_FLOAT, db->rehash_time);
} else
assert(0);
return PL_unify_term(key, PL_FUNCTOR, f, PL_INT64, v);
}
static foreign_t
rdf_statistics(term_t key, control_t h)
{ int n;
rdf_db *db = DB;
switch(PL_foreign_control(h))
{ case PL_FIRST_CALL:
{ functor_t f;
if ( PL_is_variable(key) )
{ n = 0;
goto redo;
} else if ( PL_get_functor(key, &f) )
{ for(n=0; keys[n]; n++)
{ if ( keys[n] == f )
return unify_statistics(db, key, f);
}
return domain_error(key, "rdf_statistics");
} else
return type_error(key, "rdf_statistics");
}
case PL_REDO:
n = (int)PL_foreign_context(h);
redo:
unify_statistics(db, key, keys[n]);
n++;
if ( keys[n] )
PL_retry(n);
case PL_CUTTED:
return TRUE;
default:
assert(0);
return TRUE;
}
}
static foreign_t
rdf_generation(term_t t)
{ rdf_db *db = DB;
return PL_unify_integer(t, db->generation);
}
/*******************************
* RESET *
*******************************/
static void
erase_triples(rdf_db *db)
{ triple *t, *n;
int i;
for(t=db->by_none; t; t=n)
{ n = t->next[BY_NONE];
free_triple(db, t);
db->freed++;
}
db->by_none = db->by_none_tail = NULL;
for(i=BY_S; i<=BY_OP; i++)
{ if ( db->table[i] )
{ int bytes = sizeof(triple*) * db->table_size[i];
memset(db->table[i], 0, bytes);
memset(db->tail[i], 0, bytes);
}
}
db->created = 0;
db->erased = 0;
db->freed = 0;
db->erased = 0;
db->subjects = 0;
db->rehash_count = 0;
memset(db->indexed, 0, sizeof(db->indexed));
db->duplicates = 0;
db->generation = 0;
}
static void
erase_predicates(rdf_db *db)
{ predicate **ht;
int i;
for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
{ predicate *p, *n;
for( p = *ht; p; p = n )
{ n = p->next;
free_list(db, &p->subPropertyOf);
free_list(db, &p->siblings);
if ( ++p->cloud->deleted == p->cloud->size )
free_predicate_cloud(db, p->cloud);
rdf_free(db, p, sizeof(*p));
}
*ht = NULL;
}
db->pred_count = 0;
db->next_hash = 0;
}
static void
reset_db(rdf_db *db)
{ db->resetting = TRUE;
erase_triples(db);
erase_predicates(db);
erase_graphs(db);
db->need_update = FALSE;
db->agenda_created = 0;
avlfree(&db->literals);
init_literal_table(db);
db->resetting = FALSE;
}
static foreign_t
rdf_reset_db()
{ rdf_db *db = DB;
if ( !WRLOCK(db, FALSE) )
return FALSE;
if ( db->tr_first )
{ record_transaction(db, TR_RESET, NULL);
db->tr_reset = TRUE;
} else
reset_db(db);
WRUNLOCK(db);
return TRUE;
}
/*******************************
* MATCH *
*******************************/
static foreign_t
match_label(term_t how, term_t search, term_t label)
{ atom_t h, f, l;
int type;
if ( !get_atom_ex(how, &h) ||
!get_atom_ex(search, &f) ||
!get_atom_ex(label, &l) )
return FALSE;
if ( h == ATOM_exact )
type = STR_MATCH_EXACT;
else if ( h == ATOM_substring )
type = STR_MATCH_SUBSTRING;
else if ( h == ATOM_word )
type = STR_MATCH_WORD;
else if ( h == ATOM_prefix )
type = STR_MATCH_PREFIX;
else if ( h == ATOM_like )
type = STR_MATCH_LIKE;
else
return domain_error(how, "search_method");
return match_atoms(type, f, l);
}
static foreign_t
lang_matches(term_t lang, term_t pattern)
{ atom_t l, p;
if ( !get_atom_ex(lang, &l) ||
!get_atom_ex(pattern, &p) )
return FALSE;
return atom_lang_matches(l, p);
}
/*******************************
* VERSION *
*******************************/
static foreign_t
rdf_version(term_t v)
{ return PL_unify_integer(v, RDF_VERSION);
}
/*******************************
* MORE STUFF *
*******************************/
#include "quote.c"
/*******************************
* REGISTER *
*******************************/
#define MKFUNCTOR(n, a) \
FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
#define NDET PL_FA_NONDETERMINISTIC
#define META PL_FA_TRANSPARENT
install_t
install_rdf_db()
{ int i=0;
extern install_t install_atom_map(void);
MKFUNCTOR(literal, 1);
MKFUNCTOR(error, 2);
MKFUNCTOR(type_error, 2);
MKFUNCTOR(domain_error, 2);
MKFUNCTOR(triples, 1);
MKFUNCTOR(triples, 2);
MKFUNCTOR(subjects, 1);
MKFUNCTOR(predicates, 1);
MKFUNCTOR(subject, 1);
MKFUNCTOR(predicate, 1);
MKFUNCTOR(object, 1);
MKFUNCTOR(graph, 1);
MKFUNCTOR(indexed, 8);
MKFUNCTOR(exact, 1);
MKFUNCTOR(plain, 1);
MKFUNCTOR(substring, 1);
MKFUNCTOR(word, 1);
MKFUNCTOR(prefix, 1);
MKFUNCTOR(like, 1);
MKFUNCTOR(literal, 2);
MKFUNCTOR(searched_nodes, 1);
MKFUNCTOR(duplicates, 1);
MKFUNCTOR(literals, 1);
MKFUNCTOR(symmetric, 1);
MKFUNCTOR(transitive, 1);
MKFUNCTOR(inverse_of, 1);
MKFUNCTOR(lang, 2);
MKFUNCTOR(type, 2);
MKFUNCTOR(rdf_subject_branch_factor, 1);
MKFUNCTOR(rdf_object_branch_factor, 1);
MKFUNCTOR(rdfs_subject_branch_factor, 1);
MKFUNCTOR(rdfs_object_branch_factor, 1);
MKFUNCTOR(gc, 2);
MKFUNCTOR(rehash, 2);
MKFUNCTOR(core, 1);
MKFUNCTOR(assert, 4);
MKFUNCTOR(retract, 4);
MKFUNCTOR(update, 5);
MKFUNCTOR(new_literal, 1);
MKFUNCTOR(old_literal, 1);
MKFUNCTOR(transaction, 2);
MKFUNCTOR(load, 2);
MKFUNCTOR(rehash, 1);
MKFUNCTOR(begin, 1);
MKFUNCTOR(end, 1);
FUNCTOR_colon2 = PL_new_functor(PL_new_atom(":"), 2);
ATOM_user = PL_new_atom("user");
ATOM_exact = PL_new_atom("exact");
ATOM_plain = PL_new_atom("plain");
ATOM_prefix = PL_new_atom("prefix");
ATOM_like = PL_new_atom("like");
ATOM_substring = PL_new_atom("substring");
ATOM_word = PL_new_atom("word");
ATOM_subPropertyOf = PL_new_atom(URL_subPropertyOf);
ATOM_error = PL_new_atom("error");
ATOM_begin = PL_new_atom("begin");
ATOM_end = PL_new_atom("end");
ATOM_infinite = PL_new_atom("infinite");
PRED_call1 = PL_predicate("call", 1, "user");
/* statistics */
keys[i++] = FUNCTOR_triples1;
keys[i++] = FUNCTOR_subjects1;
keys[i++] = FUNCTOR_indexed8;
keys[i++] = FUNCTOR_predicates1;
keys[i++] = FUNCTOR_searched_nodes1;
keys[i++] = FUNCTOR_duplicates1;
keys[i++] = FUNCTOR_literals1;
keys[i++] = FUNCTOR_triples2;
keys[i++] = FUNCTOR_gc2;
keys[i++] = FUNCTOR_rehash2;
keys[i++] = FUNCTOR_core1;
keys[i++] = 0;
/* setup the database */
DB = new_db();
PL_register_foreign("rdf_version", 1, rdf_version, 0);
PL_register_foreign("rdf_assert", 3, rdf_assert3, 0);
PL_register_foreign("rdf_assert", 4, rdf_assert4, 0);
PL_register_foreign("rdf_update", 4, rdf_update, 0);
PL_register_foreign("rdf_update", 5, rdf_update5, 0);
PL_register_foreign("rdf_retractall", 3, rdf_retractall3, 0);
PL_register_foreign("rdf_retractall", 4, rdf_retractall4, 0);
PL_register_foreign("rdf_subject", 1, rdf_subject, NDET);
PL_register_foreign("rdf", 3, rdf3, NDET);
PL_register_foreign("rdf", 4, rdf4, NDET);
PL_register_foreign("rdf_has", 4, rdf_has, NDET);
PL_register_foreign("rdf_statistics_",1, rdf_statistics, NDET);
PL_register_foreign("rdf_generation", 1, rdf_generation, 0);
PL_register_foreign("rdf_match_label",3, match_label, 0);
PL_register_foreign("rdf_save_db_", 2, rdf_save_db, 0);
PL_register_foreign("rdf_load_db_", 3, rdf_load_db, 0);
PL_register_foreign("rdf_reachable", 3, rdf_reachable3, NDET);
PL_register_foreign("rdf_reachable", 5, rdf_reachable5, NDET);
PL_register_foreign("rdf_reset_db_", 0, rdf_reset_db, 0);
PL_register_foreign("rdf_set_predicate",
2, rdf_set_predicate, 0);
PL_register_foreign("rdf_predicate_property_",
2, rdf_predicate_property, NDET);
PL_register_foreign("rdf_current_predicates",
1, rdf_current_predicates, 0);
PL_register_foreign("rdf_current_literal",
1, rdf_current_literal, NDET);
PL_register_foreign("rdf_graphs_", 1, rdf_graphs, 0);
PL_register_foreign("rdf_set_graph_source", 3, rdf_set_graph_source, 0);
PL_register_foreign("rdf_unset_graph_source", 1, rdf_unset_graph_source, 0);
PL_register_foreign("rdf_graph_source_", 3, rdf_graph_source, 0);
PL_register_foreign("rdf_estimate_complexity",
4, rdf_estimate_complexity, 0);
PL_register_foreign("rdf_transaction_",2, rdf_transaction, META);
PL_register_foreign("rdf_active_transactions_",
1, rdf_active_transactions, 0);
PL_register_foreign("rdf_monitor_", 2, rdf_monitor, META);
/*PL_register_foreign("rdf_broadcast_", 2, rdf_broadcast, 0);*/
#ifdef WITH_MD5
PL_register_foreign("rdf_md5", 2, rdf_md5, 0);
PL_register_foreign("rdf_atom_md5", 3, rdf_atom_md5, 0);
#endif
PL_register_foreign("rdf_quote_uri", 2, rdf_quote_uri, 0);
#ifdef O_DEBUG
PL_register_foreign("rdf_debug", 1, rdf_debug, 0);
PL_register_foreign("rdf_print_predicate_cloud", 1, rdf_print_predicate_cloud, 0);
#endif
#ifdef O_SECURE
PL_register_foreign("rdf_dump_literals", 0, dump_literals, 0);
PL_register_foreign("rdf_check_literals", 0, check_transitivity, 0);
#endif
PL_register_foreign("lang_matches", 2, lang_matches, 0);
install_atom_map();
}