/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: J.Wielemaker@uva.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2009, University of Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef HAVE_CONFIG_H #include #endif #define WITH_MD5 1 #define WITH_PL_MUTEX 1 #define _GNU_SOURCE 1 /* get rwlocks from glibc */ #ifdef _REENTRANT #ifdef __WINDOWS__ #include /* alloca() */ #define inline __inline #ifndef SIZEOF_LONG #define SIZEOF_LONG 4 #endif #else #if (!defined(__GNUC__) || defined(__hpux)) && defined(HAVE_ALLOCA_H) #include #endif #include #endif #endif #include #include #include "rdf_db.h" #include #include #include #include #include #include "avl.h" #ifdef WITH_MD5 #include "md5.h" #include "atom.h" #include "debug.h" #include "hash.h" #include "murmur.h" #undef UNLOCK static void md5_triple(triple *t, md5_byte_t *digest); static void sum_digest(md5_byte_t *digest, md5_byte_t *add); static void dec_digest(md5_byte_t *digest, md5_byte_t *add); #endif /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The ids form a mask. This must be kept consistent with monitor_mask/2 in rdf_db.pl! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ typedef enum { EV_ASSERT = 0x0001, /* triple */ EV_ASSERT_LOAD = 0x0002, /* triple */ EV_RETRACT = 0x0004, /* triple */ EV_UPDATE = 0x0008, /* old, new */ EV_NEW_LITERAL = 0x0010, /* literal */ EV_OLD_LITERAL = 0x0020, /* literal */ EV_TRANSACTION = 0x0040, /* id, begin/end */ EV_LOAD = 0x0080, /* id, begin/end */ EV_REHASH = 0x0100 /* begin/end */ } broadcast_id; static int broadcast(broadcast_id id, void *a1, void *a2); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - We now use malloc/free/realloc calls with explicit sizes to allow maintaining statistics as well as to prepare for dealing with special memory pools associated with databases. Using -DDIRECT_MALLOC the library uses plain malloc to facilitate malloc debuggers. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #ifdef DIRECT_MALLOC #define rdf_malloc(db, size) malloc(size) #define rdf_free(db, ptr, size) free(ptr) #define rdf_realloc(db, ptr, old, new) realloc(ptr, new) #else /*DIRECT_MALLOC*/ #if CHECK_MALLOC_SIZES static void * rdf_malloc(rdf_db *db, size_t size) { size_t bytes = size + sizeof(size_t); size_t *ptr = PL_malloc(bytes); *ptr++ = size; if ( db ) db->core += size; return ptr; } static void rdf_free(rdf_db *db, void *ptr, size_t size) { size_t *p = ptr; assert(p[-1] == size); db->core -= size; PL_free(&p[-1]); } static void * rdf_realloc(rdf_db *db, void *ptr, size_t old, size_t new) { size_t *p = ptr; size_t bytes = new + sizeof(size_t); assert(p[-1] == old); p = PL_realloc(&p[-1], bytes); *p++ = new; db->core< += new-old; return p; } #else /*CHECK_MALLOC_SIZES*/ static void * rdf_malloc(rdf_db *db, size_t size) { if ( db ) db->core += size; return PL_malloc(size); } static void rdf_free(rdf_db *db, void *ptr, size_t size) { db->core -= size; PL_free(ptr); } static void * rdf_realloc(rdf_db *db, void *ptr, size_t old, size_t new) { db->core += new-old; return PL_realloc(ptr, new); } #endif /*CHECK_MALLOC_SIZES*/ #endif /*DIRECT_MALLOC*/ static functor_t FUNCTOR_literal1; static functor_t FUNCTOR_literal2; static functor_t FUNCTOR_error2; static functor_t FUNCTOR_type_error2; static functor_t FUNCTOR_domain_error2; static functor_t FUNCTOR_colon2; static functor_t FUNCTOR_triples1; static functor_t FUNCTOR_triples2; static functor_t FUNCTOR_subjects1; static functor_t FUNCTOR_predicates1; static functor_t FUNCTOR_duplicates1; static functor_t FUNCTOR_literals1; static functor_t FUNCTOR_subject1; static functor_t FUNCTOR_predicate1; static functor_t FUNCTOR_object1; static functor_t FUNCTOR_graph1; static functor_t FUNCTOR_indexed8; static functor_t FUNCTOR_exact1; static functor_t FUNCTOR_plain1; static functor_t FUNCTOR_substring1; static functor_t FUNCTOR_word1; static functor_t FUNCTOR_prefix1; static functor_t FUNCTOR_like1; static functor_t FUNCTOR_symmetric1; static functor_t FUNCTOR_inverse_of1; static functor_t FUNCTOR_transitive1; static functor_t FUNCTOR_rdf_subject_branch_factor1; /* S --> BF*O */ static functor_t FUNCTOR_rdf_object_branch_factor1; /* O --> BF*S */ static functor_t FUNCTOR_rdfs_subject_branch_factor1; /* S --> BF*O */ static functor_t FUNCTOR_rdfs_object_branch_factor1; /* O --> BF*S */ static functor_t FUNCTOR_searched_nodes1; static functor_t FUNCTOR_lang2; static functor_t FUNCTOR_type2; static functor_t FUNCTOR_gc2; static functor_t FUNCTOR_rehash2; static functor_t FUNCTOR_core1; static functor_t FUNCTOR_assert4; static functor_t FUNCTOR_retract4; static functor_t FUNCTOR_update5; static functor_t FUNCTOR_new_literal1; static functor_t FUNCTOR_old_literal1; static functor_t FUNCTOR_transaction2; static functor_t FUNCTOR_load2; static functor_t FUNCTOR_rehash1; static functor_t FUNCTOR_begin1; static functor_t FUNCTOR_end1; static atom_t ATOM_user; static atom_t ATOM_exact; static atom_t ATOM_plain; static atom_t ATOM_prefix; static atom_t ATOM_substring; static atom_t ATOM_word; static atom_t ATOM_like; static atom_t ATOM_error; static atom_t ATOM_begin; static atom_t ATOM_end; static atom_t ATOM_infinite; static atom_t ATOM_subPropertyOf; static predicate_t PRED_call1; #define MATCH_EXACT 0x01 /* exact triple match */ #define MATCH_SUBPROPERTY 0x02 /* Use subPropertyOf relations */ #define MATCH_SRC 0x04 /* Match graph location */ #define MATCH_INVERSE 0x08 /* use symmetric match too */ #define MATCH_QUAL 0x10 /* Match qualifiers too */ #define MATCH_DUPLICATE (MATCH_EXACT|MATCH_QUAL) static int WANT_GC(rdf_db *db); static int match_triples(triple *t, triple *p, unsigned flags); static int update_duplicates_add(rdf_db *db, triple *t); static void update_duplicates_del(rdf_db *db, triple *t); static void unlock_atoms(triple *t); static void lock_atoms(triple *t); static void unlock_atoms_literal(literal *lit); static int update_hash(rdf_db *db); static int triple_hash(rdf_db *db, triple *t, int which); static unsigned long object_hash(triple *t); static void reset_db(rdf_db *db); static void record_transaction(rdf_db *db, tr_type type, triple *t); static void record_md5_transaction(rdf_db *db, graph *src, md5_byte_t *digest); static void create_reachability_matrix(rdf_db *db, predicate_cloud *cloud); static int get_predicate(rdf_db *db, term_t t, predicate **p); static predicate_cloud *new_predicate_cloud(rdf_db *db, predicate **p, size_t count); static int unify_literal(term_t lit, literal *l); static int check_predicate_cloud(predicate_cloud *c); /******************************* * LOCKING * *******************************/ #define RDLOCK(db) rdlock(&db->lock) #define WRLOCK(db, allowreaders) wrlock(&db->lock, allowreaders) #define LOCKOUT_READERS(db) lockout_readers(&db->lock) #define REALLOW_READERS(db) reallow_readers(&db->lock) #define WRUNLOCK(db) unlock(&db->lock, FALSE) #define RDUNLOCK(db) unlock(&db->lock, TRUE) #define LOCK_MISC(db) lock_misc(&db->lock) #define UNLOCK_MISC(db) unlock_misc(&db->lock) #define INIT_LOCK(db) init_lock(&db->lock) /******************************* * ERRORS * *******************************/ static int instantiation_error(term_t actual) { term_t ex; if ( (ex = PL_new_term_ref()) && PL_unify_term(ex, PL_FUNCTOR, FUNCTOR_error2, PL_CHARS, "instantiation_error", PL_VARIABLE) ) return PL_raise_exception(ex); return FALSE; } static int type_error(term_t actual, const char *expected) { term_t ex; if ( (ex = PL_new_term_ref()) && PL_unify_term(ex, PL_FUNCTOR, FUNCTOR_error2, PL_FUNCTOR, FUNCTOR_type_error2, PL_CHARS, expected, PL_TERM, actual, PL_VARIABLE) ) return PL_raise_exception(ex); return FALSE; } static int domain_error(term_t actual, const char *expected) { term_t ex; if ( (ex = PL_new_term_ref()) && PL_unify_term(ex, PL_FUNCTOR, FUNCTOR_error2, PL_FUNCTOR, FUNCTOR_domain_error2, PL_CHARS, expected, PL_TERM, actual, PL_VARIABLE) ) return PL_raise_exception(ex); return FALSE; } static int permission_error(const char *op, const char *type, const char *obj, const char *msg) { term_t ex, ctx; if ( !(ex = PL_new_term_ref()) || !(ctx = PL_new_term_ref()) ) return FALSE; if ( msg ) { if ( !PL_unify_term(ctx, PL_FUNCTOR_CHARS, "context", 2, PL_VARIABLE, PL_CHARS, msg) ) return FALSE; } if ( !PL_unify_term(ex, PL_FUNCTOR_CHARS, "error", 2, PL_FUNCTOR_CHARS, "permission_error", 3, PL_CHARS, op, PL_CHARS, type, PL_CHARS, obj, PL_TERM, ctx) ) return FALSE; return PL_raise_exception(ex); } static int get_atom_ex(term_t t, atom_t *a) { if ( PL_get_atom(t, a) ) return TRUE; return type_error(t, "atom"); } static int get_long_ex(term_t t, long *v) { if ( PL_get_long(t, v) ) return TRUE; return type_error(t, "integer"); } static int get_double_ex(term_t t, double *v) { if ( PL_get_float(t, v) ) return TRUE; return type_error(t, "float"); } static int get_atom_or_var_ex(term_t t, atom_t *a) { if ( PL_get_atom(t, a) ) return TRUE; if ( PL_is_variable(t) ) { *a = 0L; return TRUE; } return type_error(t, "atom"); } static int get_resource_or_var_ex(term_t t, atom_t *a) { if ( PL_get_atom(t, a) ) return TRUE; if ( PL_is_variable(t) ) { *a = 0L; return TRUE; } if ( PL_is_functor(t, FUNCTOR_literal1) ) return FALSE; /* fail on rdf(literal(_), ...) */ return type_error(t, "atom"); } static int get_bool_arg_ex(int a, term_t t, int *val) { term_t arg = PL_new_term_ref(); if ( !PL_get_arg(a, t, arg) ) return type_error(t, "compound"); if ( !PL_get_bool(arg, val) ) return type_error(arg, "bool"); return TRUE; } /******************************* * DEBUG SUPPORT * *******************************/ #ifdef O_DEBUG #define PRT_SRC 0x1 static void print_literal(literal *lit) { switch(lit->objtype) { case OBJ_STRING: switch(lit->qualifier) { case Q_TYPE: Sdprintf("%s^^\"%s\"", PL_atom_chars(lit->value.string), PL_atom_chars(lit->type_or_lang)); break; case Q_LANG: Sdprintf("%s@\"%s\"", PL_atom_chars(lit->value.string), PL_atom_chars(lit->type_or_lang)); break; default: { size_t len; const char *s; const wchar_t *w; if ( (s = PL_atom_nchars(lit->value.string, &len)) ) { if ( strlen(s) == len ) Sdprintf("\"%s\"", s); else Sdprintf("\"%s\" (len=%d)", s, len); } else if ( (w = PL_atom_wchars(lit->value.string, &len)) ) { unsigned int i; Sputc('L', Serror); Sputc('"', Serror); for(i=0; ivalue.integer); break; case OBJ_DOUBLE: Sdprintf("%f", lit->value.real); break; case OBJ_TERM: { fid_t fid = PL_open_foreign_frame(); term_t term = PL_new_term_ref(); PL_recorded_external(lit->value.term.record, term); PL_write_term(Serror, term, 1200, PL_WRT_QUOTED|PL_WRT_NUMBERVARS|PL_WRT_PORTRAY); PL_discard_foreign_frame(fid); break; } default: assert(0); } } static void print_object(triple *t) { if ( t->object_is_literal ) { print_literal(t->object.literal); } else { Sdprintf("%s", PL_atom_chars(t->object.resource)); } } static void print_src(triple *t) { if ( t->line == NO_LINE ) Sdprintf(" [%s]", PL_atom_chars(t->graph)); else Sdprintf(" [%s:%ld]", PL_atom_chars(t->graph), t->line); } static void print_triple(triple *t, int flags) { Sdprintf("<%s %s ", PL_atom_chars(t->subject), PL_atom_chars(t->predicate.r->name)); print_object(t); if ( (flags & PRT_SRC) ) print_src(t); Sdprintf(">"); } #endif /******************************* * STORAGE * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Our one and only database (for the time being). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static rdf_db *DB; /******************************* * LISTS * *******************************/ static int add_list(rdf_db *db, list *list, void *value) { cell *c; for(c=list->head; c; c=c->next) { if ( c->value == value ) return FALSE; /* already a member */ } c = rdf_malloc(db, sizeof(*c)); c->value = value; c->next = NULL; if ( list->tail ) list->tail->next = c; else list->head = c; list->tail = c; return TRUE; } static int del_list(rdf_db *db, list *list, void *value) { cell *c, *p = NULL; for(c=list->head; c; p=c, c=c->next) { if ( c->value == value ) { if ( p ) p->next = c->next; else list->head = c->next; if ( !c->next ) list->tail = p; rdf_free(db, c, sizeof(*c)); return TRUE; } } return FALSE; /* not a member */ } static void free_list(rdf_db *db, list *list) { cell *c, *n; for(c=list->head; c; c=n) { n = c->next; rdf_free(db, c, sizeof(*c)); } list->head = list->tail = NULL; } /******************************* * ATOM SETS * *******************************/ #define CHUNKSIZE 1024 typedef struct mchunk { struct mchunk *next; size_t used; char buf[CHUNKSIZE]; } mchunk; typedef struct { avl_tree tree; mchunk *node_store; mchunk store0; } atomset; static void * alloc_node_atomset(void *ptr, size_t size) { void *p; atomset *as = ptr; assert(size < CHUNKSIZE); if ( as->node_store->used + size > CHUNKSIZE ) { mchunk *ch = malloc(sizeof(mchunk)); ch->used = 0; ch->next = as->node_store; as->node_store = ch; } p = &as->node_store->buf[as->node_store->used]; as->node_store->used += size; return p; } static void free_node_atomset(void *ptr, void *data, size_t size) { assert(0); } static int cmp_long_ptr(void *p1, void *p2, NODE type) { long *l1 = p1; long *l2 = p2; return *l1 < *l2 ? -1 : *l1 > *l2 ? 1 : 0; } static void init_atomset(atomset *as) { avlinit(&as->tree, as, sizeof(atom_t), cmp_long_ptr, NULL, alloc_node_atomset, free_node_atomset); as->node_store = &as->store0; as->node_store->next = NULL; as->node_store->used = 0; } static void destroy_atomset(atomset *as) { mchunk *ch, *next; for(ch=as->node_store; ch != &as->store0; ch = next) { next = ch->next; free(ch); } } static int add_atomset(atomset *as, atom_t atom) { return avlins(&as->tree, &atom) ? FALSE : TRUE; } /******************************* * PREDICATES * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Predicates are represented as first class citizens for three reasons: quickly answer on the transitive rdfs:subPropertyOf relation for rdf_hash/3, keep track of statistics that are useful for query optimization (#triples, branching factor) and keep properties (inverse/transitive). To answer the rdfs:subPropertyOf quickly, predicates are organised in `clouds', where a cloud defines a set of predicates connected through rdfs:subPropertyOf triples. The cloud numbers its members and maintains a bit-matrix that contains the closure of the reachability. Initially a predicate has a simple cloud of size 1. merge_clouds() and split_cloud() deals with adding and deleting rdfs:subPropertyOf relations. These operations try to modify the clouds that have no triples, so it can be done without a rehash. If this fails, the predicates keep their own hash to make search without rdfs:subPropertyOf still possible (so we can avoid frequent updates while loading triples), sets the cloud `dirty' flag and the DB's need_update flag. Queries that need rdfs:subPropertyOf find the need_update flag, which calls organise_predicates(), which cause a rehash if some predicates have changed hash-code to the new cloud they have become part of. TBD: We can do a partial re-hash in that case! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void init_pred_table(rdf_db *db) { int bytes = sizeof(predicate*)*INITIAL_PREDICATE_TABLE_SIZE; db->pred_table = rdf_malloc(db, bytes); memset(db->pred_table, 0, bytes); db->pred_table_size = INITIAL_PREDICATE_TABLE_SIZE; } static predicate * existing_predicate(rdf_db *db, atom_t name) { int hash = atom_hash(name) % db->pred_table_size; predicate *p; LOCK_MISC(db); for(p=db->pred_table[hash]; p; p = p->next) { if ( p->name == name ) { UNLOCK_MISC(db); return p; } } UNLOCK_MISC(db); return NULL; } static predicate * lookup_predicate(rdf_db *db, atom_t name) { int hash = atom_hash(name) % db->pred_table_size; predicate *p; predicate_cloud *cp; LOCK_MISC(db); for(p=db->pred_table[hash]; p; p = p->next) { if ( p->name == name ) { UNLOCK_MISC(db); return p; } } p = rdf_malloc(db, sizeof(*p)); memset(p, 0, sizeof(*p)); p->name = name; cp = new_predicate_cloud(db, &p, 1); p->hash = cp->hash; PL_register_atom(name); p->next = db->pred_table[hash]; db->pred_table[hash] = p; db->pred_count++; DEBUG(5, Sdprintf("Pred %s (count = %d)\n", PL_atom_chars(name), db->pred_count)); UNLOCK_MISC(db); return p; } static const char * pname(predicate *p) { if ( p->name ) return PL_atom_chars(p->name); else { static char *ring[10]; static int ri = 0; char buf[25]; char *r; Ssprintf(buf, "__D%p", p); ring[ri++] = r = strdup(buf); if ( ri == 10 ) { ri = 0; free(ring[ri]); } return (const char*)r; } } static int organise_predicates(rdf_db *db) /* TBD: rename&move */ { predicate **ht; int i; int changed = 0; DEBUG(2, Sdprintf("rdf_db: fixing predicate clouds\n")); for(i=0,ht = db->pred_table; ipred_table_size; i++, ht++) { predicate *p; for( p = *ht; p; p = p->next ) { predicate_cloud *cloud = p->cloud; if ( cloud->dirty ) { predicate **cp; int i2; for(i2=0, cp = cloud->members; i2 < cloud->size; i2++, cp++) { if ( (*cp)->hash != cloud->hash ) { (*cp)->hash = cloud->hash; if ( (*cp)->triple_count > 0 ) changed++; } } cloud->dirty = FALSE; } } } return changed; } /******************************* * PREDICATE CLOUDS * *******************************/ static predicate_cloud * new_predicate_cloud(rdf_db *db, predicate **p, size_t count) { predicate_cloud *cloud = rdf_malloc(db, sizeof(*cloud)); memset(cloud, 0, sizeof(*cloud)); cloud->hash = db->next_hash++; if ( count ) { int i; predicate **p2; cloud->size = count; cloud->members = rdf_malloc(db, sizeof(predicate*)*count); memcpy(cloud->members, p, sizeof(predicate*)*count); for(i=0, p2=cloud->members; isize; i++, p2++) (*p2)->cloud = cloud; } create_reachability_matrix(db, cloud); return cloud; } static void free_predicate_cloud(rdf_db *db, predicate_cloud *cloud) { if ( cloud->members ) { rdf_free(db, cloud->members, sizeof(predicate*)*cloud->size); } rdf_free(db, cloud, sizeof(*cloud)); } static long triples_in_predicate_cloud(predicate_cloud *cloud) { long triples = 0; predicate **p; int i; for(i=0, p=cloud->members; isize; i++, p++) triples += (*p)->triple_count; return triples; } /* Add the predicates of c2 to c1 and destroy c2. Returns c1 */ static predicate_cloud * append_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2, int update_hash) { predicate **p; int i; for(i=0, p=c2->members; isize; i++, p++) { (*p)->cloud = c1; if ( update_hash ) (*p)->hash = c1->hash; } if ( c1->size > 0 && c2->size > 0 ) { c1->members = rdf_realloc(db, c1->members, c1->size*sizeof(predicate*), (c1->size+c2->size)*sizeof(predicate*)); memcpy(&c1->members[c1->size], c2->members, c2->size*sizeof(predicate*)); c1->size += c2->size; free_predicate_cloud(db, c2); } else if ( c2->size > 0 ) { c1->members = c2->members; c1->size = c2->size; c2->members = NULL; free_predicate_cloud(db, c2); } else { free_predicate_cloud(db, c2); } return c1; } /* merge two predicate clouds. If either of them has no triples we can do the merge without rehashing the database. Note that this code is only called from addSubPropertyOf(). If c1==c2, we added an rdfs:subPropertyOf between two predicates in the same cloud. we must still update the matrix, though we could do it a bit more efficient. I doubt this is worth the trouble though. */ static predicate_cloud * merge_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2) { predicate_cloud *cloud; if ( c1 != c2 ) { if ( triples_in_predicate_cloud(c1) == 0 ) { cloud = append_clouds(db, c2, c1, TRUE); } else if ( triples_in_predicate_cloud(c2) == 0 ) { cloud = append_clouds(db, c1, c2, TRUE); } else { cloud = append_clouds(db, c1, c2, FALSE); cloud->dirty = TRUE; db->need_update++; } } else { cloud = c1; } DEBUG(1, if ( !db->need_update ) { check_predicate_cloud(cloud); }); create_reachability_matrix(db, cloud); return cloud; } /* split a cloud into multiple disjoint clouds. The first cloud is given the hash of the original, so we only need to update if new clouds are created. Ideally we should se whether it is possible to give the orginal hash to the one and only non-empty cloud to avoid re-hashing alltogether. */ static void pred_reachable(predicate *start, char *visited, predicate **nodes, int *size) { if ( !visited[start->label] ) { cell *c; visited[start->label] = TRUE; nodes[(*size)++] = start; for(c=start->subPropertyOf.head; c; c=c->next) pred_reachable(c->value, visited, nodes, size); for(c=start->siblings.head; c; c=c->next) pred_reachable(c->value, visited, nodes, size); } } static int split_cloud(rdf_db *db, predicate_cloud *cloud, predicate_cloud **parts, int size) { char *done = alloca(cloud->size*sizeof(char)); predicate **graph = alloca(cloud->size*sizeof(predicate*)); int found = 0; int i; memset(done, 0, cloud->size*sizeof(char)); for(i=0; isize; i++) { if ( !done[i] ) { predicate *start = cloud->members[i]; predicate_cloud *new_cloud; int gsize = 0; pred_reachable(start, done, graph, &gsize); new_cloud = new_predicate_cloud(db, graph, gsize); if ( found == 0 ) { new_cloud->hash = cloud->hash; } else { new_cloud->dirty = TRUE; /* preds come from another cloud */ db->need_update++; } parts[found++] = new_cloud; } } free_predicate_cloud(db, cloud); return found; } static unsigned long predicate_hash(predicate *p) { return p->hash; } static void addSubPropertyOf(rdf_db *db, predicate *sub, predicate *super) { /*DEBUG(2, Sdprintf("addSubPropertyOf(%s, %s)\n", pname(sub), pname(super)));*/ if ( add_list(db, &sub->subPropertyOf, super) ) { add_list(db, &super->siblings, sub); merge_clouds(db, sub->cloud, super->cloud); } } /* deleting an rdfs:subPropertyOf. This is a bit naughty. If the cloud is still connected we only need to refresh the reachability matrix. Otherwise the cloud breaks in maximum two clusters. We can decide to leave it as is, which saves a re-hash of the triples but harms indexing. Alternative we can create a new cloud for one of the clusters and re-hash. */ static void delSubPropertyOf(rdf_db *db, predicate *sub, predicate *super) { if ( del_list(db, &sub->subPropertyOf, super) ) { del_list(db, &super->siblings, sub); /* if ( not worth the trouble ) create_reachability_matrix(db, sub->cloud); else */ { predicate_cloud *parts[2]; split_cloud(db, sub->cloud, parts, 2); } } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Reachability matrix. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #define WBITSIZE (sizeof(int)*8) static size_t byte_size_bitmatrix(size_t w, size_t h) { size_t wsize = ((w*h)+WBITSIZE-1)/WBITSIZE; return (size_t)(intptr_t)&((bitmatrix*)NULL)->bits[wsize]; } static bitmatrix * alloc_bitmatrix(rdf_db *db, size_t w, size_t h) { size_t size = byte_size_bitmatrix(w, h); bitmatrix *m = rdf_malloc(db, size); memset(m, 0, size); m->width = w; m->heigth = h; return m; } static void free_bitmatrix(rdf_db *db, bitmatrix *bm) { size_t size = byte_size_bitmatrix(bm->width, bm->heigth); rdf_free(db, bm, size); } #undef setbit /* conflict in HPUX 11.23 */ static void setbit(bitmatrix *m, int i, int j) { size_t ij = m->width*i+j; size_t word = ij/WBITSIZE; int bit = ij%WBITSIZE; m->bits[word] |= 1<width*i+j; size_t word = ij/WBITSIZE; int bit = ij%WBITSIZE; return ((m->bits[word] & (1<members; isize; i++, p++) (*p)->label = i; return i; } static void fill_reachable(bitmatrix *bm, predicate *p0, predicate *p) { if ( !testbit(bm, p0->label, p->label) ) { cell *c; DEBUG(1, Sdprintf(" Reachable [%s (%d)]\n", pname(p), p->label)); setbit(bm, p0->label, p->label); for(c = p->subPropertyOf.head; c; c=c->next) fill_reachable(bm, p0, c->value); } } static void create_reachability_matrix(rdf_db *db, predicate_cloud *cloud) { bitmatrix *m = alloc_bitmatrix(db, cloud->size, cloud->size); predicate **p; int i; label_predicate_cloud(cloud); for(i=0, p=cloud->members; isize; i++, p++) { DEBUG(1, Sdprintf("Reachability for %s (%d)\n", pname(*p), (*p)->label)); fill_reachable(m, *p, *p); } if ( cloud->reachable ) free_bitmatrix(db, cloud->reachable); cloud->reachable = m; } static int isSubPropertyOf(predicate *sub, predicate *p) { if ( sub->cloud == p->cloud ) return testbit(sub->cloud->reachable, sub->label, p->label); return FALSE; } /******************************* * PRINT PREDICATE HIERARCHY * *******************************/ static int check_predicate_cloud(predicate_cloud *c) { predicate **p; int errors = 0; int i; DEBUG(1, if ( c->dirty ) Sdprintf("Cloud is dirty\n")); for(i=0, p=c->members; isize; i++, p++) { if ( !c->dirty ) { if ( (*p)->hash != c->hash ) { Sdprintf("Hash of %s doesn't match cloud hash\n", pname(*p)); errors++; } } if ( (*p)->cloud != c ) { Sdprintf("Wrong cloud of %s\n", pname(*p)); errors++; } } return errors; } static void print_reachability_cloud(predicate *p) { int x, y; predicate_cloud *cloud = p->cloud; check_predicate_cloud(cloud); Sdprintf("Reachability matrix:\n"); for(x=0; xreachable->width; x++) Sdprintf("%d", x%10); Sdprintf("\n"); for(y=0; yreachable->heigth; y++) { for(x=0; xreachable->width; x++) { if ( testbit(cloud->reachable, x, y) ) Sdprintf("X"); else Sdprintf("."); } Sdprintf(" %2d %s\n", y, PL_atom_chars(cloud->members[y]->name)); assert(cloud->members[y]->label == y); } } static foreign_t rdf_print_predicate_cloud(term_t t) { predicate *p; rdf_db *db = DB; if ( !get_predicate(db, t, &p) ) return FALSE; print_reachability_cloud(p); return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Branching factors are crucial in ordering the statements of a conjunction. These functions compute the average branching factor in both directions ("subject --> P --> object" and "object --> P --> subject") by determining the number of unique values at either side of the predicate. This number is only recomputed if it is considered `dirty'. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int update_predicate_counts(rdf_db *db, predicate *p, int which) { long total = 0; if ( which == DISTINCT_DIRECT ) { long changed = abs(p->triple_count - p->distinct_updated[DISTINCT_DIRECT]); if ( changed < p->distinct_updated[DISTINCT_DIRECT] ) return TRUE; if ( p->triple_count == 0 ) { p->distinct_count[which] = 0; p->distinct_subjects[which] = 0; p->distinct_objects[which] = 0; return TRUE; } } else { long changed = db->generation - p->distinct_updated[DISTINCT_SUB]; if ( changed < p->distinct_count[DISTINCT_SUB] ) return TRUE; } if ( !update_hash(db) ) return FALSE; { atomset subject_set; atomset object_set; triple t; triple *byp; memset(&t, 0, sizeof(t)); t.predicate.r = p; t.indexed |= BY_P; init_atomset(&subject_set); init_atomset(&object_set); for(byp = db->table[t.indexed][triple_hash(db, &t, t.indexed)]; byp; byp = byp->next[t.indexed]) { if ( !byp->erased && !byp->is_duplicate ) { if ( (which == DISTINCT_DIRECT && byp->predicate.r == p) || (which != DISTINCT_DIRECT && isSubPropertyOf(byp->predicate.r, p)) ) { total++; add_atomset(&subject_set, byp->subject); add_atomset(&object_set, object_hash(byp)); /* NOTE: not exact! */ } } } p->distinct_count[which] = total; p->distinct_subjects[which] = subject_set.tree.count; p->distinct_objects[which] = object_set.tree.count; destroy_atomset(&subject_set); destroy_atomset(&object_set); if ( which == DISTINCT_DIRECT ) p->distinct_updated[DISTINCT_DIRECT] = total; else p->distinct_updated[DISTINCT_SUB] = db->generation; DEBUG(1, Sdprintf("%s: distinct subjects (%s): %ld, objects: %ld\n", PL_atom_chars(p->name), (which == DISTINCT_DIRECT ? "rdf" : "rdfs"), p->distinct_subjects[which], p->distinct_objects[which])); } return TRUE; } static void invalidate_distinct_counts(rdf_db *db) { predicate **ht; int i; for(i=0,ht = db->pred_table; ipred_table_size; i++, ht++) { predicate *p; for( p = *ht; p; p = p->next ) { p->distinct_updated[DISTINCT_SUB] = 0; p->distinct_count[DISTINCT_SUB] = 0; p->distinct_subjects[DISTINCT_SUB] = 0; p->distinct_objects[DISTINCT_SUB] = 0; } } } static double subject_branch_factor(rdf_db *db, predicate *p, int which) { if ( !update_predicate_counts(db, p, which) ) return FALSE; if ( p->distinct_subjects[which] == 0 ) return 0.0; /* 0 --> 0 */ return (double)p->distinct_count[which] / (double)p->distinct_subjects[which]; } static double object_branch_factor(rdf_db *db, predicate *p, int which) { if ( !update_predicate_counts(db, p, which) ) return FALSE; if ( p->distinct_objects[which] == 0 ) return 0.0; /* 0 --> 0 */ return (double)p->distinct_count[which] / (double)p->distinct_objects[which]; } /******************************* * NAMED GRAPHS * *******************************/ /* MT: all calls must be locked */ static void init_graph_table(rdf_db *db) { int bytes = sizeof(predicate*)*INITIAL_GRAPH_TABLE_SIZE; db->graph_table = rdf_malloc(db, bytes); memset(db->graph_table, 0, bytes); db->graph_table_size = INITIAL_GRAPH_TABLE_SIZE; } static graph * lookup_graph(rdf_db *db, atom_t name, int create) { int hash = atom_hash(name) % db->graph_table_size; graph *src; LOCK_MISC(db); for(src=db->graph_table[hash]; src; src = src->next) { if ( src->name == name ) { UNLOCK_MISC(db); return src; } } if ( !create ) { UNLOCK_MISC(db); return NULL; } src = rdf_malloc(db, sizeof(*src)); memset(src, 0, sizeof(*src)); src->name = name; src->md5 = TRUE; PL_register_atom(name); src->next = db->graph_table[hash]; db->graph_table[hash] = src; UNLOCK_MISC(db); return src; } static void erase_graphs(rdf_db *db) { graph **ht; int i; for(i=0,ht = db->graph_table; igraph_table_size; i++, ht++) { graph *src, *n; for( src = *ht; src; src = n ) { n = src->next; PL_unregister_atom(src->name); if ( src->source ) PL_unregister_atom(src->source); rdf_free(db, src, sizeof(*src)); } *ht = NULL; } db->last_graph = NULL; } static void register_graph(rdf_db *db, triple *t) { graph *src; if ( !t->graph ) return; if ( db->last_graph && db->last_graph->name == t->graph ) { src = db->last_graph; } else { src = lookup_graph(db, t->graph, TRUE); db->last_graph = src; } src->triple_count++; #ifdef WITH_MD5 if ( src->md5 ) { md5_byte_t digest[16]; md5_triple(t, digest); sum_digest(src->digest, digest); } #endif } static void unregister_graph(rdf_db *db, triple *t) { graph *src; if ( !t->graph ) return; if ( db->last_graph && db->last_graph->name == t->graph ) { src = db->last_graph; } else { src = lookup_graph(db, t->graph, TRUE); db->last_graph = src; } src->triple_count--; #ifdef WITH_MD5 if ( src->md5 ) { md5_byte_t digest[16]; md5_triple(t, digest); dec_digest(src->digest, digest); } #endif } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rdf_graphs_(-ListOfGraphs) Return a list holding the names of all currently defined graphs. We return a list to avoid the need for complicated long locks. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static foreign_t rdf_graphs(term_t list) { int i; term_t tail = PL_copy_term_ref(list); term_t head = PL_new_term_ref(); rdf_db *db = DB; if ( !RDLOCK(db) ) return FALSE; for(i=0; igraph_table_size; i++) { graph *src; for(src=db->graph_table[i]; src; src = src->next) { if ( !PL_unify_list(tail, head, tail) || !PL_unify_atom(head, src->name) ) { RDUNLOCK(db); return FALSE; } } } RDUNLOCK(db); return PL_unify_nil(tail); } static foreign_t rdf_graph_source(term_t graph_name, term_t source, term_t modified) { atom_t gn; int rc = FALSE; rdf_db *db = DB; if ( !get_atom_or_var_ex(graph_name, &gn) ) return FALSE; if ( gn ) { graph *s; if ( !RDLOCK(db) ) return FALSE; if ( (s = lookup_graph(db, gn, FALSE)) && s->source) { rc = ( PL_unify_atom(source, s->source) && PL_unify_float(modified, s->modified) ); } RDUNLOCK(db); } else { atom_t src; if ( get_atom_ex(source, &src) ) { int i; graph **ht; if ( !RDLOCK(db) ) return FALSE; for(i=0,ht = db->graph_table; igraph_table_size; i++, ht++) { graph *s; for( s = *ht; s; s = s->next ) { if ( s->source == src ) { rc = ( PL_unify_atom(graph_name, s->name) && PL_unify_float(modified, s->modified) ); } } } RDUNLOCK(db); } } return rc; } static foreign_t rdf_set_graph_source(term_t graph_name, term_t source, term_t modified) { atom_t gn, src; int rc = FALSE; rdf_db *db = DB; graph *s; double mtime; if ( !get_atom_ex(graph_name, &gn) || !get_atom_ex(source, &src) || !get_double_ex(modified, &mtime) ) return FALSE; if ( !RDLOCK(db) ) return FALSE; if ( (s = lookup_graph(db, gn, TRUE)) ) { if ( s->source != src ) { if ( s->source ) PL_unregister_atom(s->source); s->source = src; PL_register_atom(s->source); } s->modified = mtime; rc = TRUE; } RDUNLOCK(db); return rc; } static foreign_t rdf_unset_graph_source(term_t graph_name) { atom_t gn; rdf_db *db = DB; graph *s; if ( !get_atom_ex(graph_name, &gn) ) return FALSE; if ( (s = lookup_graph(db, gn, TRUE)) ) { if ( s->source ) { PL_unregister_atom(s->source); s->source = 0; } s->modified = 0.0; } if ( !RDLOCK(db) ) return FALSE; RDUNLOCK(db); return TRUE; } /******************************* * LITERALS * *******************************/ #define LITERAL_EX_MAGIC 0x2b97e881 typedef struct literal_ex { literal *literal; atom_info atom; #ifdef O_SECURE long magic; #endif } literal_ex; static inline void prepare_literal_ex(literal_ex *lex) { SECURE(lex->magic = 0x2b97e881); if ( lex->literal->objtype == OBJ_STRING ) { lex->atom.handle = lex->literal->value.string; lex->atom.resolved = FALSE; } } static literal * new_literal(rdf_db *db) { literal *lit = rdf_malloc(db, sizeof(*lit)); memset(lit, 0, sizeof(*lit)); lit->references = 1; return lit; } static void free_literal(rdf_db *db, literal *lit) { if ( --lit->references == 0 ) { unlock_atoms_literal(lit); if ( lit->shared && !db->resetting ) { literal_ex lex; lit->shared = FALSE; broadcast(EV_OLD_LITERAL, lit, NULL); DEBUG(2, Sdprintf("Delete %p from literal table: ", lit); print_literal(lit); Sdprintf("\n")); lex.literal = lit; prepare_literal_ex(&lex); if ( !avldel(&db->literals, &lex) ) { Sdprintf("Failed to delete %p (size=%ld): ", lit, db->literals.count); print_literal(lit); Sdprintf("\n"); assert(0); } } if ( lit->objtype == OBJ_TERM && lit->value.term.record ) { if ( lit->term_loaded ) rdf_free(db, lit->value.term.record, lit->value.term.len); else PL_erase_external(lit->value.term.record); } rdf_free(db, lit, sizeof(*lit)); } } static literal * copy_literal(rdf_db *db, literal *lit) { lit->references++; return lit; } static void alloc_literal_triple(rdf_db *db, triple *t) { if ( !t->object_is_literal ) { t->object.literal = new_literal(db); t->object_is_literal = TRUE; } } static void lock_atoms_literal(literal *lit) { if ( !lit->atoms_locked ) { lit->atoms_locked = TRUE; switch(lit->objtype) { case OBJ_STRING: PL_register_atom(lit->value.string); if ( lit->qualifier ) PL_register_atom(lit->type_or_lang); break; } } } static void unlock_atoms_literal(literal *lit) { if ( lit->atoms_locked ) { lit->atoms_locked = FALSE; switch(lit->objtype) { case OBJ_STRING: PL_unregister_atom(lit->value.string); if ( lit->qualifier ) PL_unregister_atom(lit->type_or_lang); break; } } } /******************************* * LITERAL DB * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - compare_literals() sorts literals. Ordering is defined as: * Numeric literals < string literals < term literals * Numeric literals (int and float) are sorted by value * String literals are sorted alhabetically - case independent, but uppercase before lowercase - locale (strcoll) sorting? - delete dyadrics - first on string, then on type, then on language * Terms are sorted on Prolog standard order of terms - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int compare_literals(void *p1, void *p2, NODE type) { literal_ex *lex = p1; literal *l1 = lex->literal; literal *l2 = *(literal**)p2; SECURE(assert(lex->magic == LITERAL_EX_MAGIC)); if ( l1->objtype == l2->objtype ) { switch(l1->objtype) { case OBJ_INTEGER: { int64_t v1 = l1->value.integer; int64_t v2 = l2->value.integer; return v1 < v2 ? -1 : v1 > v2 ? 1 : 0; } case OBJ_DOUBLE: { double v1 = l1->value.real; double v2 = l2->value.real; return v1 < v2 ? -1 : v1 > v2 ? 1 : 0; } case OBJ_STRING: { int rc = cmp_atom_info(&lex->atom, l2->value.string); if ( rc == 0 ) { if ( l1->qualifier == l2->qualifier ) return cmp_atoms(l1->type_or_lang, l2->type_or_lang); return l1->qualifier - l2->qualifier; } return rc; } case OBJ_TERM: { fid_t fid = PL_open_foreign_frame(); term_t t1 = PL_new_term_ref(); term_t t2 = PL_new_term_ref(); int rc; PL_recorded_external(l1->value.term.record, t1); /* can also be handled in literal_ex */ PL_recorded_external(l2->value.term.record, t2); rc = PL_compare(t1, t2); PL_discard_foreign_frame(fid); return rc; } default: assert(0); return 0; } } else if ( l1->objtype == OBJ_INTEGER && l2->objtype == OBJ_DOUBLE ) { double v1 = (double)l1->value.integer; double v2 = l2->value.real; return v1 < v2 ? -1 : v1 > v2 ? 1 : -1; } else if ( l1->objtype == OBJ_DOUBLE && l2->objtype == OBJ_INTEGER ) { double v1 = l1->value.real; double v2 = (double)l2->value.integer; return v1 < v2 ? -1 : v1 > v2 ? 1 : 1; } else { return l1->objtype - l2->objtype; } } static void* avl_malloc(void *ptr, size_t size) { return rdf_malloc(ptr, size); } static void avl_free(void *ptr, void *data, size_t size) { rdf_free(ptr, data, size); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the sorted literal tree. Note that we do not register a free handler for the tree as nodes are either already destroyed by free_literal() or by rdf_reset_db(). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void init_literal_table(rdf_db *db) { avlinit(&db->literals, db, sizeof(literal*), compare_literals, NULL, avl_malloc, avl_free); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - share_literal() takes a literal and replaces it with one from the literal database if there is a match. On a match, the argument literal is destroyed. Without a match it adds the literal to the database and returns it. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static literal * share_literal(rdf_db *db, literal *from) { literal **data; literal_ex lex; lex.literal = from; prepare_literal_ex(&lex); if ( (data = avlins(&db->literals, &lex)) ) { literal *l2 = *data; DEBUG(2, Sdprintf("Replace %p by %p:\n", from, l2); Sdprintf("\tfrom: "); print_literal(from); Sdprintf("\n\tto: "); print_literal(l2); Sdprintf("\n")); l2->references++; free_literal(db, from); return l2; } else { DEBUG(2, Sdprintf("Insert %p into literal table: ", from); print_literal(from); Sdprintf("\n")); from->shared = TRUE; broadcast(EV_NEW_LITERAL, from, NULL); return from; } } #ifdef O_SECURE static literal ** add_literals(AVLtree node, literal **p) { literal **litp; if ( node->subtree[LEFT] ) p = add_literals(node->subtree[LEFT], p); litp = (literal**)node->data; *p++ = *litp; if ( node->subtree[RIGHT] ) p = add_literals(node->subtree[RIGHT], p); return p; } static foreign_t check_transitivity() { rdf_db *db = DB; literal **array = malloc(sizeof(literal*)*db->literals.count); literal **p = array; int i,j; add_literals(db->literals.root, p); Sdprintf("Checking %ld literals ...\n", db->literals.count); for(i=0; iliterals.count; i++) { int end; Sdprintf("\r%6ld", i); end = i+100; if ( end > db->literals.count ) end = db->literals.count; for(j=i+1; j= 0 ) { Sdprintf("\nERROR: i,j=%d,%d: ", i, j); print_literal(array[i]); Sdprintf(" >= "); print_literal(array[j]); Sdprintf("\n"); } } } free(array); return TRUE; } static void dump_lnode(AVLtree node) { literal **litp; if ( node->subtree[LEFT] ) dump_lnode(node->subtree[LEFT]); litp = (literal**)node->data; print_literal(*litp); Sdprintf("\n"); if ( node->subtree[RIGHT] ) dump_lnode(node->subtree[RIGHT]); } static foreign_t dump_literals() { rdf_db *db = DB; dump_lnode(db->literals.root); return TRUE; } #endif /******************************* * TRIPLES * *******************************/ static void init_tables(rdf_db *db) { int i; int bytes = sizeof(triple*)*INITIAL_TABLE_SIZE; int cbytes = sizeof(int)*INITIAL_TABLE_SIZE; db->table[0] = &db->by_none; db->tail[0] = &db->by_none_tail; for(i=BY_S; i<=BY_OP; i++) { if ( i == BY_SO ) continue; db->table[i] = rdf_malloc(db, bytes); memset(db->table[i], 0, bytes); db->tail[i] = rdf_malloc(db, bytes); memset(db->tail[i], 0, bytes); db->counts[i] = rdf_malloc(db, cbytes); memset(db->counts[i], 0, cbytes); db->table_size[i] = INITIAL_TABLE_SIZE; } init_pred_table(db); init_graph_table(db); init_literal_table(db); } static rdf_db * new_db() { rdf_db *db = rdf_malloc(NULL, sizeof(*db)); memset(db, 0, sizeof(*db)); INIT_LOCK(db); init_tables(db); return db; } static triple * new_triple(rdf_db *db) { triple *t = rdf_malloc(db, sizeof(*t)); memset(t, 0, sizeof(*t)); t->allocated = TRUE; return t; } static void free_triple(rdf_db *db, triple *t) { unlock_atoms(t); if ( t->object_is_literal && t->object.literal ) free_literal(db, t->object.literal); if ( t->allocated ) rdf_free(db, t, sizeof(*t)); } #define HASHED 0x80000000 static unsigned int literal_hash(literal *lit) { if ( lit->hash & HASHED ) { return lit->hash; } else { unsigned int hash; switch(lit->objtype) { case OBJ_STRING: hash = atom_hash_case(lit->value.string); break; case OBJ_INTEGER: case OBJ_DOUBLE: hash = rdf_murmer_hash(&lit->value.integer, sizeof(lit->value.integer), MURMUR_SEED); break; case OBJ_TERM: hash = rdf_murmer_hash(lit->value.term.record, (int)lit->value.term.len, MURMUR_SEED); break; default: assert(0); return 0; } lit->hash = (hash | HASHED); return lit->hash; } } static unsigned long object_hash(triple *t) { if ( t->object_is_literal ) { return literal_hash(t->object.literal); } else { return atom_hash(t->object.resource); } } static int triple_hash(rdf_db *db, triple *t, int which) { unsigned long v; switch(which) { case BY_NONE: return 0; case BY_S: v = atom_hash(t->subject); break; case BY_P: v = predicate_hash(t->predicate.r); break; case BY_O: v = object_hash(t); break; case BY_SP: v = atom_hash(t->subject) ^ predicate_hash(t->predicate.r); break; case BY_OP: v = predicate_hash(t->predicate.r) ^ object_hash(t); break; default: v = 0; /* make compiler silent */ assert(0); } return (int)(v % (long)db->table_size[which]); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - by_inverse[] returns the index key to use for inverse search as needed to realise symmetric and inverse predicates. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int by_inverse[8] = { BY_NONE, /* BY_NONE = 0 */ BY_O, /* BY_S = 1 */ BY_P, /* BY_P = 2 */ BY_OP, /* BY_SP = 3 */ BY_S, /* BY_O = 4 */ BY_SO, /* BY_SO = 5 */ BY_SP, /* BY_OP = 6 */ BY_SPO, /* BY_SPO = 7 */ }; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - triple *first(atom_t subject) Find the first triple on subject. The first is marked to generate a unique subjects quickly; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static triple * first(rdf_db *db, atom_t subject) { triple *t, tmp; int hash; tmp.subject = subject; hash = triple_hash(db, &tmp, BY_S); for(t=db->table[BY_S][hash]; t; t = t->next[BY_S]) { if ( t->subject == subject && !t->erased ) return t; } return NULL; } static void link_triple_hash(rdf_db *db, triple *t) { int i; for(i=1; i<=BY_OP; i++) { if ( db->table[i] ) { int hash = triple_hash(db, t, i); if ( db->tail[i][hash] ) { db->tail[i][hash]->next[i] = t; } else { db->table[i][hash] = t; } db->tail[i][hash] = t; db->counts[i][hash]++; } } } typedef enum { DUP_NONE, DUP_DUPLICATE, DUP_DISCARDED } dub_state; static dub_state discard_duplicate(rdf_db *db, triple *t) { triple *d; const int indexed = BY_SP; dub_state rc = DUP_NONE; assert(t->is_duplicate == FALSE); assert(t->duplicates == 0); if ( WANT_GC(db) ) /* (*) See above */ update_hash(db); d = db->table[indexed][triple_hash(db, t, indexed)]; for( ; d && d != t; d = d->next[indexed] ) { if ( match_triples(d, t, MATCH_DUPLICATE) ) { if ( d->graph == t->graph && (d->line == NO_LINE || d->line == t->line) ) { free_triple(db, t); return DUP_DISCARDED; } rc = DUP_DUPLICATE; } } return rc; } /* MT: must be locked by caller */ static int link_triple_silent(rdf_db *db, triple *t) { triple *one; dub_state dup; if ( t->resolve_pred ) { t->predicate.r = lookup_predicate(db, t->predicate.u); t->resolve_pred = FALSE; } if ( (dup=discard_duplicate(db, t)) == DUP_DISCARDED ) return FALSE; if ( db->by_none_tail ) db->by_none_tail->next[BY_NONE] = t; else db->by_none = t; db->by_none_tail = t; link_triple_hash(db, t); if ( t->object_is_literal ) t->object.literal = share_literal(db, t->object.literal); if ( dup == DUP_DUPLICATE && update_duplicates_add(db, t) ) goto ok; /* is a duplicate */ /* keep track of subjects */ one = first(db, t->subject); if ( !one->first ) { one->first = TRUE; db->subjects++; } /* keep track of subPropertyOf */ if ( t->predicate.r->name == ATOM_subPropertyOf && t->object_is_literal == FALSE ) { predicate *me = lookup_predicate(db, t->subject); predicate *super = lookup_predicate(db, t->object.resource); addSubPropertyOf(db, me, super); } ok: db->created++; t->predicate.r->triple_count++; register_graph(db, t); return TRUE; } static inline void link_triple(rdf_db *db, triple *t) { if ( link_triple_silent(db, t) ) broadcast(EV_ASSERT, t, NULL); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rehash_triples() Relink the triples in the hash-chains after the hash-keys for properties have changed or the tables have been resized. The caller must ensure there are no active queries and the tables are of the proper size. At the same time, this predicate actually removes erased triples. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static long tbl_size(long triples) { long s0 = 1024; triples /= MIN_HASH_FACTOR; while(s0 < triples) s0 *= 2; return s0; } static void rehash_triples(rdf_db *db) { int i; triple *t, *t2; long count = db->created - db->freed; long tsize = tbl_size(count); DEBUG(1, Sdprintf("(%ld triples; %ld entries) ...", count, tsize)); broadcast(EV_REHASH, (void*)ATOM_begin, NULL); for(i=1; itable[i] ) { long bytes = sizeof(triple*) * tsize; long cbytes = sizeof(int) * tsize; long obytes = sizeof(triple*) * db->table_size[i]; long ocbytes = sizeof(int) * db->table_size[i]; db->table[i] = rdf_realloc(db, db->table[i], obytes, bytes); db->tail[i] = rdf_realloc(db, db->tail[i], obytes, bytes); db->counts[i] = rdf_realloc(db, db->counts[i], ocbytes, cbytes); db->table_size[i] = tsize; memset(db->table[i], 0, bytes); memset(db->tail[i], 0, bytes); memset(db->counts[i], 0, cbytes); } } /* delete leading erased triples */ for(t=db->by_none; t && t->erased; t=t2) { t2 = t->next[BY_NONE]; free_triple(db, t); db->freed++; db->by_none = t2; } for(t=db->by_none; t; t = t2) { triple *t3; t2 = t->next[BY_NONE]; for(i=1; inext[i] = NULL; assert(t->erased == FALSE); link_triple_hash(db, t); for( ; t2 && t2->erased; t2=t3 ) { t3 = t2->next[BY_NONE]; free_triple(db, t2); db->freed++; } t->next[BY_NONE] = t2; if ( !t2 ) db->by_none_tail = t; } if ( db->by_none == NULL ) db->by_none_tail = NULL; broadcast(EV_REHASH, (void*)ATOM_end, NULL); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - update_hash(). Note this may be called by readers and writers, but must be done only onces and certainly not concurrently by multiple readers. Hence we need a seperate lock. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int WANT_GC(rdf_db *db) { if ( db->gc_blocked ) { return FALSE; } else { long dirty = db->erased - db->freed; long count = db->created - db->erased; if ( dirty > 1000 && dirty > count ) return TRUE; if ( count > db->table_size[1]*MAX_HASH_FACTOR ) return TRUE; return FALSE; } } static int update_hash(rdf_db *db) { int want_gc = WANT_GC(db); if ( want_gc ) DEBUG(1, Sdprintf("rdf_db: want GC\n")); if ( db->need_update || want_gc ) { LOCK_MISC(db); if ( db->need_update ) /* check again */ { if ( organise_predicates(db) ) { long t0 = (long)PL_query(PL_QUERY_USER_CPU); DEBUG(1, Sdprintf("Re-hash ...")); invalidate_distinct_counts(db); rehash_triples(db); db->generation += (db->created-db->erased); db->rehash_count++; db->rehash_time += ((double)(PL_query(PL_QUERY_USER_CPU)-t0))/1000.0; DEBUG(1, Sdprintf("ok\n")); } db->need_update = 0; } else if ( WANT_GC(db) ) { long t0 = (long)PL_query(PL_QUERY_USER_CPU); DEBUG(1, Sdprintf("rdf_db: GC ...")); rehash_triples(db); db->gc_count++; db->gc_time += ((double)(PL_query(PL_QUERY_USER_CPU)-t0))/1000.0; DEBUG(1, Sdprintf("ok\n")); } UNLOCK_MISC(db); } return TRUE; } /* MT: Must be locked */ static void erase_triple_silent(rdf_db *db, triple *t) { if ( !t->erased ) { t->erased = TRUE; update_duplicates_del(db, t); if ( t->predicate.r->name == ATOM_subPropertyOf && t->object_is_literal == FALSE ) { predicate *me = lookup_predicate(db, t->subject); predicate *super = lookup_predicate(db, t->object.resource); delSubPropertyOf(db, me, super); } if ( t->first ) { triple *one = first(db, t->subject); if ( one ) one->first = TRUE; else db->subjects--; } db->erased++; t->predicate.r->triple_count--; unregister_graph(db, t); if ( t->object_is_literal ) { literal *lit = t->object.literal; t->object.literal = NULL; free_literal(db, lit); /* TBD: thread-safe? */ } } } static inline void erase_triple(rdf_db *db, triple *t) { broadcast(EV_RETRACT, t, NULL); erase_triple_silent(db, t); } static int match_object(triple *t, triple *p, unsigned flags) { if ( p->object_is_literal ) { if ( t->object_is_literal ) { literal *plit = p->object.literal; literal *tlit = t->object.literal; if ( !plit->objtype && !plit->qualifier ) return TRUE; if ( plit->objtype && plit->objtype != tlit->objtype ) return FALSE; switch( plit->objtype ) { case 0: if ( plit->qualifier && tlit->qualifier != plit->qualifier ) return FALSE; return TRUE; case OBJ_STRING: if ( (flags & MATCH_QUAL) || p->match == STR_MATCH_PLAIN ) { if ( tlit->qualifier != plit->qualifier ) return FALSE; } else { if ( plit->qualifier && tlit->qualifier && tlit->qualifier != plit->qualifier ) return FALSE; } if ( plit->type_or_lang && tlit->type_or_lang != plit->type_or_lang ) return FALSE; if ( plit->value.string ) { if ( tlit->value.string != plit->value.string ) { if ( p->match >= STR_MATCH_EXACT ) { return match_atoms(p->match, plit->value.string, tlit->value.string); } else { return FALSE; } } } return TRUE; case OBJ_INTEGER: return tlit->value.integer == plit->value.integer; case OBJ_DOUBLE: return tlit->value.real == plit->value.real; case OBJ_TERM: if ( plit->value.term.record && plit->value.term.len != tlit->value.term.len ) return FALSE; return memcmp(tlit->value.term.record, plit->value.term.record, plit->value.term.len) == 0; default: assert(0); } } return FALSE; } else { if ( p->object.resource ) { if ( t->object_is_literal || (p->object.resource != t->object.resource) ) return FALSE; } } return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Match triple t to pattern p. Erased triples are always skipped. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int match_triples(triple *t, triple *p, unsigned flags) { /* DEBUG(3, Sdprintf("match_triple("); print_triple(t, 0); Sdprintf(")\n")); */ if ( t->erased ) return FALSE; if ( p->subject && t->subject != p->subject ) return FALSE; if ( !match_object(t, p, flags) ) return FALSE; if ( flags & MATCH_SRC ) { if ( p->graph && t->graph != p->graph ) return FALSE; if ( p->line && t->line != p->line ) return FALSE; } /* last; may be expensive */ if ( p->predicate.r && t->predicate.r != p->predicate.r ) { if ( (flags & MATCH_SUBPROPERTY) ) return isSubPropertyOf(t->predicate.r, p->predicate.r); else return FALSE; } return TRUE; } /******************************* * SAVE/LOAD * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The RDF triple format. This format is intended for quick save and load and not for readability or exchange. Parts are based on the SWI-Prolog Quick Load Format (implemented in pl-wic.c). ::= ['S' ] ['F' ] ['t' ] ['M' ] {} 'E' ::= "RDF-dump\n" ::= ::= * (16 bytes digest) ::= 'T' ::= ::= ::= "R" | "L" ::= ::= "X" "A" "W" ::= ::= ::= ::= - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #define SAVE_MAGIC "RDF-dump\n" #define SAVE_VERSION 2 typedef struct saved { atom_t name; long as; struct saved *next; } saved; typedef struct save_context { saved ** saved_table; long saved_size; long saved_id; } save_context; long next_table_size(long s0) { long size = 2; while(size < s0) size *= 2; return size; } static void init_saved(rdf_db *db, save_context *ctx) { long size = next_table_size((db->created - db->erased)/8); long bytes = size * sizeof(*ctx->saved_table); ctx->saved_table = rdf_malloc(db, bytes); memset(ctx->saved_table, 0, bytes); ctx->saved_size = size; ctx->saved_id = 0; } static void destroy_saved(rdf_db *db, save_context *ctx) { if ( ctx->saved_table ) { saved **s = ctx->saved_table; int i; for(i=0; isaved_size; i++, s++) { saved *c, *n; for(c=*s; c; c = n) { n = c->next; free(c); } } rdf_free(db, ctx->saved_table, ctx->saved_size*sizeof(*ctx->saved_table)); } } #define INT64BITSIZE (sizeof(int64_t)*8) #define PLMINLONG ((int64_t)((uint64_t)1<<(INT64BITSIZE-1))) static void save_int(IOSTREAM *fd, int64_t n) { int m; int64_t absn = (n >= 0 ? n : -n); if ( n != PLMINLONG ) { if ( absn < ((intptr_t)1 << 5) ) { Sputc((int)(n & 0x3f), fd); return; } else if ( absn < ((intptr_t)1 << 13) ) { Sputc((int)(((n >> 8) & 0x3f) | (1 << 6)), fd); Sputc((int)(n & 0xff), fd); return; } else if ( absn < ((intptr_t)1 << 21) ) { Sputc((int)(((n >> 16) & 0x3f) | (2 << 6)), fd); Sputc((int)((n >> 8) & 0xff), fd); Sputc((int)(n & 0xff), fd); return; } } for(m = sizeof(n); ; m--) { int b = (int)((absn >> (((m-1)*8)-1)) & 0x1ff); if ( b == 0 ) continue; break; } Sputc(m | (3 << 6), fd); for( ; m > 0; m--) { int b = (int)((n >> ((m-1)*8)) & 0xff); Sputc(b, fd); } } #define BYTES_PER_DOUBLE sizeof(double) #ifdef WORDS_BIGENDIAN static const int double_byte_order[] = { 7,6,5,4,3,2,1,0 }; #else static const int double_byte_order[] = { 0,1,2,3,4,5,6,7 }; #endif static int save_double(IOSTREAM *fd, double f) { unsigned char *cl = (unsigned char *)&f; unsigned int i; for(i=0; isaved_size; saved *s; size_t len; const char *chars; unsigned int i; const wchar_t *wchars; for(s=ctx->saved_table[hash]; s; s= s->next) { if ( s->name == a ) { Sputc('X', out); save_int(out, s->as); return TRUE; } } s = rdf_malloc(db, sizeof(*s)); s->name = a; s->as = ctx->saved_id++; s->next = ctx->saved_table[hash]; ctx->saved_table[hash] = s; if ( (chars = PL_atom_nchars(a, &len)) ) { Sputc('A', out); save_int(out, len); for(i=0; iencoding; Sputc('W', out); save_int(out, len); out->encoding = ENC_UTF8; for(i=0; i=0 && c <= 0x10ffff)); Sputcode(c, out); } out->encoding = enc; } else return FALSE; return TRUE; } static void write_triple(rdf_db *db, IOSTREAM *out, triple *t, save_context *ctx) { Sputc('T', out); save_atom(db, out, t->subject, ctx); save_atom(db, out, t->predicate.r->name, ctx); if ( t->object_is_literal ) { literal *lit = t->object.literal; if ( lit->qualifier ) { assert(lit->type_or_lang); Sputc(lit->qualifier == Q_LANG ? 'l' : 't', out); save_atom(db, out, lit->type_or_lang, ctx); } switch(lit->objtype) { case OBJ_STRING: Sputc('L', out); save_atom(db, out, lit->value.string, ctx); break; case OBJ_INTEGER: Sputc('I', out); save_int(out, lit->value.integer); break; case OBJ_DOUBLE: { Sputc('F', out); save_double(out, lit->value.real); break; } case OBJ_TERM: { const char *s = lit->value.term.record; size_t len = lit->value.term.len; Sputc('T', out); save_int(out, len); while(len-- > 0) Sputc(*s++, out); break; } default: assert(0); } } else { Sputc('R', out); save_atom(db, out, t->object.resource, ctx); } save_atom(db, out, t->graph, ctx); save_int(out, t->line); } static void write_source(rdf_db *db, IOSTREAM *out, atom_t src, save_context *ctx) { graph *s = lookup_graph(db, src, FALSE); if ( s && s->source ) { Sputc('F', out); save_atom(db, out, s->source, ctx); Sputc('t', out); save_double(out, s->modified); } } static void write_md5(rdf_db *db, IOSTREAM *out, atom_t src) { graph *s = lookup_graph(db, src, FALSE); if ( s ) { md5_byte_t *p = s->digest; int i; Sputc('M', out); for(i=0; i<16; i++) Sputc(*p++, out); } } static int save_db(rdf_db *db, IOSTREAM *out, atom_t src) { triple *t; save_context ctx; if ( !RDLOCK(db) ) return FALSE; init_saved(db, &ctx); Sfprintf(out, "%s", SAVE_MAGIC); save_int(out, SAVE_VERSION); if ( src ) { Sputc('S', out); /* start of graph header */ save_atom(db, out, src, &ctx); write_source(db, out, src, &ctx); write_md5(db, out, src); } if ( Sferror(out) ) { RDUNLOCK(db); return FALSE; } for(t = db->by_none; t; t = t->next[BY_NONE]) { if ( !t->erased && (!src || t->graph == src) ) { write_triple(db, out, t, &ctx); if ( Sferror(out) ) return FALSE; } } Sputc('E', out); if ( Sferror(out) ) { RDUNLOCK(db); return FALSE; } destroy_saved(db, &ctx); RDUNLOCK(db); return TRUE; } static foreign_t rdf_save_db(term_t stream, term_t graph) { IOSTREAM *out; atom_t src; if ( !PL_get_stream_handle(stream, &out) ) return type_error(stream, "stream"); if ( !get_atom_or_var_ex(graph, &src) ) return FALSE; return save_db(DB, out, src); } static int64_t load_int(IOSTREAM *fd) { int64_t first = Sgetc(fd); int bytes, shift, b; if ( !(first & 0xc0) ) /* 99% of them: speed up a bit */ { first <<= (INT64BITSIZE-6); first >>= (INT64BITSIZE-6); return first; } bytes = (int) ((first >> 6) & 0x3); first &= 0x3f; if ( bytes <= 2 ) { for( b = 0; b < bytes; b++ ) { first <<= 8; first |= Sgetc(fd) & 0xff; } shift = (sizeof(first)-1-bytes)*8 + 2; } else { int m; bytes = (int)first; first = 0L; for(m=0; m>= shift; return first; } static int load_double(IOSTREAM *fd, double *fp) { double f; unsigned char *cl = (unsigned char *)&f; unsigned int i; for(i=0; iloaded_id >= ctx->atoms_size ) { if ( ctx->atoms_size == 0 ) { ctx->atoms_size = 1024; ctx->loaded_atoms = rdf_malloc(db, sizeof(atom_t)*ctx->atoms_size); } else { long obytes = sizeof(atom_t)*ctx->atoms_size; long bytes; ctx->atoms_size *= 2; bytes = sizeof(atom_t)*ctx->atoms_size; ctx->loaded_atoms = rdf_realloc(db, ctx->loaded_atoms, obytes, bytes); } } ctx->loaded_atoms[ctx->loaded_id++] = a; } static atom_t load_atom(rdf_db *db, IOSTREAM *in, ld_context *ctx) { switch(Sgetc(in)) { case 'X': { intptr_t idx = (intptr_t)load_int(in); return ctx->loaded_atoms[idx]; } case 'A': { size_t len = (size_t)load_int(in); atom_t a; if ( len < 1024 ) { char buf[1024]; Sfread(buf, 1, len, in); a = PL_new_atom_nchars(len, buf); } else { char *buf = rdf_malloc(db, len); Sfread(buf, 1, len, in); a = PL_new_atom_nchars(len, buf); rdf_free(db, buf, len); } add_atom(db, a, ctx); return a; } case 'W': { int len = (int)load_int(in); atom_t a; wchar_t buf[1024]; wchar_t *w; IOENC enc = in->encoding; int i; if ( len < 1024 ) w = buf; else w = rdf_malloc(db, len*sizeof(wchar_t)); in->encoding = ENC_UTF8; for(i=0; i=0 && w[i] <= 0x10ffff)); } in->encoding = enc; a = PL_new_atom_wchars(len, w); if ( w != buf ) rdf_free(db, w, len*sizeof(wchar_t)); add_atom(db, a, ctx); return a; } default: { assert(0); return 0; } } } static triple * load_triple(rdf_db *db, IOSTREAM *in, ld_context *ctx) { triple *t = new_triple(db); int c; t->subject = load_atom(db, in, ctx); t->predicate.u = load_atom(db, in, ctx); t->resolve_pred = TRUE; if ( (c=Sgetc(in)) == 'R' ) { t->object.resource = load_atom(db, in, ctx); } else { literal *lit = new_literal(db); t->object_is_literal = TRUE; t->object.literal = lit; value: switch(c) { case 'L': lit->objtype = OBJ_STRING; lit->value.string = load_atom(db, in, ctx); break; case 'I': lit->objtype = OBJ_INTEGER; lit->value.integer = load_int(in); break; case 'F': lit->objtype = OBJ_DOUBLE; load_double(in, &lit->value.real); break; case 'T': { unsigned int i; char *s; lit->objtype = OBJ_TERM; lit->value.term.len = (size_t)load_int(in); lit->value.term.record = rdf_malloc(db, lit->value.term.len); lit->term_loaded = TRUE; /* see free_literal() */ s = (char *)lit->value.term.record; for(i=0; ivalue.term.len; i++) s[i] = Sgetc(in); break; } case 'l': lit->qualifier = Q_LANG; lit->type_or_lang = load_atom(db, in, ctx); c = Sgetc(in); goto value; case 't': lit->qualifier = Q_TYPE; lit->type_or_lang = load_atom(db, in, ctx); c = Sgetc(in); goto value; default: assert(0); return NULL; } } t->graph = load_atom(db, in, ctx); t->line = (unsigned long)load_int(in); if ( !ctx->graph ) { if ( !ctx->graph_table ) ctx->graph_table = new_atom_hash(64); add_atom_hash(ctx->graph_table, t->graph); } return t; } static int load_magic(IOSTREAM *in) { char *s = SAVE_MAGIC; for( ; *s; s++) { if ( Sgetc(in) != *s ) return FALSE; } return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Note that we have two types of saved states. One holding many named graphs and one holding the content of exactly one named graph. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #define LOAD_ERROR ((triple*)(intptr_t)-1) static triple * load_db(rdf_db *db, IOSTREAM *in, ld_context *ctx) { int version; int c; triple *list = NULL, *tail = NULL; if ( !load_magic(in) ) return LOAD_ERROR; version = (int)load_int(in); while((c=Sgetc(in)) != EOF) { switch(c) { case 'T': { triple *t; if ( !(t=load_triple(db, in, ctx)) ) return FALSE; if ( tail ) { tail->next[BY_NONE] = t; tail = t; } else { list = tail = t; } break; } /* file holding exactly one graph */ case 'S': /* name of the graph */ { ctx->graph = load_atom(db, in, ctx); break; } case 'M': /* MD5 of the graph */ { int i; for(i=0; i<16; i++) ctx->digest[i] = Sgetc(in); ctx->has_digest = TRUE; break; } case 'F': /* file of the graph */ ctx->graph_source = load_atom(db, in, ctx); break; /* end of one-graph handling */ case 't': load_double(in, &ctx->modified); break; case 'E': /* end of file */ return list; default: break; } } PL_warning("Illegal RDF triple file"); return LOAD_ERROR; } static int link_loaded_triples(rdf_db *db, triple *t, ld_context *ctx) { long created0 = db->created; graph *graph; if ( ctx->graph ) /* lookup named graph */ { graph = lookup_graph(db, ctx->graph, TRUE); if ( ctx->graph_source && graph->source != ctx->graph_source ) { if ( graph->source ) PL_unregister_atom(graph->source); graph->source = ctx->graph_source; PL_register_atom(graph->source); graph->modified = ctx->modified; } if ( ctx->has_digest ) { if ( graph->md5 ) { if ( db->tr_first ) { record_md5_transaction(db, graph, NULL); } else { graph->md5 = FALSE; /* kill repetitive MD5 update */ } } else { ctx->has_digest = FALSE; } } } else { graph = NULL; } if ( db->tr_first ) /* loading in a transaction */ { triple *next; for( ; t; t = next ) { next = t->next[BY_NONE]; t->next[BY_NONE] = NULL; lock_atoms(t); record_transaction(db, TR_ASSERT, t); } } else { triple *next; for( ; t; t = next ) { next = t->next[BY_NONE]; t->next[BY_NONE] = NULL; lock_atoms(t); if ( link_triple_silent(db, t) ) broadcast(EV_ASSERT_LOAD, t, NULL); } } /* update the graph info */ if ( ctx->has_digest ) { if ( db->tr_first ) { md5_byte_t *d = rdf_malloc(db, sizeof(ctx->digest)); memcpy(d, ctx->digest, sizeof(ctx->digest)); record_md5_transaction(db, graph, d); } else { sum_digest(graph->digest, ctx->digest); } graph->md5 = TRUE; } db->generation += (db->created-created0); return TRUE; } static int append_graph_to_list(ptr_hash_node *node, void *closure) { atom_t graph = (atom_t)node->value; term_t tail = (term_t)closure; term_t head = PL_new_term_ref(); int rc; rc = (PL_unify_list(tail, head, tail) && PL_unify_atom(head, graph)); PL_reset_term_refs(head); return rc; } static foreign_t rdf_load_db(term_t stream, term_t id, term_t graphs) { ld_context ctx; rdf_db *db = DB; IOSTREAM *in; triple *list; int rc; if ( !PL_get_stream_handle(stream, &in) ) return type_error(stream, "stream"); memset(&ctx, 0, sizeof(ctx)); if ( (list=load_db(db, in, &ctx)) == LOAD_ERROR ) return FALSE; if ( !WRLOCK(db, FALSE) ) return FALSE; broadcast(EV_LOAD, (void*)id, (void*)ATOM_begin); if ( (rc=link_loaded_triples(db, list, &ctx)) ) { if ( ctx.graph_table ) { term_t tail = PL_copy_term_ref(graphs); rc = ( for_atom_hash(ctx.graph_table, append_graph_to_list, (void*)tail) && PL_unify_nil(tail) ); destroy_atom_hash(ctx.graph_table); } else { rc = PL_unify_atom(graphs, ctx.graph); } } broadcast(EV_LOAD, (void*)id, (void*)ATOM_end); WRUNLOCK(db); PL_release_stream(in); if ( ctx.loaded_atoms ) { atom_t *ap, *ep; for(ap=ctx.loaded_atoms, ep=ap+ctx.loaded_id; apsubject, &len, NULL); md5_append(&state, (const md5_byte_t *)s, (int)len); md5_append(&state, (const md5_byte_t *)"P", 1); s = PL_blob_data(t->predicate.r->name, &len, NULL); md5_append(&state, (const md5_byte_t *)s, (int)len); tmp[0] = 'O'; if ( t->object_is_literal ) { lit = t->object.literal; tmp[1] = md5_type[lit->objtype]; switch(lit->objtype) { case OBJ_STRING: s = PL_blob_data(lit->value.string, &len, NULL); break; case OBJ_INTEGER: /* TBD: byte order issues */ s = (const char *)&lit->value.integer; len = sizeof(lit->value.integer); break; case OBJ_DOUBLE: s = (const char *)&lit->value.real; len = sizeof(lit->value.real); break; case OBJ_TERM: s = (const char *)lit->value.term.record; len = lit->value.term.len; break; default: assert(0); } } else { s = PL_blob_data(t->object.resource, &len, NULL); tmp[1] = 0x1; /* old OBJ_RESOURCE */ lit = NULL; } md5_append(&state, tmp, 2); md5_append(&state, (const md5_byte_t *)s, (int)len); if ( lit && lit->qualifier ) { assert(lit->type_or_lang); md5_append(&state, (const md5_byte_t *)(lit->qualifier == Q_LANG ? "l" : "t"), 1); s = PL_blob_data(lit->type_or_lang, &len, NULL); md5_append(&state, (const md5_byte_t *)s, (int)len); } if ( t->graph ) { md5_append(&state, (const md5_byte_t *)"S", 1); s = PL_blob_data(t->graph, &len, NULL); md5_append(&state, (const md5_byte_t *)s, (int)len); } md5_finish(&state, digest); } static void sum_digest(md5_byte_t *digest, md5_byte_t *add) { md5_byte_t *p, *q; int n; for(p=digest, q=add, n=16; --n>=0; ) *p++ += *q++; } static void dec_digest(md5_byte_t *digest, md5_byte_t *add) { md5_byte_t *p, *q; int n; for(p=digest, q=add, n=16; --n>=0; ) *p++ -= *q++; } static int md5_unify_digest(term_t t, md5_byte_t digest[16]) { char hex_output[16*2]; int di; char *pi; static char hexd[] = "0123456789abcdef"; for(pi=hex_output, di = 0; di < 16; ++di) { *pi++ = hexd[(digest[di] >> 4) & 0x0f]; *pi++ = hexd[digest[di] & 0x0f]; } return PL_unify_atom_nchars(t, 16*2, hex_output); } static foreign_t rdf_md5(term_t graph_name, term_t md5) { atom_t src; int rc; rdf_db *db = DB; if ( !get_atom_or_var_ex(graph_name, &src) ) return FALSE; if ( src ) { graph *s; if ( !RDLOCK(db) ) return FALSE; if ( (s = lookup_graph(db, src, FALSE)) ) { rc = md5_unify_digest(md5, s->digest); } else { md5_byte_t digest[16]; memset(digest, 0, sizeof(digest)); rc = md5_unify_digest(md5, digest); } RDUNLOCK(db); } else { md5_byte_t digest[16]; graph **ht; int i; memset(&digest, 0, sizeof(digest)); if ( !RDLOCK(db) ) return FALSE; for(i=0,ht = db->graph_table; igraph_table_size; i++, ht++) { graph *s; for( s = *ht; s; s = s->next ) sum_digest(digest, s->digest); } rc = md5_unify_digest(md5, digest); RDUNLOCK(db); } return rc; } static foreign_t rdf_atom_md5(term_t text, term_t times, term_t md5) { char *s; int n, i; size_t len; md5_byte_t digest[16]; if ( !PL_get_nchars(text, &len, &s, CVT_ALL) ) return type_error(text, "text"); if ( !PL_get_integer(times, &n) ) return type_error(times, "integer"); if ( n < 1 ) return domain_error(times, "positive_integer"); for(i=0; iatoms_locked ) { t->atoms_locked = TRUE; PL_register_atom(t->subject); if ( t->object_is_literal ) { lock_atoms_literal(t->object.literal); } else { PL_register_atom(t->object.resource); } } } static void unlock_atoms(triple *t) { if ( t->atoms_locked ) { t->atoms_locked = FALSE; PL_unregister_atom(t->subject); if ( !t->object_is_literal ) { PL_unregister_atom(t->object.resource); } } } /******************************* * PROLOG CONVERSION * *******************************/ #define LIT_TYPED 0x1 #define LIT_NOERROR 0x2 #define LIT_PARTIAL 0x4 static int get_lit_atom_ex(term_t t, atom_t *a, int flags) { if ( PL_get_atom(t, a) ) return TRUE; if ( (flags & LIT_PARTIAL) && PL_is_variable(t) ) { *a = 0L; return TRUE; } return type_error(t, "atom"); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - get_literal() processes the argument of a literal/1 term passes as object. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int get_literal(rdf_db *db, term_t litt, triple *t, int flags) { literal *lit; alloc_literal_triple(db, t); lit = t->object.literal; if ( PL_get_atom(litt, &lit->value.string) ) { lit->objtype = OBJ_STRING; } else if ( PL_is_integer(litt) && PL_get_int64(litt, &lit->value.integer) ) { lit->objtype = OBJ_INTEGER; } else if ( PL_get_float(litt, &lit->value.real) ) { lit->objtype = OBJ_DOUBLE; } else if ( PL_is_functor(litt, FUNCTOR_lang2) ) { term_t a = PL_new_term_ref(); _PL_get_arg(1, litt, a); if ( !get_lit_atom_ex(a, &lit->type_or_lang, flags) ) return FALSE; _PL_get_arg(2, litt, a); if ( !get_lit_atom_ex(a, &lit->value.string, flags) ) return FALSE; lit->qualifier = Q_LANG; lit->objtype = OBJ_STRING; } else if ( PL_is_functor(litt, FUNCTOR_type2) && !(flags & LIT_TYPED) ) /* avoid recursion */ { term_t a = PL_new_term_ref(); _PL_get_arg(1, litt, a); if ( !get_lit_atom_ex(a, &lit->type_or_lang, flags) ) return FALSE; lit->qualifier = Q_TYPE; _PL_get_arg(2, litt, a); return get_literal(db, a, t, LIT_TYPED|flags); } else if ( !PL_is_ground(litt) ) { if ( !(flags & LIT_PARTIAL) ) return type_error(litt, "rdf_object"); if ( !PL_is_variable(litt) ) lit->objtype = OBJ_TERM; } else { lit->value.term.record = PL_record_external(litt, &lit->value.term.len); lit->objtype = OBJ_TERM; } return TRUE; } static int get_object(rdf_db *db, term_t object, triple *t) { if ( PL_get_atom(object, &t->object.resource) ) { assert(!t->object_is_literal); } else if ( PL_is_functor(object, FUNCTOR_literal1) ) { term_t a = PL_new_term_ref(); _PL_get_arg(1, object, a); return get_literal(db, a, t, 0); } else return type_error(object, "rdf_object"); return TRUE; } static int get_src(term_t src, triple *t) { if ( src && !PL_is_variable(src) ) { if ( PL_get_atom(src, &t->graph) ) { t->line = NO_LINE; } else if ( PL_is_functor(src, FUNCTOR_colon2) ) { term_t a = PL_new_term_ref(); long line; _PL_get_arg(1, src, a); if ( !get_atom_or_var_ex(a, &t->graph) ) return FALSE; _PL_get_arg(2, src, a); if ( PL_get_long(a, &line) ) t->line = line; else if ( !PL_is_variable(a) ) return type_error(a, "integer"); } else return type_error(src, "rdf_graph"); } return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Return values: -1: exception 0: no predicate 1: the predicate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int get_existing_predicate(rdf_db *db, term_t t, predicate **p) { atom_t name; if ( !PL_get_atom(t, &name ) ) { if ( PL_is_functor(t, FUNCTOR_literal1) ) return 0; /* rdf(_, literal(_), _) */ return type_error(t, "atom"); } if ( (*p = existing_predicate(db, name)) ) return 1; DEBUG(5, Sdprintf("No predicate %s\n", PL_atom_chars(name))); return 0; /* no predicate */ } static int get_predicate(rdf_db *db, term_t t, predicate **p) { atom_t name; if ( !get_atom_ex(t, &name ) ) return FALSE; *p = lookup_predicate(db, name); return TRUE; } static int get_triple(rdf_db *db, term_t subject, term_t predicate, term_t object, triple *t) { if ( !get_atom_ex(subject, &t->subject) || !get_predicate(db, predicate, &t->predicate.r) || !get_object(db, object, t) ) return FALSE; return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - get_partial_triple() creates a triple for matching purposes. It can return FALSE for two reasons. Mostly (type) errors, but also if resources are accessed that do not exist and therefore the subsequent matching will always fail. This is notably the case for predicates, which are first class citizens to this library. Return values: 1: ok 0: no predicate -1: error - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int get_partial_triple(rdf_db *db, term_t subject, term_t predicate, term_t object, term_t src, triple *t) { int rc; if ( subject && !get_resource_or_var_ex(subject, &t->subject) ) return FALSE; if ( !PL_is_variable(predicate) && (rc=get_existing_predicate(db, predicate, &t->predicate.r)) != 1 ) return rc; /* the object */ if ( object && !PL_is_variable(object) ) { if ( PL_get_atom(object, &t->object.resource) ) { assert(!t->object_is_literal); } else if ( PL_is_functor(object, FUNCTOR_literal1) ) { term_t a = PL_new_term_ref(); _PL_get_arg(1, object, a); if ( !get_literal(db, a, t, LIT_PARTIAL) ) return FALSE; } else if ( PL_is_functor(object, FUNCTOR_literal2) ) { term_t a = PL_new_term_ref(); literal *lit; alloc_literal_triple(db, t); lit = t->object.literal; _PL_get_arg(1, object, a); if ( PL_is_functor(a, FUNCTOR_exact1) ) t->match = STR_MATCH_EXACT; else if ( PL_is_functor(a, FUNCTOR_plain1) ) t->match = STR_MATCH_PLAIN; else if ( PL_is_functor(a, FUNCTOR_substring1) ) t->match = STR_MATCH_SUBSTRING; else if ( PL_is_functor(a, FUNCTOR_word1) ) t->match = STR_MATCH_WORD; else if ( PL_is_functor(a, FUNCTOR_prefix1) ) t->match = STR_MATCH_PREFIX; else if ( PL_is_functor(a, FUNCTOR_like1) ) t->match = STR_MATCH_LIKE; else return domain_error(a, "match_type"); _PL_get_arg(1, a, a); if ( !get_atom_or_var_ex(a, &lit->value.string) ) return FALSE; lit->objtype = OBJ_STRING; } else return type_error(object, "rdf_object"); } /* the graph */ if ( !get_src(src, t) ) return FALSE; if ( t->subject ) t->indexed |= BY_S; if ( t->predicate.r ) t->indexed |= BY_P; if ( t->object_is_literal ) { literal *lit = t->object.literal; if ( lit->objtype == OBJ_STRING && lit->value.string && t->match <= STR_MATCH_EXACT ) t->indexed |= BY_O; } else if ( t->object.resource ) t->indexed |= BY_O; db->indexed[t->indexed]++; /* statistics */ switch(t->indexed) { case BY_SPO: t->indexed = BY_SP; break; case BY_SO: t->indexed = BY_S; break; } return TRUE; } static int inverse_partial_triple(triple *t) { predicate *i = 0; if ( !t->inversed && (!t->predicate.r || (i=t->predicate.r->inverse_of)) && !t->object_is_literal ) { atom_t o = t->object.resource; t->object.resource = t->subject; t->subject = o; if ( t->predicate.r ) t->predicate.r = i; t->indexed = by_inverse[t->indexed]; t->inversed = TRUE; return TRUE; } return FALSE; } static int get_graph(term_t src, triple *t) { if ( PL_get_atom(src, &t->graph) ) { t->line = NO_LINE; return TRUE; } if ( PL_is_functor(src, FUNCTOR_colon2) ) { term_t a = PL_new_term_ref(); long line; _PL_get_arg(1, src, a); if ( !get_atom_ex(a, &t->graph) ) return FALSE; _PL_get_arg(2, src, a); if ( !get_long_ex(a, &line) ) return FALSE; t->line = line; return TRUE; } return type_error(src, "rdf_graph"); } static int unify_graph(term_t src, triple *t) { switch( PL_term_type(src) ) { case PL_VARIABLE: { if ( t->line == NO_LINE ) return PL_unify_atom(src, t->graph); else goto full_term; } case PL_ATOM: { atom_t a; return (PL_get_atom(src, &a) && a == t->graph); } case PL_TERM: { if ( t->line == NO_LINE ) { return PL_unify_term(src, PL_FUNCTOR, FUNCTOR_colon2, PL_ATOM, t->graph, PL_VARIABLE); } else { full_term: return PL_unify_term(src, PL_FUNCTOR, FUNCTOR_colon2, PL_ATOM, t->graph, PL_LONG, t->line); } } default: return type_error(src, "rdf_graph"); } } static int same_graph(triple *t1, triple *t2) { return t1->line == t2->line && t1->graph == t2->graph; } static int put_literal_value(term_t v, literal *lit) { switch(lit->objtype) { case OBJ_STRING: PL_put_atom(v, lit->value.string); break; case OBJ_INTEGER: PL_put_variable(v); return PL_unify_int64(v, lit->value.integer); case OBJ_DOUBLE: return PL_put_float(v, lit->value.real); case OBJ_TERM: return PL_recorded_external(lit->value.term.record, v); default: assert(0); return FALSE; } return TRUE; } static int unify_literal(term_t lit, literal *l) { term_t v = PL_new_term_ref(); if ( !put_literal_value(v, l) ) return FALSE; if ( l->qualifier ) { functor_t qf; assert(l->type_or_lang); if ( l->qualifier == Q_LANG ) qf = FUNCTOR_lang2; else qf = FUNCTOR_type2; if ( PL_unify_term(lit, PL_FUNCTOR, qf, PL_ATOM, l->type_or_lang, PL_TERM, v) ) return TRUE; return PL_unify(lit, v); /* allow rdf(X, Y, literal(foo)) */ } else if ( PL_unify(lit, v) ) { return TRUE; } else if ( PL_is_functor(lit, FUNCTOR_lang2) && l->objtype == OBJ_STRING ) { term_t a = PL_new_term_ref(); _PL_get_arg(2, lit, a); return PL_unify(a, v); } else if ( PL_is_functor(lit, FUNCTOR_type2) ) { term_t a = PL_new_term_ref(); _PL_get_arg(2, lit, a); return PL_unify(a, v); } else return FALSE; } static int unify_object(term_t object, triple *t) { if ( t->object_is_literal ) { term_t lit = PL_new_term_ref(); if ( PL_unify_functor(object, FUNCTOR_literal1) ) _PL_get_arg(1, object, lit); else if ( PL_is_functor(object, FUNCTOR_literal2) ) _PL_get_arg(2, object, lit); else return FALSE; return unify_literal(lit, t->object.literal); } else { return PL_unify_atom(object, t->object.resource); } } static int unify_triple(term_t subject, term_t pred, term_t object, term_t src, triple *t, int inversed) { predicate *p = t->predicate.r; fid_t fid; if ( inversed ) { term_t tmp = object; object = subject; subject = tmp; if ( !(p = p->inverse_of) ) return FALSE; } fid = PL_open_foreign_frame(); if ( !PL_unify_atom(subject, t->subject) || !PL_unify_atom(pred, p->name) || !unify_object(object, t) || (src && !unify_graph(src, t)) ) { PL_discard_foreign_frame(fid); return FALSE; } else { PL_close_foreign_frame(fid); return TRUE; } } /******************************* * DUBLICATE HANDLING * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - According to the RDF specs, duplicate triples have no meaning, but they slow down search and often produce duplicate results in search. Worse, some coding styles proposed in the OWL documents introduce huge amounts of duplicate triples. We cannot simply ignore a triple if it is a duplicate as a subsequent retract would delete the final triple. For example, after loading two files that contain the same triple and unloading one of these files the database would be left without triples. In our solution, if a triple is added as a duplicate, it is flagged such using the flag is_duplicate. The `principal' triple has a count `duplicates', indicating the number of duplicate triples in the database. It might make sense to introduce the BY_SPO table as fully indexed lookups are frequent with the introduction of duplicate detection. (*) Iff too many triples are added, it may be time to enlarge the hashtable. Note that we do not call update_hash() blindly as this would cause each triple that modifies the predicate hierarchy to force a rehash. As we are not searching using subPropertyOf semantics during the duplicate update, there is no point updating. If it is incorrect it will be updated on the first real query. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int update_duplicates_add(rdf_db *db, triple *t) { triple *d; const int indexed = BY_SP; assert(t->is_duplicate == FALSE); assert(t->duplicates == 0); if ( WANT_GC(db) ) /* (*) See above */ update_hash(db); d = db->table[indexed][triple_hash(db, t, indexed)]; for( ; d && d != t; d = d->next[indexed] ) { if ( match_triples(d, t, MATCH_DUPLICATE) ) { t->is_duplicate = TRUE; assert( !d->is_duplicate ); d->duplicates++; DEBUG(2, print_triple(t, PRT_SRC); Sdprintf(" %p: %d-th duplicate: ", t, d->duplicates); Sdprintf("Principal: %p at", d); print_src(d); Sdprintf("\n")); assert(d->duplicates); /* check overflow */ db->duplicates++; return TRUE; } } return FALSE; } static void /* t is about to be deleted */ update_duplicates_del(rdf_db *db, triple *t) { const int indexed = BY_SP; if ( t->duplicates ) /* I am the principal one */ { triple *d; DEBUG(2, print_triple(t, PRT_SRC); Sdprintf(": DEL principal %p, %d duplicates: ", t, t->duplicates)); db->duplicates--; d = db->table[indexed][triple_hash(db, t, indexed)]; for( ; d; d = d->next[indexed] ) { if ( d != t && match_triples(d, t, MATCH_DUPLICATE) ) { assert(d->is_duplicate); d->is_duplicate = FALSE; d->duplicates = t->duplicates-1; DEBUG(2, Sdprintf("New principal: %p at", d); print_src(d); Sdprintf("\n")); return; } } assert(0); } else if ( t->is_duplicate ) /* I am a duplicate */ { triple *d; DEBUG(2, print_triple(t, PRT_SRC); Sdprintf(": DEL: is a duplicate: ")); db->duplicates--; d = db->table[indexed][triple_hash(db, t, indexed)]; for( ; d; d = d->next[indexed] ) { if ( d != t && match_triples(d, t, MATCH_DUPLICATE) ) { if ( d->duplicates ) { d->duplicates--; DEBUG(2, Sdprintf("Principal %p at ", d); print_src(d); Sdprintf(" has %d duplicates\n", d->duplicates)); return; } } } Sdprintf("FATAL\n"); PL_halt(1); assert(0); } } /******************************* * TRANSACTIONS * *******************************/ static void append_transaction(rdf_db *db, transaction_record *tr) { if ( db->tr_last ) { tr->next = NULL; tr->previous = db->tr_last; db->tr_last->next = tr; db->tr_last = tr; } else { tr->next = tr->previous = NULL; db->tr_first = db->tr_last = tr; } } static void open_transaction(rdf_db *db) { transaction_record *tr = rdf_malloc(db, sizeof(*tr)); memset(tr, 0, sizeof(*tr)); tr->type = TR_MARK; if ( db->tr_first ) db->tr_nesting++; else db->tr_nesting = 0; append_transaction(db, tr); } static void record_transaction(rdf_db *db, tr_type type, triple *t) { transaction_record *tr = rdf_malloc(db, sizeof(*tr)); memset(tr, 0, sizeof(*tr)); tr->type = type; tr->triple = t; append_transaction(db, tr); } static void record_md5_transaction(rdf_db *db, graph *src, md5_byte_t *digest) { transaction_record *tr = rdf_malloc(db, sizeof(*tr)); memset(tr, 0, sizeof(*tr)); tr->type = TR_UPDATE_MD5, tr->update.md5.graph = src; tr->update.md5.digest = digest; append_transaction(db, tr); } static void record_update_transaction(rdf_db *db, triple *t, triple *new) { transaction_record *tr = rdf_malloc(db, sizeof(*tr)); memset(tr, 0, sizeof(*tr)); tr->type = TR_UPDATE, tr->triple = t; tr->update.triple = new; append_transaction(db, tr); } static void record_update_src_transaction(rdf_db *db, triple *t, atom_t src, unsigned long line) { transaction_record *tr = rdf_malloc(db, sizeof(*tr)); memset(tr, 0, sizeof(*tr)); tr->type = TR_UPDATE_SRC, tr->triple = t; tr->update.src.atom = src; tr->update.src.line = line; append_transaction(db, tr); } static void void_transaction(rdf_db *db, transaction_record *tr) { switch(tr->type) { case TR_ASSERT: free_triple(db, tr->triple); break; case TR_UPDATE: free_triple(db, tr->update.triple); break; case TR_UPDATE_MD5: if ( tr->update.md5.digest ) rdf_free(db, tr->update.md5.digest, sizeof(*tr->update.md5.digest)); break; default: break; } tr->type = TR_VOID; } static void free_transaction(rdf_db *db, transaction_record *tr) { void_transaction(db, tr); rdf_free(db, tr, sizeof(*tr)); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - This must deal with multiple operations on the same triple. Most probably the most important thing is to merge update records. We must also make-up our mind with regard to updated records that are erased or records that are erased after updating, etc. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void clean_transaction(rdf_db *db, transaction_record *tr0) { #if 0 transaction_record *tr; for(tr=tr0; tr; tr=tr->next) { if ( TR_RETRACT ) { transaction_record *tr2; for(tr2=tr->next; tr2; tr2=tr2->next) { if ( tr2->triple == tr->triple ) { switch(tr2->type) { case TR_RETRACT: case TR_UPDATE: case TR_UPDATE_SRC: void_transaction(db, tr2); default: ; } } } } } #endif } static void truncate_transaction(rdf_db *db, transaction_record *last) { db->tr_last = last; if ( last ) { db->tr_last->next = NULL; } else { db->tr_first = NULL; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - discard_transaction() simply destroys all actions in the last transaction. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void discard_transaction(rdf_db *db) { transaction_record *tr, *prev; for(tr=db->tr_last; tr; tr = prev) { prev = tr->previous; if ( tr->type == TR_SUB_END ) { if ( tr->update.transaction_id ) PL_erase(tr->update.transaction_id); } if ( tr->type == TR_MARK ) { rdf_free(db, tr, sizeof(*tr)); truncate_transaction(db, prev); db->tr_nesting--; return; } free_transaction(db, tr); } } int put_begin_end(term_t t, functor_t be, int level) { term_t av; return ( (av = PL_new_term_ref()) && PL_put_integer(av, level) && PL_cons_functor_v(t, be, av) ); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Note (*) rdf-monitors can modify the database by opening new transactions. Therefore we first close the transaction to allow opening new ones. TBD: get this clear. Monitors have only restricted read access? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int commit_transaction_int(rdf_db *db, term_t id) { transaction_record *tr, *next; int tr_level = 0; /* nesting level */ if ( db->tr_nesting > 0 ) /* commit nested transaction */ { tr=db->tr_last; if ( tr->type == TR_MARK ) /* empty nested transaction */ { truncate_transaction(db, tr->previous); rdf_free(db, tr, sizeof(*tr)); db->tr_nesting--; return TRUE; } for(; tr; tr = tr->previous) /* not the last (tested above) */ { /* not the first (we are nested) */ if ( tr->type == TR_MARK ) { transaction_record *end = rdf_malloc(db, sizeof(*end)); memset(end, 0, sizeof(*end)); end->type = TR_SUB_END; end->update.transaction_id = PL_record(id); append_transaction(db, end); tr->type = TR_SUB_START; tr->update.transaction_id = end->update.transaction_id; db->tr_nesting--; return TRUE; } } assert(0); return FALSE; } while( (tr=db->tr_first) ) /* See above (*) */ { db->tr_first = db->tr_last = NULL; clean_transaction(db, tr); /* real commit */ for(; tr; tr = next) { next = tr->next; switch(tr->type) { case TR_MARK: break; case TR_SUB_START: { term_t id = PL_new_term_ref(); term_t be = PL_new_term_ref(); if ( !PL_recorded(tr->update.transaction_id, id) || !put_begin_end(be, FUNCTOR_begin1, ++tr_level) || !broadcast(EV_TRANSACTION, (void*)id, (void*)be) ) return FALSE; break; } case TR_SUB_END: { term_t id = PL_new_term_ref(); term_t be = PL_new_term_ref(); if ( !PL_recorded(tr->update.transaction_id, id) ) return FALSE; PL_erase(tr->update.transaction_id); if ( !put_begin_end(be, FUNCTOR_end1, tr_level--) || !broadcast(EV_TRANSACTION, (void*)id, (void*)be) ) return FALSE; break; } case TR_ASSERT: link_triple(db, tr->triple); db->generation++; break; case TR_RETRACT: if ( !tr->triple->erased ) /* already erased */ { erase_triple(db, tr->triple); db->generation++; } break; case TR_UPDATE: if ( !tr->triple->erased ) { if ( !broadcast(EV_UPDATE, tr->triple, tr->update.triple) ) return FALSE; /* TBD: how to handle? */ if ( !tr->triple->erased ) { erase_triple_silent(db, tr->triple); link_triple_silent(db, tr->update.triple); db->generation++; } } break; case TR_UPDATE_SRC: if ( !tr->triple->erased ) { if ( tr->triple->graph != tr->update.src.atom ) { if ( tr->triple->graph ) unregister_graph(db, tr->triple); tr->triple->graph = tr->update.src.atom; if ( tr->triple->graph ) register_graph(db, tr->triple); } tr->triple->line = tr->update.src.line; db->generation++; } break; case TR_UPDATE_MD5: { graph *src = tr->update.md5.graph; md5_byte_t *digest = tr->update.md5.digest; if ( digest ) { sum_digest(digest, src->digest); src->md5 = TRUE; rdf_free(db, digest, sizeof(md5_byte_t)*16); } else { src->md5 = FALSE; } break; } case TR_RESET: db->tr_reset = FALSE; reset_db(db); break; case TR_VOID: break; default: assert(0); } rdf_free(db, tr, sizeof(*tr)); } } return TRUE; } static int commit_transaction(rdf_db *db, term_t id) { int rc; db->gc_blocked++; rc = commit_transaction_int(db, id); db->gc_blocked--; return rc; } static foreign_t rdf_transaction(term_t goal, term_t id) { int rc; rdf_db *db = DB; active_transaction me; if ( !WRLOCK(db, TRUE) ) return FALSE; open_transaction(db); me.parent = db->tr_active; me.id = id; db->tr_active = &me; rc = PL_call_predicate(NULL, PL_Q_PASS_EXCEPTION, PRED_call1, goal); if ( rc ) { int empty = (db->tr_last == NULL || db->tr_last->type == TR_MARK); if ( empty || db->tr_nesting > 0 ) { commit_transaction(db, id); } else { term_t be; if ( !(be=PL_new_term_ref()) || !put_begin_end(be, FUNCTOR_begin1, 0) || !broadcast(EV_TRANSACTION, (void*)id, (void*)be) || !put_begin_end(be, FUNCTOR_end1, 0) ) return FALSE; if ( !LOCKOUT_READERS(db) ) /* interrupt, timeout */ { broadcast(EV_TRANSACTION, (void*)id, (void*)be); rc = FALSE; goto discard; } commit_transaction(db, id); REALLOW_READERS(db); if ( !broadcast(EV_TRANSACTION, (void*)id, (void*)be) ) return FALSE; } } else { discard: discard_transaction(db); } db->tr_active = me.parent; WRUNLOCK(db); return rc; } static foreign_t rdf_active_transactions(term_t list) { rdf_db *db = DB; term_t tail = PL_copy_term_ref(list); term_t head = PL_new_term_ref(); active_transaction *ot; for(ot = db->tr_active; ot; ot=ot->parent) { if ( !PL_unify_list(tail, head, tail) || !PL_unify(head, ot->id) ) return FALSE; } return PL_unify_nil(tail); } /******************************* * PREDICATES * *******************************/ static foreign_t rdf_assert4(term_t subject, term_t predicate, term_t object, term_t src) { rdf_db *db = DB; triple *t = new_triple(db); if ( !get_triple(db, subject, predicate, object, t) ) { free_triple(db, t); return FALSE; } if ( src ) { if ( !get_graph(src, t) ) { free_triple(db, t); return FALSE; } } else { t->graph = ATOM_user; t->line = NO_LINE; } lock_atoms(t); if ( !WRLOCK(db, FALSE) ) { free_triple(db, t); return FALSE; } if ( db->tr_first ) { record_transaction(db, TR_ASSERT, t); } else { link_triple(db, t); db->generation++; } WRUNLOCK(db); return TRUE; } static foreign_t rdf_assert3(term_t subject, term_t predicate, term_t object) { return rdf_assert4(subject, predicate, object, 0); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - inc_active_queries(rdf_db *db); dec_active_queries(rdf_db *db); TBD: Either delete this or use atomic inc/dec. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void inc_active_queries(rdf_db *db) { LOCK_MISC(db); db->active_queries++; UNLOCK_MISC(db); } static void dec_active_queries(rdf_db *db) { LOCK_MISC(db); db->active_queries--; assert(db->active_queries>=0); UNLOCK_MISC(db); } typedef struct search_state { rdf_db *db; /* our database */ term_t subject; /* Prolog term references */ term_t object; term_t predicate; term_t src; term_t realpred; unsigned locked : 1; /* State has been locked */ unsigned allocated : 1; /* State has been allocated */ unsigned flags; /* Misc flags controlling search */ atom_t prefix; /* prefix and like search */ avl_enum *literal_state; /* Literal search state */ literal *literal_cursor; /* pointer in current literal */ literal_ex lit_ex; /* extended literal for fast compare */ triple *cursor; /* Pointer in triple DB */ triple pattern; /* Pattern triple */ } search_state; static void free_search_state(search_state *state); static void init_cursor_from_literal(search_state *state, literal *cursor) { triple *p = &state->pattern; unsigned long iv; int i; DEBUG(3, Sdprintf("Trying literal search for "); print_literal(cursor); Sdprintf("\n")); p->indexed |= BY_O; p->indexed &= ~BY_S; /* we do not have index BY_SO */ switch(p->indexed) { case BY_O: iv = literal_hash(cursor); break; case BY_OP: iv = predicate_hash(p->predicate.r) ^ literal_hash(cursor); break; default: iv = 0; /* make compiler silent */ assert(0); } i = (int)(iv % (long)state->db->table_size[p->indexed]); state->cursor = state->db->table[p->indexed][i]; state->literal_cursor = cursor; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (*) update_hash() is there to update the hash after a change to the predicate organization. If we do not have a predicate or we do not search using rdf_has/3, this is not needed. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int init_search_state(search_state *state) { triple *p = &state->pattern; if ( get_partial_triple(state->db, state->subject, state->predicate, state->object, state->src, p) != TRUE ) { free_triple(state->db, p); return FALSE; } if ( !RDLOCK(state->db) ) { free_triple(state->db, p); return FALSE; } state->locked = TRUE; if ( p->predicate.r && (state->flags & MATCH_SUBPROPERTY) ) /* See (*) */ { if ( !update_hash(state->db) ) { free_search_state(state); return FALSE; } } if ( (p->match == STR_MATCH_PREFIX || p->match == STR_MATCH_LIKE) && p->indexed != BY_SP && (state->prefix = first_atom(p->object.literal->value.string, p->match))) { literal lit; literal **rlitp; lit = *p->object.literal; lit.value.string = state->prefix; state->literal_state = rdf_malloc(state->db, sizeof(*state->literal_state)); state->lit_ex.literal = &lit; prepare_literal_ex(&state->lit_ex); rlitp = avlfindfirst(&state->db->literals, &state->lit_ex, state->literal_state); if ( rlitp ) { init_cursor_from_literal(state, *rlitp); } else { free_search_state(state); return FALSE; } } else { state->cursor = state->db->table[p->indexed] [triple_hash(state->db, p, p->indexed)]; } return TRUE; } static void free_search_state(search_state *state) { if ( state->locked ) { RDUNLOCK(state->db); } free_triple(state->db, &state->pattern); if ( state->prefix ) PL_unregister_atom(state->prefix); if ( state->literal_state ) rdf_free(state->db, state->literal_state, sizeof(*state->literal_state)); if ( state->allocated ) /* also means redo! */ { dec_active_queries(state->db); rdf_free(state->db, state, sizeof(*state)); } } static foreign_t allow_retry_state(search_state *state) { if ( !state->allocated ) { search_state *copy = rdf_malloc(state->db, sizeof(*copy)); *copy = *state; copy->allocated = TRUE; inc_active_queries(state->db); state = copy; } PL_retry_address(state); } /* TBD: simplify. Maybe split for resource and literal search, as both involve mutual exclusive complications to this routine, */ static int next_search_state(search_state *state) { triple *t = state->cursor; triple *p = &state->pattern; retry: for( ; t; t = t->next[p->indexed]) { if ( t->is_duplicate && !state->src ) continue; /* hash-collision, skip */ if ( state->literal_state ) { if ( !(t->object_is_literal && t->object.literal == state->literal_cursor) ) continue; } if ( match_triples(t, p, state->flags) ) { term_t retpred = state->realpred ? state->realpred : state->predicate; if ( !unify_triple(state->subject, retpred, state->object, state->src, t, p->inversed) ) continue; if ( state->realpred && PL_is_variable(state->predicate) ) { if ( !PL_unify(state->predicate, retpred) ) return FALSE; } t=t->next[p->indexed]; inv_alt: for(; t; t = t->next[p->indexed]) { if ( state->literal_state ) { if ( !(t->object_is_literal && t->object.literal == state->literal_cursor) ) continue; } if ( match_triples(t, p, state->flags) ) { state->cursor = t; return TRUE; /* non-deterministic */ } } if ( (state->flags & MATCH_INVERSE) && inverse_partial_triple(p) ) { t = state->db->table[p->indexed][triple_hash(state->db, p, p->indexed)]; goto inv_alt; } state->cursor = NULL; /* deterministic */ return TRUE; } } if ( (state->flags & MATCH_INVERSE) && inverse_partial_triple(p) ) { t = state->db->table[p->indexed][triple_hash(state->db, p, p->indexed)]; goto retry; } if ( state->literal_state ) { literal **litp; if ( (litp = avlfindnext(state->literal_state)) ) { if ( state->prefix ) { literal *lit = *litp; if ( !match_atoms(STR_MATCH_PREFIX, state->prefix, lit->value.string) ) { DEBUG(1, Sdprintf("Terminated literal iteration from "); print_literal(lit); Sdprintf("\n")); return FALSE; /* no longer a prefix */ } } init_cursor_from_literal(state, *litp); t = state->cursor; goto retry; } } return FALSE; } static foreign_t rdf(term_t subject, term_t predicate, term_t object, term_t src, term_t realpred, control_t h, unsigned flags) { rdf_db *db = DB; search_state *state; switch(PL_foreign_control(h)) { case PL_FIRST_CALL: { search_state buf; state = &buf; memset(state, 0, sizeof(*state)); state->db = db; state->subject = subject; state->object = object; state->predicate = predicate; state->src = src; state->realpred = realpred; state->flags = flags; if ( !init_search_state(state) ) return FALSE; goto search; } case PL_REDO: { int rc; state = PL_foreign_context_address(h); assert(state->subject == subject); search: if ( (rc=next_search_state(state)) ) { if ( state->cursor || state->literal_state ) return allow_retry_state(state); } free_search_state(state); return rc; } case PL_CUTTED: { search_state *state = PL_foreign_context_address(h); free_search_state(state); return TRUE; } default: assert(0); return FALSE; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rdf(Subject, Predicate, Object) Search specifications: Predicate: subPropertyOf(X) = P Object: literal(substring(X), L) literal(word(X), L) literal(exact(X), L) literal(prefix(X), L) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static foreign_t rdf3(term_t subject, term_t predicate, term_t object, control_t h) { return rdf(subject, predicate, object, 0, 0, h, MATCH_EXACT); } static foreign_t rdf4(term_t subject, term_t predicate, term_t object, term_t src, control_t h) { return rdf(subject, predicate, object, src, 0, h, MATCH_EXACT|MATCH_SRC); } static foreign_t rdf_has(term_t subject, term_t predicate, term_t object, term_t realpred, control_t h) { return rdf(subject, predicate, object, 0, realpred, h, MATCH_SUBPROPERTY|MATCH_INVERSE); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rdf_estimate_complexity(+S,+P,+O,-C) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static foreign_t rdf_estimate_complexity(term_t subject, term_t predicate, term_t object, term_t complexity) { triple t; long c; rdf_db *db = DB; int rc; memset(&t, 0, sizeof(t)); if ( (rc=get_partial_triple(db, subject, predicate, object, 0, &t)) != TRUE ) { if ( rc == -1 ) { return FALSE; /* error */ } else { return PL_unify_integer(complexity, 0); /* no predicate */ } } if ( !RDLOCK(db) ) return FALSE; if ( !update_hash(db) ) /* or ignore this problem? */ { RDUNLOCK(db); free_triple(db, &t); return FALSE; } if ( t.indexed == BY_NONE ) { c = db->created - db->erased; /* = totale triple count */ #if 0 } else if ( t.indexed == BY_P ) { c = t.predicate.r->triple_count; /* must sum over children */ #endif } else { c = db->counts[t.indexed][triple_hash(db, &t, t.indexed)]; } rc = PL_unify_integer(complexity, c); RDUNLOCK(db); free_triple(db, &t); return rc; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - current_literal(?Literals) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static foreign_t rdf_current_literal(term_t t, control_t h) { rdf_db *db = DB; literal **data; avl_enum *state; int rc; switch(PL_foreign_control(h)) { case PL_FIRST_CALL: if ( PL_is_variable(t) ) { state = rdf_malloc(db, sizeof(*state)); RDLOCK(db); inc_active_queries(db); data = avlfindfirst(&db->literals, NULL, state); goto next; } else { return FALSE; /* TBD */ } case PL_REDO: state = PL_foreign_context_address(h); data = avlfindnext(state); next: for(; data; data=avlfindnext(state)) { literal *lit = *data; if ( unify_literal(t, lit) ) { PL_retry_address(state); } } rc = FALSE; goto cleanup; case PL_CUTTED: rc = TRUE; cleanup: state = PL_foreign_context_address(h); avlfinddestroy(state); rdf_free(db, state, sizeof(*state)); RDUNLOCK(db); dec_active_queries(db); return rc; default: assert(0); return FALSE; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rdf_update(+Subject, +Predicate, +Object, +Action) Update a triple. Please note this is actually erase+assert as the triple needs to be updated in the linked lists while erase simply flags a triple as `erases' without deleting it to support queries which active choicepoints. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int update_triple(rdf_db *db, term_t action, triple *t) { term_t a = PL_new_term_ref(); triple tmp, *new; int i; /* Create copy in local memory */ tmp = *t; tmp.allocated = FALSE; tmp.atoms_locked = FALSE; if ( t->object_is_literal ) tmp.object.literal = copy_literal(db, t->object.literal); if ( !PL_get_arg(1, action, a) ) return type_error(action, "rdf_action"); if ( PL_is_functor(action, FUNCTOR_subject1) ) { atom_t s; if ( !get_atom_ex(a, &s) ) return FALSE; if ( tmp.subject == s ) return TRUE; /* no change */ tmp.subject = s; } else if ( PL_is_functor(action, FUNCTOR_predicate1) ) { predicate *p; if ( !get_predicate(db, a, &p) ) return FALSE; if ( tmp.predicate.r == p ) return TRUE; /* no change */ tmp.predicate.r = p; } else if ( PL_is_functor(action, FUNCTOR_object1) ) { triple t2; memset(&t2, 0, sizeof(t2)); if ( !get_object(db, a, &t2) ) { free_triple(db, &t2); return FALSE; } if ( match_object(&t2, &tmp, MATCH_QUAL) ) { free_triple(db, &t2); return TRUE; } if ( tmp.object_is_literal ) free_literal(db, tmp.object.literal); if ( (tmp.object_is_literal = t2.object_is_literal) ) { tmp.object.literal = t2.object.literal; } else { tmp.object.resource = t2.object.resource; } } else if ( PL_is_functor(action, FUNCTOR_graph1) ) { triple t2; if ( !get_graph(a, &t2) ) return FALSE; if ( t2.graph == t->graph && t2.line == t->line ) return TRUE; if ( db->tr_first ) { record_update_src_transaction(db, t, t2.graph, t2.line); } else { if ( t->graph ) unregister_graph(db, t); t->graph = t2.graph; t->line = t2.line; if ( t->graph ) register_graph(db, t); } return TRUE; /* considered no change */ } else return domain_error(action, "rdf_action"); for(i=0; isubject = tmp.subject; new->predicate.r = tmp.predicate.r; if ( (new->object_is_literal = tmp.object_is_literal) ) { new->object.literal = copy_literal(db, tmp.object.literal); } else { new->object.resource = tmp.object.resource; } new->graph = tmp.graph; new->line = tmp.line; free_triple(db, &tmp); lock_atoms(new); if ( db->tr_first ) { record_update_transaction(db, t, new); } else { broadcast(EV_UPDATE, t, new); erase_triple_silent(db, t); link_triple_silent(db, new); db->generation++; } return TRUE; } static foreign_t rdf_update5(term_t subject, term_t predicate, term_t object, term_t src, term_t action) { triple t, *p; int indexed = BY_SP; int done = 0; rdf_db *db = DB; memset(&t, 0, sizeof(t)); if ( !get_src(src, &t) || !get_triple(db, subject, predicate, object, &t) ) return FALSE; if ( !WRLOCK(db, FALSE) ) { free_triple(db, &t); return FALSE; } if ( !update_hash(db) ) { WRUNLOCK(db); free_triple(db, &t); return FALSE; } p = db->table[indexed][triple_hash(db, &t, indexed)]; for( ; p; p = p->next[indexed]) { if ( match_triples(p, &t, MATCH_EXACT) ) { if ( !update_triple(db, action, p) ) { WRUNLOCK(db); free_triple(db, &t); return FALSE; /* type errors */ } done++; } } free_triple(db, &t); WRUNLOCK(db); return done ? TRUE : FALSE; } static foreign_t rdf_update(term_t subject, term_t predicate, term_t object, term_t action) { return rdf_update5(subject, predicate, object, 0, action); } static foreign_t rdf_retractall4(term_t subject, term_t predicate, term_t object, term_t src) { triple t, *p; rdf_db *db = DB; memset(&t, 0, sizeof(t)); switch( get_partial_triple(db, subject, predicate, object, src, &t) ) { case 0: /* no such predicate */ return TRUE; case -1: /* error */ return FALSE; } if ( t.graph ) /* speedup for rdf_retractall(_,_,_,DB) */ { graph *gr = lookup_graph(db, t.graph, FALSE); if ( !gr || gr->triple_count == 0 ) return TRUE; } if ( !WRLOCK(db, FALSE) ) return FALSE; /* No need, as we do not search with subPropertyOf if ( !update_hash(db) ) { WRUNLOCK(db); return FALSE; } */ p = db->table[t.indexed][triple_hash(db, &t, t.indexed)]; for( ; p; p = p->next[t.indexed]) { if ( match_triples(p, &t, MATCH_EXACT|MATCH_SRC) ) { if ( t.object_is_literal && t.object.literal->objtype == OBJ_TERM ) { fid_t fid = PL_open_foreign_frame(); int rc = unify_object(object, p); PL_discard_foreign_frame(fid); if ( !rc ) continue; } if ( db->tr_first ) { if ( db->tr_reset ) { WRUNLOCK(db); return permission_error("retract", "triple", "", "rdf_retractall cannot follow " "rdf_reset_db in one transaction"); } record_transaction(db, TR_RETRACT, p); } else { erase_triple(db, p); db->generation++; } } } WRUNLOCK(db); free_triple(db, &t); return TRUE; } static foreign_t rdf_retractall3(term_t subject, term_t predicate, term_t object) { return rdf_retractall4(subject, predicate, object, 0); } /******************************* * MONITOR * *******************************/ typedef struct broadcast_callback { struct broadcast_callback *next; predicate_t pred; long mask; } broadcast_callback; static long joined_mask = 0L; static broadcast_callback *callback_list; static broadcast_callback *callback_tail; static void do_broadcast(term_t term, long mask) { if ( callback_list ) { broadcast_callback *cb; for(cb = callback_list; cb; cb = cb->next) { qid_t qid; term_t ex; if ( !(cb->mask & mask) ) continue; qid = PL_open_query(NULL, PL_Q_CATCH_EXCEPTION, cb->pred, term); if ( !PL_next_solution(qid) && (ex = PL_exception(qid)) ) { term_t av = PL_new_term_refs(2); PL_cut_query(qid); PL_put_atom(av+0, ATOM_error); PL_put_term(av+1, ex); PL_call_predicate(NULL, PL_Q_NORMAL, PL_predicate("print_message", 2, "user"), av); } else { PL_close_query(qid); } } } } /* No longer used, but we keep it for if we need it again static foreign_t rdf_broadcast(term_t term, term_t mask) { long msk; if ( !get_long_ex(mask, &msk) ) return FALSE; do_broadcast(term, msk); return TRUE; } */ static int broadcast(broadcast_id id, void *a1, void *a2) { if ( (joined_mask & id) ) { fid_t fid; term_t term; functor_t funct; if ( !(fid = PL_open_foreign_frame()) || !(term = PL_new_term_ref()) ) return FALSE; switch(id) { case EV_ASSERT: case EV_ASSERT_LOAD: funct = FUNCTOR_assert4; goto assert_retract; case EV_RETRACT: funct = FUNCTOR_retract4; assert_retract: { triple *t = a1; term_t tmp; if ( !(tmp = PL_new_term_refs(4)) || !PL_put_atom(tmp+0, t->subject) || !PL_put_atom(tmp+1, t->predicate.r->name) || !unify_object(tmp+2, t) || !unify_graph(tmp+3, t) || !PL_cons_functor_v(term, funct, tmp) ) return FALSE; break; } case EV_UPDATE: { triple *t = a1; triple *new = a2; term_t tmp, a; functor_t action; int rc; if ( !(tmp = PL_new_term_refs(5)) || !(a = PL_new_term_ref()) || !PL_put_atom(tmp+0, t->subject) || !PL_put_atom(tmp+1, t->predicate.r->name) || !unify_object(tmp+2, t) || !unify_graph(tmp+3, t) ) return FALSE; if ( t->subject != new->subject ) { action = FUNCTOR_subject1; rc = PL_put_atom(a, new->subject); } else if ( t->predicate.r != new->predicate.r ) { action = FUNCTOR_predicate1; rc = PL_put_atom(a, new->predicate.r->name); } else if ( !match_object(t, new, MATCH_QUAL) ) { action = FUNCTOR_object1; rc = unify_object(a, new); } else if ( !same_graph(t, new) ) { action = FUNCTOR_graph1; rc = unify_graph(a, new); } else { return TRUE; /* no change */ } if ( !rc || !PL_cons_functor_v(tmp+4, action, a) || !PL_cons_functor_v(term, FUNCTOR_update5, tmp) ) return FALSE; break; } case EV_NEW_LITERAL: { literal *lit = a1; term_t tmp; if ( !(tmp = PL_new_term_refs(1)) || !unify_literal(tmp, lit) || !PL_cons_functor_v(term, FUNCTOR_new_literal1, tmp) ) return FALSE; break; } case EV_OLD_LITERAL: { literal *lit = a1; term_t tmp; if ( !(tmp = PL_new_term_refs(1)) || !unify_literal(tmp, lit) || !PL_cons_functor_v(term, FUNCTOR_old_literal1, tmp) ) return FALSE; break; } case EV_LOAD: { term_t ctx = (term_t)a1; atom_t be = (atom_t)a2; term_t tmp; if ( !(tmp = PL_new_term_refs(2)) || !PL_put_atom(tmp+0, be) || /* begin/end */ !PL_put_term(tmp+1, ctx) || !PL_cons_functor_v(term, FUNCTOR_load2, tmp) ) return FALSE; break; } case EV_TRANSACTION: { term_t ctx = (term_t)a1; term_t be = (term_t)a2; term_t tmp; if ( !(tmp = PL_new_term_refs(2)) || !PL_put_term(tmp+0, be) || /* begin/end */ !PL_put_term(tmp+1, ctx) || !PL_cons_functor_v(term, FUNCTOR_transaction2, tmp) ) return FALSE; break; } case EV_REHASH: { atom_t be = (atom_t)a1; term_t tmp = PL_new_term_refs(1); if ( !(tmp = PL_new_term_refs(1)) || !PL_put_atom(tmp+0, be) || !PL_cons_functor_v(term, FUNCTOR_rehash1, tmp) ) return FALSE; break; } default: assert(0); } do_broadcast(term, id); PL_discard_foreign_frame(fid); } return TRUE; } static foreign_t rdf_monitor(term_t goal, term_t mask) { atom_t name; broadcast_callback *cb; predicate_t p; long msk; module_t m = NULL; PL_strip_module(goal, &m, goal); if ( !get_atom_ex(goal, &name) || !get_long_ex(mask, &msk) ) return FALSE; p = PL_pred(PL_new_functor(name, 1), m); for(cb=callback_list; cb; cb = cb->next) { if ( cb->pred == p ) { broadcast_callback *cb2; cb->mask = msk; joined_mask = 0L; for(cb2=callback_list; cb2; cb2 = cb2->next) joined_mask |= cb2->mask; DEBUG(2, Sdprintf("Set mask to 0x%x\n", joined_mask)); return TRUE; } } cb = PL_malloc(sizeof(*cb)); cb->next = NULL; cb->mask = msk; cb->pred = p; if ( callback_list ) { callback_tail->next = cb; callback_tail = cb; } else { callback_list = callback_tail = cb; } joined_mask |= msk; return TRUE; } /******************************* * QUERY * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Enumerate the known subjects. This uses the `first' flag on triples to avoid returning the same resource multiple times. As the `by_none' is never re-hashed, we don't mark this query in the `active_queries'. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static foreign_t rdf_subject(term_t subject, control_t h) { triple *t; rdf_db *db = DB; switch(PL_foreign_control(h)) { case PL_FIRST_CALL: { if ( PL_is_variable(subject) ) { t = db->table[BY_NONE][0]; goto next; } else { atom_t a; if ( get_atom_ex(subject, &a) ) { if ( first(db, a) ) return TRUE; return FALSE; } return FALSE; } } case PL_REDO: t = PL_foreign_context_address(h); next: for(; t; t = t->next[BY_NONE]) { if ( t->first && !t->erased ) { if ( !PL_unify_atom(subject, t->subject) ) return FALSE; t = t->next[BY_NONE]; if ( t ) PL_retry_address(t); return TRUE; } } return FALSE; case PL_CUTTED: return TRUE; default: assert(0); return FALSE; } } static foreign_t rdf_set_predicate(term_t pred, term_t option) { predicate *p; rdf_db *db = DB; if ( !get_predicate(db, pred, &p) ) return FALSE; if ( PL_is_functor(option, FUNCTOR_symmetric1) ) { int val; if ( !get_bool_arg_ex(1, option, &val) ) return FALSE; p->inverse_of = p; return TRUE; } else if ( PL_is_functor(option, FUNCTOR_inverse_of1) ) { term_t a = PL_new_term_ref(); predicate *i; _PL_get_arg(1, option, a); if ( PL_get_nil(a) ) { if ( p->inverse_of ) { p->inverse_of->inverse_of = NULL; p->inverse_of = NULL; } } else { if ( !get_predicate(db, a, &i) ) return FALSE; p->inverse_of = i; i->inverse_of = p; } return TRUE; } else if ( PL_is_functor(option, FUNCTOR_transitive1) ) { int val; if ( !get_bool_arg_ex(1, option, &val) ) return FALSE; p->transitive = val; return TRUE; } else return type_error(option, "predicate_option"); } #define PRED_PROPERTY_COUNT 9 static functor_t predicate_key[PRED_PROPERTY_COUNT]; static int unify_predicate_property(rdf_db *db, predicate *p, term_t option, functor_t f) { if ( f == FUNCTOR_symmetric1 ) return PL_unify_term(option, PL_FUNCTOR, f, PL_BOOL, p->inverse_of == p ? TRUE : FALSE); else if ( f == FUNCTOR_inverse_of1 ) { if ( p->inverse_of ) return PL_unify_term(option, PL_FUNCTOR, f, PL_ATOM, p->inverse_of->name); else return FALSE; } else if ( f == FUNCTOR_transitive1 ) { return PL_unify_term(option, PL_FUNCTOR, f, PL_BOOL, p->transitive); } else if ( f == FUNCTOR_triples1 ) { return PL_unify_term(option, PL_FUNCTOR, f, PL_LONG, p->triple_count); } else if ( f == FUNCTOR_rdf_subject_branch_factor1 ) { return PL_unify_term(option, PL_FUNCTOR, f, PL_FLOAT, subject_branch_factor(db, p, DISTINCT_DIRECT)); } else if ( f == FUNCTOR_rdf_object_branch_factor1 ) { return PL_unify_term(option, PL_FUNCTOR, f, PL_FLOAT, object_branch_factor(db, p, DISTINCT_DIRECT)); } else if ( f == FUNCTOR_rdfs_subject_branch_factor1 ) { return PL_unify_term(option, PL_FUNCTOR, f, PL_FLOAT, subject_branch_factor(db, p, DISTINCT_SUB)); } else if ( f == FUNCTOR_rdfs_object_branch_factor1 ) { return PL_unify_term(option, PL_FUNCTOR, f, PL_FLOAT, object_branch_factor(db, p, DISTINCT_SUB)); } else { assert(0); return FALSE; } } static foreign_t rdf_current_predicates(term_t preds) { rdf_db *db = DB; int i; term_t head = PL_new_term_ref(); term_t tail = PL_copy_term_ref(preds); LOCK_MISC(db); for(i=0; ipred_table_size; i++) { predicate *p; for(p=db->pred_table[i]; p; p = p->next) { if ( !PL_unify_list(tail, head, tail) || !PL_unify_atom(head, p->name) ) { UNLOCK_MISC(db); return FALSE; } } } UNLOCK_MISC(db); return PL_unify_nil(tail); } static foreign_t rdf_predicate_property(term_t pred, term_t option, control_t h) { int n; predicate *p; rdf_db *db = DB; if ( !predicate_key[0] ) { int i = 0; predicate_key[i++] = FUNCTOR_symmetric1; predicate_key[i++] = FUNCTOR_inverse_of1; predicate_key[i++] = FUNCTOR_transitive1; predicate_key[i++] = FUNCTOR_triples1; predicate_key[i++] = FUNCTOR_rdf_subject_branch_factor1; predicate_key[i++] = FUNCTOR_rdf_object_branch_factor1; predicate_key[i++] = FUNCTOR_rdfs_subject_branch_factor1; predicate_key[i++] = FUNCTOR_rdfs_object_branch_factor1; assert(i < PRED_PROPERTY_COUNT); } switch(PL_foreign_control(h)) { case PL_FIRST_CALL: { functor_t f; if ( PL_is_variable(option) ) { n = 0; goto redo; } else if ( PL_get_functor(option, &f) ) { for(n=0; predicate_key[n]; n++) { if ( predicate_key[n] == f ) { if ( !get_predicate(db, pred, &p) ) return FALSE; return unify_predicate_property(db, p, option, f); } } return domain_error(option, "rdf_predicate_property"); } else return type_error(option, "rdf_predicate_property"); } case PL_REDO: n = (int)PL_foreign_context(h); redo: if ( !get_predicate(db, pred, &p) ) return FALSE; for( ; predicate_key[n]; n++ ) { if ( unify_predicate_property(db, p, option, predicate_key[n]) ) { n++; if ( predicate_key[n] ) PL_retry(n); return TRUE; } } return FALSE; case PL_CUTTED: return TRUE; default: assert(0); return TRUE; } } /******************************* * TRANSITIVE RELATIONS * *******************************/ typedef struct visited { struct visited *next; /* next in list */ struct visited *hash_link; /* next in hashed link */ atom_t resource; /* visited resource */ uintptr_t distance; /* Distance */ } visited; #define AGENDA_LOCAL_MAGIC 742736360 #define AGENDA_SAVED_MAGIC 742736362 typedef struct agenda { visited *head; /* visited list */ visited *tail; /* tail of visited list */ visited *to_expand; /* next to expand */ visited *to_return; /* next to return */ visited **hash; /* hash-table for cycle detection */ int magic; /* AGENDA_*_MAGIC */ int hash_size; int size; /* size of the agenda */ uintptr_t max_d; /* max distance */ triple pattern; /* partial triple used as pattern */ atom_t target; /* resource we are seaching for */ struct chunk *chunk; /* node-allocation chunks */ } agenda; #ifndef offsetof #define offsetof(structure, field) ((size_t) &(((structure *)NULL)->field)) #endif #define CHUNK_SIZE(n) offsetof(chunk, nodes[n]) typedef struct chunk { struct chunk *next; int used; /* # used elements */ int size; /* size of the chunk */ struct visited nodes[1]; /* nodes in the chunk */ } chunk; static visited * alloc_node_agenda(rdf_db *db, agenda *a) { chunk *c; int size; if ( (c=a->chunk) ) { if ( c->used < c->size ) { visited *v = &c->nodes[c->used++]; return v; } } size = (a->size == 0 ? 8 : 1024); c = rdf_malloc(db, CHUNK_SIZE(size)); c->size = size; c->used = 1; c->next = a->chunk; a->chunk = c; return &c->nodes[0]; } static void empty_agenda(rdf_db *db, agenda *a) { chunk *c, *n; for(c=a->chunk; c; c = n) { n = c->next; rdf_free(db, c, CHUNK_SIZE(c->size)); } if ( a->hash ) rdf_free(db, a->hash, sizeof(visited*)*a->hash_size); if ( a->magic == AGENDA_SAVED_MAGIC ) { a->magic = 0; rdf_free(db, a, sizeof(*a)); } else { a->magic = 0; } } static void unlock_and_empty_agenda(rdf_db *db, agenda *a) { RDUNLOCK(db); empty_agenda(db, a); } static agenda * save_agenda(rdf_db *db, agenda *a) { agenda *r = rdf_malloc(db, sizeof(*r)); assert(a->magic == AGENDA_LOCAL_MAGIC); *r = *a; r->magic = AGENDA_SAVED_MAGIC; return r; } static void hash_agenda(rdf_db *db, agenda *a, int size) { if ( a->hash ) rdf_free(db, a->hash, sizeof(*a->hash)); if ( size > 0 ) { visited *v; a->hash = rdf_malloc(db, sizeof(visited*)*size); memset(a->hash, 0, sizeof(visited*)*size); a->hash_size = size; for(v=a->head; v; v = v->next) { int key = atom_hash(v->resource)&(size-1); v->hash_link = a->hash[key]; a->hash[key] = v; } } } static int in_aganda(agenda *a, atom_t resource) { visited *v; if ( a->hash ) { int key = atom_hash(resource)&(a->hash_size-1); v = a->hash[key]; for( ; v; v = v->hash_link ) { if ( v->resource == resource ) return TRUE; } } else { v = a->head; for( ; v; v = v->next ) { if ( v->resource == resource ) return TRUE; } } return FALSE; } static visited * append_agenda(rdf_db *db, agenda *a, atom_t res, uintptr_t d) { visited *v = a->head; if ( in_aganda(a, res) ) return NULL; db->agenda_created++; /* statistics */ a->size++; if ( !a->hash_size && a->size > 32 ) hash_agenda(db, a, 64); else if ( a->size > a->hash_size * 4 ) hash_agenda(db, a, a->hash_size * 4); v = alloc_node_agenda(db, a); v->resource = res; v->distance = d; v->next = NULL; if ( a->tail ) { a->tail->next = v; a->tail = v; } else { a->head = a->tail = v; } if ( a->hash_size ) { int key = atom_hash(res)&(a->hash_size-1); v->hash_link = a->hash[key]; a->hash[key] = v; } return v; } static int can_reach_target(rdf_db *db, agenda *a) { int indexed = a->pattern.indexed; int rc = FALSE; triple *p; if ( indexed & BY_S ) /* subj ---> */ { a->pattern.object.resource = a->target; indexed |= BY_O; } else { a->pattern.subject = a->target; indexed |= BY_S; } p = db->table[indexed][triple_hash(db, &a->pattern, indexed)]; for( ; p; p = p->next[indexed]) { if ( match_triples(p, &a->pattern, MATCH_SUBPROPERTY) ) { rc = TRUE; break; } } if ( a->pattern.indexed & BY_S ) { a->pattern.object.resource = 0; } else { a->pattern.subject = 0; } return rc; } static visited * bf_expand(rdf_db *db, agenda *a, atom_t resource, uintptr_t d) { triple *p; int indexed = a->pattern.indexed; visited *rc = NULL; if ( indexed & BY_S ) /* subj ---> */ { a->pattern.subject = resource; } else { a->pattern.object.resource = resource; } if ( a->target && can_reach_target(db, a) ) { return append_agenda(db, a, a->target, d); } p = db->table[indexed][triple_hash(db, &a->pattern, indexed)]; for( ; p; p = p->next[indexed]) { if ( match_triples(p, &a->pattern, MATCH_SUBPROPERTY) ) { atom_t found; visited *v; if ( indexed & BY_S ) { if ( p->object_is_literal ) continue; found = p->object.resource; } else { found = p->subject; } v = append_agenda(db, a, found, d); if ( !rc ) rc = v; if ( found == a->target ) break; } } /* TBD: handle owl:inverseOf */ /* TBD: handle owl:sameAs */ return rc; } static visited * next_agenda(rdf_db *db, agenda *a) { visited *v; if ( (v=a->to_return) ) { ok: a->to_return = a->to_return->next; return v; } while( a->to_expand ) { uintptr_t next_d = a->to_expand->distance+1; if ( next_d >= a->max_d ) return NULL; a->to_return = bf_expand(db, a, a->to_expand->resource, next_d); a->to_expand = a->to_expand->next; if ( (v=a->to_return) ) goto ok; } return NULL; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rdf_reachable(+Subject, +Predicate, -Object) rdf_reachable(-Subject, +Predicate, ?Object) Examine transitive relations, reporting all `Object' that can be reached from `Subject' using Predicate without going into a loop if the relation is cyclic. directly_attached() deals with the posibility that the predicate is not defined and Subject and Object are the same. Should use clean error handling, but that means a lot of changes. For now this will do. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int directly_attached(term_t pred, term_t from, term_t to) { if ( PL_is_atom(pred) && PL_is_atom(from) ) return PL_unify(to, from); return FALSE; } static int unify_distance(term_t d, uintptr_t dist) { if ( d ) return PL_unify_integer(d, dist); return TRUE; } static foreign_t rdf_reachable(term_t subj, term_t pred, term_t obj, term_t max_d, term_t d, control_t h) { rdf_db *db = DB; switch(PL_foreign_control(h)) { case PL_FIRST_CALL: { agenda a; visited *v; term_t target_term; int is_det = FALSE; if ( PL_is_variable(pred) ) return instantiation_error(pred); memset(&a, 0, sizeof(a)); a.magic = AGENDA_LOCAL_MAGIC; if ( max_d ) { long md; atom_t inf; if ( PL_get_atom(max_d, &inf) && inf == ATOM_infinite ) a.max_d = (uintptr_t)-1; if ( !get_long_ex(max_d, &md) || md < 0 ) return FALSE; a.max_d = md; } else { a.max_d = (uintptr_t)-1; } if ( !PL_is_variable(subj) ) /* subj .... obj */ { switch(get_partial_triple(db, subj, pred, 0, 0, &a.pattern)) { case 0: return directly_attached(pred, subj, obj) && unify_distance(d, 0); case -1: return FALSE; } is_det = PL_is_ground(obj); target_term = obj; } else if ( !PL_is_variable(obj) ) /* obj .... subj */ { switch(get_partial_triple(db, 0, pred, obj, 0, &a.pattern)) { case 0: return directly_attached(pred, obj, subj); case -1: return FALSE; } if ( a.pattern.object_is_literal ) return FALSE; /* rdf_reachable(-,+,literal(...)) */ target_term = subj; } else return instantiation_error(subj); if ( !RDLOCK(db) ) return FALSE; if ( !update_hash(db) ) return FALSE; if ( (a.pattern.indexed & BY_S) ) /* subj ... */ append_agenda(db, &a, a.pattern.subject, 0); else append_agenda(db, &a, a.pattern.object.resource, 0); a.to_return = a.head; a.to_expand = a.head; while( (v=next_agenda(db, &a)) ) { if ( PL_unify_atom(target_term, v->resource) ) { if ( is_det ) /* mode(+, +, +) */ { int rc = unify_distance(d, v->distance); unlock_and_empty_agenda(db, &a); return rc; } else if ( unify_distance(d, v->distance) ) { /* mode(+, +, -) or mode(-, +, +) */ agenda *ra = save_agenda(db, &a); inc_active_queries(db); DEBUG(9, Sdprintf("Saved agenta to %p\n", ra)); PL_retry_address(ra); } } } unlock_and_empty_agenda(db, &a); return FALSE; } case PL_REDO: { agenda *a = PL_foreign_context_address(h); term_t target_term; visited *v; assert(a->magic == AGENDA_SAVED_MAGIC); if ( !PL_is_variable(subj) ) /* +, +, - */ target_term = obj; else target_term = subj; /* -, +, + */ while( (v=next_agenda(db, a)) ) { if ( PL_unify_atom(target_term, v->resource) && unify_distance(d, v->distance) ) { assert(a->magic == AGENDA_SAVED_MAGIC); PL_retry_address(a); } } dec_active_queries(db); unlock_and_empty_agenda(db, a); return FALSE; } case PL_CUTTED: { agenda *a = PL_foreign_context_address(h); DEBUG(9, Sdprintf("Cutted; agenda = %p\n", a)); assert(a->magic == AGENDA_SAVED_MAGIC); dec_active_queries(db); unlock_and_empty_agenda(db, a); return TRUE; } default: assert(0); return FALSE; } } static foreign_t rdf_reachable3(term_t subj, term_t pred, term_t obj, control_t h) { return rdf_reachable(subj, pred, obj, 0, 0, h); } static foreign_t rdf_reachable5(term_t subj, term_t pred, term_t obj, term_t max_d, term_t d, control_t h) { return rdf_reachable(subj, pred, obj, max_d, d, h); } /******************************* * STATISTICS * *******************************/ static functor_t keys[16]; /* initialised in install_rdf_db() */ static int unify_statistics(rdf_db *db, term_t key, functor_t f) { int64_t v; if ( f == FUNCTOR_triples1 ) { v = db->created - db->erased; } else if ( f == FUNCTOR_subjects1 ) { v = db->subjects; } else if ( f == FUNCTOR_predicates1 ) { v = db->pred_count; } else if ( f == FUNCTOR_core1 ) { v = db->core; } else if ( f == FUNCTOR_indexed8 ) { int i; term_t a = PL_new_term_ref(); if ( !PL_unify_functor(key, FUNCTOR_indexed8) ) return FALSE; for(i=0; i<8; i++) { if ( !PL_get_arg(i+1, key, a) || !PL_unify_integer(a, db->indexed[i]) ) return FALSE; } return TRUE; } else if ( f == FUNCTOR_searched_nodes1 ) { v = db->agenda_created; } else if ( f == FUNCTOR_duplicates1 ) { v = db->duplicates; } else if ( f == FUNCTOR_literals1 ) { v = db->literals.count; } else if ( f == FUNCTOR_triples2 && PL_is_functor(key, f) ) { graph *src; term_t a = PL_new_term_ref(); atom_t name; _PL_get_arg(1, key, a); if ( !PL_get_atom(a, &name) ) return type_error(a, "atom"); if ( (src = lookup_graph(db, name, FALSE)) ) v = src->triple_count; else v = 0; _PL_get_arg(2, key, a); return PL_unify_int64(a, v); } else if ( f == FUNCTOR_gc2 ) { return PL_unify_term(key, PL_FUNCTOR, f, PL_INT, db->gc_count, PL_FLOAT, db->gc_time); /* time spent */ } else if ( f == FUNCTOR_rehash2 ) { return PL_unify_term(key, PL_FUNCTOR, f, PL_INT, db->rehash_count, PL_FLOAT, db->rehash_time); } else assert(0); return PL_unify_term(key, PL_FUNCTOR, f, PL_INT64, v); } static foreign_t rdf_statistics(term_t key, control_t h) { int n; rdf_db *db = DB; switch(PL_foreign_control(h)) { case PL_FIRST_CALL: { functor_t f; if ( PL_is_variable(key) ) { n = 0; goto redo; } else if ( PL_get_functor(key, &f) ) { for(n=0; keys[n]; n++) { if ( keys[n] == f ) return unify_statistics(db, key, f); } return domain_error(key, "rdf_statistics"); } else return type_error(key, "rdf_statistics"); } case PL_REDO: n = (int)PL_foreign_context(h); redo: unify_statistics(db, key, keys[n]); n++; if ( keys[n] ) PL_retry(n); case PL_CUTTED: return TRUE; default: assert(0); return TRUE; } } static foreign_t rdf_generation(term_t t) { rdf_db *db = DB; return PL_unify_integer(t, db->generation); } /******************************* * RESET * *******************************/ static void erase_triples(rdf_db *db) { triple *t, *n; int i; for(t=db->by_none; t; t=n) { n = t->next[BY_NONE]; free_triple(db, t); db->freed++; } db->by_none = db->by_none_tail = NULL; for(i=BY_S; i<=BY_OP; i++) { if ( db->table[i] ) { int bytes = sizeof(triple*) * db->table_size[i]; memset(db->table[i], 0, bytes); memset(db->tail[i], 0, bytes); } } db->created = 0; db->erased = 0; db->freed = 0; db->erased = 0; db->subjects = 0; db->rehash_count = 0; memset(db->indexed, 0, sizeof(db->indexed)); db->duplicates = 0; db->generation = 0; } static void erase_predicates(rdf_db *db) { predicate **ht; int i; for(i=0,ht = db->pred_table; ipred_table_size; i++, ht++) { predicate *p, *n; for( p = *ht; p; p = n ) { n = p->next; free_list(db, &p->subPropertyOf); free_list(db, &p->siblings); if ( ++p->cloud->deleted == p->cloud->size ) free_predicate_cloud(db, p->cloud); rdf_free(db, p, sizeof(*p)); } *ht = NULL; } db->pred_count = 0; db->next_hash = 0; } static void reset_db(rdf_db *db) { db->resetting = TRUE; erase_triples(db); erase_predicates(db); erase_graphs(db); db->need_update = FALSE; db->agenda_created = 0; avlfree(&db->literals); init_literal_table(db); db->resetting = FALSE; } static foreign_t rdf_reset_db() { rdf_db *db = DB; if ( !WRLOCK(db, FALSE) ) return FALSE; if ( db->tr_first ) { record_transaction(db, TR_RESET, NULL); db->tr_reset = TRUE; } else reset_db(db); WRUNLOCK(db); return TRUE; } /******************************* * MATCH * *******************************/ static foreign_t match_label(term_t how, term_t search, term_t label) { atom_t h, f, l; int type; if ( !get_atom_ex(how, &h) || !get_atom_ex(search, &f) || !get_atom_ex(label, &l) ) return FALSE; if ( h == ATOM_exact ) type = STR_MATCH_EXACT; else if ( h == ATOM_substring ) type = STR_MATCH_SUBSTRING; else if ( h == ATOM_word ) type = STR_MATCH_WORD; else if ( h == ATOM_prefix ) type = STR_MATCH_PREFIX; else if ( h == ATOM_like ) type = STR_MATCH_LIKE; else return domain_error(how, "search_method"); return match_atoms(type, f, l); } static foreign_t lang_matches(term_t lang, term_t pattern) { atom_t l, p; if ( !get_atom_ex(lang, &l) || !get_atom_ex(pattern, &p) ) return FALSE; return atom_lang_matches(l, p); } /******************************* * VERSION * *******************************/ static foreign_t rdf_version(term_t v) { return PL_unify_integer(v, RDF_VERSION); } /******************************* * MORE STUFF * *******************************/ #include "quote.c" /******************************* * REGISTER * *******************************/ #define MKFUNCTOR(n, a) \ FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a) #define NDET PL_FA_NONDETERMINISTIC #define META PL_FA_TRANSPARENT install_t install_rdf_db() { int i=0; extern install_t install_atom_map(void); MKFUNCTOR(literal, 1); MKFUNCTOR(error, 2); MKFUNCTOR(type_error, 2); MKFUNCTOR(domain_error, 2); MKFUNCTOR(triples, 1); MKFUNCTOR(triples, 2); MKFUNCTOR(subjects, 1); MKFUNCTOR(predicates, 1); MKFUNCTOR(subject, 1); MKFUNCTOR(predicate, 1); MKFUNCTOR(object, 1); MKFUNCTOR(graph, 1); MKFUNCTOR(indexed, 8); MKFUNCTOR(exact, 1); MKFUNCTOR(plain, 1); MKFUNCTOR(substring, 1); MKFUNCTOR(word, 1); MKFUNCTOR(prefix, 1); MKFUNCTOR(like, 1); MKFUNCTOR(literal, 2); MKFUNCTOR(searched_nodes, 1); MKFUNCTOR(duplicates, 1); MKFUNCTOR(literals, 1); MKFUNCTOR(symmetric, 1); MKFUNCTOR(transitive, 1); MKFUNCTOR(inverse_of, 1); MKFUNCTOR(lang, 2); MKFUNCTOR(type, 2); MKFUNCTOR(rdf_subject_branch_factor, 1); MKFUNCTOR(rdf_object_branch_factor, 1); MKFUNCTOR(rdfs_subject_branch_factor, 1); MKFUNCTOR(rdfs_object_branch_factor, 1); MKFUNCTOR(gc, 2); MKFUNCTOR(rehash, 2); MKFUNCTOR(core, 1); MKFUNCTOR(assert, 4); MKFUNCTOR(retract, 4); MKFUNCTOR(update, 5); MKFUNCTOR(new_literal, 1); MKFUNCTOR(old_literal, 1); MKFUNCTOR(transaction, 2); MKFUNCTOR(load, 2); MKFUNCTOR(rehash, 1); MKFUNCTOR(begin, 1); MKFUNCTOR(end, 1); FUNCTOR_colon2 = PL_new_functor(PL_new_atom(":"), 2); ATOM_user = PL_new_atom("user"); ATOM_exact = PL_new_atom("exact"); ATOM_plain = PL_new_atom("plain"); ATOM_prefix = PL_new_atom("prefix"); ATOM_like = PL_new_atom("like"); ATOM_substring = PL_new_atom("substring"); ATOM_word = PL_new_atom("word"); ATOM_subPropertyOf = PL_new_atom(URL_subPropertyOf); ATOM_error = PL_new_atom("error"); ATOM_begin = PL_new_atom("begin"); ATOM_end = PL_new_atom("end"); ATOM_infinite = PL_new_atom("infinite"); PRED_call1 = PL_predicate("call", 1, "user"); /* statistics */ keys[i++] = FUNCTOR_triples1; keys[i++] = FUNCTOR_subjects1; keys[i++] = FUNCTOR_indexed8; keys[i++] = FUNCTOR_predicates1; keys[i++] = FUNCTOR_searched_nodes1; keys[i++] = FUNCTOR_duplicates1; keys[i++] = FUNCTOR_literals1; keys[i++] = FUNCTOR_triples2; keys[i++] = FUNCTOR_gc2; keys[i++] = FUNCTOR_rehash2; keys[i++] = FUNCTOR_core1; keys[i++] = 0; /* setup the database */ DB = new_db(); PL_register_foreign("rdf_version", 1, rdf_version, 0); PL_register_foreign("rdf_assert", 3, rdf_assert3, 0); PL_register_foreign("rdf_assert", 4, rdf_assert4, 0); PL_register_foreign("rdf_update", 4, rdf_update, 0); PL_register_foreign("rdf_update", 5, rdf_update5, 0); PL_register_foreign("rdf_retractall", 3, rdf_retractall3, 0); PL_register_foreign("rdf_retractall", 4, rdf_retractall4, 0); PL_register_foreign("rdf_subject", 1, rdf_subject, NDET); PL_register_foreign("rdf", 3, rdf3, NDET); PL_register_foreign("rdf", 4, rdf4, NDET); PL_register_foreign("rdf_has", 4, rdf_has, NDET); PL_register_foreign("rdf_statistics_",1, rdf_statistics, NDET); PL_register_foreign("rdf_generation", 1, rdf_generation, 0); PL_register_foreign("rdf_match_label",3, match_label, 0); PL_register_foreign("rdf_save_db_", 2, rdf_save_db, 0); PL_register_foreign("rdf_load_db_", 3, rdf_load_db, 0); PL_register_foreign("rdf_reachable", 3, rdf_reachable3, NDET); PL_register_foreign("rdf_reachable", 5, rdf_reachable5, NDET); PL_register_foreign("rdf_reset_db_", 0, rdf_reset_db, 0); PL_register_foreign("rdf_set_predicate", 2, rdf_set_predicate, 0); PL_register_foreign("rdf_predicate_property_", 2, rdf_predicate_property, NDET); PL_register_foreign("rdf_current_predicates", 1, rdf_current_predicates, 0); PL_register_foreign("rdf_current_literal", 1, rdf_current_literal, NDET); PL_register_foreign("rdf_graphs_", 1, rdf_graphs, 0); PL_register_foreign("rdf_set_graph_source", 3, rdf_set_graph_source, 0); PL_register_foreign("rdf_unset_graph_source", 1, rdf_unset_graph_source, 0); PL_register_foreign("rdf_graph_source_", 3, rdf_graph_source, 0); PL_register_foreign("rdf_estimate_complexity", 4, rdf_estimate_complexity, 0); PL_register_foreign("rdf_transaction_",2, rdf_transaction, META); PL_register_foreign("rdf_active_transactions_", 1, rdf_active_transactions, 0); PL_register_foreign("rdf_monitor_", 2, rdf_monitor, META); /*PL_register_foreign("rdf_broadcast_", 2, rdf_broadcast, 0);*/ #ifdef WITH_MD5 PL_register_foreign("rdf_md5", 2, rdf_md5, 0); PL_register_foreign("rdf_atom_md5", 3, rdf_atom_md5, 0); #endif PL_register_foreign("rdf_quote_uri", 2, rdf_quote_uri, 0); #ifdef O_DEBUG PL_register_foreign("rdf_debug", 1, rdf_debug, 0); PL_register_foreign("rdf_print_predicate_cloud", 1, rdf_print_predicate_cloud, 0); #endif #ifdef O_SECURE PL_register_foreign("rdf_dump_literals", 0, dump_literals, 0); PL_register_foreign("rdf_check_literals", 0, check_transitivity, 0); #endif PL_register_foreign("lang_matches", 2, lang_matches, 0); install_atom_map(); }