/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: wielemak@science.uva.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2007, University of Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef RDFDB_H_INCLUDED #define RDFDB_H_INCLUDED #include "avl.h" #ifdef WITH_MD5 #include "md5.h" #endif #include "lock.h" #define RDF_VERSION 20800 /* 2.8.0 */ #define URL_subPropertyOf \ "http://www.w3.org/2000/01/rdf-schema#subPropertyOf" /******************************* * C * *******************************/ /* Keep consistent with md5_type[] in rdf_db.c! */ #define OBJ_UNTYPED 0x0 /* partial: don't know */ #define OBJ_INTEGER 0x1 #define OBJ_DOUBLE 0x2 #define OBJ_STRING 0x3 #define OBJ_TERM 0x4 #define Q_NONE 0x0 #define Q_TYPE 0x1 #define Q_LANG 0x2 #define BY_NONE 0x00 /* 0 */ #define BY_S 0x01 /* 1 */ #define BY_P 0x02 /* 2 */ #define BY_O 0x04 /* 4 */ #define BY_SP (BY_S|BY_P) /* 3 */ #define BY_SO (BY_S|BY_O) /* 5 */ #define BY_OP (BY_P|BY_O) /* 6 */ #define BY_SPO (BY_S|BY_P|BY_O) /* 7 */ #define INDEX_TABLES 7 #define INITIAL_TABLE_SIZE 8*1024 #define INITIAL_PREDICATE_TABLE_SIZE 1024 #define INITIAL_GRAPH_TABLE_SIZE 64 #define MAX_HASH_FACTOR 8 /* factor to trigger re-hash */ #define MIN_HASH_FACTOR 4 /* factor after re-hash */ #define NO_LINE (0) typedef struct cell { void * value; /* represented resource */ struct cell *next; /* next in chain */ } cell; typedef struct list { cell *head; /* first in list */ cell *tail; /* tail of list */ } list; typedef struct bitmatrix { size_t width; size_t heigth; int bits[1]; } bitmatrix; #define DISTINCT_DIRECT 0 /* for ->distinct_subjects, etc */ #define DISTINCT_SUB 1 typedef struct predicate { atom_t name; /* name of the predicate */ struct predicate *next; /* next in hash-table */ /* hierarchy */ list subPropertyOf; /* the one I'm subPropertyOf */ list siblings; /* reverse of subPropertyOf */ int label; /* Numeric label in cloud */ struct predicate_cloud *cloud; /* cloud I belong to */ unsigned int hash; /* key used for hashing (=hash if ->cloud is up-to-date) */ /* properties */ struct predicate *inverse_of; /* my inverse predicate */ unsigned transitive : 1; /* P(a,b)&P(b,c) --> P(a,c) */ /* statistics */ long triple_count; /* # triples on this predicate */ long distinct_updated[2];/* Is count still valid? */ long distinct_count[2]; /* Triple count at last update */ long distinct_subjects[2];/* # distinct subject values */ long distinct_objects[2];/* # distinct object values */ } predicate; typedef struct predicate_cloud { predicate **members; /* member predicates */ unsigned int hash; /* hash-code */ size_t size; /* size of the cloud */ size_t deleted; /* See erase_predicates() */ bitmatrix *reachable; /* cloud reachability matrix */ unsigned dirty : 1; /* predicate hash not synchronised */ } predicate_cloud; typedef struct graph { struct graph *next; /* next in table */ atom_t name; /* name of the graph */ atom_t source; /* URL graph was loaded from */ double modified; /* Modified time of source URL */ int triple_count; /* # triples associated to it */ #ifdef WITH_MD5 unsigned md5 : 1; /* do/don't record MD5 */ md5_byte_t digest[16]; /* MD5 digest */ #endif } graph; typedef struct literal { union { atom_t string; int64_t integer; double real; struct { record_t record; size_t len; } term; /* external record */ } value; atom_t type_or_lang; /* Type or language for literals */ unsigned int hash; /* saved hash */ unsigned objtype : 3; unsigned qualifier : 2; /* Lang/Type qualifier */ unsigned shared : 1; /* member of shared table */ unsigned term_loaded : 1; /* OBJ_TERM from quick save file */ unsigned atoms_locked : 1; /* Atoms have been locked */ unsigned references : 24; /* # references to me */ } literal; #define t_match next[0] typedef struct triple { atom_t subject; union { predicate* r; /* resolved: normal DB */ atom_t u; /* used by rdf_load_db_/3 */ } predicate; union { literal * literal; atom_t resource; } object; atom_t graph; /* where it comes from */ unsigned long line; /* graph-line number */ /* indexing */ struct triple*next[INDEX_TABLES]; /* hash-table next links */ /* flags */ unsigned object_is_literal : 1; /* Object is a literal */ unsigned resolve_pred : 1; /* predicates needs to be resolved */ unsigned indexed : 3; /* Partials: BY_* */ unsigned erased : 1; /* If TRUE, triple is erased */ unsigned first : 1; /* I'm the first on subject */ unsigned match : 3; /* How to match literals */ unsigned inversed : 1; /* Partials: using inverse match */ unsigned is_duplicate : 1; /* I'm a duplicate */ unsigned allocated : 1; /* Triple is allocated */ unsigned atoms_locked : 1; /* Atoms have been locked */ unsigned duplicates : 16; /* Duplicate count */ /* Total: 32 */ } triple; typedef enum { TR_MARK, /* mark start for nesting */ TR_SUB_START, /* start nested transaction */ TR_SUB_END, /* end nested transaction */ TR_ASSERT, /* rdf_assert */ TR_RETRACT, /* rdf_retractall */ TR_UPDATE, /* rdf_update */ TR_UPDATE_SRC, /* rdf_update */ TR_UPDATE_MD5, /* update md5 src */ TR_RESET, /* rdf_reset_db */ TR_VOID /* no-op */ } tr_type; typedef struct transaction_record { struct transaction_record *previous; struct transaction_record *next; tr_type type; triple *triple; /* new/deleted triple */ union { triple *triple; /* used for update */ struct { atom_t atom; unsigned long line; } src; struct { graph *graph; md5_byte_t *digest; } md5; record_t transaction_id; } update; } transaction_record; typedef struct active_transaction { struct active_transaction *parent; term_t id; } active_transaction; typedef struct rdf_db { triple *by_none, *by_none_tail; triple **table[INDEX_TABLES]; triple **tail[INDEX_TABLES]; int *counts[INDEX_TABLES]; int table_size[INDEX_TABLES]; long created; /* #triples created */ long erased; /* #triples erased */ long freed; /* #triples actually erased */ long subjects; /* subjects (unique first) */ long indexed[8]; /* Count calls */ int rehash_count; /* # rehashes */ int gc_count; /* # garbage collections */ int gc_blocked; /* GC is blocked; */ double rehash_time; /* time spent in rehash */ double gc_time; /* time spent in GC */ size_t core; /* core in use */ predicate **pred_table; /* Hash-table of predicates */ int pred_table_size; /* #entries in the table */ int pred_count; /* #predicates */ unsigned long next_hash; /* cloud hash keys */ int active_queries; /* Calls with choicepoints */ int need_update; /* We need to update */ long agenda_created; /* #visited nodes in agenda */ long duplicates; /* #duplicate triples */ long generation; /* generation-id of the database */ graph **graph_table; /* Hash table of sources */ int graph_table_size; /* Entries in table */ graph *last_graph; /* last accessed graph */ active_transaction *tr_active; /* open transactions */ transaction_record *tr_first; /* first transaction record */ transaction_record *tr_last; /* last transaction record */ int tr_nesting; /* nesting depth of transactions */ int tr_reset; /* transaction contains reset */ int resetting; /* We are in rdf_reset_db() */ rwlock lock; /* threaded access */ avl_tree literals; } rdf_db; #endif /*RDFDB_H_INCLUDED*/