276 lines
8.4 KiB
C
276 lines
8.4 KiB
C
/* $Id$
|
|
|
|
Part of SWI-Prolog
|
|
|
|
Author: Jan Wielemaker
|
|
E-mail: wielemak@science.uva.nl
|
|
WWW: http://www.swi-prolog.org
|
|
Copyright (C): 1985-2007, University of Amsterdam
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#ifndef RDFDB_H_INCLUDED
|
|
#define RDFDB_H_INCLUDED
|
|
#include "avl.h"
|
|
#ifdef WITH_MD5
|
|
#include "md5.h"
|
|
#endif
|
|
#include "lock.h"
|
|
|
|
#define RDF_VERSION 20800 /* 2.8.0 */
|
|
|
|
#define URL_subPropertyOf \
|
|
"http://www.w3.org/2000/01/rdf-schema#subPropertyOf"
|
|
|
|
|
|
/*******************************
|
|
* C *
|
|
*******************************/
|
|
|
|
/* Keep consistent with md5_type[] in rdf_db.c! */
|
|
#define OBJ_UNTYPED 0x0 /* partial: don't know */
|
|
#define OBJ_INTEGER 0x1
|
|
#define OBJ_DOUBLE 0x2
|
|
#define OBJ_STRING 0x3
|
|
#define OBJ_TERM 0x4
|
|
|
|
#define Q_NONE 0x0
|
|
#define Q_TYPE 0x1
|
|
#define Q_LANG 0x2
|
|
|
|
#define BY_NONE 0x00 /* 0 */
|
|
#define BY_S 0x01 /* 1 */
|
|
#define BY_P 0x02 /* 2 */
|
|
#define BY_O 0x04 /* 4 */
|
|
#define BY_SP (BY_S|BY_P) /* 3 */
|
|
#define BY_SO (BY_S|BY_O) /* 5 */
|
|
#define BY_OP (BY_P|BY_O) /* 6 */
|
|
#define BY_SPO (BY_S|BY_P|BY_O) /* 7 */
|
|
|
|
#define INDEX_TABLES 7
|
|
#define INITIAL_TABLE_SIZE 8*1024
|
|
#define INITIAL_PREDICATE_TABLE_SIZE 1024
|
|
#define INITIAL_GRAPH_TABLE_SIZE 64
|
|
|
|
#define MAX_HASH_FACTOR 8 /* factor to trigger re-hash */
|
|
#define MIN_HASH_FACTOR 4 /* factor after re-hash */
|
|
|
|
#define NO_LINE (0)
|
|
|
|
typedef struct cell
|
|
{ void * value; /* represented resource */
|
|
struct cell *next; /* next in chain */
|
|
} cell;
|
|
|
|
|
|
typedef struct list
|
|
{ cell *head; /* first in list */
|
|
cell *tail; /* tail of list */
|
|
} list;
|
|
|
|
|
|
typedef struct bitmatrix
|
|
{ size_t width;
|
|
size_t heigth;
|
|
int bits[1];
|
|
} bitmatrix;
|
|
|
|
|
|
#define DISTINCT_DIRECT 0 /* for ->distinct_subjects, etc */
|
|
#define DISTINCT_SUB 1
|
|
|
|
typedef struct predicate
|
|
{ atom_t name; /* name of the predicate */
|
|
struct predicate *next; /* next in hash-table */
|
|
/* hierarchy */
|
|
list subPropertyOf; /* the one I'm subPropertyOf */
|
|
list siblings; /* reverse of subPropertyOf */
|
|
int label; /* Numeric label in cloud */
|
|
struct predicate_cloud *cloud; /* cloud I belong to */
|
|
unsigned int hash; /* key used for hashing
|
|
(=hash if ->cloud is up-to-date) */
|
|
/* properties */
|
|
struct predicate *inverse_of; /* my inverse predicate */
|
|
unsigned transitive : 1; /* P(a,b)&P(b,c) --> P(a,c) */
|
|
/* statistics */
|
|
long triple_count; /* # triples on this predicate */
|
|
long distinct_updated[2];/* Is count still valid? */
|
|
long distinct_count[2]; /* Triple count at last update */
|
|
long distinct_subjects[2];/* # distinct subject values */
|
|
long distinct_objects[2];/* # distinct object values */
|
|
} predicate;
|
|
|
|
|
|
typedef struct predicate_cloud
|
|
{ predicate **members; /* member predicates */
|
|
unsigned int hash; /* hash-code */
|
|
size_t size; /* size of the cloud */
|
|
size_t deleted; /* See erase_predicates() */
|
|
bitmatrix *reachable; /* cloud reachability matrix */
|
|
unsigned dirty : 1; /* predicate hash not synchronised */
|
|
} predicate_cloud;
|
|
|
|
|
|
typedef struct graph
|
|
{ struct graph *next; /* next in table */
|
|
atom_t name; /* name of the graph */
|
|
atom_t source; /* URL graph was loaded from */
|
|
double modified; /* Modified time of source URL */
|
|
int triple_count; /* # triples associated to it */
|
|
#ifdef WITH_MD5
|
|
unsigned md5 : 1; /* do/don't record MD5 */
|
|
md5_byte_t digest[16]; /* MD5 digest */
|
|
#endif
|
|
} graph;
|
|
|
|
|
|
typedef struct literal
|
|
{ union
|
|
{ atom_t string;
|
|
int64_t integer;
|
|
double real;
|
|
struct
|
|
{ record_t record;
|
|
size_t len;
|
|
} term; /* external record */
|
|
} value;
|
|
atom_t type_or_lang; /* Type or language for literals */
|
|
unsigned int hash; /* saved hash */
|
|
unsigned objtype : 3;
|
|
unsigned qualifier : 2; /* Lang/Type qualifier */
|
|
unsigned shared : 1; /* member of shared table */
|
|
unsigned term_loaded : 1; /* OBJ_TERM from quick save file */
|
|
unsigned atoms_locked : 1; /* Atoms have been locked */
|
|
unsigned references : 24; /* # references to me */
|
|
} literal;
|
|
|
|
|
|
#define t_match next[0]
|
|
|
|
typedef struct triple
|
|
{ atom_t subject;
|
|
union
|
|
{ predicate* r; /* resolved: normal DB */
|
|
atom_t u; /* used by rdf_load_db_/3 */
|
|
} predicate;
|
|
union
|
|
{ literal * literal;
|
|
atom_t resource;
|
|
} object;
|
|
atom_t graph; /* where it comes from */
|
|
unsigned long line; /* graph-line number */
|
|
/* indexing */
|
|
struct triple*next[INDEX_TABLES]; /* hash-table next links */
|
|
/* flags */
|
|
unsigned object_is_literal : 1; /* Object is a literal */
|
|
unsigned resolve_pred : 1; /* predicates needs to be resolved */
|
|
unsigned indexed : 3; /* Partials: BY_* */
|
|
unsigned erased : 1; /* If TRUE, triple is erased */
|
|
unsigned first : 1; /* I'm the first on subject */
|
|
unsigned match : 3; /* How to match literals */
|
|
unsigned inversed : 1; /* Partials: using inverse match */
|
|
unsigned is_duplicate : 1; /* I'm a duplicate */
|
|
unsigned allocated : 1; /* Triple is allocated */
|
|
unsigned atoms_locked : 1; /* Atoms have been locked */
|
|
unsigned duplicates : 16; /* Duplicate count */
|
|
/* Total: 32 */
|
|
} triple;
|
|
|
|
|
|
typedef enum
|
|
{ TR_MARK, /* mark start for nesting */
|
|
TR_SUB_START, /* start nested transaction */
|
|
TR_SUB_END, /* end nested transaction */
|
|
TR_ASSERT, /* rdf_assert */
|
|
TR_RETRACT, /* rdf_retractall */
|
|
TR_UPDATE, /* rdf_update */
|
|
TR_UPDATE_SRC, /* rdf_update */
|
|
TR_UPDATE_MD5, /* update md5 src */
|
|
TR_RESET, /* rdf_reset_db */
|
|
TR_VOID /* no-op */
|
|
} tr_type;
|
|
|
|
|
|
typedef struct transaction_record
|
|
{ struct transaction_record *previous;
|
|
struct transaction_record *next;
|
|
tr_type type;
|
|
triple *triple; /* new/deleted triple */
|
|
union
|
|
{ triple *triple; /* used for update */
|
|
struct
|
|
{ atom_t atom;
|
|
unsigned long line;
|
|
} src;
|
|
struct
|
|
{ graph *graph;
|
|
md5_byte_t *digest;
|
|
} md5;
|
|
record_t transaction_id;
|
|
} update;
|
|
} transaction_record;
|
|
|
|
|
|
typedef struct active_transaction
|
|
{ struct active_transaction *parent;
|
|
term_t id;
|
|
} active_transaction;
|
|
|
|
|
|
typedef struct rdf_db
|
|
{ triple *by_none, *by_none_tail;
|
|
triple **table[INDEX_TABLES];
|
|
triple **tail[INDEX_TABLES];
|
|
int *counts[INDEX_TABLES];
|
|
int table_size[INDEX_TABLES];
|
|
long created; /* #triples created */
|
|
long erased; /* #triples erased */
|
|
long freed; /* #triples actually erased */
|
|
long subjects; /* subjects (unique first) */
|
|
long indexed[8]; /* Count calls */
|
|
int rehash_count; /* # rehashes */
|
|
int gc_count; /* # garbage collections */
|
|
int gc_blocked; /* GC is blocked; */
|
|
double rehash_time; /* time spent in rehash */
|
|
double gc_time; /* time spent in GC */
|
|
size_t core; /* core in use */
|
|
predicate **pred_table; /* Hash-table of predicates */
|
|
int pred_table_size; /* #entries in the table */
|
|
int pred_count; /* #predicates */
|
|
unsigned long next_hash; /* cloud hash keys */
|
|
int active_queries; /* Calls with choicepoints */
|
|
int need_update; /* We need to update */
|
|
long agenda_created; /* #visited nodes in agenda */
|
|
long duplicates; /* #duplicate triples */
|
|
long generation; /* generation-id of the database */
|
|
graph **graph_table; /* Hash table of sources */
|
|
int graph_table_size; /* Entries in table */
|
|
|
|
graph *last_graph; /* last accessed graph */
|
|
active_transaction *tr_active; /* open transactions */
|
|
transaction_record *tr_first; /* first transaction record */
|
|
transaction_record *tr_last; /* last transaction record */
|
|
int tr_nesting; /* nesting depth of transactions */
|
|
int tr_reset; /* transaction contains reset */
|
|
int resetting; /* We are in rdf_reset_db() */
|
|
|
|
rwlock lock; /* threaded access */
|
|
|
|
avl_tree literals;
|
|
} rdf_db;
|
|
|
|
#endif /*RDFDB_H_INCLUDED*/
|