new more compact/better hashing version of exo.c
This commit is contained in:
parent
b88dcb86ab
commit
4dac867fb8
151
C/exo.c
151
C/exo.c
@ -40,50 +40,67 @@
|
|||||||
|
|
||||||
#define MAX_ARITY 256
|
#define MAX_ARITY 256
|
||||||
|
|
||||||
|
#define FNV32_PRIME 16777619
|
||||||
|
#define FNV64_PRIME ((UInt)1099511628211)
|
||||||
|
|
||||||
|
#define FNV32_OFFSET 2166136261
|
||||||
|
#define FNV64_OFFSET ((UInt)14695981039346656037)
|
||||||
|
|
||||||
|
|
||||||
/* Simple hash function:
|
/* Simple hash function:
|
||||||
first component is the base key.
|
first component is the base key.
|
||||||
hash0 spreads extensions coming from different elements.
|
hash0 spreads extensions coming from different elements.
|
||||||
spread over j quadrants.
|
spread over j quadrants.
|
||||||
*/
|
*/
|
||||||
static UInt
|
static BITS32
|
||||||
HASH(UInt hash0, UInt j, CELL *cl, struct index_t *it)
|
HASH(UInt arity, CELL *cl, UInt bnds[], UInt sz)
|
||||||
{
|
{
|
||||||
Term t = cl[j];
|
UInt hash;
|
||||||
UInt sz = it->hsize;
|
UInt j=0;
|
||||||
if (IsIntTerm(t))
|
|
||||||
return (17*(IntOfTerm(t) + (hash0+1)*j ) ) % sz;
|
hash = FNV32_OFFSET;
|
||||||
return (17*(((UInt)AtomOfTerm(t)>>5) + (hash0+1)*j ) ) % sz;
|
while (j < arity) {
|
||||||
|
if (bnds[j]) {
|
||||||
|
unsigned char *i=(unsigned char*)(cl+j);
|
||||||
|
unsigned char *m=(unsigned char*)(cl+(j+1));
|
||||||
|
|
||||||
|
while (i < m) {
|
||||||
|
hash = hash ^ i[0];
|
||||||
|
hash = hash * FNV32_PRIME;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
static UInt
|
static BITS32
|
||||||
NEXT(UInt hash, Term t, UInt j, struct index_t *it)
|
NEXT(UInt hash)
|
||||||
{
|
{
|
||||||
return (hash+(j+1)*997) % (it->hsize);
|
return (hash*997);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* search for matching elements */
|
/* search for matching elements */
|
||||||
static int
|
static int
|
||||||
MATCH(CELL *clp, CELL *kvp, UInt j, struct index_t *it, UInt bnds[])
|
MATCH(CELL *clp, CELL *kvp, UInt arity, UInt bnds[])
|
||||||
{
|
{
|
||||||
if ((kvp - it->cls)%it->arity != j)
|
UInt j = 0;
|
||||||
return FALSE;
|
while (j< arity) {
|
||||||
do {
|
if ( bnds[j] && clp[j] != kvp[j])
|
||||||
if ( bnds[j] && *clp != *kvp)
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
clp--;
|
j++;
|
||||||
kvp--;
|
}
|
||||||
} while (j-- != 0);
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ADD_TO_TRY_CHAIN(CELL *kvp, CELL *cl, struct index_t *it)
|
ADD_TO_TRY_CHAIN(CELL *kvp, CELL *cl, struct index_t *it)
|
||||||
{
|
{
|
||||||
UInt old = (kvp-it->cls)/it->arity;
|
BITS32 old = (kvp-it->cls)/it->arity;
|
||||||
UInt new = (cl-it->cls)/it->arity;
|
BITS32 new = (cl-it->cls)/it->arity;
|
||||||
UInt *links = it->links;
|
BITS32 *links = it->links;
|
||||||
UInt tmp = links[old]; /* points to the end of the chain */
|
BITS32 tmp = links[old]; /* points to the end of the chain */
|
||||||
|
|
||||||
if (!tmp) {
|
if (!tmp) {
|
||||||
links[old] = links[new] = new;
|
links[old] = links[new] = new;
|
||||||
@ -112,49 +129,28 @@ ADD_TO_TRY_CHAIN(CELL *kvp, CELL *cl, struct index_t *it)
|
|||||||
* else
|
* else
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
INSERT(CELL *cl, struct index_t *it, UInt arity, UInt base, UInt hash0, UInt bnds[])
|
INSERT(CELL *cl, struct index_t *it, UInt arity, UInt base, UInt bnds[])
|
||||||
{
|
{
|
||||||
UInt j = base;
|
|
||||||
CELL *kvp;
|
CELL *kvp;
|
||||||
UInt hash;
|
BITS32 hash;
|
||||||
|
|
||||||
/* skip over argument */
|
|
||||||
while (!bnds[j]) {
|
hash = HASH(arity, cl, bnds, it->hsize);
|
||||||
j++;
|
|
||||||
}
|
|
||||||
/* j is the firs bound element */
|
|
||||||
/* check if we match */
|
|
||||||
hash = hash0 = HASH(hash0, j, cl, it);
|
|
||||||
//if (exo_write) printf("h=%ld j=%ld %lx\n", hash, j, cl[j]);
|
|
||||||
next:
|
next:
|
||||||
/* loop to insert element */
|
kvp = EXO_OFFSET_TO_ADDRESS(it, it->key [hash % it->hsize]);
|
||||||
kvp = it->key[hash];
|
|
||||||
if (kvp == NULL) {
|
if (kvp == NULL) {
|
||||||
/* simple case, new entry */
|
/* simple case, new entry */
|
||||||
it->nentries++;
|
it->nentries++;
|
||||||
it->key[hash] = cl+j;
|
it->key[hash % it->hsize ] = EXO_ADDRESS_TO_OFFSET(it, cl);
|
||||||
return;
|
return;
|
||||||
} else if (MATCH(cl+j, kvp, j, it, bnds)) {
|
} else if (MATCH(kvp, cl, arity, bnds)) {
|
||||||
/* collision */
|
|
||||||
UInt k;
|
|
||||||
CELL *target;
|
|
||||||
|
|
||||||
for (k =j+1, target = kvp+1; k < arity; k++,target++ ) {
|
|
||||||
if (bnds[k]) {
|
|
||||||
if (*target != cl[k]) {
|
|
||||||
/* found a new forking point */
|
|
||||||
// printf("j=%ld hash0=%ld cl[j]=%lx\n", j, hash0, cl[j]);
|
|
||||||
INSERT(cl, it, arity, k, hash0, bnds);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
it->ntrys++;
|
it->ntrys++;
|
||||||
ADD_TO_TRY_CHAIN(kvp, cl, it);
|
ADD_TO_TRY_CHAIN(kvp, cl, it);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
it->ncollisions++;
|
it->ncollisions++;
|
||||||
hash = NEXT(hash, cl[j], j, it);
|
// printf("#");
|
||||||
|
hash = NEXT(hash);
|
||||||
//if (exo_write) printf("N=%ld\n", hash);
|
//if (exo_write) printf("N=%ld\n", hash);
|
||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
@ -165,40 +161,26 @@ LOOKUP(struct index_t *it, UInt arity, UInt j, UInt bnds[])
|
|||||||
{
|
{
|
||||||
CACHE_REGS
|
CACHE_REGS
|
||||||
CELL *kvp;
|
CELL *kvp;
|
||||||
UInt hash, hash0 = 0;
|
BITS32 hash;
|
||||||
|
|
||||||
/* j is the firs bound element */
|
/* j is the firs bound element */
|
||||||
/* check if we match */
|
/* check if we match */
|
||||||
hash:
|
hash = HASH(arity, XREGS+1, bnds, it->hsize);
|
||||||
hash = hash0 = HASH(hash0, j, XREGS+1, it);
|
|
||||||
next:
|
next:
|
||||||
/* loop to insert element */
|
/* loop to insert element */
|
||||||
kvp = it->key[hash];
|
kvp = EXO_OFFSET_TO_ADDRESS(it, it->key[hash % it->hsize]);
|
||||||
if (kvp == NULL) {
|
if (kvp == NULL) {
|
||||||
/* simple case, no element */
|
/* simple case, no element */
|
||||||
return FAILCODE;
|
return FAILCODE;
|
||||||
} else if (MATCH(XREGS+(j+1), kvp, j, it, bnds)) {
|
} else if (MATCH(kvp, XREGS+1, arity, bnds)) {
|
||||||
/* found element */
|
S = kvp;
|
||||||
UInt k;
|
|
||||||
CELL *target;
|
|
||||||
|
|
||||||
for (k =j+1, target = kvp+1; k < arity; k++ ) {
|
|
||||||
if (bnds[k]) {
|
|
||||||
if (*target != XREGS[k+1]) {
|
|
||||||
j = k;
|
|
||||||
goto hash;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
target++;
|
|
||||||
}
|
|
||||||
S = target-arity;
|
|
||||||
if (!it->is_key && it->links[(S-it->cls)/arity])
|
if (!it->is_key && it->links[(S-it->cls)/arity])
|
||||||
return it->code;
|
return it->code;
|
||||||
else
|
else
|
||||||
return NEXTOP(NEXTOP(it->code,lp),lp);
|
return NEXTOP(NEXTOP(it->code,lp),lp);
|
||||||
} else {
|
} else {
|
||||||
/* collision */
|
/* collision */
|
||||||
hash = NEXT(hash, XREGS[j+1], j, it);
|
hash = NEXT(hash);
|
||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -211,12 +193,12 @@ fill_hash(UInt bmap, struct index_t *it, UInt bnds[])
|
|||||||
CELL *cl = it->cls;
|
CELL *cl = it->cls;
|
||||||
|
|
||||||
for (i=0; i < it->nels; i++) {
|
for (i=0; i < it->nels; i++) {
|
||||||
INSERT(cl, it, arity, 0, 0, bnds);
|
INSERT(cl, it, arity, 0, bnds);
|
||||||
cl += arity;
|
cl += arity;
|
||||||
}
|
}
|
||||||
for (i=0; i < it->hsize; i++) {
|
for (i=0; i < it->hsize; i++) {
|
||||||
if (it->key[i]) {
|
if (it->key[i]) {
|
||||||
UInt offset = (it->key[i]-it->cls)/arity;
|
UInt offset = it->key[i]/arity;
|
||||||
UInt last = it->links[offset];
|
UInt last = it->links[offset];
|
||||||
if (last) {
|
if (last) {
|
||||||
/* the chain used to point straight to the last, and the last back to the origibal first */
|
/* the chain used to point straight to the last, and the last back to the origibal first */
|
||||||
@ -255,7 +237,7 @@ add_index(struct index_t **ip, UInt bmap, PredEntry *ap, UInt count, UInt bnds[]
|
|||||||
i->is_key = FALSE;
|
i->is_key = FALSE;
|
||||||
i->hsize = 2*ncls;
|
i->hsize = 2*ncls;
|
||||||
if (count) {
|
if (count) {
|
||||||
if (!(base = (CELL *)Yap_AllocCodeSpace(sizeof(CELL)*(ncls+i->hsize)))) {
|
if (!(base = (CELL *)Yap_AllocCodeSpace(sizeof(BITS32)*(ncls+i->hsize)))) {
|
||||||
CACHE_REGS
|
CACHE_REGS
|
||||||
save_machine_regs();
|
save_machine_regs();
|
||||||
LOCAL_Error_Size = sizeof(CELL)*(ncls+i->hsize);
|
LOCAL_Error_Size = sizeof(CELL)*(ncls+i->hsize);
|
||||||
@ -267,7 +249,7 @@ add_index(struct index_t **ip, UInt bmap, PredEntry *ap, UInt count, UInt bnds[]
|
|||||||
bzero(base, sizeof(CELL)*(ncls+i->hsize));
|
bzero(base, sizeof(CELL)*(ncls+i->hsize));
|
||||||
}
|
}
|
||||||
i->size = sizeof(CELL)*(ncls+i->hsize)+sz+sizeof(struct index_t);
|
i->size = sizeof(CELL)*(ncls+i->hsize)+sz+sizeof(struct index_t);
|
||||||
i->key = (CELL **)base;
|
i->key = (CELL *)base;
|
||||||
i->links = (CELL *)(base+i->hsize);
|
i->links = (CELL *)(base+i->hsize);
|
||||||
i->ncollisions = i->nentries = i->ntrys = 0;
|
i->ncollisions = i->nentries = i->ntrys = 0;
|
||||||
i->cls = (CELL *)((ADDR)ap->cs.p_code.FirstClause+2*sizeof(struct index_t *));
|
i->cls = (CELL *)((ADDR)ap->cs.p_code.FirstClause+2*sizeof(struct index_t *));
|
||||||
@ -337,14 +319,11 @@ Yap_ExoLookup(PredEntry *ap USES_REGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (i) {
|
while (i) {
|
||||||
if (i->is_key) {
|
// if (i->is_key && (i->bmap & bmap) == i->bmap) {
|
||||||
if ((i->bmap & bmap) == i->bmap) {
|
// break;
|
||||||
break;
|
// }
|
||||||
}
|
if (i->bmap == bmap) {
|
||||||
} else {
|
break;
|
||||||
if (i->bmap == bmap) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ip = &i->next;
|
ip = &i->next;
|
||||||
i = i->next;
|
i = i->next;
|
||||||
@ -362,9 +341,9 @@ CELL
|
|||||||
Yap_NextExo(choiceptr cptr, struct index_t *it)
|
Yap_NextExo(choiceptr cptr, struct index_t *it)
|
||||||
{
|
{
|
||||||
CACHE_REGS
|
CACHE_REGS
|
||||||
CELL offset = EXO_ADDRESS_TO_OFFSET(it,(CELL *)((CELL *)(B+1))[it->arity]);
|
CELL offset = ADDRESS_TO_LINK(it,(CELL *)((CELL *)(B+1))[it->arity]);
|
||||||
CELL next = it->links[offset];
|
CELL next = it->links[offset];
|
||||||
((CELL *)(B+1))[it->arity] = (CELL)EXO_OFFSET_TO_ADDRESS(it, next);
|
((CELL *)(B+1))[it->arity] = (CELL)LINK_TO_ADDRESS(it, next);
|
||||||
S = it->cls+it->arity*offset;
|
S = it->cls+it->arity*offset;
|
||||||
return next;
|
return next;
|
||||||
}
|
}
|
||||||
|
30
H/clause.h
30
H/clause.h
@ -170,25 +170,43 @@ typedef struct index_t {
|
|||||||
UInt ntrys;
|
UInt ntrys;
|
||||||
UInt nentries;
|
UInt nentries;
|
||||||
UInt hsize;
|
UInt hsize;
|
||||||
CELL **key;
|
BITS32 *key;
|
||||||
CELL *cls;
|
CELL *cls;
|
||||||
CELL *links;
|
BITS32 *links;
|
||||||
size_t size;
|
size_t size;
|
||||||
yamop *code;
|
yamop *code;
|
||||||
} Index_t;
|
} Index_t;
|
||||||
|
|
||||||
INLINE_ONLY EXTERN inline UInt EXO_ADDRESS_TO_OFFSET(struct index_t *it, CELL *ptr);
|
INLINE_ONLY EXTERN inline BITS32 EXO_ADDRESS_TO_OFFSET(struct index_t *it, CELL *ptr);
|
||||||
|
|
||||||
INLINE_ONLY EXTERN inline UInt
|
INLINE_ONLY EXTERN inline BITS32
|
||||||
EXO_ADDRESS_TO_OFFSET(struct index_t *it, CELL* ptr)
|
EXO_ADDRESS_TO_OFFSET(struct index_t *it, CELL* ptr)
|
||||||
{
|
{
|
||||||
return ptr-it->links;
|
return 1+(ptr-it->cls);
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE_ONLY EXTERN inline CELL *EXO_OFFSET_TO_ADDRESS(struct index_t *it, UInt off);
|
INLINE_ONLY EXTERN inline CELL *EXO_OFFSET_TO_ADDRESS(struct index_t *it, UInt off);
|
||||||
|
|
||||||
INLINE_ONLY EXTERN inline CELL *
|
INLINE_ONLY EXTERN inline CELL *
|
||||||
EXO_OFFSET_TO_ADDRESS(struct index_t *it, UInt off)
|
EXO_OFFSET_TO_ADDRESS(struct index_t *it, BITS32 off)
|
||||||
|
{
|
||||||
|
if (off == 0L)
|
||||||
|
return NULL;
|
||||||
|
return (it->cls-1)+off;
|
||||||
|
}
|
||||||
|
|
||||||
|
INLINE_ONLY EXTERN inline BITS32 ADDRESS_TO_LINK(struct index_t *it, CELL *ptr);
|
||||||
|
|
||||||
|
INLINE_ONLY EXTERN inline BITS32
|
||||||
|
ADDRESS_TO_LINK(struct index_t *it, CELL* ptr)
|
||||||
|
{
|
||||||
|
return ptr-it->links;
|
||||||
|
}
|
||||||
|
|
||||||
|
INLINE_ONLY EXTERN inline CELL *LINK_TO_ADDRESS(struct index_t *it, BITS32 off);
|
||||||
|
|
||||||
|
INLINE_ONLY EXTERN inline CELL *
|
||||||
|
LINK_TO_ADDRESS(struct index_t *it, BITS32 off)
|
||||||
{
|
{
|
||||||
return it->links+off;
|
return it->links+off;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user