new version of term_hash based on SWI-ideas.

It handles infinite terms, and it seems to be a better hash.
This commit is contained in:
Vitor Santos Costa
2008-11-18 11:28:11 +00:00
parent 1b98de440d
commit 99c5cb3e36

View File

@@ -1338,6 +1338,9 @@ static Int var_in_complex_term(register CELL *pt0,
var_in_term_nvar: var_in_term_nvar:
{ {
if (IsPairTerm(d0)) { if (IsPairTerm(d0)) {
if (to_visit + 1024 >= (CELL **)AuxSp) {
goto aux_overflow;
}
#ifdef RATIONAL_TREES #ifdef RATIONAL_TREES
to_visit[0] = pt0; to_visit[0] = pt0;
to_visit[1] = pt0_end; to_visit[1] = pt0_end;
@@ -1353,6 +1356,7 @@ static Int var_in_complex_term(register CELL *pt0,
#endif #endif
pt0 = RepPair(d0) - 1; pt0 = RepPair(d0) - 1;
pt0_end = RepPair(d0) + 1; pt0_end = RepPair(d0) + 1;
continue;
} else if (IsApplTerm(d0)) { } else if (IsApplTerm(d0)) {
register Functor f; register Functor f;
register CELL *ap2; register CELL *ap2;
@@ -1364,6 +1368,9 @@ static Int var_in_complex_term(register CELL *pt0,
continue; continue;
} }
if (to_visit + 1024 >= (CELL **)AuxSp) {
goto aux_overflow;
}
#ifdef RATIONAL_TREES #ifdef RATIONAL_TREES
to_visit[0] = pt0; to_visit[0] = pt0;
to_visit[1] = pt0_end; to_visit[1] = pt0_end;
@@ -1412,6 +1419,18 @@ static Int var_in_complex_term(register CELL *pt0,
} }
clean_tr(TR0); clean_tr(TR0);
return FALSE; return FALSE;
aux_overflow:
/* unwind stack */
#ifdef RATIONAL_TREES
while (to_visit > to_visit0) {
to_visit -= 3;
pt0 = to_visit[0];
*pt0 = (CELL)to_visit[2];
}
#endif
return -1;
} }
static Int static Int
@@ -1441,84 +1460,288 @@ p_var_in_term(void)
/* This code with max_depth == -1 will loop for infinite trees */ /* This code with max_depth == -1 will loop for infinite trees */
#define GvNht ((UInt *)H)
#define HASHADD(T) (GvNht[k]+=(T), k=(k<2 ? k+1 : 0)) //-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby
static Int TermHash(Term t1, Int depth_lim, Int k) // Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
static unsigned int
MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )
{ {
Int i; const unsigned int m = 0x5bd1e995;
if (IsVarTerm(t1)) { const int r = 24;
return(-1);
} else if (IsAtomOrIntTerm(t1)) {
if (IsAtomTerm(t1)) {
register char *s = AtomName(AtomOfTerm(t1));
for (i=0; s[i]; i++)
HASHADD(s[i]);
return k;
} else {
HASHADD(IntOfTerm(t1));
return k;
}
} else if (IsPairTerm(t1)) {
HASHADD('.');
depth_lim--;
if (depth_lim == 0) return(TRUE);
k = TermHash(HeadOfTerm(t1),depth_lim,k);
if (k < 0) return k;
return TermHash(TailOfTerm(t1),depth_lim,k);
} else {
Functor f = FunctorOfTerm(t1);
if (IsExtensionFunctor(f)) { unsigned int h = seed ^ len;
if (f == FunctorDouble) {
Int *iptr = (Int *)(RepAppl(t1)+1);
int i;
for (i = 0; i < sizeof(Float) / sizeof(CELL); i++) { const unsigned char * data = (const unsigned char *)key;
HASHADD(*iptr++);
while(len >= 4)
{
unsigned int k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
} }
return(k);
} else if (f == FunctorLongInt) {
HASHADD(LongIntOfTerm(t1));
return(k);
} else if (f == FunctorDBRef) {
HASHADD((Int)DBRefOfTerm(t1));
return(k);
/* should never happen */
} else {
return(-1);
}
} else {
int ar;
char *s;
s = AtomName(NameOfFunctor(f)); switch(len)
for (i=0; s[i]; i++) {
HASHADD(s[i]); case 3: h ^= data[2] << 16;
depth_lim--; case 2: h ^= data[1] << 8;
if (depth_lim == 0) return k; case 1: h ^= data[0];
ar = ArityOfFunctor(f); h *= m;
for (i=1; i<=ar; i++) { };
k = TermHash(ArgOfTerm(i,t1),depth_lim,k);
if (k < 0) return k; h ^= h >> 13;
} h *= m;
return(k); h ^= h >> 15;
return h;
}
static CELL *
AddAtomToHash(CELL *st, Atom at)
{
unsigned int len;
CELL * start;
if (IsWideAtom(at)) {
wchar_t *c = RepAtom(at)->WStrOfAE;
int ulen = wcslen(c);
len = ulen*sizeof(wchar_t);
if (len % CellSize == 0) {
len /= CellSize;
} else {
len /= CellSize;
len++;
} }
st[len-1] = 0L;
wcsncpy((wchar_t *)st, c, ulen);
} else {
char *c = RepAtom(at)->StrOfAE;
int ulen = strlen(c);
start = (CELL *)c;
if (ulen % CellSize == 0) {
len = ulen/CellSize;
} else {
len = ulen/CellSize;
len++;
}
st[len-1] = 0L;
strncpy((char *)st, c, ulen);
} }
return st+len;
}
static CELL *
hash_complex_term(register CELL *pt0,
register CELL *pt0_end,
Int depth,
CELL *st)
{
register CELL **to_visit0, **to_visit = (CELL **)Yap_PreAllocCodeSpace();
to_visit0 = to_visit;
loop:
while (pt0 < pt0_end) {
register CELL d0;
register CELL *ptd0;
++ pt0;
ptd0 = pt0;
d0 = *ptd0;
deref_head(d0, hash_complex_unk);
hash_complex_nvar:
{
if (st + 1024 >= ASP) {
goto global_overflow;
}
if (IsPrimitiveTerm(d0)) {
if (d0 != TermFoundVar) {
if (IsAtomTerm(d0)) {
st = AddAtomToHash(st, AtomOfTerm(d0));
} else {
*st++ = IntOfTerm(d0);
}
}
continue;
} else if (IsPairTerm(d0)) {
st = AddAtomToHash(st, AtomDot);
if (depth == 1)
continue;
if (to_visit + 1024 >= (CELL **)AuxSp) {
goto aux_overflow;
}
#ifdef RATIONAL_TREES
to_visit[0] = pt0;
to_visit[1] = pt0_end;
to_visit[2] = (CELL *)*pt0;
to_visit[3] = (CELL *)(depth--);
to_visit += 4;
*pt0 = TermFoundVar;
#else
if (pt0 < pt0_end) {
to_visit[0] = pt0;
to_visit[1] = pt0_end;
to_visit[2] = (CELL *)(depth--);
to_visit += 3;
}
#endif
pt0 = RepPair(d0) - 1;
pt0_end = RepPair(d0) + 1;
continue;
} else if (IsApplTerm(d0)) {
register Functor f;
register CELL *ap2;
/* store the terms to visit */
ap2 = RepAppl(d0);
f = (Functor)(*ap2);
if (IsExtensionFunctor(f)) {
CELL fc = (CELL)NameOfFunctor(f);
switch(fc) {
case (CELL)FunctorDBRef:
*st++ = fc;
break;
case (CELL)FunctorLongInt:
*st++ = LongIntOfTerm(d0);
break;
#ifdef USE_GMP
case (CELL)FunctorBigInt:
{
CELL *pt = RepAppl(d0);
Int sz =
sizeof(MP_INT)+
(((MP_INT *)(pt+1))->_mp_alloc*sizeof(mp_limb_t));
if (st + (1024 + sz/CellSize) >= ASP) {
goto global_overflow;
}
/* then the actual number */
memcpy((void *)(st+1), (void *)(pt+1), sz);
st = st+sz/CellSize;
}
break;
#endif
case (CELL)FunctorDouble:
{
CELL *pt = RepAppl(d0);
*st++ = pt[1];
#if SIZEOF_DOUBLE == 2*SIZEOF_LONG_INT
*st++ = pt[2];
#endif
break;
}
}
continue;
}
st = AddAtomToHash(st, NameOfFunctor(f));
if (depth == 1)
continue;
if (to_visit + 1024 >= (CELL **)AuxSp) {
goto aux_overflow;
}
#ifdef RATIONAL_TREES
to_visit[0] = pt0;
to_visit[1] = pt0_end;
to_visit[2] = (CELL *)*pt0;
to_visit[3] = (CELL *)(depth--);
to_visit += 4;
*pt0 = TermFoundVar;
#else
/* store the terms to visit */
if (pt0 < pt0_end) {
to_visit[0] = pt0;
to_visit[1] = pt0_end;
to_visit[2] = depth--;
to_visit += 3;
}
#endif
d0 = ArityOfFunctor(f);
pt0 = ap2;
pt0_end = ap2 + d0;
}
continue;
}
deref_body(d0, ptd0, hash_complex_unk, hash_complex_nvar);
return NULL;
}
/* Do we still have compound terms to visit */
if (to_visit > to_visit0) {
#ifdef RATIONAL_TREES
to_visit -= 4;
pt0 = to_visit[0];
pt0_end = to_visit[1];
*pt0 = (CELL)to_visit[2];
depth = (CELL)to_visit[3];
#else
to_visit -= 3;
pt0 = to_visit[0];
pt0_end = to_visit[1];
depth = (CELL)to_visit[2];
#endif
goto loop;
}
return st;
aux_overflow:
/* unwind stack */
#ifdef RATIONAL_TREES
while (to_visit > to_visit0) {
to_visit -= 4;
pt0 = to_visit[0];
*pt0 = (CELL)to_visit[2];
}
#endif
return (CELL *)-1;
global_overflow:
/* unwind stack */
#ifdef RATIONAL_TREES
while (to_visit > to_visit0) {
to_visit -= 4;
pt0 = to_visit[0];
*pt0 = (CELL)to_visit[2];
}
#endif
return (CELL *) -2;
} }
static Int static Int
GvNTermHash(void) p_term_hash(void)
{ {
unsigned int i1,i2,i3; unsigned int i1;
Term t1 = Deref(ARG1); Term t1 = Deref(ARG1);
Term t2 = Deref(ARG2); Term t2 = Deref(ARG2);
Term t3 = Deref(ARG3); Term t3 = Deref(ARG3);
Term result; Term result;
Int size, depth; Int size, depth;
if (IsVarTerm(t2)) { if (IsVarTerm(t2)) {
Yap_Error(INSTANTIATION_ERROR,t2,"term_hash/4"); Yap_Error(INSTANTIATION_ERROR,t2,"term_hash/4");
return(FALSE); return(FALSE);
@@ -1541,16 +1764,28 @@ GvNTermHash(void)
return(FALSE); return(FALSE);
} }
size = IntegerOfTerm(t3); size = IntegerOfTerm(t3);
GvNht[0] = 0; while (TRUE) {
GvNht[1] = 0; CELL *ar = hash_complex_term(&t1-1, &t1, depth, H);
GvNht[2] = 0; if (ar == (CELL *)-1) {
if (!Yap_ExpandPreAllocCodeSpace(0, NULL)) {
if (TermHash(t1,depth,0) == -1) return(TRUE); Yap_Error(OUT_OF_AUXSPACE_ERROR, ARG1, "overflow in term_hash");
return FALSE;
i1 = GvNht[0]; }
i2 = GvNht[1]; t1 = Deref(ARG1);
i3 = GvNht[2]; } else if(ar == (CELL *)-2) {
i2 ^= i3; i1 ^= i2; i1 = (((i3 << 7) + i2) << 7) + i1; if (!Yap_gcl((ASP-H)*sizeof(CELL), 4, ENV, gc_P(P,CP))) {
Yap_Error(OUT_OF_STACK_ERROR, TermNil, "in term_hash");
return FALSE;
}
t1 = Deref(ARG1);
} else if (ar == NULL) {
return FALSE;
} else {
i1 = MurmurHashNeutral2((const void *)H, CellSize*(ar-H),0x1a3be34a);
break;
}
}
/* got the seed and hash from SWI-Prolog */
result = MkIntegerTerm(i1 % size); result = MkIntegerTerm(i1 % size);
return Yap_unify(ARG4,result); return Yap_unify(ARG4,result);
} }
@@ -2109,7 +2344,7 @@ void Yap_InitUtilCPreds(void)
Yap_InitCPred("term_variables", 3, p_term_variables3, 0); Yap_InitCPred("term_variables", 3, p_term_variables3, 0);
CurrentModule = TERMS_MODULE; CurrentModule = TERMS_MODULE;
Yap_InitCPred("variable_in_term", 2, p_var_in_term, SafePredFlag); Yap_InitCPred("variable_in_term", 2, p_var_in_term, SafePredFlag);
Yap_InitCPred("term_hash", 4, GvNTermHash, SafePredFlag); Yap_InitCPred("term_hash", 4, p_term_hash, SafePredFlag);
Yap_InitCPred("variant", 2, p_variant, 0); Yap_InitCPred("variant", 2, p_variant, 0);
Yap_InitCPred("subsumes", 2, p_subsumes, SafePredFlag); Yap_InitCPred("subsumes", 2, p_subsumes, SafePredFlag);
Yap_InitCPred("protected_unifiable", 3, p_unifiable, 0); Yap_InitCPred("protected_unifiable", 3, p_unifiable, 0);