cuda fixes by Carlos and other diverse (fact init, configure).
This commit is contained in:
parent
0dc78f106b
commit
691e977a68
4
configure
vendored
4
configure
vendored
@ -6969,12 +6969,12 @@ fi
|
|||||||
then
|
then
|
||||||
ENABLE_CUDA="@# "
|
ENABLE_CUDA="@# "
|
||||||
else
|
else
|
||||||
ENABLE_CUDA="@# "
|
ENABLE_CUDA=""
|
||||||
case "$target_os" in
|
case "$target_os" in
|
||||||
*darwin*)
|
*darwin*)
|
||||||
CUDA_LDFLAGS="$LDFLAGS"
|
CUDA_LDFLAGS="$LDFLAGS"
|
||||||
CUDA_CPPFLAGS="-shared -arch=sm_20 -Xcompiler -fPIC -O3 "
|
CUDA_CPPFLAGS="-shared -arch=sm_20 -Xcompiler -fPIC -O3 "
|
||||||
CUDA_SHLIB_LD="$NVCC -Xcompiler -dynamiclib -L../.. -lYAP "
|
CUDA_SHLIB_LD="$NVCC -Xcompiler -dynamiclib -L../.. -lYap "
|
||||||
;;
|
;;
|
||||||
**)
|
**)
|
||||||
CUDA_LDFLAGS="$LDFLAGS $LIBS"
|
CUDA_LDFLAGS="$LDFLAGS $LIBS"
|
||||||
|
@ -915,7 +915,7 @@ else
|
|||||||
*darwin*)
|
*darwin*)
|
||||||
CUDA_LDFLAGS="$LDFLAGS"
|
CUDA_LDFLAGS="$LDFLAGS"
|
||||||
CUDA_CPPFLAGS="-shared -arch=sm_20 -Xcompiler -fPIC -O3 "
|
CUDA_CPPFLAGS="-shared -arch=sm_20 -Xcompiler -fPIC -O3 "
|
||||||
CUDA_SHLIB_LD="$NVCC -Xcompiler -dynamiclib -L../.. -lYAP "
|
CUDA_SHLIB_LD="$NVCC -Xcompiler -dynamiclib -L../.. -lYap "
|
||||||
;;
|
;;
|
||||||
**)
|
**)
|
||||||
CUDA_LDFLAGS="$LDFLAGS $LIBS"
|
CUDA_LDFLAGS="$LDFLAGS $LIBS"
|
||||||
|
@ -85,7 +85,6 @@ in_hash(ADDR key)
|
|||||||
static inline atom_t
|
static inline atom_t
|
||||||
AtomToSWIAtom(Atom at)
|
AtomToSWIAtom(Atom at)
|
||||||
{
|
{
|
||||||
atom_t ats;
|
|
||||||
TranslationEntry *p;
|
TranslationEntry *p;
|
||||||
|
|
||||||
if ((p = Yap_GetTranslationProp(at)) != NULL)
|
if ((p = Yap_GetTranslationProp(at)) != NULL)
|
||||||
@ -96,7 +95,7 @@ AtomToSWIAtom(Atom at)
|
|||||||
static inline Atom
|
static inline Atom
|
||||||
SWIAtomToAtom(atom_t at)
|
SWIAtomToAtom(atom_t at)
|
||||||
{
|
{
|
||||||
if ((CELL)at < 2*N_SWI_ATOMS+1)
|
if ((CELL)at & 1)
|
||||||
return SWI_Atoms[at/2];
|
return SWI_Atoms[at/2];
|
||||||
return (Atom)at;
|
return (Atom)at;
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpreds(int *dop1, int rows, int *bin, int3 numpreds, int **ret)
|
int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
|
||||||
{
|
{
|
||||||
int *temp;
|
int *temp;
|
||||||
int tmplen = rows + 1;
|
int tmplen = rows + 1;
|
||||||
@ -124,6 +124,8 @@ int bpreds(int *dop1, int rows, int *bin, int3 numpreds, int **ret)
|
|||||||
|
|
||||||
liberar(dhead, hsize);
|
liberar(dhead, hsize);
|
||||||
liberar(temp, size);
|
liberar(temp, size);
|
||||||
|
liberar(dop1, rows * cols * sizeof(int));
|
||||||
|
|
||||||
*ret = fres;
|
*ret = fres;
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
|
@ -158,6 +158,69 @@ load_facts( void ) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int currentFact = 0;
|
||||||
|
static predicate *currentPred = NULL;
|
||||||
|
|
||||||
|
static int
|
||||||
|
cuda_init_facts( void ) {
|
||||||
|
|
||||||
|
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||||
|
int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
|
||||||
|
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
||||||
|
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
|
||||||
|
predicate *pred;
|
||||||
|
|
||||||
|
if (!mat)
|
||||||
|
return FALSE;
|
||||||
|
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||||
|
// new
|
||||||
|
pred = (predicate *)malloc(sizeof(predicate));
|
||||||
|
} else {
|
||||||
|
pred = (predicate *)YAP_IntOfTerm(YAP_ARG4);
|
||||||
|
if (pred->address_host_table)
|
||||||
|
free( pred->address_host_table );
|
||||||
|
}
|
||||||
|
pred->name = pname;
|
||||||
|
pred->num_rows = nrows;
|
||||||
|
pred->num_columns = ncols;
|
||||||
|
pred->is_fact = TRUE;
|
||||||
|
pred->address_host_table = mat;
|
||||||
|
currentPred = pred;
|
||||||
|
currentFact = 0;
|
||||||
|
|
||||||
|
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||||
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||||
|
} else {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
cuda_load_fact( void ) {
|
||||||
|
YAP_Term th = YAP_ARG1;
|
||||||
|
|
||||||
|
int i, j;
|
||||||
|
int ncols = currentPred->num_columns;
|
||||||
|
int *mat = currentPred->address_host_table;
|
||||||
|
i = currentFact;
|
||||||
|
for (j = 0; j < ncols; j++) {
|
||||||
|
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||||
|
if (YAP_IsAtomTerm(ta)) {
|
||||||
|
mat[i*ncols+j] = YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||||
|
} else {
|
||||||
|
mat[i*ncols+j] = YAP_IntOfTerm(ta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
if (i == currentPred->num_rows) {
|
||||||
|
Cuda_NewFacts(currentPred);
|
||||||
|
currentPred = NULL;
|
||||||
|
currentFact = 0;
|
||||||
|
} else {
|
||||||
|
currentFact = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
load_rule( void ) {
|
load_rule( void ) {
|
||||||
// maximum of 2K symbols per rule, should be enough for ILP
|
// maximum of 2K symbols per rule, should be enough for ILP
|
||||||
@ -264,7 +327,8 @@ cuda_eval( void )
|
|||||||
}
|
}
|
||||||
out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
|
out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
|
||||||
}
|
}
|
||||||
free( mat );
|
if (n > 0)
|
||||||
|
free( mat );
|
||||||
return YAP_Unify(YAP_ARG2, out);
|
return YAP_Unify(YAP_ARG2, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -276,10 +340,16 @@ cuda_coverage( void )
|
|||||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
||||||
int32_t ncols = ptr->num_columns;
|
int32_t ncols = ptr->num_columns;
|
||||||
int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
|
int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
|
||||||
int32_t i = n/2, min = 0, max = n-1, t0 = mat[0], t1 = mat[(n-1)*2];
|
int32_t i = n/2, min = 0, max = n-1;
|
||||||
|
int32_t t0, t1;
|
||||||
|
|
||||||
if (n < 0)
|
if (n < 0)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
if (n == 0) {
|
||||||
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(0)) &&
|
||||||
|
YAP_Unify(YAP_ARG3, YAP_MkIntTerm(0));
|
||||||
|
}
|
||||||
|
t0 = mat[0], t1 = mat[(n-1)*2];
|
||||||
if (t0 == t1) { /* all sametype */
|
if (t0 == t1) { /* all sametype */
|
||||||
free( mat );
|
free( mat );
|
||||||
/* all pos */
|
/* all pos */
|
||||||
@ -337,6 +407,8 @@ init_cuda(void)
|
|||||||
AtomLe = YAP_LookupAtom("=<");
|
AtomLe = YAP_LookupAtom("=<");
|
||||||
AtomDf = YAP_LookupAtom("\\=");
|
AtomDf = YAP_LookupAtom("\\=");
|
||||||
YAP_UserCPredicate("load_facts", load_facts, 4);
|
YAP_UserCPredicate("load_facts", load_facts, 4);
|
||||||
|
YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
|
||||||
|
YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
|
||||||
YAP_UserCPredicate("load_rule", load_rule, 4);
|
YAP_UserCPredicate("load_rule", load_rule, 4);
|
||||||
YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
|
YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
|
||||||
YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
|
YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
|
||||||
|
@ -21,9 +21,21 @@ cuda_inline(P, Q) :-
|
|||||||
cuda_extensional( Call, IdFacts) :-
|
cuda_extensional( Call, IdFacts) :-
|
||||||
strip_module(Call, Mod, Name/Arity),
|
strip_module(Call, Mod, Name/Arity),
|
||||||
functor(S, Name, Arity),
|
functor(S, Name, Arity),
|
||||||
findall( S, Mod:S, L),
|
count_answers( Mod:S, N),
|
||||||
length( L, N ),
|
% reserve space
|
||||||
load_facts( N, Arity, L, IdFacts ).
|
cuda_init_facts( N, Arity, Name, IdFacts ),
|
||||||
|
% fill it out
|
||||||
|
( Mod:S, cuda_load_fact(S), fail ; true ).
|
||||||
|
|
||||||
|
count_answers(G, N) :-
|
||||||
|
S = count(0),
|
||||||
|
(
|
||||||
|
G,
|
||||||
|
arg(1, S, I0),
|
||||||
|
I is I0+1,
|
||||||
|
nb_setarg(1, S, I),
|
||||||
|
fail ;
|
||||||
|
S = count(N) ).
|
||||||
|
|
||||||
cuda_rule((Head :- Body) , IdRules) :-
|
cuda_rule((Head :- Body) , IdRules) :-
|
||||||
body_to_list( Body, L, [], 1, N),
|
body_to_list( Body, L, [], 1, N),
|
||||||
|
@ -264,7 +264,7 @@ int2 columnsproject(int *first, int tam, int *rule, int ini, int fin, int sini,
|
|||||||
temp = first[x];
|
temp = first[x];
|
||||||
for(y = 0; y < ini; y++)
|
for(y = 0; y < ini; y++)
|
||||||
{
|
{
|
||||||
if(temp == rule[y])
|
if(temp == rule[y] && temp > 0) /*added condition to avoid constants*/
|
||||||
{
|
{
|
||||||
if(notin(temp, pv, ret.y))
|
if(notin(temp, pv, ret.y))
|
||||||
{
|
{
|
||||||
@ -279,7 +279,7 @@ int2 columnsproject(int *first, int tam, int *rule, int ini, int fin, int sini,
|
|||||||
continue;
|
continue;
|
||||||
for(y = sfin + 1; y < fin; y++)
|
for(y = sfin + 1; y < fin; y++)
|
||||||
{
|
{
|
||||||
if(temp == rule[y])
|
if(temp == rule[y] && temp > 0)
|
||||||
{
|
{
|
||||||
if(notin(temp, pv, ret.y))
|
if(notin(temp, pv, ret.y))
|
||||||
{
|
{
|
||||||
@ -298,7 +298,7 @@ int2 columnsproject(int *first, int tam, int *rule, int ini, int fin, int sini,
|
|||||||
temp = rule[x];
|
temp = rule[x];
|
||||||
for(y = 0; y < ini; y++)
|
for(y = 0; y < ini; y++)
|
||||||
{
|
{
|
||||||
if(temp == rule[y])
|
if(temp == rule[y] && temp > 0)
|
||||||
{
|
{
|
||||||
if(notin(temp, pv, ret.y))
|
if(notin(temp, pv, ret.y))
|
||||||
{
|
{
|
||||||
@ -313,7 +313,7 @@ int2 columnsproject(int *first, int tam, int *rule, int ini, int fin, int sini,
|
|||||||
continue;
|
continue;
|
||||||
for(y = sfin + 1; y < fin; y++)
|
for(y = sfin + 1; y < fin; y++)
|
||||||
{
|
{
|
||||||
if(temp == rule[y])
|
if(temp == rule[y] && temp > 0)
|
||||||
{
|
{
|
||||||
if(notin(temp, pv, ret.y))
|
if(notin(temp, pv, ret.y))
|
||||||
{
|
{
|
||||||
@ -1083,6 +1083,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
//cout << "resrows = " << res_rows << endl;
|
//cout << "resrows = " << res_rows << endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(x == num_refs)
|
if(x == num_refs)
|
||||||
{
|
{
|
||||||
if(rul_act->num_bpreds.x > 0) /*Built-in predicates*/
|
if(rul_act->num_bpreds.x > 0) /*Built-in predicates*/
|
||||||
@ -1094,7 +1095,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
cudaEventRecord(start3, 0);
|
cudaEventRecord(start3, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
res_rows = bpreds(res, res_rows, rul_act->builtin, rul_act->num_bpreds, &res);
|
res_rows = bpreds(res, res_rows, rul_act->num_columns, rul_act->builtin, rul_act->num_bpreds, &res);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop3, 0);
|
cudaEventRecord(stop3, 0);
|
||||||
@ -1115,7 +1116,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
cudaEventRecord(start2, 0);
|
cudaEventRecord(start2, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
res_rows = unir(res, res_rows, rul_act->num_columns, &res); /*Duplicate Elimination*/
|
res_rows = unir(res, res_rows, rul_act->num_columns); /*Duplicate Elimination*/
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop2, 0);
|
cudaEventRecord(stop2, 0);
|
||||||
@ -1223,6 +1224,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
|
|
||||||
itr++;
|
itr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
tmprule.name = qname;
|
tmprule.name = qname;
|
||||||
qposr = lower_bound(rul_str, fin, tmprule, comparer);
|
qposr = lower_bound(rul_str, fin, tmprule, comparer);
|
||||||
if(qposr != fin && qposr->name == qname)
|
if(qposr != fin && qposr->name == qname)
|
||||||
@ -1246,14 +1248,19 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
res = dop1;
|
res = dop1;
|
||||||
res_rows = rows1;
|
res_rows = rows1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
res_rows = selectproyect(dop1, rows1, cols1, tmprule.num_columns, tmprule.select[0], tmprule.numsel[0], tmprule.selfjoin[0], tmprule.numselfj[0], tmprule.project[0], &res);
|
res_rows = selectproyect(dop1, rows1, cols1, tmprule.num_columns, tmprule.select[0], tmprule.numsel[0], tmprule.selfjoin[0], tmprule.numselfj[0], tmprule.project[0], &res);
|
||||||
|
if(qposr != fin && qposr->name == qname)
|
||||||
|
cudaFree(dop1);
|
||||||
|
}
|
||||||
|
|
||||||
cols1 = tmprule.num_columns;
|
cols1 = tmprule.num_columns;
|
||||||
tipo = res_rows * cols1 * sizeof(int);
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
hres = (int *)malloc(tipo);
|
hres = (int *)malloc(tipo);
|
||||||
cudaMemcpy(hres, res, tipo, cudaMemcpyDeviceToHost);
|
cudaMemcpy(hres, res, tipo, cudaMemcpyDeviceToHost);
|
||||||
cudaFree(res);
|
if(res_rows > 0 && tmprule.numsel[0] != 0 && tmprule.numselfj[0] != 0)
|
||||||
|
cudaFree(res);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
res_rows = 0;
|
res_rows = 0;
|
||||||
|
@ -187,13 +187,21 @@ void reservar(int **ptr, int size)
|
|||||||
//cudaMemGetInfo( &free, &total );
|
//cudaMemGetInfo( &free, &total );
|
||||||
// cerr << "R " << free << " " << size << endl;
|
// cerr << "R " << free << " " << size << endl;
|
||||||
|
|
||||||
|
if (size == 0) {
|
||||||
|
*ptr = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
while(avmem < size)
|
while(avmem < size)
|
||||||
limpiar("not enough memory");
|
limpiar("not enough memory");
|
||||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||||
limpiar("error in memory allocation");
|
limpiar("error in memory allocation");
|
||||||
if (! *ptr )
|
if (! *ptr ) {
|
||||||
exit(0);
|
size_t free, total;
|
||||||
|
cudaMemGetInfo( &free, &total );
|
||||||
|
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
|
||||||
|
cerr << "Exiting CUDA...." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
// cerr << *ptr << " " << size;
|
// cerr << *ptr << " " << size;
|
||||||
avmem -= size;
|
avmem -= size;
|
||||||
|
|
||||||
|
@ -1126,7 +1126,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
reservar(&d_Rout, resSize);
|
reservar(&d_Rout, resSize);
|
||||||
if(numj > 2)
|
if(numj > 2)
|
||||||
{
|
{
|
||||||
cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||||
multiJoinWithWrite<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
|
multiJoinWithWrite<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -77,7 +77,7 @@ struct o3
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int unir(int *res, int rows, int tipo, int **ret)
|
int unir(int *res, int rows, int tipo)
|
||||||
{
|
{
|
||||||
thrust::device_ptr<int> pt, re;
|
thrust::device_ptr<int> pt, re;
|
||||||
thrust::device_ptr<s2> pt2, re2;
|
thrust::device_ptr<s2> pt2, re2;
|
||||||
|
Reference in New Issue
Block a user