distributed config file

This commit is contained in:
Vítor Santos Costa 2013-11-03 14:13:08 +00:00
parent e423fc28e5
commit 1967e0c434
4 changed files with 358 additions and 34 deletions

View File

@ -0,0 +1,47 @@
AC_ARG_WITH(cuda,
[ --enable-cuda use minisat interface],
if test "$withval" = yes; then
yap_cv_cuda=/usr
elif test "$withval" = no; then
yap_cv_cuda=no
else
yap_cv_cuda="$withval"
fi,
[yap_cv_cuda=no])
CUDA_LDFLAGS=""
CUDA_CPPFLAGS=""
if test "$yap_cv_cuda" = no
then
ENABLE_CUDA="@# "
else
AC_PATH_PROG(NVCC, [nvcc], [no], [$yap_cv_cuda/bin])
if test "$yap_cv_cuda" = no
then
ENABLE_CUDA="@# "
else
ENABLE_CUDA=""
case "$target_os" in
*darwin*)
CUDA_LDFLAGS="$LDFLAGS"
CUDA_CPPFLAGS="-arch=sm_20 -Xcompiler -fPIC -O3 "
CUDA_SHLIB_LD="$NVCC -Xcompiler -dynamiclib -L../.. -lYap "
;;
**)
CUDA_LDFLAGS="$LDFLAGS $LIBS"
CUDA_CPPFLAGS=" -arch=sm_20 -Xcompiler -fPIC -O3 "
CUDA_SHLIB_LD="$NVCC -Xcompiler -export-dynamic"
;;
esac
fi
fi
AC_SUBST(ENABLE_CUDA)
AC_SUBST(NVCC)
AC_SUBST(CUDA_SHLIB_LD)
AC_SUBST(CUDA_CPPFLAGS)
AC_SUBST(CUDA_LDFLAGS)
AC_CONFIG_FILES([packages/cuda/Makefile])

View File

@ -926,7 +926,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
fin = rules.end();
nombres(rul_str, fin); /*preprocessing*/
movebpreds(rul_str, fin);
//movebpreds(rul_str, fin);
referencias(L.begin(), L.end(), rul_str, fin);
seleccion(rul_str, fin);
selfjoin(rul_str, fin);
@ -984,7 +984,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
rows1 = cargar(name1, filas1, cols1, isfact1, table1, &dop1, itr);
//cout << "rows1 = " << rows1 << endl;
// cout << "rows1 = " << rows1 << endl;
if(rows1 == 0)
{
@ -1012,7 +1012,6 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
{
/*int x, y;
cout << "antes = " << cols1 << " " << rows1 << endl;
int *hop1 = (int *)malloc(cols1 * rows1 * sizeof(int));
cudaMemcpy(hop1, dop1, cols1 * rows1 * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < rows1; x++)
@ -1086,6 +1085,27 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
num_refs = rul_act->num_rows - 1;
for(x = 2; x < num_refs; x++)
{
if (rul_act->address_host_table[x] < 0) {
#ifdef TIMER
cudaEvent_t start3, stop3;
cudaEventCreate(&start3);
cudaEventCreate(&stop3);
cudaEventRecord(start3, 0);
#endif
res_rows = bpreds(res, res_rows, rul_act->projpos[x-2].y, rul_act->builtin, rul_act->num_bpreds, &res);
#ifdef TIMER
cudaEventRecord(stop3, 0);
cudaEventSynchronize(stop3);
cudaEventElapsedTime(&time, start3, stop3);
cudaEventDestroy(start3);
cudaEventDestroy(stop3);
//cout << "Predicados = " << time << endl;
cuda_stats.pred_time += time;
#endif
continue;
}
tipo = rul_act->referencias[x];
if(tipo < 0)
{
@ -1108,42 +1128,43 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
rows2 = cargar(name2, filas2, cols2, isfact2, table2, &dop2, itr);
//cout << "rows = " << x << " " << rows2 << endl;
//out << "rows = " << x << " " << rows2 << endl;
if(rows2 == 0)
break;
cout << x << ": join = " << res_rows << "/" << rul_act->projpos[x-2].y << " " << rows2 << "/" << cols2 << endl;
res_rows = join(res, dop2, res_rows, rows2, rul_act->projpos[x-2].y, cols2, rul_act, x-1, 0, &res);
if(res_rows == 0)
break;
//cout << "resrows = " << res_rows << endl;
cout << x << ": resrows before = " << res_rows << " cols = " << rul_act->projpos[x-1].y << endl;
if (x < num_refs-1 && res_rows > 32) {
#ifdef TIMER
cudaEvent_t start2, stop2;
cudaEventCreate(&start2);
cudaEventCreate(&stop2);
cudaEventRecord(start2, 0);
#endif
res_rows = unir(res, res_rows, rul_act->projpos[x-1].y); /*Duplicate Elimination*/
#ifdef TIMER
cudaEventRecord(stop2, 0);
cudaEventSynchronize(stop2);
cudaEventElapsedTime(&time, start2, stop2);
cudaEventDestroy(start2);
cudaEventDestroy(stop2);
//cout << "Union = " << time << endl;
cuda_stats.union_time += time;
#endif
cout << "resrows after = " << res_rows << endl;
}
}
if(x == num_refs)
{
if(rul_act->num_bpreds.x > 0) /*Built-in predicates*/
{
#ifdef TIMER
cudaEvent_t start3, stop3;
cudaEventCreate(&start3);
cudaEventCreate(&stop3);
cudaEventRecord(start3, 0);
#endif
res_rows = bpreds(res, res_rows, rul_act->num_columns, rul_act->builtin, rul_act->num_bpreds, &res);
#ifdef TIMER
cudaEventRecord(stop3, 0);
cudaEventSynchronize(stop3);
cudaEventElapsedTime(&time, start3, stop3);
cudaEventDestroy(start3);
cudaEventDestroy(stop3);
//cout << "Predicados = " << time << endl;
cuda_stats.pred_time += time;
#endif
}
//cout << "antes de unir = " << res_rows << endl;
#ifdef TIMER

View File

@ -861,11 +861,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
memSizeS = newLen * sizeof(int);
reservar(&d_S, memSizeS);
#ifdef DEBUG_MEM
#if DEBUG_MEM
cerr << "+ " << d_S << " d_S " << memSizeS << endl;
#endif
reservar(&posS, memSizeS);
#ifdef DEBUG_MEM
#if DEBUG_MEM
cerr << "+ " << posS << " posS " << memSizeS << endl;
#endif
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);

View File

@ -13,6 +13,26 @@ typedef struct n3
int v[3];
}s3;
typedef struct n4
{
int v[4];
}s4;
typedef struct n5
{
int v[5];
}s5;
typedef struct n6
{
int v[6];
}s6;
typedef struct n7
{
int v[7];
}s7;
struct p2
{
__host__ __device__
@ -77,13 +97,136 @@ struct o3
}
};
struct p4
{
__host__ __device__
bool operator()(const s4 &r1, const s4 &r2)
{
int x;
for(x = 0; x < 4; x++)
{
if(r1.v[x] != r2.v[x])
return false;
}
return true;
}
};
struct o4
{
__host__ __device__
bool operator()(const s4 &r1, const s4 &r2)
{
int x;
for(x = 0; x < 4; x++)
{
if(r1.v[x] > r2.v[x])
return true;
if(r1.v[x] < r2.v[x])
return false;
}
return false;
}
};
struct p5
{
__host__ __device__
bool operator()(const s5 &r1, const s5 &r2)
{
int x;
for(x = 0; x < 5; x++)
{
if(r1.v[x] != r2.v[x])
return false;
}
return true;
}
};
struct o5
{
__host__ __device__
bool operator()(const s5 &r1, const s5 &r2)
{
int x;
for(x = 0; x < 5; x++)
{
if(r1.v[x] > r2.v[x])
return true;
if(r1.v[x] < r2.v[x])
return false;
}
return false;
}
};
struct p6
{
__host__ __device__
bool operator()(const s6 &r1, const s6 &r2)
{
int x;
for(x = 0; x < 6; x++)
{
if(r1.v[x] != r2.v[x])
return false;
}
return true;
}
};
struct o6
{
__host__ __device__
bool operator()(const s6 &r1, const s6 &r2)
{
int x;
for(x = 0; x < 6; x++)
{
if(r1.v[x] > r2.v[x])
return true;
if(r1.v[x] < r2.v[x])
return false;
}
return false;
}
};
struct p7
{
__host__ __device__
bool operator()(const s7 &r1, const s7 &r2)
{
int x;
for(x = 0; x < 7; x++)
{
if(r1.v[x] != r2.v[x])
return false;
}
return true;
}
};
struct o7
{
__host__ __device__
bool operator()(const s7 &r1, const s7 &r2)
{
int x;
for(x = 0; x < 7; x++)
{
if(r1.v[x] > r2.v[x])
return true;
if(r1.v[x] < r2.v[x])
return false;
}
return false;
}
};
int unir(int *res, int rows, int tipo)
{
thrust::device_ptr<int> pt, re;
thrust::device_ptr<s2> pt2, re2;
thrust::device_ptr<s3> pt3, re3;
s2 *t2;
s3 *t3;
int flag, nrows;
#if TIMER
@ -93,6 +236,8 @@ int unir(int *res, int rows, int tipo)
{
case 1:
{
thrust::device_ptr<int> pt, re;
pt = thrust::device_pointer_cast(res);
flag = 0;
while(flag != 1)
@ -116,6 +261,8 @@ int unir(int *res, int rows, int tipo)
}
case 2:
{
thrust::device_ptr<s2> pt2, re2;
s2 *t2;
t2 = (s2*)res;
/*int *a, x, y;
@ -169,6 +316,8 @@ int unir(int *res, int rows, int tipo)
}
case 3:
{
thrust::device_ptr<s3> pt3, re3;
s3 *t3;
t3 = (s3*)res;
pt3 = thrust::device_pointer_cast(t3);
flag = 0;
@ -191,6 +340,113 @@ int unir(int *res, int rows, int tipo)
iVec.shrink_to_fit();
return nrows;
}
case 4:
{
thrust::device_ptr<s4> pt4, re4;
s4 *t4;
t4 = (s4*)res;
pt4 = thrust::device_pointer_cast(t4);
flag = 0;
while(flag != 1)
{
try
{
thrust::sort(pt4, pt4 + rows, o4());
re4 = thrust::unique(pt4, pt4 + rows, p4());
flag = 1;
}
catch(std::bad_alloc &e)
{
limpiar("sort/unique in unir", 0);
}
}
nrows = thrust::distance(pt4, re4);
thrust::device_vector<s4> iVec(pt4, pt4 + rows);
iVec.resize(nrows);
iVec.shrink_to_fit();
return nrows;
}
case 5:
{
thrust::device_ptr<s5> pt5, re5;
s5 *t5;
t5 = (s5*)res;
pt5 = thrust::device_pointer_cast(t5);
flag = 0;
while(flag != 1)
{
try
{
thrust::sort(pt5, pt5 + rows, o5());
re5 = thrust::unique(pt5, pt5 + rows, p5());
flag = 1;
}
catch(std::bad_alloc &e)
{
limpiar("sort/unique in unir", 0);
}
}
nrows = thrust::distance(pt5, re5);
thrust::device_vector<s5> iVec(pt5, pt5 + rows);
iVec.resize(nrows);
iVec.shrink_to_fit();
return nrows;
}
case 6:
{
thrust::device_ptr<s6> pt6, re6;
s6 *t6;
t6 = (s6*)res;
pt6 = thrust::device_pointer_cast(t6);
flag = 0;
while(flag != 1)
{
try
{
thrust::sort(pt6, pt6 + rows, o6());
re6 = thrust::unique(pt6, pt6 + rows, p6());
flag = 1;
}
catch(std::bad_alloc &e)
{
limpiar("sort/unique in unir", 0);
}
}
nrows = thrust::distance(pt6, re6);
thrust::device_vector<s6> iVec(pt6, pt6 + rows);
iVec.resize(nrows);
iVec.shrink_to_fit();
return nrows;
}
case 7:
{
thrust::device_ptr<s7> pt7, re7;
s7 *t7;
t7 = (s7*)res;
pt7 = thrust::device_pointer_cast(t7);
flag = 0;
while(flag != 1)
{
try
{
thrust::sort(pt7, pt7 + rows, o7());
re7 = thrust::unique(pt7, pt7 + rows, p7());
flag = 1;
}
catch(std::bad_alloc &e)
{
limpiar("sort/unique in unir", 0);
}
}
nrows = thrust::distance(pt7, re7);
thrust::device_vector<s7> iVec(pt7, pt7 + rows);
iVec.resize(nrows);
iVec.shrink_to_fit();
return nrows;
}
default:
cerr << "Union: " << tipo << " columns are too many." << endl;
exit(1);
}
return 0;
}