distributed config file

2013-11-03 14:13:08 +00:00 · 2013-11-03 14:13:08 +00:00 · 1967e0c434
commit 1967e0c434
parent e423fc28e5
4 changed files with 358 additions and 34 deletions
--- a/packages/cuda/configure.in
+++ b/packages/cuda/configure.in
@ -0,0 +1,47 @@
+AC_ARG_WITH(cuda,
+	[ --enable-cuda           use minisat interface],
+	if test "$withval" = yes; then
+		yap_cv_cuda=/usr
+	elif test "$withval" = no; then
+		yap_cv_cuda=no
+	else
+		yap_cv_cuda="$withval"
+	fi,
+	[yap_cv_cuda=no])
+
+
+CUDA_LDFLAGS=""
+CUDA_CPPFLAGS=""
+if test "$yap_cv_cuda" = no 
+then
+  ENABLE_CUDA="@# "
+else
+  AC_PATH_PROG(NVCC, [nvcc], [no], [$yap_cv_cuda/bin])
+  if test "$yap_cv_cuda" = no
+  then
+    ENABLE_CUDA="@# "
+  else
+    ENABLE_CUDA=""
+    case "$target_os" in
+     *darwin*)
+       CUDA_LDFLAGS="$LDFLAGS"
+    CUDA_CPPFLAGS="-arch=sm_20  -Xcompiler -fPIC -O3 "
+    CUDA_SHLIB_LD="$NVCC  -Xcompiler -dynamiclib  -L../.. -lYap "
+     ;;
+    **)
+       CUDA_LDFLAGS="$LDFLAGS $LIBS"
+    CUDA_CPPFLAGS=" -arch=sm_20  -Xcompiler -fPIC -O3 "
+    CUDA_SHLIB_LD="$NVCC -Xcompiler -export-dynamic"
+    ;;
+    esac
+ fi
+fi
+
+AC_SUBST(ENABLE_CUDA)
+AC_SUBST(NVCC)
+AC_SUBST(CUDA_SHLIB_LD)
+AC_SUBST(CUDA_CPPFLAGS)
+AC_SUBST(CUDA_LDFLAGS)
+
+AC_CONFIG_FILES([packages/cuda/Makefile])
+
--- a/packages/cuda/lista.cu
+++ b/packages/cuda/lista.cu
@ -926,7 +926,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
 	fin = rules.end();

 	nombres(rul_str, fin); /*preprocessing*/
-	movebpreds(rul_str, fin);
+	//movebpreds(rul_str, fin);
 	referencias(L.begin(), L.end(), rul_str, fin);
 	seleccion(rul_str, fin);
 	selfjoin(rul_str, fin);
@ -984,7 +984,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,

 			rows1 = cargar(name1, filas1, cols1, isfact1, table1, &dop1, itr);

-			//cout << "rows1 = " << rows1 << endl;
+			// cout << "rows1 = " << rows1  << endl;

 			if(rows1 == 0)
 			{
@ -1012,7 +1012,6 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
 				{

 					/*int x, y;
-					cout << "antes = " << cols1 << " " << rows1 << endl;
 					int *hop1 = (int *)malloc(cols1 * rows1 * sizeof(int));
 					cudaMemcpy(hop1, dop1, cols1 * rows1 * sizeof(int), cudaMemcpyDeviceToHost);
 					for(x = 0; x < rows1; x++)
@ -1086,6 +1085,27 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
 			num_refs = rul_act->num_rows - 1;
 			for(x = 2; x < num_refs; x++)
 			{
+			  if (rul_act->address_host_table[x] < 0) {
+					#ifdef TIMER
+					cudaEvent_t start3, stop3;
+					cudaEventCreate(&start3);
+					cudaEventCreate(&stop3);
+					cudaEventRecord(start3, 0);
+					#endif					
+				
+					res_rows = bpreds(res, res_rows, rul_act->projpos[x-2].y, rul_act->builtin, rul_act->num_bpreds, &res);
+
+					#ifdef TIMER
+					cudaEventRecord(stop3, 0);
+					cudaEventSynchronize(stop3);
+					cudaEventElapsedTime(&time, start3, stop3);
+					cudaEventDestroy(start3);
+					cudaEventDestroy(stop3);
+					//cout << "Predicados = " << time << endl;
+					cuda_stats.pred_time += time;
+					#endif
+			    continue;
+			  }
 				tipo = rul_act->referencias[x];
 				if(tipo < 0)
 				{
@ -1108,42 +1128,43 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,

 				rows2 = cargar(name2, filas2, cols2, isfact2, table2, &dop2, itr);

-				//cout << "rows = " << x << " " << rows2 << endl;
+				//out << "rows = " << x << " " << rows2 << endl;

 				if(rows2 == 0)
 					break;
+				cout << x << ": join = " << res_rows << "/" <<  rul_act->projpos[x-2].y << " " << rows2 << "/" << cols2 << endl;
 				res_rows = join(res, dop2, res_rows, rows2, rul_act->projpos[x-2].y, cols2, rul_act, x-1, 0, &res);
 				if(res_rows == 0)
 					break;
 				
-				//cout << "resrows = " << res_rows << endl;
+				cout << x << ": resrows before = " << res_rows << " cols = " <<  rul_act->projpos[x-1].y << endl;
+				if (x < num_refs-1 && res_rows > 32) {
+				  
+#ifdef TIMER
+				  cudaEvent_t start2, stop2;
+				  cudaEventCreate(&start2);
+				  cudaEventCreate(&stop2);
+				  cudaEventRecord(start2, 0);
+#endif
+
+				  res_rows = unir(res, res_rows, rul_act->projpos[x-1].y); /*Duplicate Elimination*/
+#ifdef TIMER
+				  cudaEventRecord(stop2, 0);
+				  cudaEventSynchronize(stop2);
+				  cudaEventElapsedTime(&time, start2, stop2);
+				  cudaEventDestroy(start2);
+				  cudaEventDestroy(stop2);
+				  //cout << "Union = " << time << endl;
+				  cuda_stats.union_time += time;
+#endif					
+	
+				  cout << "resrows after = " << res_rows << endl;
+				}

 			}

 			if(x == num_refs)
 			{
-				if(rul_act->num_bpreds.x > 0) /*Built-in predicates*/
-				{
-					#ifdef TIMER
-					cudaEvent_t start3, stop3;
-					cudaEventCreate(&start3);
-					cudaEventCreate(&stop3);
-					cudaEventRecord(start3, 0);
-					#endif					
-				
-					res_rows = bpreds(res, res_rows, rul_act->num_columns, rul_act->builtin, rul_act->num_bpreds, &res);
-
-					#ifdef TIMER
-					cudaEventRecord(stop3, 0);
-					cudaEventSynchronize(stop3);
-					cudaEventElapsedTime(&time, start3, stop3);
-					cudaEventDestroy(start3);
-					cudaEventDestroy(stop3);
-					//cout << "Predicados = " << time << endl;
-					cuda_stats.pred_time += time;
-					#endif
-				}
-
 				//cout << "antes de unir = " << res_rows << endl;

 				#ifdef TIMER
--- a/packages/cuda/treeb.cu
+++ b/packages/cuda/treeb.cu
@ -861,11 +861,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:

 		memSizeS = newLen * sizeof(int);
 		reservar(&d_S, memSizeS);
-#ifdef DEBUG_MEM
+#if DEBUG_MEM
 		cerr << "+ " << d_S << " d_S  " << memSizeS << endl;
 #endif
 		reservar(&posS, memSizeS);
-#ifdef DEBUG_MEM
+#if DEBUG_MEM
 		cerr << "+ " << posS << " posS  " << memSizeS << endl;
 #endif
 		llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
--- a/packages/cuda/union2.cu
+++ b/packages/cuda/union2.cu
@ -13,6 +13,26 @@ typedef struct n3
 	int v[3];
 }s3;

+typedef struct n4
+{
+	int v[4];
+}s4;
+
+typedef struct n5
+{
+	int v[5];
+}s5;
+
+typedef struct n6
+{
+	int v[6];
+}s6;
+
+typedef struct n7
+{
+	int v[7];
+}s7;
+
 struct p2
 {
 	__host__ __device__
@ -77,13 +97,136 @@ struct o3
    	}
 };

+struct p4
+{
+	__host__ __device__
+    	bool operator()(const s4 &r1, const s4 &r2)
+    	{
+     		int x;
+		for(x = 0; x < 4; x++)
+		{
+			if(r1.v[x] != r2.v[x])
+				return false;
+		}
+		return true;
+    	}
+};
+
+struct o4
+{
+	__host__ __device__
+    	bool operator()(const s4 &r1, const s4 &r2)
+    	{
+     		int x;
+		for(x = 0; x < 4; x++)
+		{
+			if(r1.v[x] > r2.v[x])
+				return true;
+			if(r1.v[x] < r2.v[x])
+				return false;
+		}
+		return false;
+    	}
+};
+
+struct p5
+{
+	__host__ __device__
+    	bool operator()(const s5 &r1, const s5 &r2)
+    	{
+     		int x;
+		for(x = 0; x < 5; x++)
+		{
+			if(r1.v[x] != r2.v[x])
+				return false;
+		}
+		return true;
+    	}
+};
+
+struct o5
+{
+	__host__ __device__
+    	bool operator()(const s5 &r1, const s5 &r2)
+    	{
+     		int x;
+		for(x = 0; x < 5; x++)
+		{
+			if(r1.v[x] > r2.v[x])
+				return true;
+			if(r1.v[x] < r2.v[x])
+				return false;
+		}
+		return false;
+    	}
+};
+
+struct p6
+{
+	__host__ __device__
+    	bool operator()(const s6 &r1, const s6 &r2)
+    	{
+     		int x;
+		for(x = 0; x < 6; x++)
+		{
+			if(r1.v[x] != r2.v[x])
+				return false;
+		}
+		return true;
+    	}
+};
+
+struct o6
+{
+	__host__ __device__
+    	bool operator()(const s6 &r1, const s6 &r2)
+    	{
+     		int x;
+		for(x = 0; x < 6; x++)
+		{
+			if(r1.v[x] > r2.v[x])
+				return true;
+			if(r1.v[x] < r2.v[x])
+				return false;
+		}
+		return false;
+    	}
+};
+
+struct p7
+{
+	__host__ __device__
+    	bool operator()(const s7 &r1, const s7 &r2)
+    	{
+     		int x;
+		for(x = 0; x < 7; x++)
+		{
+			if(r1.v[x] != r2.v[x])
+				return false;
+		}
+		return true;
+    	}
+};
+
+struct o7
+{
+	__host__ __device__
+    	bool operator()(const s7 &r1, const s7 &r2)
+    	{
+     		int x;
+		for(x = 0; x < 7; x++)
+		{
+			if(r1.v[x] > r2.v[x])
+				return true;
+			if(r1.v[x] < r2.v[x])
+				return false;
+		}
+		return false;
+    	}
+};
+
 int unir(int *res, int rows, int tipo)
 {
-	thrust::device_ptr<int> pt, re;
-	thrust::device_ptr<s2> pt2, re2;
-	thrust::device_ptr<s3> pt3, re3;
-	s2 *t2;
-	s3 *t3;
 	int flag, nrows;

 #if TIMER
@ -93,6 +236,8 @@ int unir(int *res, int rows, int tipo)
 	{
 		case 1: 
 		{
+			thrust::device_ptr<int> pt, re;
+
 			pt = thrust::device_pointer_cast(res);
 			flag = 0;
 			while(flag != 1)
@ -116,6 +261,8 @@ int unir(int *res, int rows, int tipo)
 		}			
 		case 2: 
 		{
+			thrust::device_ptr<s2> pt2, re2;
+			s2 *t2;
 			t2 = (s2*)res;
 			
 			/*int *a, x, y;
@ -169,6 +316,8 @@ int unir(int *res, int rows, int tipo)
 		}
 		case 3: 
 		{
+			thrust::device_ptr<s3> pt3, re3;
+			s3 *t3;
 			t3 = (s3*)res;
 			pt3 = thrust::device_pointer_cast(t3);
 			flag = 0;
@ -191,6 +340,113 @@ int unir(int *res, int rows, int tipo)
 			iVec.shrink_to_fit();
 			return nrows;
 		}
+		case 4: 
+		{
+			thrust::device_ptr<s4> pt4, re4;
+			s4 *t4;
+			t4 = (s4*)res;
+			pt4 = thrust::device_pointer_cast(t4);
+			flag = 0;
+			while(flag != 1)
+			{
+				try
+				{
+					thrust::sort(pt4, pt4 + rows, o4());
+					re4 = thrust::unique(pt4, pt4 + rows, p4());
+					flag = 1;
+				}
+				catch(std::bad_alloc &e)
+				{
+					limpiar("sort/unique in unir", 0);
+				}				
+			}
+			nrows = thrust::distance(pt4, re4);
+			thrust::device_vector<s4> iVec(pt4, pt4 + rows);
+			iVec.resize(nrows);
+			iVec.shrink_to_fit();
+			return nrows;
+		}
+		case 5: 
+		{
+			thrust::device_ptr<s5> pt5, re5;
+			s5 *t5;
+			t5 = (s5*)res;
+			pt5 = thrust::device_pointer_cast(t5);
+			flag = 0;
+			while(flag != 1)
+			{
+				try
+				{
+					thrust::sort(pt5, pt5 + rows, o5());
+					re5 = thrust::unique(pt5, pt5 + rows, p5());
+					flag = 1;
+				}
+				catch(std::bad_alloc &e)
+				{
+					limpiar("sort/unique in unir", 0);
+				}				
+			}
+			nrows = thrust::distance(pt5, re5);
+			thrust::device_vector<s5> iVec(pt5, pt5 + rows);
+			iVec.resize(nrows);
+			iVec.shrink_to_fit();
+			return nrows;
+		}
+		case 6: 
+		{
+			thrust::device_ptr<s6> pt6, re6;
+			s6 *t6;
+			t6 = (s6*)res;
+			pt6 = thrust::device_pointer_cast(t6);
+			flag = 0;
+			while(flag != 1)
+			{
+				try
+				{
+					thrust::sort(pt6, pt6 + rows, o6());
+					re6 = thrust::unique(pt6, pt6 + rows, p6());
+					flag = 1;
+				}
+				catch(std::bad_alloc &e)
+				{
+					limpiar("sort/unique in unir", 0);
+				}				
+			}
+			nrows = thrust::distance(pt6, re6);
+			thrust::device_vector<s6> iVec(pt6, pt6 + rows);
+			iVec.resize(nrows);
+			iVec.shrink_to_fit();
+			return nrows;
+		}
+		case 7: 
+		{
+			thrust::device_ptr<s7> pt7, re7;
+			s7 *t7;
+			t7 = (s7*)res;
+			pt7 = thrust::device_pointer_cast(t7);
+			flag = 0;
+			while(flag != 1)
+			{
+				try
+				{
+					thrust::sort(pt7, pt7 + rows, o7());
+					re7 = thrust::unique(pt7, pt7 + rows, p7());
+					flag = 1;
+				}
+				catch(std::bad_alloc &e)
+				{
+					limpiar("sort/unique in unir", 0);
+				}				
+			}
+			nrows = thrust::distance(pt7, re7);
+			thrust::device_vector<s7> iVec(pt7, pt7 + rows);
+			iVec.resize(nrows);
+			iVec.shrink_to_fit();
+			return nrows;
+		}
+	default:
+	  cerr << "Union: " << tipo << " columns are too many." << endl;
+	  exit(1);
 	}
 	return 0;
 }