From f6bc5ab918defbfa6caa0f5bcb145e5bda676042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADtor=20Santos=20Costa?= Date: Wed, 9 Oct 2013 11:23:45 +0100 Subject: [PATCH] more fixes; get coverage quickly --- packages/cuda/cuda.c | 43 +++++++++++++++++++++ packages/cuda/cuda.yap | 1 + packages/cuda/lista.cu | 85 ++++++++++++++++++++++++++++++++++++++++- packages/cuda/memory.cu | 45 +++++++++++----------- 4 files changed, 149 insertions(+), 25 deletions(-) diff --git a/packages/cuda/cuda.c b/packages/cuda/cuda.c index 7885fd2a5..0010b7eac 100644 --- a/packages/cuda/cuda.c +++ b/packages/cuda/cuda.c @@ -268,6 +268,48 @@ cuda_eval( void ) return YAP_Unify(YAP_ARG2, out); } +static int +cuda_coverage( void ) +{ + int32_t *mat; + predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1); + int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat); + int32_t ncols = ptr->num_columns; + int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2)); + int32_t i = n/2, min = 0, max = n-1, t0 = mat[0], t1 = mat[(n-1)*2]; + + if (n < 0) + return FALSE; + if (t0 == t1) { /* all sametype */ + free( mat ); + /* all pos */ + if (t0 == post) + return YAP_Unify(YAP_ARG3, YAP_MkIntTerm(n)) && + YAP_Unify(YAP_ARG4, YAP_MkIntTerm(0)); + /* all neg */ + return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(n)) && + YAP_Unify(YAP_ARG3, YAP_MkIntTerm(0)); + } + do { + i = (min+max)/2; + if (i == min) i++; + if (mat[i*2] == t0) { + min = i; + } else { + max = i; + } + if (min+1 == max) { + free( mat ); + if (t0 == post) + return YAP_Unify(YAP_ARG3, YAP_MkIntTerm(max)) && + YAP_Unify(YAP_ARG4, YAP_MkIntTerm(n-max)); + /* all neg */ + return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(max)) && + YAP_Unify(YAP_ARG3, YAP_MkIntTerm(n-max)); + } + } while ( TRUE ); +} + static int cuda_count( void ) { int32_t *mat; @@ -298,6 +340,7 @@ init_cuda(void) YAP_UserCPredicate("load_rule", load_rule, 4); YAP_UserCPredicate("cuda_erase", cuda_erase, 1); YAP_UserCPredicate("cuda_eval", cuda_eval, 2); + YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4); YAP_UserCPredicate("cuda_count", cuda_count, 2); } diff --git a/packages/cuda/cuda.yap b/packages/cuda/cuda.yap index a965586c1..52dd03c3d 100644 --- a/packages/cuda/cuda.yap +++ b/packages/cuda/cuda.yap @@ -3,6 +3,7 @@ cuda_rule/2, cuda_erase/1, cuda_eval/2, + cuda_coverage/4, cuda_count/2]). tell_warning :- diff --git a/packages/cuda/lista.cu b/packages/cuda/lista.cu index f5f5b0560..4f4bb6131 100644 --- a/packages/cuda/lista.cu +++ b/packages/cuda/lista.cu @@ -67,6 +67,73 @@ void buscarreglas(vector *facts, vector *rules) } } +template +void movebpreds(InputIterator rules, InputIterator end) +{ + int x, subs, total, cont, cont2, pos; + int *move, *rest; + while(rules != end) + { + if(rules->num_bpreds.x > 0) + { + + total = rules->num_rows+rules->num_bpreds.x; + + /*cout << "ANTES" << endl; + for(x = 0; x < rules->rule_names[total]; x++) + cout << rules->address_host_table[x] << " "; + cout << "FINANTES" << endl;*/ + + move = (int *)malloc(sizeof(int) * rules->num_bpreds.x * 4); + rest = (int *)malloc(sizeof(int) * rules->rule_names[total]); + cont = 0; + cont2 = 0; + for(x = 0; x < total; x++) + { + subs = rules->rule_names[x+1] - rules->rule_names[x]; + + //cout << subs << " "; + + if(rules->address_host_table[rules->rule_names[x]] > 0) + { + memcpy(rest + cont, rules->address_host_table + rules->rule_names[x], subs * sizeof(int)); + cont += subs; + } + else + { + memcpy(move + cont2, rules->address_host_table + rules->rule_names[x], subs * sizeof(int)); + cont2 += subs; + } + } + + /*cout << "REST" << endl; + for(x = 0; x < cont; x++) + cout << rest[x] << " "; + cout << "RESTFIN" << endl;*/ + + memcpy(rest + cont, move, cont2 * sizeof(int)); + pos = 1; + for(x = 1; x <= total; x++) + { + while(rest[pos] != 0) + pos++; + pos++; + rules->rule_names[x] = pos; + } + memcpy(rules->address_host_table, rest, sizeof(int) * rules->rule_names[total]); + free(move); + free(rest); + + /*cout << "DESPUES" << endl; + for(x = 0; x < rules->rule_names[total]; x++) + cout << rules->address_host_table[x] << " "; + cout << "FINDESPUES" << endl;*/ + + } + rules++; + } +} + template void nombres(InputIterator rules, InputIterator end) { @@ -454,7 +521,7 @@ void proyeccion(InputIterator actual, InputIterator end) pos = columnsproject(pv, pos.y, actual->address_host_table, ini, fin, rulestart, ruleend, &res, &pv); actual->project[numjoins] = res; actual->projpos[numjoins] = pos; - actual->num_bpreds.y = pos.y; /*para guardar el tamaƱo de la union final*/ + actual->num_bpreds.y = pos.y; /*para guardar el tamanio de la union final*/ actual->num_bpreds.z = builtinpredicates(pv, pos.y, actual->address_host_table, ruleend + 1, actual->rule_names[total] - 1, &res); actual->builtin = res; } @@ -797,6 +864,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, for(x = 0; x < ninpr; x++) L.push_back(*inprules[x]); + /*cout << "NAMES" << endl; + for(x = 0; x < (ninpf+ninpr); x++) + cout << L[x].name << endl; + cout << "NAMESEND" << endl;*/ + qname = inpquery->name; query = inpquery->address_host_table; qsize = inpquery->num_columns; @@ -822,7 +894,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, rul_str = rules.begin(); fin = rules.end(); - nombres(rul_str, fin); + nombres(rul_str, fin); /*preprocessing*/ + movebpreds(rul_str, fin); referencias(L.begin(), L.end(), rul_str, fin); seleccion(rul_str, fin); selfjoin(rul_str, fin); @@ -1027,6 +1100,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, cudaEventRecord(stop3, 0); cudaEventSynchronize(stop3); cudaEventElapsedTime(&time, start3, stop3); + cudaEventDestroy(start3); + cudaEventDestroy(stop3); cout << "Predicados = " << time << endl; #endif } @@ -1046,6 +1121,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, cudaEventRecord(stop2, 0); cudaEventSynchronize(stop2); cudaEventElapsedTime(&time, start2, stop2); + cudaEventDestroy(start2); + cudaEventDestroy(stop2); cout << "Union = " << time << endl; #endif @@ -1175,6 +1252,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, tipo = res_rows * cols1 * sizeof(int); hres = (int *)malloc(tipo); cudaMemcpy(hres, res, tipo, cudaMemcpyDeviceToHost); + cudaFree(res); } else res_rows = 0; @@ -1182,6 +1260,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); if(showr == 1) { @@ -1199,6 +1279,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, cout << "Size = " << res_rows << endl; cout << "Iterations = " << itr << endl; + clear_memory(); *result = hres; return res_rows; diff --git a/packages/cuda/memory.cu b/packages/cuda/memory.cu index ce29061c3..413562396 100644 --- a/packages/cuda/memory.cu +++ b/packages/cuda/memory.cu @@ -182,7 +182,7 @@ void liberar(int *ptr, int size) void reservar(int **ptr, int size) { - //cout << "R " << avmem << " " << size; + // cout << "R " << avmem << " " << size while(avmem < size) limpiar(); @@ -190,7 +190,7 @@ void reservar(int **ptr, int size) limpiar(); avmem -= size; - //cout << " " << avmem << endl; + // cout << " " << avmem << endl; } void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule) @@ -346,9 +346,6 @@ int cargafinal(int name, int cols, int **ptr) } cout << "select finala" << endl;*/ - GPUmem.clear(); - CPUmem.clear(); - *ptr = ini; return cont; } @@ -463,22 +460,24 @@ void resultados(vector::iterator first, vector::iterator las cout << cont << endl; } -/*device_vector reservar_vector(int size) +void clear_memory() { - limpiar(size * sizeof(int)); - device_vector ret(size); - return ret; -}*/ -/* -void reservar_resultado(InputIterator req, int *ptr, int size) -{ - limpiar(size); - memnode temp; - temp.name = req->name; - temp.size = size; - cudaMalloc(&temp.dev_address, size); - temp.in_use = 1; - GPUmem.push_back(temp); - avmem -= size; - ptr = temp.dev_address; -}*/ + list::iterator ini; + list::iterator fin; + ini = GPUmem.begin(); + fin = GPUmem.end(); + while(ini != fin) + { + cudaFree(ini->dev_address); + ini++; + } + ini = CPUmem.begin(); + fin = CPUmem.end(); + while(ini != fin) + { + free(ini->dev_address); + ini++; + } + GPUmem.clear(); + CPUmem.clear(); +}