diff --git a/packages/cuda/CC_CSSTree.cu b/packages/cuda/CC_CSSTree.cu new file mode 100755 index 000000000..9993585d7 --- /dev/null +++ b/packages/cuda/CC_CSSTree.cu @@ -0,0 +1,30 @@ +#include "CC_CSSTree.h" + +//return the start position of searching the key. +int CC_CSSTree::search(int key) +{ + int i=0; + int curIndex=0; + int curNode=0; + int j=0; + //search + for(i=0;inumRecord) curIndex=numRecord-1; + //cout<<"I: "< +#include +using namespace std; + +#define divRoundUp(n,s) (((n) / (s)) + ((((n) % (s)) > 0) ? 1 : 0)) +#define CSS_TREE_FANOUT 33 +//we use implicit pointer to perform the addressing. + +typedef int Record; + +class CC_GenericTree +{ +public: + int numRecord; + Record *data; + //we use the BFS layout as the default layout. + int numNode; + int level; + int gResult; + CC_GenericTree(){} + //we assume that numR=2^i. Otherwise, we pad the array with -1 from the beginning. + //we also assume that the record are sorted by the key. + CC_GenericTree(Record *d, int numR) + { + data=d; + numRecord=numR; + } + virtual ~CC_GenericTree() + { + } + virtual int search(int key)=0; + +}; + +class CC_CSSTree:public CC_GenericTree +{ +public: + int *ntree; + int fanout; + int blockSize; + int *vStart; + int *vG;//vG[0] is used in computing the position for level 1. + int numKey; + CC_CSSTree(Record *d, int numR, int f):CC_GenericTree(d,numR) + { + fanout=f; + blockSize=fanout-1; + int numLeaf=divRoundUp(numR,blockSize); + level=1; + int temp=numLeaf; + while(temp>1) + { + temp=divRoundUp(temp, fanout); + level++; + } + numNode=(int)((pow((double)fanout,(double)level)-1)/(fanout-1)); + numKey=numNode*blockSize; + ntree=new int[numKey]; + vStart=new int[level]; + vG=new int[level]; +#ifdef DEBUG + cout<, i.e., the leaf level. [start,end] + for(i=0;i=numRecord) + curIndex=numRecord-1; + ntree[k]=data[curIndex]; + } + else + break; + } + } + } + } + ~CC_CSSTree() + { + delete [] ntree; + delete [] vStart; + delete [] vG; + } + virtual int search(int key); + void print() + { + int i=0, j=0; + int k=0; + int startNode=0; + int endNode=0; + int startKey, endKey; + for(i=0;i vec[NUM_T]; + + for(x = 0; x < NUM_T; x++) + vec[x].reserve(INISIZE); + + //omp_set_num_threads(NUM_T); + div = rows / NUM_T; + ini[0] = 0; + for(x = 1; x < NUM_T; x++) + ini[x] = div * x; + ini[NUM_T] = rows; + + #pragma omp parallel for private(x,rowact,y,fin,op1,op2) firstprivate(flag,total) + for(i = 0; i < NUM_T; i++) + { + fin = ini[i+1]; + for(x = ini[i]; x < fin; x++) + { + rowact = x * numpreds.y; + for(y = 0; y < predn; y += 3) + { + op1 = bin[y+1]; + if(op1 < 0) + op1 *= -1; + else + op1 = dop1[rowact + op1]; + op2 = bin[y+2]; + if(op2 < 0) + op2 *= -1; + else + op2 = dop1[rowact + op2]; + switch(bin[y]) + { + case SBG_EQ: if(op1 != op2) + flag = 1; + break; + case SBG_GT: if(op1 <= op2) + flag = 1; + break; + case SBG_LT: if(op1 >= op2) + flag = 1; + break; + case SBG_GE: if(op1 < op2) + flag = 1; + break; + case SBG_LE: if(op1 > op2) + flag = 1; + break; + case SBG_DF: if(op1 == op2) + flag = 1; + } + if(flag) + break; + } + if(flag != 1) + { + for(y = predn; y < total; y++) + vec[i].push_back(dop1[rowact+bin[y]]); + } + else + flag = 0; + } + } + for(x = 0; x < NUM_T; x++) + { + ini[x] = vec[x].size(); + size += ini[x]; + } + fres = (int *)malloc(size * sizeof(int)); + ptr = fres; + for(x = 0; x < NUM_T; x++) + { + memcpy(ptr, vec[x].data(), ini[x] * sizeof(int)); + ptr += ini[x]; + } + *ret = fres; + return size / numpreds.z; +} diff --git a/packages/cuda/creator2.c b/packages/cuda/creator2.c new file mode 100755 index 000000000..54534bd61 --- /dev/null +++ b/packages/cuda/creator2.c @@ -0,0 +1,197 @@ +#include +#include +#include +#include + +/*Program used to generate union2.cu and union2.h. A new pointer and all its operations are generated for each set (pairs, triplets, etc.). + Arguments are the cardinality of the biggest set and the name of the cuda file. For example, executing "creator2 20 union2" will generate + all pointers and operations for all sets from 1 to 20 in the files union2.cu and union2.h.*/ + +int main(int argc, char *argv[]) +{ + int num = atoi(argv[1]); + int x; + char *str = (char *)malloc((strlen(argv[2]) + 4) * sizeof(char)); + sprintf(str, "%s.cu", argv[2]); + FILE *cuda = fopen(str, "w"); + + fprintf(cuda, "/*Computer generated file to remove duplicates. Since Thrust's unique and sort, unlike their std's counterparts, don't have a way to specify the size of each element in\n"); + fprintf(cuda, "the array, comparing pairs, triplets and other sets is not possible without defining a new pointer and all related operations for each set. If you have a better idea to do\n"); + fprintf(cuda, "this, please don't hesitate to email us.*/\n\n"); + fprintf(cuda, "#include \n"); + fprintf(cuda, "#include \n"); + fprintf(cuda, "#include \n"); + fprintf(cuda, "#include \n"); + fprintf(cuda, "#include \n"); + fprintf(cuda, "#include \"memory.h\"\n"); + fprintf(cuda, "#include \"%s.h\"\n\n", argv[2]); + fprintf(cuda, "int unir(int *res, int rows, int tipo, int **ret, int final)\n"); + fprintf(cuda, "{\n"); + fprintf(cuda, "\tthrust::device_ptr pt, re;\n"); + for(x = 2; x <= num; x++) + fprintf(cuda, "\tthrust::device_ptr pt%d, re%d;\n", x, x ,x); + for(x = 2; x <= num; x++) + fprintf(cuda, "\ts%d *t%d;\n", x, x); + fprintf(cuda, "\tint flag, nrows, *nres, size;\n\n"); + fprintf(cuda, "#if TIMER\n"); + fprintf(cuda, "\tcuda_stats.unions++;\n"); + fprintf(cuda, "#endif\n\n"); + fprintf(cuda, "\tswitch(tipo)\n"); + fprintf(cuda, "\t{\n"); + fprintf(cuda, "\t\tcase 1:\n"); + fprintf(cuda, "\t\t{\n"); + fprintf(cuda, "\t\t\tpt = thrust::device_pointer_cast(res);\n"); + fprintf(cuda, "\t\t\tflag = 0;\n"); + fprintf(cuda, "\t\t\twhile(flag != 1)\n"); + fprintf(cuda, "\t\t\t{\n"); + fprintf(cuda, "\t\t\t\ttry\n"); + fprintf(cuda, "\t\t\t\t{\n"); + fprintf(cuda, "\t\t\t\t\tthrust::sort(pt, pt + rows);\n"); + fprintf(cuda, "\t\t\t\t\tif(final)\n"); + fprintf(cuda, "\t\t\t\t\t{\n"); + fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, pt + rows, q1());\n"); + fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, re);\n"); + fprintf(cuda, "\t\t\t\t\t}\n"); + fprintf(cuda, "\t\t\t\t\telse\n"); + fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, pt + rows);\n"); + fprintf(cuda, "\t\t\t\t\tflag = 1;\n"); + fprintf(cuda, "\t\t\t\t}\n"); + fprintf(cuda, "\t\t\t\tcatch(std::bad_alloc &e)\n"); + fprintf(cuda, "\t\t\t\t{\n"); + fprintf(cuda, "\t\t\t\t\tlimpiar(\"sort/unique in unir\", 0);\n"); + fprintf(cuda, "\t\t\t\t}\n"); + fprintf(cuda, "\t\t\t}\n"); + fprintf(cuda, "\t\t\tnrows = thrust::distance(pt, re);\n"); + fprintf(cuda, "\t\t\tif(nrows < rows / 2)\n"); + fprintf(cuda, "\t\t\t{\n"); + fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n"); + fprintf(cuda, "\t\t\t\treservar(&nres, size);\n"); + fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n"); + fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n"); + fprintf(cuda, "\t\t\t\t*ret = nres;\n"); + fprintf(cuda, "\t\t\t}\n"); + fprintf(cuda, "\t\t\treturn nrows;\n"); + fprintf(cuda, "\t\t}\n"); + for(x = 2; x <= num; x++) + { + fprintf(cuda, "\t\tcase %d:\n", x); + fprintf(cuda, "\t\t{\n"); + fprintf(cuda, "\t\t\tt%d = (s%d*)res;\n", x, x); + fprintf(cuda, "\t\t\tpt%d = thrust::device_pointer_cast(t%d);\n", x, x); + fprintf(cuda, "\t\t\tflag = 0;\n"); + fprintf(cuda, "\t\t\twhile(flag != 1)\n"); + fprintf(cuda, "\t\t\t{\n"); + fprintf(cuda, "\t\t\t\ttry\n"); + fprintf(cuda, "\t\t\t\t{\n"); + fprintf(cuda, "\t\t\t\t\tthrust::sort(pt%d, pt%d + rows, o%d());\n", x, x, x); + fprintf(cuda, "\t\t\t\t\tif(final)\n"); + fprintf(cuda, "\t\t\t\t\t{\n"); + fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, pt%d + rows, q%d());\n", x, x, x, x); + fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, re%d, p%d());\n", x, x, x, x); + fprintf(cuda, "\t\t\t\t\t}\n"); + fprintf(cuda, "\t\t\t\t\telse\n"); + fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, pt%d + rows, p%d());\n", x, x, x, x); + fprintf(cuda, "\t\t\t\t\tflag = 1;\n"); + fprintf(cuda, "\t\t\t\t}\n"); + fprintf(cuda, "\t\t\t\tcatch(std::bad_alloc &e)\n"); + fprintf(cuda, "\t\t\t\t{\n"); + fprintf(cuda, "\t\t\t\t\tlimpiar(\"sort/unique in unir\", 0);\n"); + fprintf(cuda, "\t\t\t\t}\n"); + fprintf(cuda, "\t\t\t}\n"); + fprintf(cuda, "\t\t\tnrows = thrust::distance(pt%d, re%d);\n", x, x); + fprintf(cuda, "\t\t\tif(nrows < rows / 2)\n"); + fprintf(cuda, "\t\t\t{\n"); + fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n"); + fprintf(cuda, "\t\t\t\treservar(&nres, size);\n"); + fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n"); + fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n"); + fprintf(cuda, "\t\t\t\t*ret = nres;\n"); + fprintf(cuda, "\t\t\t}\n"); + fprintf(cuda, "\t\t\treturn nrows;\n"); + fprintf(cuda, "\t\t}\n"); + } + fprintf(cuda, "\t}\n"); + fprintf(cuda, "\treturn 0;\n"); + fprintf(cuda, "}\n"); + + fclose(cuda); + sprintf(str, "%s.h", argv[2]); + cuda = fopen(str, "w"); /*tipo de archivo cambiar*/ + + fprintf(cuda, "#ifndef _"); + for(x = 0; x < strlen(argv[2]); x++) + fprintf(cuda, "%c", toupper(argv[2][x])); + fprintf(cuda, "_H_\n"); + fprintf(cuda, "#define _"); + for(x = 0; x < strlen(argv[2]); x++) + fprintf(cuda, "%c", toupper(argv[2][x])); + fprintf(cuda, "_H_\n\n"); + fprintf(cuda, "int unir(int *res, int rows, int tipo, int **ret, int final);\n\n"); + for(x = 2; x <= num; x++) + { + fprintf(cuda, "typedef struct n%d\n", x); + fprintf(cuda, "{\n"); + fprintf(cuda, "\tint v[%d];\n", x); + fprintf(cuda, "}s%d;\n\n", x); + } + fprintf(cuda, "struct q1\n"); + fprintf(cuda, "{\n"); + fprintf(cuda, "\t__host__ __device__\n"); + fprintf(cuda, "\tbool operator()(const int &r1, const int &r2)\n"); + fprintf(cuda, "\t{\n"); + fprintf(cuda, "\t\tif(r1 != r2)\n"); + fprintf(cuda, "\t\t\treturn true;\n"); + fprintf(cuda, "\t\treturn false;\n"); + fprintf(cuda, "\t}\n"); + fprintf(cuda, "};\n\n"); + for(x = 2; x <= num; x++) + { + fprintf(cuda, "struct p%d\n", x); + fprintf(cuda, "{\n"); + fprintf(cuda, "\t__host__ __device__\n"); + fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x); + fprintf(cuda, "\t{\n"); + fprintf(cuda, "\t\tint x;\n"); + fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x); + fprintf(cuda, "\t\t{\n"); + fprintf(cuda, "\t\t\tif(r1.v[x] != r2.v[x])\n"); + fprintf(cuda, "\t\t\t\treturn false;\n"); + fprintf(cuda, "\t\t}\n"); + fprintf(cuda, "\t\treturn true;\n"); + fprintf(cuda, "\t}\n"); + fprintf(cuda, "};\n\n"); + fprintf(cuda, "struct q%d\n", x); + fprintf(cuda, "{\n"); + fprintf(cuda, "\t__host__ __device__\n"); + fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x); + fprintf(cuda, "\t{\n"); + fprintf(cuda, "\t\tint x;\n"); + fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x); + fprintf(cuda, "\t\t{\n"); + fprintf(cuda, "\t\t\tif(r1.v[x] != r2.v[x])\n"); + fprintf(cuda, "\t\t\t\treturn true;\n"); + fprintf(cuda, "\t\t}\n"); + fprintf(cuda, "\t\treturn false;\n"); + fprintf(cuda, "\t}\n"); + fprintf(cuda, "};\n\n"); + fprintf(cuda, "struct o%d\n", x); + fprintf(cuda, "{\n"); + fprintf(cuda, "\t__host__ __device__\n"); + fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x); + fprintf(cuda, "\t{\n"); + fprintf(cuda, "\t\tint x;\n"); + fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x); + fprintf(cuda, "\t\t{\n"); + fprintf(cuda, "\t\t\tif(r1.v[x] > r2.v[x])\n"); + fprintf(cuda, "\t\t\t\treturn true;\n"); + fprintf(cuda, "\t\t\tif(r1.v[x] < r2.v[x])\n"); + fprintf(cuda, "\t\t\t\treturn false;\n"); + fprintf(cuda, "\t\t}\n"); + fprintf(cuda, "\t\treturn false;\n"); + fprintf(cuda, "\t}\n"); + fprintf(cuda, "};\n\n"); + } + fprintf(cuda, "#endif\n"); + fclose(cuda); + free(str); +} diff --git a/packages/cuda/dbio.cu b/packages/cuda/dbio.cu new file mode 100644 index 000000000..6b0fe78b4 --- /dev/null +++ b/packages/cuda/dbio.cu @@ -0,0 +1,603 @@ +#include +#include +#include +#include "memory.h" +#include "union2.h" +#include "dbio.h" + +#ifdef DATALOG +//template +//void datalogWrite(int query, InputIterator rul_str, InputIterator fin, int finalDR, int **result) +void datalogWrite(int query, vector::iterator rul_str, vector::iterator fin, int finalDR, int **result) +{ + rulenode tmprule; + vector::iterator qposr; + int *dop1, *hres; + int cols1, res_rows, tipo; + tmprule.name = query; + qposr = lower_bound(rul_str, fin, tmprule, comparer); + cols1 = qposr->num_columns; + res_rows = cargafinal(query, cols1, &dop1); + + if(res_rows != 0) + { + if(res_rows > 0) + { + if(finalDR) + res_rows = unir(dop1, res_rows, cols1, &dop1, 0); + tipo = res_rows * cols1 * sizeof(int); + hres = (int *)malloc(tipo); + cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost); + cudaFree(dop1); + *result = hres; + } + else + { + res_rows *= -1; + if(finalDR) + { + int *dop2; + tipo = res_rows * cols1 * sizeof(int); + reservar(&dop2, tipo); + cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice); + free(dop1); + res_rows = unir(dop2, res_rows, cols1, &dop2, 0); + tipo = res_rows * cols1 * sizeof(int); + hres = (int *)malloc(tipo); + cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost); + cudaFree(dop2); + *result = hres; + } + else + *result = dop1; + } + } +} +#endif + +#ifdef TUFFY +void postgresRead(PGconn **ret, vector *L, int *inpquery, char *names, int finalDR) +{ + PGresult *pgr; + int x, y; + int *mat, *mat2; + char *tok, sel[1024], **qrs; + int w, z = 0, numt, numc, numc2, start = 0, start2, val; + PGconn *conn = PQconnectdb("host=localhost port=5432 dbname = prueba user=tuffer password=root"); + if(PQstatus(conn) != CONNECTION_OK) + { + fprintf(stderr, "Connection to database failed: %s", PQerrorMessage(conn)); + exit(1); + } + + pgr = PQexec(conn, "Select nspname from pg_catalog.pg_namespace where oid = (select max(oid) from pg_catalog.pg_namespace)"); + sprintf(sel, "SET search_path = %s", PQgetvalue(pgr, 0, 0)); + PQclear(pgr); + PQexec(conn, sel); + tok = strtok(names, " "); + if(finalDR) + { + qrs = (char **)malloc(100 * sizeof(char *)); + while(tok != NULL) + { + sprintf(sel, "Select * from %s limit 0", tok); + pgr = PQexec(conn, sel); + numc = L->at(z).num_columns; + if(tok[0] == 'c') + { + sprintf(sel, "Select "); + numt = numc + 1; + for(x = 1; x < numt; x++) + { + strcat(sel, PQfname(pgr, x)); + strcat(sel, ", "); + } + sel[strlen(sel)-2] = '\0'; + sprintf(sel, "%s from %s", sel, tok); + } + else + { + sprintf(sel, "Select id, Club, "); + numt = numc + 6; + for(x = 8; x < numt; x++) + { + strcat(sel, PQfname(pgr, x)); + strcat(sel, ", "); + } + sel[strlen(sel)-2] = '\0'; + sprintf(sel, "%s from %s", sel, tok); + } + PQclear(pgr); + pgr = PQexec(conn, sel); + numt = PQntuples(pgr); + mat = (int *)malloc(numt * numc * sizeof(int)); + if(tok[0] == 'c') + { + for(x = 0; x < numt; x++) + { + start = x * numc; + for(y = 0; y < numc; y++) + mat[start + y] = atoi(PQgetvalue(pgr, x, y)); + } + } + else + { + numc2 = numc - 2; + mat2 = (int *)malloc(numt * numc2 * sizeof(int)); + start = 0; + start2 = 0; + for(x = 0; x < numt; x++) + { + w = atoi(PQgetvalue(pgr, x, 1)); + if(w < 2) + { + mat[start] = atoi(PQgetvalue(pgr, x, 0)); + start++; + mat[start] = w; + start++; + if(w > 0) + { + for(y = 2; y < numc; y++) + { + val = atoi(PQgetvalue(pgr, x, y)); + mat[start] = val; + mat2[start2] = val; + start++; + start2++; + } + } + else + { + for(y = 2; y < numc; y++) + { + val = atoi(PQgetvalue(pgr, x, y)); + mat[start] = val; + start++; + } + } + } + else + { + for(y = 2; y < numc; y++) + { + val = atoi(PQgetvalue(pgr, x, y)); + mat2[start2] = val; + start2++; + } + } + } + L->at(z+1).address_host_table = mat2; + L->at(z+1).num_rows = start2 / numc2; + } + L->at(z).address_host_table = mat; + L->at(z).num_rows = start / numc; + PQclear(pgr); + + x = 1; + while(inpquery[x] != -1) + { + if(L->at(z).name == inpquery[x]) + { + numt = (strlen(tok) + 1) * sizeof(char); + qrs[x] = (char *)malloc(numt); + memcpy(qrs[x], tok, numt); + } + x += 2; + } + if(tok[0] == 'c') + { + tok = strtok(NULL, " "); + z++; + } + else + { + strtok(NULL, " "); + tok = strtok(NULL, " "); + z += 2; + } + } + } + else + { + while(tok != NULL) + { + sprintf(sel, "Select * from %s limit 0", tok); + pgr = PQexec(conn, sel); + numc = L->at(z).num_columns; + if(tok[0] == 'c') + { + sprintf(sel, "Select weight, myid, "); + start = 1; + numt = numc + 1; + } + else + { + sprintf(sel, "Select truth, Club, atomID, "); + start = 8; + numt = numc + 5; + } + for(x = start; x < numt; x++) + { + strcat(sel, PQfname(pgr, x)); + strcat(sel, ", "); + } + sel[strlen(sel)-2] = '\0'; + sprintf(sel, "%s from %s", sel, tok); + PQclear(pgr); + pgr = PQexec(conn, sel); + numt = PQntuples(pgr); + mat = (int *)malloc(numt * numc * sizeof(int)); + L->at(z).weight = (double *)malloc(numt * sizeof(double)); + L->at(z).num_rows = numt; + + for(x = 0; x < numt; x++) + { + start = x * numc; + for(y = 1; y < numc; y++) + mat[start + y] = atoi(PQgetvalue(pgr, x, y)); + } + + numt *= numc; + double flo; + if(tok[0] == 'c') + { + for(x = 0, y = 0; x < numt; x+=numc, y++) + { + flo = atof(PQgetvalue(pgr, y, 0)); + L->at(z).weight[y] = flo; + if(flo > 0) + mat[x] = y + 1; + else + mat[x] = -y - 1; + } + } + else + { + for(x = 0, y = 0; x < numt; x+=numc, y++) + { + if(PQgetvalue(pgr, y, 0)[0] == 't') + mat[x] = 2; + else + mat[x] = 1; + } + } + L->at(z).address_host_table = mat; + numc = (strlen(tok) + 1) * sizeof(char); + L->at(z).predname = (char *)malloc(numc); + memcpy(L->at(z).predname, tok, numc); + PQclear(pgr); + tok = strtok(NULL, " "); + z++; + } + } + *ret = conn; +} + +void postgresWrite(int *inpquery, int ninpf, vector::iterator rul_str, vector::iterator fin, vector *L, PGconn *conn, int finalDR) +{ + char sel[1024]; + double *matw = NULL; + int qname, cols1, res_rows, tipo, *dop1; + int x, w, z, y, *hres; + rulenode tmprule; + vector::iterator qposr; + if(finalDR) + { + char file[] = "/dev/shm/mln0_atoms.csv"; + z = 0; + int seqid = 1; + FILE *fp; + fp = fopen(file, "w"); + if(fp == NULL) + { + cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl; + sprintf(file, "./temp/mln0_atoms.csv"); + fp = fopen(file, "w"); + if(fp == NULL) + { + cerr << "Failed to create main memory temporary file" << endl; + exit(1); + } + } + while((qname = inpquery[z]) != -1) + { + tmprule.name = qname; + qposr = lower_bound(rul_str, fin, tmprule, comparer); + cols1 = qposr->num_columns; + res_rows = cargafinal(qname, cols1, &dop1); + + if(res_rows != 0) + { + if(res_rows < 0) + res_rows = unir(dop1, -res_rows, cols1, &dop1, 0); /*duplicate elimination on result*/ + else + res_rows = unir(dop1, res_rows, cols1, &dop1, finalDR); + + tipo = res_rows * cols1 * sizeof(int); + hres = (int *)malloc(tipo); + cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost); + cudaFree(dop1); + w = z + 1; + + strtok(qposr->rulename, "_"); + strtok(NULL, "_"); + int prid = atoi(strtok(NULL, "_")); + + for(x = 0, w = 0; x < res_rows; x++, w+=2) + { + if(hres[w+1]) + fprintf(fp, "%d,%d,%d,true\n", seqid, hres[w], prid); + else + fprintf(fp, "%d,%d,%d,false\n", seqid, hres[w], prid); + seqid++; + } + free(hres); + } + z += 2; + } + fclose(fp); + sprintf(sel, "Copy mln0_atoms(atomid,tupleID,predID,isquery) from '%s' CSV", file); + PQexec(conn, sel); + } + else + { + while(rul_str != fin) + { + cols1 = rul_str->num_columns; + res_rows = cargafinal(rul_str->name, cols1, &dop1); + if(res_rows == 0) + { + rul_str++; + continue; + } + res_rows = abs(res_rows); + tipo = res_rows * cols1 * sizeof(int); + hres = (int *)malloc(tipo); + cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost); + cudaFree(dop1); + + char file[] = "/dev/shm/buffer.csv"; + FILE *fp; + fp = fopen(file, "w"); + if(fp == NULL) + { + cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl; + sprintf(file, "./temp/buffer.csv"); + fp = fopen(file, "w"); + if(fp == NULL) + { + cerr << "Failed to create main memory temporary file" << endl; + exit(1); + } + } + + if(rul_str->rulename[0] == 'z') + { + char *name = rul_str->rulename + 1; + for(x = 0; x < ninpf; x++) + { + if(strncmp(L->at(x).predname, name, strlen(name)) == 0) + { + matw = L->at(x).weight; + break; + } + } + + cols1 -= 3; + for(x = 0, z = 0; x < res_rows; x++, z+=3) + { + for(y = 0; y < cols1; y++, z++) + fprintf(fp, "%d,", hres[z]); + fprintf(fp, "%d,%lf,%d\n", hres[z], matw[abs(hres[z+1])-1], hres[z+2]); + } + fclose(fp); + sprintf(sel, "Copy %s from '%s' CSV", name, file); + PQexec(conn, sel); + } + else + { + cols1--; + for(x = 0, z = 0; x < res_rows; x++, z++) + { + for(y = 0; y < cols1; y++, z++) + fprintf(fp, "%d,", hres[z]); + fprintf(fp, "%d\n", hres[z]); + } + fclose(fp); + sprintf(sel, "Copy %s from '%s' CSV", rul_str->rulename, file); + PQexec(conn, sel); + } + free(hres); + rul_str++; + } + } + PQfinish(conn); + if(finalDR) + clear_memory_all(); +} +#endif + +#ifdef ROCKIT +void mysqlRead(MYSQL **ret, int *qrs, vector *L, int ninpf, char *names, int finalDR) +{ + char *tok, sel[1024]; + int w, x, y, z = 0, numt, numc; + int *mat; + MYSQL *con = mysql_init(NULL); + if(con == NULL) + { + fprintf(stderr, "mysql_init() failed\n"); + exit(1); + } + mysql_options(con, MYSQL_OPT_LOCAL_INFILE, NULL); + mysql_real_connect(con, "localhost", "root", "root", "rockit", 0, NULL, 0); + if(finalDR) + { + y = 0; + while(qrs[y] != 0) + { + for(z = 0; z < ninpf; z++) + { + if(qrs[y] == L->at(z).name) + { + MYSQL_ROW row; + sprintf(sel, "Select count(*) from %s", L->at(z).predname); + mysql_query(con, sel); + MYSQL_RES *result = mysql_store_result(con); + row = mysql_fetch_row(result); + numt = atoi(row[0]); + mysql_free_result(result); + + if(numt != L->at(z).num_rows) + { + liberar(L->at(z).name); + numc = L->at(z).num_columns; + sprintf(sel, "Select * from %s", L->at(z).predname); + mysql_query(con, sel); + MYSQL_RES *result = mysql_store_result(con); + mat = (int *)malloc(numt * numc * sizeof(int)); + w = 0; + while ((row = mysql_fetch_row(result))) + { + for(x = 0; x < numc; x++, w++) + mat[w] = atoi(row[x]); + } + + mysql_free_result(result); + if(L->at(z).address_host_table != NULL) + free(L->at(z).address_host_table); + L->at(z).address_host_table = mat; + L->at(z).num_rows = numt; + } + } + } + y++; + } + } + else + { + tok = strtok(names, " "); + while(tok != NULL) + { + numc = L->at(z).num_columns; + sprintf(sel, "Select * from %s", tok); + mysql_query(con, sel); + MYSQL_RES *result = mysql_store_result(con); + numt = mysql_num_rows(result); + + MYSQL_ROW row; + mat = (int *)malloc(numt * numc * sizeof(int)); + w = 0; + if(tok[0] == 'f' && tok[1] >= '0' && tok[1] <= '9') + { + while ((row = mysql_fetch_row(result))) + { + for(x = 1; x <= numc; x++, w++) + mat[w] = atoi(row[x]); + } + } + else + { + while ((row = mysql_fetch_row(result))) + { + for(x = 0; x < numc; x++, w++) + mat[w] = atoi(row[x]); + } + } + mysql_free_result(result); + L->at(z).address_host_table = mat; + L->at(z).num_rows = numt; + + numc = (strlen(tok) + 1) * sizeof(char); + L->at(z).predname = (char *)malloc(numc); + strcpy(L->at(z).predname, tok); + tok = strtok(NULL, " "); + z++; + } + } + *ret = con; +} + +void mysqlWrite(vector::iterator rul_str, vector::iterator fin, vector *L, MYSQL *con) +{ + int x, y, z, cols1, cols2, res_rows, tipo; + int *hres, *dop1; + char *id, *sign, *q1, *q2; + char sel[1024], weight[1024]; + gpunode tmpfact; + while(rul_str != fin) + { + cols1 = rul_str->num_columns; + res_rows = cargafinal(rul_str->name, cols1, &dop1); + id = strtok(rul_str->rulename, "_"); + sprintf(sel, "create table if not exists %s(weight double, ", id); + for(x = 0; x < cols1; x++) + { + sprintf(weight, "a%d char(10), ", x); + strcat(sel, weight); + } + sel[strlen(sel)-2] = ')'; + strcat(sel, "ENGINE = MEMORY DEFAULT CHARSET=latin1"); + mysql_query(con, sel); + sprintf(sel, "truncate %s", id); + mysql_query(con, sel); + + if(res_rows == 0) + { + rul_str++; + continue; + } + + if(res_rows > 0) + { + tmpfact = L->at(-rul_str->referencias[rul_str->num_rows - 2] - 1); + sign = tmpfact.predname; + tipo = res_rows * cols1 * sizeof(int); + hres = (int *)malloc(tipo); + cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost); + if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9') + sumar(tmpfact.name, dop1, cols1, res_rows); + } + else + { + hres = dop1; + res_rows = -res_rows; + } + + sign = strtok(NULL, "_"); + q1 = strtok(NULL, "_"); + q2 = strtok(NULL, "_"); + if(sign[0] == '0') + sprintf(weight, "%s.%s", q1, q2); + else + sprintf(weight, "-%s.%s", q1, q2); + + FILE *fp; + char file[512]; + sprintf(file, "/dev/shm/%s.tsv", id); + fp = fopen(file, "w"); + if(fp == NULL) + { + cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl; + sprintf(file, "./temp/%s.tsv", id); + fp = fopen(file, "w"); + } + + cols2 = cols1 - 1; + for(x = 0, z = 0; x < res_rows; x++, z++) + { + fprintf(fp, "%s\t", weight); + for(y = 0; y < cols2; y++, z++) + fprintf(fp, "%d\t", hres[z]); + fprintf(fp, "%d\n", hres[z]); + } + fclose(fp); + + sprintf(sel, "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s", file, id); + mysql_query(con, sel); + rul_str++; + } + mysql_close(con); +} +#endif + diff --git a/packages/cuda/dbio.h b/packages/cuda/dbio.h new file mode 100644 index 000000000..1491e6380 --- /dev/null +++ b/packages/cuda/dbio.h @@ -0,0 +1,28 @@ +#ifndef _DBIO_H_ +#define _DBIO_H_ + +#include "pred.h" +#ifdef TUFFY +#include +#endif +#ifdef ROCKIT +#include +#endif +#include +#include "lista.h" + +using namespace std; + +#ifdef TUFFY +void postgresRead(PGconn **ret, vector *L, int *inpquery, char *names, int finalDR); +void postgresWrite(int *inpquery, int ninpf, vector::iterator rul_str, vector::iterator fin, vector *L, PGconn *conn, int finalDR); +#endif +#ifdef ROCKIT +void mysqlRead(MYSQL **ret, int *qrs, vector *L, int ninpf, char *names, int finalDR); +void mysqlWrite(vector::iterator rul_str, vector::iterator fin, vector *L, MYSQL *con); +#endif +#ifdef DATALOG +void datalogWrite(int query, vector::iterator rul_str, vector::iterator fin, int finalDR, int **result); +#endif + +#endif diff --git a/packages/cuda/joincpu.cpp b/packages/cuda/joincpu.cpp new file mode 100755 index 000000000..391b0d9f8 --- /dev/null +++ b/packages/cuda/joincpu.cpp @@ -0,0 +1,500 @@ +#include "CC_CSSTree.h" +#include +#include +#include +#include "pred.h" + +void partInlj(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int wj, int halfrul, int lenrul) +{ + //set_thread_affinity(cpuid,NUM_T); + int i=0; + int k=0; + int curIndex=0; + int keyForSearch; + int y, posS, posR; + for(k=startS; ksearch(keyForSearch); + for(i=curIndex-1;i>0;i--) + { + if(keyForSearch == R[i]) + { + + //cout << keyForSearch << endl; + + posR = perm[i] * of1; + for(y = 0; y < halfrul; y++) + res->push_back(p1[posR + proj[y]]); + for(; y < lenrul; y++) + res->push_back(p2[posS + proj[y]]); + } + else + if(R[i]push_back(p1[posR + proj[y]]); + for(; y < lenrul; y++) + res->push_back(p2[posS + proj[y]]); + } + else + if(R[i]>keyForSearch) + break; + } + } +} + +void partInlj2(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int cols, int wj) +{ + //set_thread_affinity(cpuid,NUM_T); + int i=0; + int k=0; + int curIndex=0; + int keyForSearch; + int y, cond, posS, posR; + for(k=startS; ksearch(keyForSearch); + for(i=curIndex-1;i>0;i--) + { + if(keyForSearch == R[i]) + { + + //cout << keyForSearch << endl; + + posR = perm[i] * of1 - 1; + for(y = 0; y < cols; y++) + { + cond = proj[y]; + if(cond > 0) + res->push_back(p1[posR + cond]); + else + res->push_back(p2[posS - cond - 1]); + } + } + else + if(R[i] 0) + res->push_back(p1[posR + cond]); + else + res->push_back(p2[posS - cond - 1]); + } + } + else + if(R[i]>keyForSearch) + break; + } + } +} + +void multipartInlj(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int *wj, int numj, int halfrul, int lenrul) +{ + //set_thread_affinity(cpuid,NUM_T); + int i=0; + int k=0; + int curIndex=0; + int keyForSearch; + int y, posS, posR; + for(k=startS; ksearch(keyForSearch); + for(i=curIndex-1;i>0;i--) + { + if(keyForSearch == R[i]) + { + posR = perm[i] * of1; + for(y = 2; y < numj; y += 2) + { + if(p1[posR + wj[y]] != p2[posS + wj[y+1]]) + break; + } + if(y < numj) + continue; + for(y = 0; y < halfrul; y++) + res->push_back(p1[posR + proj[y]]); + for(; y < lenrul; y++) + res->push_back(p2[posS + proj[y]]); + } + else + if(R[i]push_back(p1[posR + proj[y]]); + for(; y < lenrul; y++) + res->push_back(p2[posS + proj[y]]); + } + else + if(R[i]>keyForSearch) + break; + } + } +} + +void multipartInlj2(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int cols, int *wj, int numj) +{ + //set_thread_affinity(cpuid,NUM_T); + int i=0; + int k=0; + int curIndex=0; + int keyForSearch; + int y, cond, posS, posR; + for(k=startS; ksearch(keyForSearch); + for(i=curIndex-1;i>0;i--) + { + if(keyForSearch == R[i]) + { + posR = perm[i] * of1; + for(y = 2; y < numj; y += 2) + { + if(p1[posR + wj[y]] != p2[posS + wj[y+1]]) + break; + } + if(y < numj) + continue; + for(y = 0; y < cols; y++) + { + cond = proj[y]; + if(cond > 0) + res->push_back(p1[posR + cond - 1]); + else + res->push_back(p2[posS - cond - 1]); + } + } + else + if(R[i] 0) + res->push_back(p1[posR + cond - 1]); + else + res->push_back(p2[posS - cond - 1]); + } + } + else + if(R[i]>keyForSearch) + break; + } + } +} + +void inlj_omp(Record *R, int rLen, CC_CSSTree *tree, Record *S, int sLen, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int2 projp, int cols, int* wj, int numj, int tipo) +{ + int i=0; + int j=0; + int *startS=new int[NUM_T]; + int *endS=new int[NUM_T]; + int chunkSize=sLen/NUM_T; + for(i=0;i 2) + multipartInlj2(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, cols, wj, numj); + else + partInlj2(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, cols, wj[1]); + } + else + { + if(numj > 2) + multipartInlj(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, wj, numj, projp.x, projp.y); + else + partInlj(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, wj[1], projp.x, projp.y); + } + } + + //cout << "fin" << endl; + + delete startS; + delete endS; +} + +int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list::iterator rule, int pos, int bothops, int **ret) +{ + int pos2 = pos + 1; + int *sel1, nsel1 = 0; + int *sel2 = rule->select[pos2]; + int nsel2 = rule->numsel[pos2]; + int *proj = rule->project[pos]; + int2 projp = rule->projpos[pos]; + int *sjoin1, nsj1 = 0; + int *sjoin2 = rule->selfjoin[pos2]; + int nsj2 = rule->numselfj[pos2]; + int *wherej = rule->wherejoin[pos]; + int numj = rule->numjoin[pos]; + int size, *fres, ini[NUM_T], *temp; + int x, tipo = 0; + int *Sres = NULL, *Rres, Snl, Rnl, *permutation; + + if(bothops) + { + sel1 = rule->select[pos]; + nsel1 = rule->numsel[pos]; + sjoin1 = rule->selfjoin[pos]; + nsj1 = rule->numselfj[pos]; + } + + #ifdef TIMER + cudaEvent_t start, stop; + float time; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + #endif + + if(nsel1 > 0 || nsj1 > 0) + Rnl = selectproyectcpu2(p1, rLen, of1, sel1, nsel1, sjoin1, nsj1, wherej[0], &Rres, &permutation); + else + { + + /*cout << "sin sel" << endl; + cout << "valores = " << rLen << " " << of1 << " " << wherej[0] << endl; + for(x = 0; x < 100; x++) + cout << p1[x] << " "; + cout << endl; + cout << "ultimo = " << p1[of1 * rLen - 1] << endl;*/ + + Rnl = rLen; + size = Rnl * sizeof(int); + permutation = (int *)malloc(size); + Rres = (int *)malloc(size); + #pragma omp parallel for firstprivate(of1) + for(x = 0; x < Rnl; x++) + { + permutation[x] = x; + Rres[x] = p1[of1 * x + wherej[0]]; + } + + //cout << "sin sel fin" << endl; + + } + + #ifdef TIMER + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time, start, stop); + cuda_stats.select1_time += time; + + cudaEventDestroy(start); + cudaEventDestroy(stop); + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + #endif + + if(nsel2 > 0 || nsj2 > 0) + { + //cout << "con sel S" << endl; + + Snl = selectproyectcpu2(p2, sLen, of2, sel2, nsel2, sjoin2, nsj2, wherej[1], &Sres, NULL); + } + else + Snl = sLen; + + #ifdef TIMER + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time, start, stop); + cuda_stats.select2_time += time; + + cudaEventDestroy(start); + cudaEventDestroy(stop); + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + #endif + + //cout << "antes" << endl; + + /*cout << "antes" << endl; + for(x = 0; x < Rnl; x++) + cout << permutation[x] << " "; + cout << endl; + for(x = 0; x < 100; x++) + cout << Rres[x] << " "; + cout << endl;*/ + + thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation); + + #ifdef TIMER + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time, start, stop); + cuda_stats.sort_time += time; + + cudaEventDestroy(start); + cudaEventDestroy(stop); + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + #endif + + /*cout << "despues" << endl; + for(x = 0; x < Rnl; x++) + cout << permutation[x] << " "; + cout << endl; + for(x = 0; x < Rnl; x++) + cout << Rres[x] << " "; + cout << endl;*/ + + //cout << "despues sort" << endl; + + vector *res = new vector[NUM_T]; + for(x = 0; x < NUM_T; x++) + res[x].reserve(INISIZE); + CC_CSSTree *tree = new CC_CSSTree(Rres, Rnl, CSS_TREE_FANOUT); + if(pos == (rule->num_rows - 3)) // && rule->num_bpreds.x == 0) + tipo = 1; + inlj_omp(Rres, Rnl, tree, Sres, Snl, of1, of2, res, p1, p2, permutation, proj, projp, rule->num_columns, wherej, numj, tipo); + + /*cout << "proj = "; + for(x = 0; x < rule->num_columns; x++) + cout << proj[x] << " "; + cout << endl; + int y,z; + for(x = 0; x < NUM_T; x++) + { + cout << "Thread " << x << endl; + for(y = 0; y < res[x].size() / projp.y; y++) + { + for(z = 0; z < projp.y; z++) + cout << res[x][y * projp.y + z] << " "; + cout << endl; + } + } + cout << "Tamanios" << endl;*/ + + size = 0; + for(x = 0; x < NUM_T; x++) + { + ini[x] = res[x].size(); + size += ini[x]; + + //cout << ini[x] << " " << size << endl; + + } + fres = (int *)malloc(size * sizeof(int)); + temp = fres; + for(x = 0; x < NUM_T; x++) + { + memcpy(temp, res[x].data(), ini[x] * sizeof(int)); + temp += ini[x]; + } + + if(*ret != NULL) + free(*ret); + free(Rres); + free(permutation); + if(Sres != NULL) + free(Sres); + delete tree; + delete [] res; + + *ret = fres; + + #ifdef TIMER + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time, start, stop); + cuda_stats.join_time += time; + #endif + + //cout << "Projp.x = " << projp.x << " projp.y = " << projp.y << endl; + + /*if(numj > 2) + { + cout << "total = " << rLen << " " << size / projp.y << " " << projp.y << " " << rule->num_columns << endl; + exit(1); + }*/ + + return size / projp.y; +} diff --git a/packages/cuda/selectproyectcpu.cpp b/packages/cuda/selectproyectcpu.cpp new file mode 100755 index 000000000..5ae091e13 --- /dev/null +++ b/packages/cuda/selectproyectcpu.cpp @@ -0,0 +1,394 @@ +#include +#include +#include +#include +#include "pred.h" + +using namespace std; + +int selectproyectcpu(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret) +{ + int size = 0, pos, temp; + int i, x, y, z, w; + int *fres, *ptr; + int div, fin, ini[NUM_T + 1]; + vector vec[NUM_T]; + + for(x = 0; x < NUM_T; x++) + vec[x].reserve(INISIZE); + + //omp_set_num_threads(NUM_T); + div = rows / NUM_T; + ini[0] = 0; + for(x = 1; x < NUM_T; x++) + ini[x] = div * x; + ini[NUM_T] = rows; + + if(numselect > 0) + { + #pragma omp parallel for private(x,pos,y,z,fin,temp,w) + for(i = 0; i < NUM_T; i++) + { + fin = ini[i+1]; + for(x = ini[i]; x < fin; x++) + { + pos = x * cols; + for(y = 0; y < numselect; y += 2) + { + if(dop1[pos+select[y]] != select[y+1]) + break; + } + for(z = 0; z < numselfj; z++) + { + temp = dop1[pos+selfjoin[z]]; + w = z + 1; + while(selfjoin[w] > -1) + { + if(temp != dop1[pos+selfjoin[w]]) + break; + w++; + } + z = w; + if(selfjoin[w] != -1) + break; + } + if(y == numselect && z == numselfj) + { + for(y = 0; y < head_size; y++) + vec[i].push_back(dop1[pos+project[y]]); + } + } + } + } + else + { + if(numselfj > 0) + { + #pragma omp parallel for private(x,pos,y,z,fin,w,temp) + for(i = 0; i < NUM_T; i++) + { + fin = ini[i+1]; + for(x = ini[i]; x < fin; x++) + { + pos = x * cols; + for(z = 0; z < numselfj; z++) + { + temp = dop1[pos+selfjoin[z]]; + w = z + 1; + while(selfjoin[w] > -1) + { + if(temp != dop1[pos+selfjoin[w]]) + break; + w++; + } + z = w; + if(selfjoin[w] != -1) + break; + } + if(z == numselfj) + { + for(y = 0; y < head_size; y++) + vec[i].push_back(dop1[pos+project[y]]); + } + } + } + } + else + { + fres = (int *)malloc(rows * cols * sizeof(int)); + #pragma omp parallel for private(pos,y,z) + for(x = 0; x < rows; x++) + { + pos = x * cols; + z = pos; + for(y = 0; y < head_size; y++, z++) + fres[z] = dop1[pos+project[y]]; + } + *ret = fres; + return rows; + } + } + for(x = 0; x < NUM_T; x++) + { + ini[x] = vec[x].size(); + size += ini[x]; + } + fres = (int *)malloc(size * sizeof(int)); + ptr = fres; + for(x = 0; x < NUM_T; x++) + { + memcpy(ptr, vec[x].data(), ini[x] * sizeof(int)); + ptr += ini[x]; + } + *ret = fres; + return size / head_size; +} + +int selectproyectcpu2(int *dop1, int rows, int cols, int *select, int numselect, int *selfjoin, int numselfj, int wj, int **ret, int **list) +{ + int size = 0, pos, temp; + int i, x, y, z, w; + int *fres, *lres, *ptr; + int div, fin, ini[NUM_T + 1]; + int *vec[NUM_T]; + int *lis[NUM_T]; + int cont[NUM_T]; + + //omp_set_num_threads(NUM_T); + div = rows / NUM_T; + ini[0] = 0; + for(x = 1; x < NUM_T; x++) + ini[x] = div * x; + ini[NUM_T] = rows; + + pos = div + NUM_T; + for(x = 0; x < NUM_T; x++) + { + vec[x] = (int *)malloc(pos * sizeof(int)); + lis[x] = (int *)malloc(pos * sizeof(int)); + cont[x] = 0; + } + + /*cout << "numselect = " << numselect << endl; + for(x = 0; x < numselect; x++) + cout << select[x] << " "; + cout << endl;*/ + + if(numselect > 0) + { + #pragma omp parallel for private(x,pos,y,z,fin,temp,w) + for(i = 0; i < NUM_T; i++) + { + fin = ini[i+1]; + for(x = ini[i]; x < fin; x++) + { + pos = x * cols; + for(y = 0; y < numselect; y += 2) + { + if(dop1[pos+select[y]] != select[y+1]) + break; + } + for(z = 0; z < numselfj; z++) + { + temp = dop1[pos+selfjoin[z]]; + w = z + 1; + while(selfjoin[w] > -1) + { + if(temp != dop1[pos+selfjoin[w]]) + break; + w++; + } + z = w; + if(selfjoin[w] != -1) + break; + } + if(y == numselect && z == numselfj) + { + lis[i][cont[i]] = x; + if(list != NULL) + vec[i][cont[i]] = dop1[pos+wj]; + cont[i]++; + } + } + } + } + else + { + if(numselfj > 0) + { + #pragma omp parallel for private(x,pos,y,z,fin,w,temp) firstprivate(cont) + for(i = 0; i < NUM_T; i++) + { + fin = ini[i+1]; + for(x = ini[i]; x < fin; x++) + { + pos = x * cols; + for(z = 0; z < numselfj; z++) + { + temp = dop1[pos+selfjoin[z]]; + w = z + 1; + while(selfjoin[w] > -1) + { + if(temp != dop1[pos+selfjoin[w]]) + break; + w++; + } + z = w; + if(selfjoin[w] != -1) + break; + } + if(z == numselfj) + { + lis[i][cont[i]] = x; + if(list != NULL) + vec[i][cont[i]] = dop1[pos+wj]; + cont[i]++; + } + } + } + } + } + + //cout << "despues sel" << endl; + + for(x = 0; x < NUM_T; x++) + size += cont[x]; + lres = (int *)malloc(size * sizeof(int)); + ptr = lres; + for(x = 0; x < NUM_T; x++) + { + memcpy(ptr, lis[x], cont[x] * sizeof(int)); + ptr += cont[x]; + } + if(list != NULL) + { + fres = (int *)malloc(size * sizeof(int)); + ptr = fres; + for(x = 0; x < NUM_T; x++) + { + memcpy(ptr, vec[x], cont[x] * sizeof(int)); + ptr += cont[x]; + } + *ret = fres; + *list = lres; + } + else + *ret = lres; + for(x = 0; x < NUM_T; x++) + { + free(lis[x]); + free(vec[x]); + } + return size; +} + +int selectproyectcpu3(int *dop1, int rows, int cols, int *select, int numselect, int *selfjoin, int numselfj, int wj, int **ret, int **list) +{ + int size = 0, pos, temp; + int i, x, y, z, w; + int *fres, *lres, *ptr; + int div, fin, ini[NUM_T + 1]; + vector vec[NUM_T]; + vector lis[NUM_T]; + + for(x = 0; x < NUM_T; x++) + { + vec[x].reserve(INISIZE); + lis[x].reserve(INISIZE); + } + + //omp_set_num_threads(NUM_T); + div = rows / NUM_T; + ini[0] = 0; + for(x = 1; x < NUM_T; x++) + ini[x] = div * x; + ini[NUM_T] = rows; + + /*cout << "numselect = " << numselect << endl; + for(x = 0; x < numselect; x++) + cout << select[x] << " "; + cout << endl;*/ + + if(numselect > 0) + { + #pragma omp parallel for private(x,pos,y,z,fin,temp,w) + for(i = 0; i < NUM_T; i++) + { + fin = ini[i+1]; + for(x = ini[i]; x < fin; x++) + { + pos = x * cols; + for(y = 0; y < numselect; y += 2) + { + if(dop1[pos+select[y]] != select[y+1]) + break; + } + for(z = 0; z < numselfj; z++) + { + temp = dop1[pos+selfjoin[z]]; + w = z + 1; + while(selfjoin[w] > -1) + { + if(temp != dop1[pos+selfjoin[w]]) + break; + w++; + } + z = w; + if(selfjoin[w] != -1) + break; + } + if(y == numselect && z == numselfj) + { + lis[i].push_back(x); + if(list != NULL) + vec[i].push_back(dop1[pos+wj]); + } + } + } + } + else + { + if(numselfj > 0) + { + #pragma omp parallel for private(x,pos,y,z,fin,w,temp) + for(i = 0; i < NUM_T; i++) + { + fin = ini[i+1]; + for(x = ini[i]; x < fin; x++) + { + pos = x * cols; + for(z = 0; z < numselfj; z++) + { + temp = dop1[pos+selfjoin[z]]; + w = z + 1; + while(selfjoin[w] > -1) + { + if(temp != dop1[pos+selfjoin[w]]) + break; + w++; + } + z = w; + if(selfjoin[w] != -1) + break; + } + if(z == numselfj) + { + lis[i].push_back(x); + if(list != NULL) + vec[i].push_back(dop1[pos+wj]); + } + } + } + } + } + + //cout << "despues sel" << endl; + + for(x = 0; x < NUM_T; x++) + { + ini[x] = lis[x].size(); + size += ini[x]; + } + lres = (int *)malloc(size * sizeof(int)); + ptr = lres; + for(x = 0; x < NUM_T; x++) + { + memcpy(ptr, lis[x].data(), ini[x] * sizeof(int)); + ptr += ini[x]; + } + if(list != NULL) + { + fres = (int *)malloc(size * sizeof(int)); + ptr = fres; + for(x = 0; x < NUM_T; x++) + { + memcpy(ptr, vec[x].data(), ini[x] * sizeof(int)); + ptr += ini[x]; + } + *ret = fres; + *list = lres; + } + else + *ret = lres; + return size; +} diff --git a/packages/cuda/union2.h b/packages/cuda/union2.h new file mode 100755 index 000000000..3eda836b0 --- /dev/null +++ b/packages/cuda/union2.h @@ -0,0 +1,1005 @@ +#ifndef _UNION2_H_ +#define _UNION2_H_ + +int unir(int *res, int rows, int tipo, int **ret, int final); + +typedef struct n2 +{ + int v[2]; +}s2; + +typedef struct n3 +{ + int v[3]; +}s3; + +typedef struct n4 +{ + int v[4]; +}s4; + +typedef struct n5 +{ + int v[5]; +}s5; + +typedef struct n6 +{ + int v[6]; +}s6; + +typedef struct n7 +{ + int v[7]; +}s7; + +typedef struct n8 +{ + int v[8]; +}s8; + +typedef struct n9 +{ + int v[9]; +}s9; + +typedef struct n10 +{ + int v[10]; +}s10; + +typedef struct n11 +{ + int v[11]; +}s11; + +typedef struct n12 +{ + int v[12]; +}s12; + +typedef struct n13 +{ + int v[13]; +}s13; + +typedef struct n14 +{ + int v[14]; +}s14; + +typedef struct n15 +{ + int v[15]; +}s15; + +typedef struct n16 +{ + int v[16]; +}s16; + +typedef struct n17 +{ + int v[17]; +}s17; + +typedef struct n18 +{ + int v[18]; +}s18; + +typedef struct n19 +{ + int v[19]; +}s19; + +typedef struct n20 +{ + int v[20]; +}s20; + +struct q1 +{ + __host__ __device__ + bool operator()(const int &r1, const int &r2) + { + if(r1 != r2) + return true; + return false; + } +}; + +struct p2 +{ + __host__ __device__ + bool operator()(const s2 &r1, const s2 &r2) + { + int x; + for(x = 0; x < 2; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q2 +{ + __host__ __device__ + bool operator()(const s2 &r1, const s2 &r2) + { + int x; + for(x = 0; x < 2; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o2 +{ + __host__ __device__ + bool operator()(const s2 &r1, const s2 &r2) + { + int x; + for(x = 0; x < 2; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p3 +{ + __host__ __device__ + bool operator()(const s3 &r1, const s3 &r2) + { + int x; + for(x = 0; x < 3; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q3 +{ + __host__ __device__ + bool operator()(const s3 &r1, const s3 &r2) + { + int x; + for(x = 0; x < 3; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o3 +{ + __host__ __device__ + bool operator()(const s3 &r1, const s3 &r2) + { + int x; + for(x = 0; x < 3; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p4 +{ + __host__ __device__ + bool operator()(const s4 &r1, const s4 &r2) + { + int x; + for(x = 0; x < 4; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q4 +{ + __host__ __device__ + bool operator()(const s4 &r1, const s4 &r2) + { + int x; + for(x = 0; x < 4; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o4 +{ + __host__ __device__ + bool operator()(const s4 &r1, const s4 &r2) + { + int x; + for(x = 0; x < 4; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p5 +{ + __host__ __device__ + bool operator()(const s5 &r1, const s5 &r2) + { + int x; + for(x = 0; x < 5; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q5 +{ + __host__ __device__ + bool operator()(const s5 &r1, const s5 &r2) + { + int x; + for(x = 0; x < 5; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o5 +{ + __host__ __device__ + bool operator()(const s5 &r1, const s5 &r2) + { + int x; + for(x = 0; x < 5; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p6 +{ + __host__ __device__ + bool operator()(const s6 &r1, const s6 &r2) + { + int x; + for(x = 0; x < 6; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q6 +{ + __host__ __device__ + bool operator()(const s6 &r1, const s6 &r2) + { + int x; + for(x = 0; x < 6; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o6 +{ + __host__ __device__ + bool operator()(const s6 &r1, const s6 &r2) + { + int x; + for(x = 0; x < 6; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p7 +{ + __host__ __device__ + bool operator()(const s7 &r1, const s7 &r2) + { + int x; + for(x = 0; x < 7; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q7 +{ + __host__ __device__ + bool operator()(const s7 &r1, const s7 &r2) + { + int x; + for(x = 0; x < 7; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o7 +{ + __host__ __device__ + bool operator()(const s7 &r1, const s7 &r2) + { + int x; + for(x = 0; x < 7; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p8 +{ + __host__ __device__ + bool operator()(const s8 &r1, const s8 &r2) + { + int x; + for(x = 0; x < 8; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q8 +{ + __host__ __device__ + bool operator()(const s8 &r1, const s8 &r2) + { + int x; + for(x = 0; x < 8; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o8 +{ + __host__ __device__ + bool operator()(const s8 &r1, const s8 &r2) + { + int x; + for(x = 0; x < 8; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p9 +{ + __host__ __device__ + bool operator()(const s9 &r1, const s9 &r2) + { + int x; + for(x = 0; x < 9; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q9 +{ + __host__ __device__ + bool operator()(const s9 &r1, const s9 &r2) + { + int x; + for(x = 0; x < 9; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o9 +{ + __host__ __device__ + bool operator()(const s9 &r1, const s9 &r2) + { + int x; + for(x = 0; x < 9; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p10 +{ + __host__ __device__ + bool operator()(const s10 &r1, const s10 &r2) + { + int x; + for(x = 0; x < 10; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q10 +{ + __host__ __device__ + bool operator()(const s10 &r1, const s10 &r2) + { + int x; + for(x = 0; x < 10; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o10 +{ + __host__ __device__ + bool operator()(const s10 &r1, const s10 &r2) + { + int x; + for(x = 0; x < 10; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p11 +{ + __host__ __device__ + bool operator()(const s11 &r1, const s11 &r2) + { + int x; + for(x = 0; x < 11; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q11 +{ + __host__ __device__ + bool operator()(const s11 &r1, const s11 &r2) + { + int x; + for(x = 0; x < 11; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o11 +{ + __host__ __device__ + bool operator()(const s11 &r1, const s11 &r2) + { + int x; + for(x = 0; x < 11; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p12 +{ + __host__ __device__ + bool operator()(const s12 &r1, const s12 &r2) + { + int x; + for(x = 0; x < 12; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q12 +{ + __host__ __device__ + bool operator()(const s12 &r1, const s12 &r2) + { + int x; + for(x = 0; x < 12; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o12 +{ + __host__ __device__ + bool operator()(const s12 &r1, const s12 &r2) + { + int x; + for(x = 0; x < 12; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p13 +{ + __host__ __device__ + bool operator()(const s13 &r1, const s13 &r2) + { + int x; + for(x = 0; x < 13; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q13 +{ + __host__ __device__ + bool operator()(const s13 &r1, const s13 &r2) + { + int x; + for(x = 0; x < 13; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o13 +{ + __host__ __device__ + bool operator()(const s13 &r1, const s13 &r2) + { + int x; + for(x = 0; x < 13; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p14 +{ + __host__ __device__ + bool operator()(const s14 &r1, const s14 &r2) + { + int x; + for(x = 0; x < 14; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q14 +{ + __host__ __device__ + bool operator()(const s14 &r1, const s14 &r2) + { + int x; + for(x = 0; x < 14; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o14 +{ + __host__ __device__ + bool operator()(const s14 &r1, const s14 &r2) + { + int x; + for(x = 0; x < 14; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p15 +{ + __host__ __device__ + bool operator()(const s15 &r1, const s15 &r2) + { + int x; + for(x = 0; x < 15; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q15 +{ + __host__ __device__ + bool operator()(const s15 &r1, const s15 &r2) + { + int x; + for(x = 0; x < 15; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o15 +{ + __host__ __device__ + bool operator()(const s15 &r1, const s15 &r2) + { + int x; + for(x = 0; x < 15; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p16 +{ + __host__ __device__ + bool operator()(const s16 &r1, const s16 &r2) + { + int x; + for(x = 0; x < 16; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q16 +{ + __host__ __device__ + bool operator()(const s16 &r1, const s16 &r2) + { + int x; + for(x = 0; x < 16; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o16 +{ + __host__ __device__ + bool operator()(const s16 &r1, const s16 &r2) + { + int x; + for(x = 0; x < 16; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p17 +{ + __host__ __device__ + bool operator()(const s17 &r1, const s17 &r2) + { + int x; + for(x = 0; x < 17; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q17 +{ + __host__ __device__ + bool operator()(const s17 &r1, const s17 &r2) + { + int x; + for(x = 0; x < 17; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o17 +{ + __host__ __device__ + bool operator()(const s17 &r1, const s17 &r2) + { + int x; + for(x = 0; x < 17; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p18 +{ + __host__ __device__ + bool operator()(const s18 &r1, const s18 &r2) + { + int x; + for(x = 0; x < 18; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q18 +{ + __host__ __device__ + bool operator()(const s18 &r1, const s18 &r2) + { + int x; + for(x = 0; x < 18; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o18 +{ + __host__ __device__ + bool operator()(const s18 &r1, const s18 &r2) + { + int x; + for(x = 0; x < 18; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p19 +{ + __host__ __device__ + bool operator()(const s19 &r1, const s19 &r2) + { + int x; + for(x = 0; x < 19; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q19 +{ + __host__ __device__ + bool operator()(const s19 &r1, const s19 &r2) + { + int x; + for(x = 0; x < 19; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o19 +{ + __host__ __device__ + bool operator()(const s19 &r1, const s19 &r2) + { + int x; + for(x = 0; x < 19; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +struct p20 +{ + __host__ __device__ + bool operator()(const s20 &r1, const s20 &r2) + { + int x; + for(x = 0; x < 20; x++) + { + if(r1.v[x] != r2.v[x]) + return false; + } + return true; + } +}; + +struct q20 +{ + __host__ __device__ + bool operator()(const s20 &r1, const s20 &r2) + { + int x; + for(x = 0; x < 20; x++) + { + if(r1.v[x] != r2.v[x]) + return true; + } + return false; + } +}; + +struct o20 +{ + __host__ __device__ + bool operator()(const s20 &r1, const s20 &r2) + { + int x; + for(x = 0; x < 20; x++) + { + if(r1.v[x] > r2.v[x]) + return true; + if(r1.v[x] < r2.v[x]) + return false; + } + return false; + } +}; + +#endif diff --git a/packages/cuda/unioncpu2.cpp b/packages/cuda/unioncpu2.cpp new file mode 100755 index 000000000..6f1e3cd81 --- /dev/null +++ b/packages/cuda/unioncpu2.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include "union2.h" + +int unircpu(int *res, int rows, int tipo, int **ret) +{ + + //cout << "En union = " << rows << " " << tipo << endl; + + s2 *t2, *re2; + s3 *t3, *re3; + int nrows, *nres; + //int size; + + switch(tipo) + { + case 1: + { + thrust::sort(thrust::omp::par, res, res + rows); + nres = thrust::unique(thrust::omp::par, res, res + rows); + nrows = thrust::distance(res, nres); + /*if(nrows < rows / 2) + { + size = nrows * tipo * sizeof(int); + nres = (int *)malloc(size); + memcpy(nres, res, size); + free(*ret); + *ret = nres; + }*/ + return nrows; + } + case 2: + { + t2 = (s2*)res; + thrust::sort(thrust::omp::par, t2, t2 + rows, o2()); + re2 = thrust::unique(thrust::omp::par, t2, t2 + rows, p2()); + nrows = thrust::distance(t2, re2); + /*if(nrows < rows / 2) + { + size = nrows * tipo * sizeof(int); + nres = (int *)malloc(size); + memcpy(nres, res, size); + free(*ret); + *ret = nres; + }*/ + return nrows; + } + case 3: + { + t3 = (s3*)res; + thrust::sort(thrust::omp::par, t3, t3 + rows, o3()); + re3 = thrust::unique(thrust::omp::par, t3, t3 + rows, p3()); + nrows = thrust::distance(t3, re3); + /*if(nrows < rows / 2) + { + size = nrows * tipo * sizeof(int); + nres = (int *)malloc(size); + memcpy(nres, res, size); + free(*ret); + *ret = nres; + }*/ + return nrows; + } + } + return 0; +}