CUDA: the missing files

This commit is contained in:
Vitor Santos Costa 2016-04-22 18:21:05 +01:00
parent f966a5b912
commit ea90785c6a
11 changed files with 3071 additions and 0 deletions

30
packages/cuda/CC_CSSTree.cu Executable file
View File

@ -0,0 +1,30 @@
#include "CC_CSSTree.h"
//return the start position of searching the key.
int CC_CSSTree::search(int key)
{
int i=0;
int curIndex=0;
int curNode=0;
int j=0;
//search
for(i=0;i<level;i++)
{
for(j=0;j<blockSize;j++)
{
if(ntree[curIndex+j]==-1)
break;
if(key<=ntree[curIndex+j])
break;
}
curNode=(fanout*(curNode)+j+1);
curIndex=curNode*blockSize;
//#ifdef DEBUG
// cout<<curNode<<", "<<j<<", "<<ntree[curIndex]<<"; ";
//#endif
}
curIndex=(curNode-numNode)*blockSize;
if(curIndex>numRecord) curIndex=numRecord-1;
//cout<<"I: "<<curIndex<<", ";//cout<<endl;
return curIndex;
}

146
packages/cuda/CC_CSSTree.h Executable file
View File

@ -0,0 +1,146 @@
#ifndef CSSTREE_H
#define CSSTREE_H
#include <iostream>
#include <math.h>
using namespace std;
#define divRoundUp(n,s) (((n) / (s)) + ((((n) % (s)) > 0) ? 1 : 0))
#define CSS_TREE_FANOUT 33
//we use implicit pointer to perform the addressing.
typedef int Record;
class CC_GenericTree
{
public:
int numRecord;
Record *data;
//we use the BFS layout as the default layout.
int numNode;
int level;
int gResult;
CC_GenericTree(){}
//we assume that numR=2^i. Otherwise, we pad the array with -1 from the beginning.
//we also assume that the record are sorted by the key.
CC_GenericTree(Record *d, int numR)
{
data=d;
numRecord=numR;
}
virtual ~CC_GenericTree()
{
}
virtual int search(int key)=0;
};
class CC_CSSTree:public CC_GenericTree
{
public:
int *ntree;
int fanout;
int blockSize;
int *vStart;
int *vG;//vG[0] is used in computing the position for level 1.
int numKey;
CC_CSSTree(Record *d, int numR, int f):CC_GenericTree(d,numR)
{
fanout=f;
blockSize=fanout-1;
int numLeaf=divRoundUp(numR,blockSize);
level=1;
int temp=numLeaf;
while(temp>1)
{
temp=divRoundUp(temp, fanout);
level++;
}
numNode=(int)((pow((double)fanout,(double)level)-1)/(fanout-1));
numKey=numNode*blockSize;
ntree=new int[numKey];
vStart=new int[level];
vG=new int[level];
#ifdef DEBUG
cout<<numLeaf<<","<<level<<", "<<numNode<<endl;
#endif
//layout the tree from bottom up.
int i=0,j=0,k=0;
int startNode=0;
int endNode=0;
int startKey, endKey;
int curIndex;
for(i=0;i<numNode;i++)
ntree[i]=-1;
//for <level-1>, i.e., the leaf level. [start,end]
for(i=0;i<level;i++)//level
{
startNode=(int)((pow((double)fanout,(double)i)-1)/(fanout-1));
endNode=(int)((pow((double)fanout,(double)(i+1))-1)/(fanout-1));
for(j= startNode;j< endNode;j++)//which node
{
startKey=j*blockSize;
endKey=startKey+blockSize;
for(k=startKey;k<endKey;k++)
{
curIndex=(int)(blockSize*pow((double)fanout,(double)(level-i-1))*(k+1-startNode*blockSize+(j-startNode))-1);
if(curIndex<numRecord+blockSize)
{
if(curIndex>=numRecord)
curIndex=numRecord-1;
ntree[k]=data[curIndex];
}
else
break;
}
}
}
}
~CC_CSSTree()
{
delete [] ntree;
delete [] vStart;
delete [] vG;
}
virtual int search(int key);
void print()
{
int i=0, j=0;
int k=0;
int startNode=0;
int endNode=0;
int startKey, endKey;
for(i=0;i<level;i++)//level
{
cout<<"Level, "<<i<<endl;
startNode=(int)((pow((double)fanout,(double)i)-1)/(fanout-1));
endNode=(int)((pow((double)fanout,(double)(i+1))-1)/(fanout-1));
for(j= startNode;j< endNode;j++)//which node
{
cout<<"Level, "<<i<<", Node, "<<j<<": ";
startKey=j*blockSize;
endKey=startKey+blockSize;
for(k=startKey;k<endKey;k++)
{
cout<<ntree[k]<<", ";
}
cout<<endl;
}
}
for(i=0;i<numRecord;i++)
{
cout<<data[i]<<", ";
if(i%(fanout-1)==(fanout-2))
cout<<"*"<<endl;
}
}
};
#endif

12
packages/cuda/bpreds.h Executable file
View File

@ -0,0 +1,12 @@
#ifndef _BPREDS_H_
#define _BPREDS_H_
int maximo(int, ...);
__global__ void bpreds(int*, int*, int, int, int, int*, int, int, int*, int*);
__global__ void bpredsnormal(int*, int, int, int*, int, int*);
__global__ void bpredsnormal2(int*, int, int, int*, int, int*);
__global__ void bpredsOR(int*, int*, int, int, int, int*, int, int, int*, int*);
__global__ void bpredsorlogic(int*, int, int, int*, int, int*);
__global__ void bpredsorlogic2(int*, int, int, int*, int, int*);
#endif

88
packages/cuda/bpredscpu.cpp Executable file
View File

@ -0,0 +1,88 @@
#include "pred.h"
int bpredscpu(int *dop1, int rows, int *bin, int3 numpreds, int **ret)
{
int i, x, y, op1, op2;
int size = 0, rowact, flag = 0;
int predn = numpreds.x * 3;
int total = predn + numpreds.z;
int *fres, *ptr;
int div, fin, ini[NUM_T + 1];
vector<int> vec[NUM_T];
for(x = 0; x < NUM_T; x++)
vec[x].reserve(INISIZE);
//omp_set_num_threads(NUM_T);
div = rows / NUM_T;
ini[0] = 0;
for(x = 1; x < NUM_T; x++)
ini[x] = div * x;
ini[NUM_T] = rows;
#pragma omp parallel for private(x,rowact,y,fin,op1,op2) firstprivate(flag,total)
for(i = 0; i < NUM_T; i++)
{
fin = ini[i+1];
for(x = ini[i]; x < fin; x++)
{
rowact = x * numpreds.y;
for(y = 0; y < predn; y += 3)
{
op1 = bin[y+1];
if(op1 < 0)
op1 *= -1;
else
op1 = dop1[rowact + op1];
op2 = bin[y+2];
if(op2 < 0)
op2 *= -1;
else
op2 = dop1[rowact + op2];
switch(bin[y])
{
case SBG_EQ: if(op1 != op2)
flag = 1;
break;
case SBG_GT: if(op1 <= op2)
flag = 1;
break;
case SBG_LT: if(op1 >= op2)
flag = 1;
break;
case SBG_GE: if(op1 < op2)
flag = 1;
break;
case SBG_LE: if(op1 > op2)
flag = 1;
break;
case SBG_DF: if(op1 == op2)
flag = 1;
}
if(flag)
break;
}
if(flag != 1)
{
for(y = predn; y < total; y++)
vec[i].push_back(dop1[rowact+bin[y]]);
}
else
flag = 0;
}
}
for(x = 0; x < NUM_T; x++)
{
ini[x] = vec[x].size();
size += ini[x];
}
fres = (int *)malloc(size * sizeof(int));
ptr = fres;
for(x = 0; x < NUM_T; x++)
{
memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));
ptr += ini[x];
}
*ret = fres;
return size / numpreds.z;
}

197
packages/cuda/creator2.c Executable file
View File

@ -0,0 +1,197 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
/*Program used to generate union2.cu and union2.h. A new pointer and all its operations are generated for each set (pairs, triplets, etc.).
Arguments are the cardinality of the biggest set and the name of the cuda file. For example, executing "creator2 20 union2" will generate
all pointers and operations for all sets from 1 to 20 in the files union2.cu and union2.h.*/
int main(int argc, char *argv[])
{
int num = atoi(argv[1]);
int x;
char *str = (char *)malloc((strlen(argv[2]) + 4) * sizeof(char));
sprintf(str, "%s.cu", argv[2]);
FILE *cuda = fopen(str, "w");
fprintf(cuda, "/*Computer generated file to remove duplicates. Since Thrust's unique and sort, unlike their std's counterparts, don't have a way to specify the size of each element in\n");
fprintf(cuda, "the array, comparing pairs, triplets and other sets is not possible without defining a new pointer and all related operations for each set. If you have a better idea to do\n");
fprintf(cuda, "this, please don't hesitate to email us.*/\n\n");
fprintf(cuda, "#include <thrust/device_vector.h>\n");
fprintf(cuda, "#include <thrust/unique.h>\n");
fprintf(cuda, "#include <thrust/distance.h>\n");
fprintf(cuda, "#include <thrust/sort.h>\n");
fprintf(cuda, "#include <iostream>\n");
fprintf(cuda, "#include \"memory.h\"\n");
fprintf(cuda, "#include \"%s.h\"\n\n", argv[2]);
fprintf(cuda, "int unir(int *res, int rows, int tipo, int **ret, int final)\n");
fprintf(cuda, "{\n");
fprintf(cuda, "\tthrust::device_ptr<int> pt, re;\n");
for(x = 2; x <= num; x++)
fprintf(cuda, "\tthrust::device_ptr<s%d> pt%d, re%d;\n", x, x ,x);
for(x = 2; x <= num; x++)
fprintf(cuda, "\ts%d *t%d;\n", x, x);
fprintf(cuda, "\tint flag, nrows, *nres, size;\n\n");
fprintf(cuda, "#if TIMER\n");
fprintf(cuda, "\tcuda_stats.unions++;\n");
fprintf(cuda, "#endif\n\n");
fprintf(cuda, "\tswitch(tipo)\n");
fprintf(cuda, "\t{\n");
fprintf(cuda, "\t\tcase 1:\n");
fprintf(cuda, "\t\t{\n");
fprintf(cuda, "\t\t\tpt = thrust::device_pointer_cast(res);\n");
fprintf(cuda, "\t\t\tflag = 0;\n");
fprintf(cuda, "\t\t\twhile(flag != 1)\n");
fprintf(cuda, "\t\t\t{\n");
fprintf(cuda, "\t\t\t\ttry\n");
fprintf(cuda, "\t\t\t\t{\n");
fprintf(cuda, "\t\t\t\t\tthrust::sort(pt, pt + rows);\n");
fprintf(cuda, "\t\t\t\t\tif(final)\n");
fprintf(cuda, "\t\t\t\t\t{\n");
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, pt + rows, q1());\n");
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, re);\n");
fprintf(cuda, "\t\t\t\t\t}\n");
fprintf(cuda, "\t\t\t\t\telse\n");
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, pt + rows);\n");
fprintf(cuda, "\t\t\t\t\tflag = 1;\n");
fprintf(cuda, "\t\t\t\t}\n");
fprintf(cuda, "\t\t\t\tcatch(std::bad_alloc &e)\n");
fprintf(cuda, "\t\t\t\t{\n");
fprintf(cuda, "\t\t\t\t\tlimpiar(\"sort/unique in unir\", 0);\n");
fprintf(cuda, "\t\t\t\t}\n");
fprintf(cuda, "\t\t\t}\n");
fprintf(cuda, "\t\t\tnrows = thrust::distance(pt, re);\n");
fprintf(cuda, "\t\t\tif(nrows < rows / 2)\n");
fprintf(cuda, "\t\t\t{\n");
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
fprintf(cuda, "\t\t\t}\n");
fprintf(cuda, "\t\t\treturn nrows;\n");
fprintf(cuda, "\t\t}\n");
for(x = 2; x <= num; x++)
{
fprintf(cuda, "\t\tcase %d:\n", x);
fprintf(cuda, "\t\t{\n");
fprintf(cuda, "\t\t\tt%d = (s%d*)res;\n", x, x);
fprintf(cuda, "\t\t\tpt%d = thrust::device_pointer_cast(t%d);\n", x, x);
fprintf(cuda, "\t\t\tflag = 0;\n");
fprintf(cuda, "\t\t\twhile(flag != 1)\n");
fprintf(cuda, "\t\t\t{\n");
fprintf(cuda, "\t\t\t\ttry\n");
fprintf(cuda, "\t\t\t\t{\n");
fprintf(cuda, "\t\t\t\t\tthrust::sort(pt%d, pt%d + rows, o%d());\n", x, x, x);
fprintf(cuda, "\t\t\t\t\tif(final)\n");
fprintf(cuda, "\t\t\t\t\t{\n");
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, pt%d + rows, q%d());\n", x, x, x, x);
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, re%d, p%d());\n", x, x, x, x);
fprintf(cuda, "\t\t\t\t\t}\n");
fprintf(cuda, "\t\t\t\t\telse\n");
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, pt%d + rows, p%d());\n", x, x, x, x);
fprintf(cuda, "\t\t\t\t\tflag = 1;\n");
fprintf(cuda, "\t\t\t\t}\n");
fprintf(cuda, "\t\t\t\tcatch(std::bad_alloc &e)\n");
fprintf(cuda, "\t\t\t\t{\n");
fprintf(cuda, "\t\t\t\t\tlimpiar(\"sort/unique in unir\", 0);\n");
fprintf(cuda, "\t\t\t\t}\n");
fprintf(cuda, "\t\t\t}\n");
fprintf(cuda, "\t\t\tnrows = thrust::distance(pt%d, re%d);\n", x, x);
fprintf(cuda, "\t\t\tif(nrows < rows / 2)\n");
fprintf(cuda, "\t\t\t{\n");
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
fprintf(cuda, "\t\t\t}\n");
fprintf(cuda, "\t\t\treturn nrows;\n");
fprintf(cuda, "\t\t}\n");
}
fprintf(cuda, "\t}\n");
fprintf(cuda, "\treturn 0;\n");
fprintf(cuda, "}\n");
fclose(cuda);
sprintf(str, "%s.h", argv[2]);
cuda = fopen(str, "w"); /*tipo de archivo cambiar*/
fprintf(cuda, "#ifndef _");
for(x = 0; x < strlen(argv[2]); x++)
fprintf(cuda, "%c", toupper(argv[2][x]));
fprintf(cuda, "_H_\n");
fprintf(cuda, "#define _");
for(x = 0; x < strlen(argv[2]); x++)
fprintf(cuda, "%c", toupper(argv[2][x]));
fprintf(cuda, "_H_\n\n");
fprintf(cuda, "int unir(int *res, int rows, int tipo, int **ret, int final);\n\n");
for(x = 2; x <= num; x++)
{
fprintf(cuda, "typedef struct n%d\n", x);
fprintf(cuda, "{\n");
fprintf(cuda, "\tint v[%d];\n", x);
fprintf(cuda, "}s%d;\n\n", x);
}
fprintf(cuda, "struct q1\n");
fprintf(cuda, "{\n");
fprintf(cuda, "\t__host__ __device__\n");
fprintf(cuda, "\tbool operator()(const int &r1, const int &r2)\n");
fprintf(cuda, "\t{\n");
fprintf(cuda, "\t\tif(r1 != r2)\n");
fprintf(cuda, "\t\t\treturn true;\n");
fprintf(cuda, "\t\treturn false;\n");
fprintf(cuda, "\t}\n");
fprintf(cuda, "};\n\n");
for(x = 2; x <= num; x++)
{
fprintf(cuda, "struct p%d\n", x);
fprintf(cuda, "{\n");
fprintf(cuda, "\t__host__ __device__\n");
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
fprintf(cuda, "\t{\n");
fprintf(cuda, "\t\tint x;\n");
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
fprintf(cuda, "\t\t{\n");
fprintf(cuda, "\t\t\tif(r1.v[x] != r2.v[x])\n");
fprintf(cuda, "\t\t\t\treturn false;\n");
fprintf(cuda, "\t\t}\n");
fprintf(cuda, "\t\treturn true;\n");
fprintf(cuda, "\t}\n");
fprintf(cuda, "};\n\n");
fprintf(cuda, "struct q%d\n", x);
fprintf(cuda, "{\n");
fprintf(cuda, "\t__host__ __device__\n");
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
fprintf(cuda, "\t{\n");
fprintf(cuda, "\t\tint x;\n");
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
fprintf(cuda, "\t\t{\n");
fprintf(cuda, "\t\t\tif(r1.v[x] != r2.v[x])\n");
fprintf(cuda, "\t\t\t\treturn true;\n");
fprintf(cuda, "\t\t}\n");
fprintf(cuda, "\t\treturn false;\n");
fprintf(cuda, "\t}\n");
fprintf(cuda, "};\n\n");
fprintf(cuda, "struct o%d\n", x);
fprintf(cuda, "{\n");
fprintf(cuda, "\t__host__ __device__\n");
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
fprintf(cuda, "\t{\n");
fprintf(cuda, "\t\tint x;\n");
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
fprintf(cuda, "\t\t{\n");
fprintf(cuda, "\t\t\tif(r1.v[x] > r2.v[x])\n");
fprintf(cuda, "\t\t\t\treturn true;\n");
fprintf(cuda, "\t\t\tif(r1.v[x] < r2.v[x])\n");
fprintf(cuda, "\t\t\t\treturn false;\n");
fprintf(cuda, "\t\t}\n");
fprintf(cuda, "\t\treturn false;\n");
fprintf(cuda, "\t}\n");
fprintf(cuda, "};\n\n");
}
fprintf(cuda, "#endif\n");
fclose(cuda);
free(str);
}

603
packages/cuda/dbio.cu Normal file
View File

@ -0,0 +1,603 @@
#include <iostream>
#include <algorithm>
#include <stdio.h>
#include "memory.h"
#include "union2.h"
#include "dbio.h"
#ifdef DATALOG
//template<class InputIterator>
//void datalogWrite(int query, InputIterator rul_str, InputIterator fin, int finalDR, int **result)
void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, int finalDR, int **result)
{
rulenode tmprule;
vector<rulenode>::iterator qposr;
int *dop1, *hres;
int cols1, res_rows, tipo;
tmprule.name = query;
qposr = lower_bound(rul_str, fin, tmprule, comparer);
cols1 = qposr->num_columns;
res_rows = cargafinal(query, cols1, &dop1);
if(res_rows != 0)
{
if(res_rows > 0)
{
if(finalDR)
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
cudaFree(dop1);
*result = hres;
}
else
{
res_rows *= -1;
if(finalDR)
{
int *dop2;
tipo = res_rows * cols1 * sizeof(int);
reservar(&dop2, tipo);
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice);
free(dop1);
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost);
cudaFree(dop2);
*result = hres;
}
else
*result = dop1;
}
}
}
#endif
#ifdef TUFFY
void postgresRead(PGconn **ret, vector<gpunode> *L, int *inpquery, char *names, int finalDR)
{
PGresult *pgr;
int x, y;
int *mat, *mat2;
char *tok, sel[1024], **qrs;
int w, z = 0, numt, numc, numc2, start = 0, start2, val;
PGconn *conn = PQconnectdb("host=localhost port=5432 dbname = prueba user=tuffer password=root");
if(PQstatus(conn) != CONNECTION_OK)
{
fprintf(stderr, "Connection to database failed: %s", PQerrorMessage(conn));
exit(1);
}
pgr = PQexec(conn, "Select nspname from pg_catalog.pg_namespace where oid = (select max(oid) from pg_catalog.pg_namespace)");
sprintf(sel, "SET search_path = %s", PQgetvalue(pgr, 0, 0));
PQclear(pgr);
PQexec(conn, sel);
tok = strtok(names, " ");
if(finalDR)
{
qrs = (char **)malloc(100 * sizeof(char *));
while(tok != NULL)
{
sprintf(sel, "Select * from %s limit 0", tok);
pgr = PQexec(conn, sel);
numc = L->at(z).num_columns;
if(tok[0] == 'c')
{
sprintf(sel, "Select ");
numt = numc + 1;
for(x = 1; x < numt; x++)
{
strcat(sel, PQfname(pgr, x));
strcat(sel, ", ");
}
sel[strlen(sel)-2] = '\0';
sprintf(sel, "%s from %s", sel, tok);
}
else
{
sprintf(sel, "Select id, Club, ");
numt = numc + 6;
for(x = 8; x < numt; x++)
{
strcat(sel, PQfname(pgr, x));
strcat(sel, ", ");
}
sel[strlen(sel)-2] = '\0';
sprintf(sel, "%s from %s", sel, tok);
}
PQclear(pgr);
pgr = PQexec(conn, sel);
numt = PQntuples(pgr);
mat = (int *)malloc(numt * numc * sizeof(int));
if(tok[0] == 'c')
{
for(x = 0; x < numt; x++)
{
start = x * numc;
for(y = 0; y < numc; y++)
mat[start + y] = atoi(PQgetvalue(pgr, x, y));
}
}
else
{
numc2 = numc - 2;
mat2 = (int *)malloc(numt * numc2 * sizeof(int));
start = 0;
start2 = 0;
for(x = 0; x < numt; x++)
{
w = atoi(PQgetvalue(pgr, x, 1));
if(w < 2)
{
mat[start] = atoi(PQgetvalue(pgr, x, 0));
start++;
mat[start] = w;
start++;
if(w > 0)
{
for(y = 2; y < numc; y++)
{
val = atoi(PQgetvalue(pgr, x, y));
mat[start] = val;
mat2[start2] = val;
start++;
start2++;
}
}
else
{
for(y = 2; y < numc; y++)
{
val = atoi(PQgetvalue(pgr, x, y));
mat[start] = val;
start++;
}
}
}
else
{
for(y = 2; y < numc; y++)
{
val = atoi(PQgetvalue(pgr, x, y));
mat2[start2] = val;
start2++;
}
}
}
L->at(z+1).address_host_table = mat2;
L->at(z+1).num_rows = start2 / numc2;
}
L->at(z).address_host_table = mat;
L->at(z).num_rows = start / numc;
PQclear(pgr);
x = 1;
while(inpquery[x] != -1)
{
if(L->at(z).name == inpquery[x])
{
numt = (strlen(tok) + 1) * sizeof(char);
qrs[x] = (char *)malloc(numt);
memcpy(qrs[x], tok, numt);
}
x += 2;
}
if(tok[0] == 'c')
{
tok = strtok(NULL, " ");
z++;
}
else
{
strtok(NULL, " ");
tok = strtok(NULL, " ");
z += 2;
}
}
}
else
{
while(tok != NULL)
{
sprintf(sel, "Select * from %s limit 0", tok);
pgr = PQexec(conn, sel);
numc = L->at(z).num_columns;
if(tok[0] == 'c')
{
sprintf(sel, "Select weight, myid, ");
start = 1;
numt = numc + 1;
}
else
{
sprintf(sel, "Select truth, Club, atomID, ");
start = 8;
numt = numc + 5;
}
for(x = start; x < numt; x++)
{
strcat(sel, PQfname(pgr, x));
strcat(sel, ", ");
}
sel[strlen(sel)-2] = '\0';
sprintf(sel, "%s from %s", sel, tok);
PQclear(pgr);
pgr = PQexec(conn, sel);
numt = PQntuples(pgr);
mat = (int *)malloc(numt * numc * sizeof(int));
L->at(z).weight = (double *)malloc(numt * sizeof(double));
L->at(z).num_rows = numt;
for(x = 0; x < numt; x++)
{
start = x * numc;
for(y = 1; y < numc; y++)
mat[start + y] = atoi(PQgetvalue(pgr, x, y));
}
numt *= numc;
double flo;
if(tok[0] == 'c')
{
for(x = 0, y = 0; x < numt; x+=numc, y++)
{
flo = atof(PQgetvalue(pgr, y, 0));
L->at(z).weight[y] = flo;
if(flo > 0)
mat[x] = y + 1;
else
mat[x] = -y - 1;
}
}
else
{
for(x = 0, y = 0; x < numt; x+=numc, y++)
{
if(PQgetvalue(pgr, y, 0)[0] == 't')
mat[x] = 2;
else
mat[x] = 1;
}
}
L->at(z).address_host_table = mat;
numc = (strlen(tok) + 1) * sizeof(char);
L->at(z).predname = (char *)malloc(numc);
memcpy(L->at(z).predname, tok, numc);
PQclear(pgr);
tok = strtok(NULL, " ");
z++;
}
}
*ret = conn;
}
void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, PGconn *conn, int finalDR)
{
char sel[1024];
double *matw = NULL;
int qname, cols1, res_rows, tipo, *dop1;
int x, w, z, y, *hres;
rulenode tmprule;
vector<rulenode>::iterator qposr;
if(finalDR)
{
char file[] = "/dev/shm/mln0_atoms.csv";
z = 0;
int seqid = 1;
FILE *fp;
fp = fopen(file, "w");
if(fp == NULL)
{
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
sprintf(file, "./temp/mln0_atoms.csv");
fp = fopen(file, "w");
if(fp == NULL)
{
cerr << "Failed to create main memory temporary file" << endl;
exit(1);
}
}
while((qname = inpquery[z]) != -1)
{
tmprule.name = qname;
qposr = lower_bound(rul_str, fin, tmprule, comparer);
cols1 = qposr->num_columns;
res_rows = cargafinal(qname, cols1, &dop1);
if(res_rows != 0)
{
if(res_rows < 0)
res_rows = unir(dop1, -res_rows, cols1, &dop1, 0); /*duplicate elimination on result*/
else
res_rows = unir(dop1, res_rows, cols1, &dop1, finalDR);
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
cudaFree(dop1);
w = z + 1;
strtok(qposr->rulename, "_");
strtok(NULL, "_");
int prid = atoi(strtok(NULL, "_"));
for(x = 0, w = 0; x < res_rows; x++, w+=2)
{
if(hres[w+1])
fprintf(fp, "%d,%d,%d,true\n", seqid, hres[w], prid);
else
fprintf(fp, "%d,%d,%d,false\n", seqid, hres[w], prid);
seqid++;
}
free(hres);
}
z += 2;
}
fclose(fp);
sprintf(sel, "Copy mln0_atoms(atomid,tupleID,predID,isquery) from '%s' CSV", file);
PQexec(conn, sel);
}
else
{
while(rul_str != fin)
{
cols1 = rul_str->num_columns;
res_rows = cargafinal(rul_str->name, cols1, &dop1);
if(res_rows == 0)
{
rul_str++;
continue;
}
res_rows = abs(res_rows);
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
cudaFree(dop1);
char file[] = "/dev/shm/buffer.csv";
FILE *fp;
fp = fopen(file, "w");
if(fp == NULL)
{
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
sprintf(file, "./temp/buffer.csv");
fp = fopen(file, "w");
if(fp == NULL)
{
cerr << "Failed to create main memory temporary file" << endl;
exit(1);
}
}
if(rul_str->rulename[0] == 'z')
{
char *name = rul_str->rulename + 1;
for(x = 0; x < ninpf; x++)
{
if(strncmp(L->at(x).predname, name, strlen(name)) == 0)
{
matw = L->at(x).weight;
break;
}
}
cols1 -= 3;
for(x = 0, z = 0; x < res_rows; x++, z+=3)
{
for(y = 0; y < cols1; y++, z++)
fprintf(fp, "%d,", hres[z]);
fprintf(fp, "%d,%lf,%d\n", hres[z], matw[abs(hres[z+1])-1], hres[z+2]);
}
fclose(fp);
sprintf(sel, "Copy %s from '%s' CSV", name, file);
PQexec(conn, sel);
}
else
{
cols1--;
for(x = 0, z = 0; x < res_rows; x++, z++)
{
for(y = 0; y < cols1; y++, z++)
fprintf(fp, "%d,", hres[z]);
fprintf(fp, "%d\n", hres[z]);
}
fclose(fp);
sprintf(sel, "Copy %s from '%s' CSV", rul_str->rulename, file);
PQexec(conn, sel);
}
free(hres);
rul_str++;
}
}
PQfinish(conn);
if(finalDR)
clear_memory_all();
}
#endif
#ifdef ROCKIT
void mysqlRead(MYSQL **ret, int *qrs, vector<gpunode> *L, int ninpf, char *names, int finalDR)
{
char *tok, sel[1024];
int w, x, y, z = 0, numt, numc;
int *mat;
MYSQL *con = mysql_init(NULL);
if(con == NULL)
{
fprintf(stderr, "mysql_init() failed\n");
exit(1);
}
mysql_options(con, MYSQL_OPT_LOCAL_INFILE, NULL);
mysql_real_connect(con, "localhost", "root", "root", "rockit", 0, NULL, 0);
if(finalDR)
{
y = 0;
while(qrs[y] != 0)
{
for(z = 0; z < ninpf; z++)
{
if(qrs[y] == L->at(z).name)
{
MYSQL_ROW row;
sprintf(sel, "Select count(*) from %s", L->at(z).predname);
mysql_query(con, sel);
MYSQL_RES *result = mysql_store_result(con);
row = mysql_fetch_row(result);
numt = atoi(row[0]);
mysql_free_result(result);
if(numt != L->at(z).num_rows)
{
liberar(L->at(z).name);
numc = L->at(z).num_columns;
sprintf(sel, "Select * from %s", L->at(z).predname);
mysql_query(con, sel);
MYSQL_RES *result = mysql_store_result(con);
mat = (int *)malloc(numt * numc * sizeof(int));
w = 0;
while ((row = mysql_fetch_row(result)))
{
for(x = 0; x < numc; x++, w++)
mat[w] = atoi(row[x]);
}
mysql_free_result(result);
if(L->at(z).address_host_table != NULL)
free(L->at(z).address_host_table);
L->at(z).address_host_table = mat;
L->at(z).num_rows = numt;
}
}
}
y++;
}
}
else
{
tok = strtok(names, " ");
while(tok != NULL)
{
numc = L->at(z).num_columns;
sprintf(sel, "Select * from %s", tok);
mysql_query(con, sel);
MYSQL_RES *result = mysql_store_result(con);
numt = mysql_num_rows(result);
MYSQL_ROW row;
mat = (int *)malloc(numt * numc * sizeof(int));
w = 0;
if(tok[0] == 'f' && tok[1] >= '0' && tok[1] <= '9')
{
while ((row = mysql_fetch_row(result)))
{
for(x = 1; x <= numc; x++, w++)
mat[w] = atoi(row[x]);
}
}
else
{
while ((row = mysql_fetch_row(result)))
{
for(x = 0; x < numc; x++, w++)
mat[w] = atoi(row[x]);
}
}
mysql_free_result(result);
L->at(z).address_host_table = mat;
L->at(z).num_rows = numt;
numc = (strlen(tok) + 1) * sizeof(char);
L->at(z).predname = (char *)malloc(numc);
strcpy(L->at(z).predname, tok);
tok = strtok(NULL, " ");
z++;
}
}
*ret = con;
}
void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, MYSQL *con)
{
int x, y, z, cols1, cols2, res_rows, tipo;
int *hres, *dop1;
char *id, *sign, *q1, *q2;
char sel[1024], weight[1024];
gpunode tmpfact;
while(rul_str != fin)
{
cols1 = rul_str->num_columns;
res_rows = cargafinal(rul_str->name, cols1, &dop1);
id = strtok(rul_str->rulename, "_");
sprintf(sel, "create table if not exists %s(weight double, ", id);
for(x = 0; x < cols1; x++)
{
sprintf(weight, "a%d char(10), ", x);
strcat(sel, weight);
}
sel[strlen(sel)-2] = ')';
strcat(sel, "ENGINE = MEMORY DEFAULT CHARSET=latin1");
mysql_query(con, sel);
sprintf(sel, "truncate %s", id);
mysql_query(con, sel);
if(res_rows == 0)
{
rul_str++;
continue;
}
if(res_rows > 0)
{
tmpfact = L->at(-rul_str->referencias[rul_str->num_rows - 2] - 1);
sign = tmpfact.predname;
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
sumar(tmpfact.name, dop1, cols1, res_rows);
}
else
{
hres = dop1;
res_rows = -res_rows;
}
sign = strtok(NULL, "_");
q1 = strtok(NULL, "_");
q2 = strtok(NULL, "_");
if(sign[0] == '0')
sprintf(weight, "%s.%s", q1, q2);
else
sprintf(weight, "-%s.%s", q1, q2);
FILE *fp;
char file[512];
sprintf(file, "/dev/shm/%s.tsv", id);
fp = fopen(file, "w");
if(fp == NULL)
{
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
sprintf(file, "./temp/%s.tsv", id);
fp = fopen(file, "w");
}
cols2 = cols1 - 1;
for(x = 0, z = 0; x < res_rows; x++, z++)
{
fprintf(fp, "%s\t", weight);
for(y = 0; y < cols2; y++, z++)
fprintf(fp, "%d\t", hres[z]);
fprintf(fp, "%d\n", hres[z]);
}
fclose(fp);
sprintf(sel, "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s", file, id);
mysql_query(con, sel);
rul_str++;
}
mysql_close(con);
}
#endif

28
packages/cuda/dbio.h Normal file
View File

@ -0,0 +1,28 @@
#ifndef _DBIO_H_
#define _DBIO_H_
#include "pred.h"
#ifdef TUFFY
#include <libpq-fe.h>
#endif
#ifdef ROCKIT
#include <mysql/mysql.h>
#endif
#include <vector>
#include "lista.h"
using namespace std;
#ifdef TUFFY
void postgresRead(PGconn **ret, vector<gpunode> *L, int *inpquery, char *names, int finalDR);
void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, PGconn *conn, int finalDR);
#endif
#ifdef ROCKIT
void mysqlRead(MYSQL **ret, int *qrs, vector<gpunode> *L, int ninpf, char *names, int finalDR);
void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, MYSQL *con);
#endif
#ifdef DATALOG
void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, int finalDR, int **result);
#endif
#endif

500
packages/cuda/joincpu.cpp Executable file
View File

@ -0,0 +1,500 @@
#include "CC_CSSTree.h"
#include <vector>
#include <thrust/sort.h>
#include <thrust/system/omp/execution_policy.h>
#include "pred.h"
void partInlj(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int wj, int halfrul, int lenrul)
{
//set_thread_affinity(cpuid,NUM_T);
int i=0;
int k=0;
int curIndex=0;
int keyForSearch;
int y, posS, posR;
for(k=startS; k<endS; k++)
{
if(S == NULL)
posS = k * of2;
else
posS = S[k] * of2;
keyForSearch=p2[posS + wj];
curIndex=tree->search(keyForSearch);
for(i=curIndex-1;i>0;i--)
{
if(keyForSearch == R[i])
{
//cout << keyForSearch << endl;
posR = perm[i] * of1;
for(y = 0; y < halfrul; y++)
res->push_back(p1[posR + proj[y]]);
for(; y < lenrul; y++)
res->push_back(p2[posS + proj[y]]);
}
else
if(R[i]<keyForSearch)
break;
}
for(i=curIndex;i<rLen;i++)
{
if(keyForSearch == R[i])
{
//cout << -i << " " << keyForSearch << endl;
posR = perm[i] * of1;
for(y = 0; y < halfrul; y++)
res->push_back(p1[posR + proj[y]]);
for(; y < lenrul; y++)
res->push_back(p2[posS + proj[y]]);
}
else
if(R[i]>keyForSearch)
break;
}
}
}
void partInlj2(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int cols, int wj)
{
//set_thread_affinity(cpuid,NUM_T);
int i=0;
int k=0;
int curIndex=0;
int keyForSearch;
int y, cond, posS, posR;
for(k=startS; k<endS; k++)
{
if(S == NULL)
posS = k * of2;
else
posS = S[k] * of2;
keyForSearch=p2[posS + wj];
curIndex=tree->search(keyForSearch);
for(i=curIndex-1;i>0;i--)
{
if(keyForSearch == R[i])
{
//cout << keyForSearch << endl;
posR = perm[i] * of1 - 1;
for(y = 0; y < cols; y++)
{
cond = proj[y];
if(cond > 0)
res->push_back(p1[posR + cond]);
else
res->push_back(p2[posS - cond - 1]);
}
}
else
if(R[i]<keyForSearch)
break;
}
for(i=curIndex;i<rLen;i++)
{
if(keyForSearch == R[i])
{
//cout << -i << " " << keyForSearch << endl;
posR = perm[i] * of1 - 1;
for(y = 0; y < cols; y++)
{
cond = proj[y];
if(cond > 0)
res->push_back(p1[posR + cond]);
else
res->push_back(p2[posS - cond - 1]);
}
}
else
if(R[i]>keyForSearch)
break;
}
}
}
void multipartInlj(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int *wj, int numj, int halfrul, int lenrul)
{
//set_thread_affinity(cpuid,NUM_T);
int i=0;
int k=0;
int curIndex=0;
int keyForSearch;
int y, posS, posR;
for(k=startS; k<endS; k++)
{
if(S == NULL)
posS = k * of2;
else
posS = S[k] * of2;
keyForSearch=p2[posS + wj[1]];
curIndex=tree->search(keyForSearch);
for(i=curIndex-1;i>0;i--)
{
if(keyForSearch == R[i])
{
posR = perm[i] * of1;
for(y = 2; y < numj; y += 2)
{
if(p1[posR + wj[y]] != p2[posS + wj[y+1]])
break;
}
if(y < numj)
continue;
for(y = 0; y < halfrul; y++)
res->push_back(p1[posR + proj[y]]);
for(; y < lenrul; y++)
res->push_back(p2[posS + proj[y]]);
}
else
if(R[i]<keyForSearch)
break;
}
for(i=curIndex;i<rLen;i++)
{
if(keyForSearch == R[i])
{
posR = perm[i] * of1;
for(y = 2; y < numj; y += 2)
{
if(p1[posR + wj[y]] != p2[posS + wj[y+1]])
break;
}
if(y < numj)
continue;
for(y = 0; y < halfrul; y++)
res->push_back(p1[posR + proj[y]]);
for(; y < lenrul; y++)
res->push_back(p2[posS + proj[y]]);
}
else
if(R[i]>keyForSearch)
break;
}
}
}
void multipartInlj2(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int cols, int *wj, int numj)
{
//set_thread_affinity(cpuid,NUM_T);
int i=0;
int k=0;
int curIndex=0;
int keyForSearch;
int y, cond, posS, posR;
for(k=startS; k<endS; k++)
{
if(S == NULL)
posS = k * of2;
else
posS = S[k] * of2;
keyForSearch=p2[posS + wj[1]];
curIndex=tree->search(keyForSearch);
for(i=curIndex-1;i>0;i--)
{
if(keyForSearch == R[i])
{
posR = perm[i] * of1;
for(y = 2; y < numj; y += 2)
{
if(p1[posR + wj[y]] != p2[posS + wj[y+1]])
break;
}
if(y < numj)
continue;
for(y = 0; y < cols; y++)
{
cond = proj[y];
if(cond > 0)
res->push_back(p1[posR + cond - 1]);
else
res->push_back(p2[posS - cond - 1]);
}
}
else
if(R[i]<keyForSearch)
break;
}
for(i=curIndex;i<rLen;i++)
{
if(keyForSearch == R[i])
{
posR = perm[i] * of1;
for(y = 2; y < numj; y += 2)
{
if(p1[posR + wj[y]] != p2[posS + wj[y+1]])
break;
}
if(y < numj)
continue;
for(y = 0; y < cols; y++)
{
cond = proj[y];
if(cond > 0)
res->push_back(p1[posR + cond - 1]);
else
res->push_back(p2[posS - cond - 1]);
}
}
else
if(R[i]>keyForSearch)
break;
}
}
}
void inlj_omp(Record *R, int rLen, CC_CSSTree *tree, Record *S, int sLen, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int2 projp, int cols, int* wj, int numj, int tipo)
{
int i=0;
int j=0;
int *startS=new int[NUM_T];
int *endS=new int[NUM_T];
int chunkSize=sLen/NUM_T;
for(i=0;i<NUM_T;i++)
{
startS[i]=i*chunkSize;
if(i==(NUM_T-1))
endS[i]=sLen;
else
endS[i]=(i+1)*chunkSize;
//cout<<"T"<<i<<", "<<endS[i]-startS[i]<<"; ";
}
//cout<<endl;
//omp_set_num_threads(NUM_T);
//cout << "inicio" << endl;
#pragma omp parallel for
for(j=0;j<NUM_T;j++)
{
if(tipo)
{
if(numj > 2)
multipartInlj2(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, cols, wj, numj);
else
partInlj2(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, cols, wj[1]);
}
else
{
if(numj > 2)
multipartInlj(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, wj, numj, projp.x, projp.y);
else
partInlj(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, wj[1], projp.x, projp.y);
}
}
//cout << "fin" << endl;
delete startS;
delete endS;
}
int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>::iterator rule, int pos, int bothops, int **ret)
{
int pos2 = pos + 1;
int *sel1, nsel1 = 0;
int *sel2 = rule->select[pos2];
int nsel2 = rule->numsel[pos2];
int *proj = rule->project[pos];
int2 projp = rule->projpos[pos];
int *sjoin1, nsj1 = 0;
int *sjoin2 = rule->selfjoin[pos2];
int nsj2 = rule->numselfj[pos2];
int *wherej = rule->wherejoin[pos];
int numj = rule->numjoin[pos];
int size, *fres, ini[NUM_T], *temp;
int x, tipo = 0;
int *Sres = NULL, *Rres, Snl, Rnl, *permutation;
if(bothops)
{
sel1 = rule->select[pos];
nsel1 = rule->numsel[pos];
sjoin1 = rule->selfjoin[pos];
nsj1 = rule->numselfj[pos];
}
#ifdef TIMER
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
#endif
if(nsel1 > 0 || nsj1 > 0)
Rnl = selectproyectcpu2(p1, rLen, of1, sel1, nsel1, sjoin1, nsj1, wherej[0], &Rres, &permutation);
else
{
/*cout << "sin sel" << endl;
cout << "valores = " << rLen << " " << of1 << " " << wherej[0] << endl;
for(x = 0; x < 100; x++)
cout << p1[x] << " ";
cout << endl;
cout << "ultimo = " << p1[of1 * rLen - 1] << endl;*/
Rnl = rLen;
size = Rnl * sizeof(int);
permutation = (int *)malloc(size);
Rres = (int *)malloc(size);
#pragma omp parallel for firstprivate(of1)
for(x = 0; x < Rnl; x++)
{
permutation[x] = x;
Rres[x] = p1[of1 * x + wherej[0]];
}
//cout << "sin sel fin" << endl;
}
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cuda_stats.select1_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
#endif
if(nsel2 > 0 || nsj2 > 0)
{
//cout << "con sel S" << endl;
Snl = selectproyectcpu2(p2, sLen, of2, sel2, nsel2, sjoin2, nsj2, wherej[1], &Sres, NULL);
}
else
Snl = sLen;
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cuda_stats.select2_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
#endif
//cout << "antes" << endl;
/*cout << "antes" << endl;
for(x = 0; x < Rnl; x++)
cout << permutation[x] << " ";
cout << endl;
for(x = 0; x < 100; x++)
cout << Rres[x] << " ";
cout << endl;*/
thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation);
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cuda_stats.sort_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
#endif
/*cout << "despues" << endl;
for(x = 0; x < Rnl; x++)
cout << permutation[x] << " ";
cout << endl;
for(x = 0; x < Rnl; x++)
cout << Rres[x] << " ";
cout << endl;*/
//cout << "despues sort" << endl;
vector<int> *res = new vector<int>[NUM_T];
for(x = 0; x < NUM_T; x++)
res[x].reserve(INISIZE);
CC_CSSTree *tree = new CC_CSSTree(Rres, Rnl, CSS_TREE_FANOUT);
if(pos == (rule->num_rows - 3)) // && rule->num_bpreds.x == 0)
tipo = 1;
inlj_omp(Rres, Rnl, tree, Sres, Snl, of1, of2, res, p1, p2, permutation, proj, projp, rule->num_columns, wherej, numj, tipo);
/*cout << "proj = ";
for(x = 0; x < rule->num_columns; x++)
cout << proj[x] << " ";
cout << endl;
int y,z;
for(x = 0; x < NUM_T; x++)
{
cout << "Thread " << x << endl;
for(y = 0; y < res[x].size() / projp.y; y++)
{
for(z = 0; z < projp.y; z++)
cout << res[x][y * projp.y + z] << " ";
cout << endl;
}
}
cout << "Tamanios" << endl;*/
size = 0;
for(x = 0; x < NUM_T; x++)
{
ini[x] = res[x].size();
size += ini[x];
//cout << ini[x] << " " << size << endl;
}
fres = (int *)malloc(size * sizeof(int));
temp = fres;
for(x = 0; x < NUM_T; x++)
{
memcpy(temp, res[x].data(), ini[x] * sizeof(int));
temp += ini[x];
}
if(*ret != NULL)
free(*ret);
free(Rres);
free(permutation);
if(Sres != NULL)
free(Sres);
delete tree;
delete [] res;
*ret = fres;
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cuda_stats.join_time += time;
#endif
//cout << "Projp.x = " << projp.x << " projp.y = " << projp.y << endl;
/*if(numj > 2)
{
cout << "total = " << rLen << " " << size / projp.y << " " << projp.y << " " << rule->num_columns << endl;
exit(1);
}*/
return size / projp.y;
}

View File

@ -0,0 +1,394 @@
#include <stdlib.h>
#include <string.h>
#include <omp.h>
#include <vector>
#include "pred.h"
using namespace std;
int selectproyectcpu(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
{
int size = 0, pos, temp;
int i, x, y, z, w;
int *fres, *ptr;
int div, fin, ini[NUM_T + 1];
vector<int> vec[NUM_T];
for(x = 0; x < NUM_T; x++)
vec[x].reserve(INISIZE);
//omp_set_num_threads(NUM_T);
div = rows / NUM_T;
ini[0] = 0;
for(x = 1; x < NUM_T; x++)
ini[x] = div * x;
ini[NUM_T] = rows;
if(numselect > 0)
{
#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
for(i = 0; i < NUM_T; i++)
{
fin = ini[i+1];
for(x = ini[i]; x < fin; x++)
{
pos = x * cols;
for(y = 0; y < numselect; y += 2)
{
if(dop1[pos+select[y]] != select[y+1])
break;
}
for(z = 0; z < numselfj; z++)
{
temp = dop1[pos+selfjoin[z]];
w = z + 1;
while(selfjoin[w] > -1)
{
if(temp != dop1[pos+selfjoin[w]])
break;
w++;
}
z = w;
if(selfjoin[w] != -1)
break;
}
if(y == numselect && z == numselfj)
{
for(y = 0; y < head_size; y++)
vec[i].push_back(dop1[pos+project[y]]);
}
}
}
}
else
{
if(numselfj > 0)
{
#pragma omp parallel for private(x,pos,y,z,fin,w,temp)
for(i = 0; i < NUM_T; i++)
{
fin = ini[i+1];
for(x = ini[i]; x < fin; x++)
{
pos = x * cols;
for(z = 0; z < numselfj; z++)
{
temp = dop1[pos+selfjoin[z]];
w = z + 1;
while(selfjoin[w] > -1)
{
if(temp != dop1[pos+selfjoin[w]])
break;
w++;
}
z = w;
if(selfjoin[w] != -1)
break;
}
if(z == numselfj)
{
for(y = 0; y < head_size; y++)
vec[i].push_back(dop1[pos+project[y]]);
}
}
}
}
else
{
fres = (int *)malloc(rows * cols * sizeof(int));
#pragma omp parallel for private(pos,y,z)
for(x = 0; x < rows; x++)
{
pos = x * cols;
z = pos;
for(y = 0; y < head_size; y++, z++)
fres[z] = dop1[pos+project[y]];
}
*ret = fres;
return rows;
}
}
for(x = 0; x < NUM_T; x++)
{
ini[x] = vec[x].size();
size += ini[x];
}
fres = (int *)malloc(size * sizeof(int));
ptr = fres;
for(x = 0; x < NUM_T; x++)
{
memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));
ptr += ini[x];
}
*ret = fres;
return size / head_size;
}
int selectproyectcpu2(int *dop1, int rows, int cols, int *select, int numselect, int *selfjoin, int numselfj, int wj, int **ret, int **list)
{
int size = 0, pos, temp;
int i, x, y, z, w;
int *fres, *lres, *ptr;
int div, fin, ini[NUM_T + 1];
int *vec[NUM_T];
int *lis[NUM_T];
int cont[NUM_T];
//omp_set_num_threads(NUM_T);
div = rows / NUM_T;
ini[0] = 0;
for(x = 1; x < NUM_T; x++)
ini[x] = div * x;
ini[NUM_T] = rows;
pos = div + NUM_T;
for(x = 0; x < NUM_T; x++)
{
vec[x] = (int *)malloc(pos * sizeof(int));
lis[x] = (int *)malloc(pos * sizeof(int));
cont[x] = 0;
}
/*cout << "numselect = " << numselect << endl;
for(x = 0; x < numselect; x++)
cout << select[x] << " ";
cout << endl;*/
if(numselect > 0)
{
#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
for(i = 0; i < NUM_T; i++)
{
fin = ini[i+1];
for(x = ini[i]; x < fin; x++)
{
pos = x * cols;
for(y = 0; y < numselect; y += 2)
{
if(dop1[pos+select[y]] != select[y+1])
break;
}
for(z = 0; z < numselfj; z++)
{
temp = dop1[pos+selfjoin[z]];
w = z + 1;
while(selfjoin[w] > -1)
{
if(temp != dop1[pos+selfjoin[w]])
break;
w++;
}
z = w;
if(selfjoin[w] != -1)
break;
}
if(y == numselect && z == numselfj)
{
lis[i][cont[i]] = x;
if(list != NULL)
vec[i][cont[i]] = dop1[pos+wj];
cont[i]++;
}
}
}
}
else
{
if(numselfj > 0)
{
#pragma omp parallel for private(x,pos,y,z,fin,w,temp) firstprivate(cont)
for(i = 0; i < NUM_T; i++)
{
fin = ini[i+1];
for(x = ini[i]; x < fin; x++)
{
pos = x * cols;
for(z = 0; z < numselfj; z++)
{
temp = dop1[pos+selfjoin[z]];
w = z + 1;
while(selfjoin[w] > -1)
{
if(temp != dop1[pos+selfjoin[w]])
break;
w++;
}
z = w;
if(selfjoin[w] != -1)
break;
}
if(z == numselfj)
{
lis[i][cont[i]] = x;
if(list != NULL)
vec[i][cont[i]] = dop1[pos+wj];
cont[i]++;
}
}
}
}
}
//cout << "despues sel" << endl;
for(x = 0; x < NUM_T; x++)
size += cont[x];
lres = (int *)malloc(size * sizeof(int));
ptr = lres;
for(x = 0; x < NUM_T; x++)
{
memcpy(ptr, lis[x], cont[x] * sizeof(int));
ptr += cont[x];
}
if(list != NULL)
{
fres = (int *)malloc(size * sizeof(int));
ptr = fres;
for(x = 0; x < NUM_T; x++)
{
memcpy(ptr, vec[x], cont[x] * sizeof(int));
ptr += cont[x];
}
*ret = fres;
*list = lres;
}
else
*ret = lres;
for(x = 0; x < NUM_T; x++)
{
free(lis[x]);
free(vec[x]);
}
return size;
}
int selectproyectcpu3(int *dop1, int rows, int cols, int *select, int numselect, int *selfjoin, int numselfj, int wj, int **ret, int **list)
{
int size = 0, pos, temp;
int i, x, y, z, w;
int *fres, *lres, *ptr;
int div, fin, ini[NUM_T + 1];
vector<int> vec[NUM_T];
vector<int> lis[NUM_T];
for(x = 0; x < NUM_T; x++)
{
vec[x].reserve(INISIZE);
lis[x].reserve(INISIZE);
}
//omp_set_num_threads(NUM_T);
div = rows / NUM_T;
ini[0] = 0;
for(x = 1; x < NUM_T; x++)
ini[x] = div * x;
ini[NUM_T] = rows;
/*cout << "numselect = " << numselect << endl;
for(x = 0; x < numselect; x++)
cout << select[x] << " ";
cout << endl;*/
if(numselect > 0)
{
#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
for(i = 0; i < NUM_T; i++)
{
fin = ini[i+1];
for(x = ini[i]; x < fin; x++)
{
pos = x * cols;
for(y = 0; y < numselect; y += 2)
{
if(dop1[pos+select[y]] != select[y+1])
break;
}
for(z = 0; z < numselfj; z++)
{
temp = dop1[pos+selfjoin[z]];
w = z + 1;
while(selfjoin[w] > -1)
{
if(temp != dop1[pos+selfjoin[w]])
break;
w++;
}
z = w;
if(selfjoin[w] != -1)
break;
}
if(y == numselect && z == numselfj)
{
lis[i].push_back(x);
if(list != NULL)
vec[i].push_back(dop1[pos+wj]);
}
}
}
}
else
{
if(numselfj > 0)
{
#pragma omp parallel for private(x,pos,y,z,fin,w,temp)
for(i = 0; i < NUM_T; i++)
{
fin = ini[i+1];
for(x = ini[i]; x < fin; x++)
{
pos = x * cols;
for(z = 0; z < numselfj; z++)
{
temp = dop1[pos+selfjoin[z]];
w = z + 1;
while(selfjoin[w] > -1)
{
if(temp != dop1[pos+selfjoin[w]])
break;
w++;
}
z = w;
if(selfjoin[w] != -1)
break;
}
if(z == numselfj)
{
lis[i].push_back(x);
if(list != NULL)
vec[i].push_back(dop1[pos+wj]);
}
}
}
}
}
//cout << "despues sel" << endl;
for(x = 0; x < NUM_T; x++)
{
ini[x] = lis[x].size();
size += ini[x];
}
lres = (int *)malloc(size * sizeof(int));
ptr = lres;
for(x = 0; x < NUM_T; x++)
{
memcpy(ptr, lis[x].data(), ini[x] * sizeof(int));
ptr += ini[x];
}
if(list != NULL)
{
fres = (int *)malloc(size * sizeof(int));
ptr = fres;
for(x = 0; x < NUM_T; x++)
{
memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));
ptr += ini[x];
}
*ret = fres;
*list = lres;
}
else
*ret = lres;
return size;
}

1005
packages/cuda/union2.h Executable file

File diff suppressed because it is too large Load Diff

68
packages/cuda/unioncpu2.cpp Executable file
View File

@ -0,0 +1,68 @@
#include <thrust/unique.h>
#include <thrust/distance.h>
#include <thrust/system/omp/execution_policy.h>
#include <iostream>
#include "union2.h"
int unircpu(int *res, int rows, int tipo, int **ret)
{
//cout << "En union = " << rows << " " << tipo << endl;
s2 *t2, *re2;
s3 *t3, *re3;
int nrows, *nres;
//int size;
switch(tipo)
{
case 1:
{
thrust::sort(thrust::omp::par, res, res + rows);
nres = thrust::unique(thrust::omp::par, res, res + rows);
nrows = thrust::distance(res, nres);
/*if(nrows < rows / 2)
{
size = nrows * tipo * sizeof(int);
nres = (int *)malloc(size);
memcpy(nres, res, size);
free(*ret);
*ret = nres;
}*/
return nrows;
}
case 2:
{
t2 = (s2*)res;
thrust::sort(thrust::omp::par, t2, t2 + rows, o2());
re2 = thrust::unique(thrust::omp::par, t2, t2 + rows, p2());
nrows = thrust::distance(t2, re2);
/*if(nrows < rows / 2)
{
size = nrows * tipo * sizeof(int);
nres = (int *)malloc(size);
memcpy(nres, res, size);
free(*ret);
*ret = nres;
}*/
return nrows;
}
case 3:
{
t3 = (s3*)res;
thrust::sort(thrust::omp::par, t3, t3 + rows, o3());
re3 = thrust::unique(thrust::omp::par, t3, t3 + rows, p3());
nrows = thrust::distance(t3, re3);
/*if(nrows < rows / 2)
{
size = nrows * tipo * sizeof(int);
nres = (int *)malloc(size);
memcpy(nres, res, size);
free(*ret);
*ret = nres;
}*/
return nrows;
}
}
return 0;
}