CUDA: the missing files
This commit is contained in:
parent
f966a5b912
commit
ea90785c6a
30
packages/cuda/CC_CSSTree.cu
Executable file
30
packages/cuda/CC_CSSTree.cu
Executable file
@ -0,0 +1,30 @@
|
||||
#include "CC_CSSTree.h"
|
||||
|
||||
//return the start position of searching the key.
|
||||
int CC_CSSTree::search(int key)
|
||||
{
|
||||
int i=0;
|
||||
int curIndex=0;
|
||||
int curNode=0;
|
||||
int j=0;
|
||||
//search
|
||||
for(i=0;i<level;i++)
|
||||
{
|
||||
for(j=0;j<blockSize;j++)
|
||||
{
|
||||
if(ntree[curIndex+j]==-1)
|
||||
break;
|
||||
if(key<=ntree[curIndex+j])
|
||||
break;
|
||||
}
|
||||
curNode=(fanout*(curNode)+j+1);
|
||||
curIndex=curNode*blockSize;
|
||||
//#ifdef DEBUG
|
||||
// cout<<curNode<<", "<<j<<", "<<ntree[curIndex]<<"; ";
|
||||
//#endif
|
||||
}
|
||||
curIndex=(curNode-numNode)*blockSize;
|
||||
if(curIndex>numRecord) curIndex=numRecord-1;
|
||||
//cout<<"I: "<<curIndex<<", ";//cout<<endl;
|
||||
return curIndex;
|
||||
}
|
146
packages/cuda/CC_CSSTree.h
Executable file
146
packages/cuda/CC_CSSTree.h
Executable file
@ -0,0 +1,146 @@
|
||||
#ifndef CSSTREE_H
|
||||
#define CSSTREE_H
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
using namespace std;
|
||||
|
||||
#define divRoundUp(n,s) (((n) / (s)) + ((((n) % (s)) > 0) ? 1 : 0))
|
||||
#define CSS_TREE_FANOUT 33
|
||||
//we use implicit pointer to perform the addressing.
|
||||
|
||||
typedef int Record;
|
||||
|
||||
class CC_GenericTree
|
||||
{
|
||||
public:
|
||||
int numRecord;
|
||||
Record *data;
|
||||
//we use the BFS layout as the default layout.
|
||||
int numNode;
|
||||
int level;
|
||||
int gResult;
|
||||
CC_GenericTree(){}
|
||||
//we assume that numR=2^i. Otherwise, we pad the array with -1 from the beginning.
|
||||
//we also assume that the record are sorted by the key.
|
||||
CC_GenericTree(Record *d, int numR)
|
||||
{
|
||||
data=d;
|
||||
numRecord=numR;
|
||||
}
|
||||
virtual ~CC_GenericTree()
|
||||
{
|
||||
}
|
||||
virtual int search(int key)=0;
|
||||
|
||||
};
|
||||
|
||||
class CC_CSSTree:public CC_GenericTree
|
||||
{
|
||||
public:
|
||||
int *ntree;
|
||||
int fanout;
|
||||
int blockSize;
|
||||
int *vStart;
|
||||
int *vG;//vG[0] is used in computing the position for level 1.
|
||||
int numKey;
|
||||
CC_CSSTree(Record *d, int numR, int f):CC_GenericTree(d,numR)
|
||||
{
|
||||
fanout=f;
|
||||
blockSize=fanout-1;
|
||||
int numLeaf=divRoundUp(numR,blockSize);
|
||||
level=1;
|
||||
int temp=numLeaf;
|
||||
while(temp>1)
|
||||
{
|
||||
temp=divRoundUp(temp, fanout);
|
||||
level++;
|
||||
}
|
||||
numNode=(int)((pow((double)fanout,(double)level)-1)/(fanout-1));
|
||||
numKey=numNode*blockSize;
|
||||
ntree=new int[numKey];
|
||||
vStart=new int[level];
|
||||
vG=new int[level];
|
||||
#ifdef DEBUG
|
||||
cout<<numLeaf<<","<<level<<", "<<numNode<<endl;
|
||||
#endif
|
||||
//layout the tree from bottom up.
|
||||
int i=0,j=0,k=0;
|
||||
int startNode=0;
|
||||
int endNode=0;
|
||||
int startKey, endKey;
|
||||
int curIndex;
|
||||
for(i=0;i<numNode;i++)
|
||||
ntree[i]=-1;
|
||||
//for <level-1>, i.e., the leaf level. [start,end]
|
||||
for(i=0;i<level;i++)//level
|
||||
{
|
||||
startNode=(int)((pow((double)fanout,(double)i)-1)/(fanout-1));
|
||||
endNode=(int)((pow((double)fanout,(double)(i+1))-1)/(fanout-1));
|
||||
for(j= startNode;j< endNode;j++)//which node
|
||||
{
|
||||
startKey=j*blockSize;
|
||||
endKey=startKey+blockSize;
|
||||
for(k=startKey;k<endKey;k++)
|
||||
{
|
||||
curIndex=(int)(blockSize*pow((double)fanout,(double)(level-i-1))*(k+1-startNode*blockSize+(j-startNode))-1);
|
||||
if(curIndex<numRecord+blockSize)
|
||||
{
|
||||
if(curIndex>=numRecord)
|
||||
curIndex=numRecord-1;
|
||||
ntree[k]=data[curIndex];
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
~CC_CSSTree()
|
||||
{
|
||||
delete [] ntree;
|
||||
delete [] vStart;
|
||||
delete [] vG;
|
||||
}
|
||||
virtual int search(int key);
|
||||
void print()
|
||||
{
|
||||
int i=0, j=0;
|
||||
int k=0;
|
||||
int startNode=0;
|
||||
int endNode=0;
|
||||
int startKey, endKey;
|
||||
for(i=0;i<level;i++)//level
|
||||
{
|
||||
cout<<"Level, "<<i<<endl;
|
||||
startNode=(int)((pow((double)fanout,(double)i)-1)/(fanout-1));
|
||||
endNode=(int)((pow((double)fanout,(double)(i+1))-1)/(fanout-1));
|
||||
for(j= startNode;j< endNode;j++)//which node
|
||||
{
|
||||
cout<<"Level, "<<i<<", Node, "<<j<<": ";
|
||||
startKey=j*blockSize;
|
||||
endKey=startKey+blockSize;
|
||||
for(k=startKey;k<endKey;k++)
|
||||
{
|
||||
cout<<ntree[k]<<", ";
|
||||
}
|
||||
cout<<endl;
|
||||
}
|
||||
}
|
||||
for(i=0;i<numRecord;i++)
|
||||
{
|
||||
cout<<data[i]<<", ";
|
||||
if(i%(fanout-1)==(fanout-2))
|
||||
cout<<"*"<<endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
12
packages/cuda/bpreds.h
Executable file
12
packages/cuda/bpreds.h
Executable file
@ -0,0 +1,12 @@
|
||||
#ifndef _BPREDS_H_
|
||||
#define _BPREDS_H_
|
||||
|
||||
int maximo(int, ...);
|
||||
__global__ void bpreds(int*, int*, int, int, int, int*, int, int, int*, int*);
|
||||
__global__ void bpredsnormal(int*, int, int, int*, int, int*);
|
||||
__global__ void bpredsnormal2(int*, int, int, int*, int, int*);
|
||||
__global__ void bpredsOR(int*, int*, int, int, int, int*, int, int, int*, int*);
|
||||
__global__ void bpredsorlogic(int*, int, int, int*, int, int*);
|
||||
__global__ void bpredsorlogic2(int*, int, int, int*, int, int*);
|
||||
|
||||
#endif
|
88
packages/cuda/bpredscpu.cpp
Executable file
88
packages/cuda/bpredscpu.cpp
Executable file
@ -0,0 +1,88 @@
|
||||
#include "pred.h"
|
||||
|
||||
int bpredscpu(int *dop1, int rows, int *bin, int3 numpreds, int **ret)
|
||||
{
|
||||
int i, x, y, op1, op2;
|
||||
int size = 0, rowact, flag = 0;
|
||||
int predn = numpreds.x * 3;
|
||||
int total = predn + numpreds.z;
|
||||
int *fres, *ptr;
|
||||
int div, fin, ini[NUM_T + 1];
|
||||
vector<int> vec[NUM_T];
|
||||
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
vec[x].reserve(INISIZE);
|
||||
|
||||
//omp_set_num_threads(NUM_T);
|
||||
div = rows / NUM_T;
|
||||
ini[0] = 0;
|
||||
for(x = 1; x < NUM_T; x++)
|
||||
ini[x] = div * x;
|
||||
ini[NUM_T] = rows;
|
||||
|
||||
#pragma omp parallel for private(x,rowact,y,fin,op1,op2) firstprivate(flag,total)
|
||||
for(i = 0; i < NUM_T; i++)
|
||||
{
|
||||
fin = ini[i+1];
|
||||
for(x = ini[i]; x < fin; x++)
|
||||
{
|
||||
rowact = x * numpreds.y;
|
||||
for(y = 0; y < predn; y += 3)
|
||||
{
|
||||
op1 = bin[y+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = bin[y+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(bin[y])
|
||||
{
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
flag = 1;
|
||||
break;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
flag = 1;
|
||||
break;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
flag = 1;
|
||||
break;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
flag = 1;
|
||||
break;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
flag = 1;
|
||||
break;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
flag = 1;
|
||||
}
|
||||
if(flag)
|
||||
break;
|
||||
}
|
||||
if(flag != 1)
|
||||
{
|
||||
for(y = predn; y < total; y++)
|
||||
vec[i].push_back(dop1[rowact+bin[y]]);
|
||||
}
|
||||
else
|
||||
flag = 0;
|
||||
}
|
||||
}
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
ini[x] = vec[x].size();
|
||||
size += ini[x];
|
||||
}
|
||||
fres = (int *)malloc(size * sizeof(int));
|
||||
ptr = fres;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));
|
||||
ptr += ini[x];
|
||||
}
|
||||
*ret = fres;
|
||||
return size / numpreds.z;
|
||||
}
|
197
packages/cuda/creator2.c
Executable file
197
packages/cuda/creator2.c
Executable file
@ -0,0 +1,197 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
/*Program used to generate union2.cu and union2.h. A new pointer and all its operations are generated for each set (pairs, triplets, etc.).
|
||||
Arguments are the cardinality of the biggest set and the name of the cuda file. For example, executing "creator2 20 union2" will generate
|
||||
all pointers and operations for all sets from 1 to 20 in the files union2.cu and union2.h.*/
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int num = atoi(argv[1]);
|
||||
int x;
|
||||
char *str = (char *)malloc((strlen(argv[2]) + 4) * sizeof(char));
|
||||
sprintf(str, "%s.cu", argv[2]);
|
||||
FILE *cuda = fopen(str, "w");
|
||||
|
||||
fprintf(cuda, "/*Computer generated file to remove duplicates. Since Thrust's unique and sort, unlike their std's counterparts, don't have a way to specify the size of each element in\n");
|
||||
fprintf(cuda, "the array, comparing pairs, triplets and other sets is not possible without defining a new pointer and all related operations for each set. If you have a better idea to do\n");
|
||||
fprintf(cuda, "this, please don't hesitate to email us.*/\n\n");
|
||||
fprintf(cuda, "#include <thrust/device_vector.h>\n");
|
||||
fprintf(cuda, "#include <thrust/unique.h>\n");
|
||||
fprintf(cuda, "#include <thrust/distance.h>\n");
|
||||
fprintf(cuda, "#include <thrust/sort.h>\n");
|
||||
fprintf(cuda, "#include <iostream>\n");
|
||||
fprintf(cuda, "#include \"memory.h\"\n");
|
||||
fprintf(cuda, "#include \"%s.h\"\n\n", argv[2]);
|
||||
fprintf(cuda, "int unir(int *res, int rows, int tipo, int **ret, int final)\n");
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\tthrust::device_ptr<int> pt, re;\n");
|
||||
for(x = 2; x <= num; x++)
|
||||
fprintf(cuda, "\tthrust::device_ptr<s%d> pt%d, re%d;\n", x, x ,x);
|
||||
for(x = 2; x <= num; x++)
|
||||
fprintf(cuda, "\ts%d *t%d;\n", x, x);
|
||||
fprintf(cuda, "\tint flag, nrows, *nres, size;\n\n");
|
||||
fprintf(cuda, "#if TIMER\n");
|
||||
fprintf(cuda, "\tcuda_stats.unions++;\n");
|
||||
fprintf(cuda, "#endif\n\n");
|
||||
fprintf(cuda, "\tswitch(tipo)\n");
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tcase 1:\n");
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tpt = thrust::device_pointer_cast(res);\n");
|
||||
fprintf(cuda, "\t\t\tflag = 0;\n");
|
||||
fprintf(cuda, "\t\t\twhile(flag != 1)\n");
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\ttry\n");
|
||||
fprintf(cuda, "\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\tthrust::sort(pt, pt + rows);\n");
|
||||
fprintf(cuda, "\t\t\t\t\tif(final)\n");
|
||||
fprintf(cuda, "\t\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, pt + rows, q1());\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, re);\n");
|
||||
fprintf(cuda, "\t\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t\t\telse\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, pt + rows);\n");
|
||||
fprintf(cuda, "\t\t\t\t\tflag = 1;\n");
|
||||
fprintf(cuda, "\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t\tcatch(std::bad_alloc &e)\n");
|
||||
fprintf(cuda, "\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\tlimpiar(\"sort/unique in unir\", 0);\n");
|
||||
fprintf(cuda, "\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\tnrows = thrust::distance(pt, re);\n");
|
||||
fprintf(cuda, "\t\t\tif(nrows < rows / 2)\n");
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\treturn nrows;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
for(x = 2; x <= num; x++)
|
||||
{
|
||||
fprintf(cuda, "\t\tcase %d:\n", x);
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tt%d = (s%d*)res;\n", x, x);
|
||||
fprintf(cuda, "\t\t\tpt%d = thrust::device_pointer_cast(t%d);\n", x, x);
|
||||
fprintf(cuda, "\t\t\tflag = 0;\n");
|
||||
fprintf(cuda, "\t\t\twhile(flag != 1)\n");
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\ttry\n");
|
||||
fprintf(cuda, "\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\tthrust::sort(pt%d, pt%d + rows, o%d());\n", x, x, x);
|
||||
fprintf(cuda, "\t\t\t\t\tif(final)\n");
|
||||
fprintf(cuda, "\t\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, pt%d + rows, q%d());\n", x, x, x, x);
|
||||
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, re%d, p%d());\n", x, x, x, x);
|
||||
fprintf(cuda, "\t\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t\t\telse\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, pt%d + rows, p%d());\n", x, x, x, x);
|
||||
fprintf(cuda, "\t\t\t\t\tflag = 1;\n");
|
||||
fprintf(cuda, "\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t\tcatch(std::bad_alloc &e)\n");
|
||||
fprintf(cuda, "\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\tlimpiar(\"sort/unique in unir\", 0);\n");
|
||||
fprintf(cuda, "\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\tnrows = thrust::distance(pt%d, re%d);\n", x, x);
|
||||
fprintf(cuda, "\t\t\tif(nrows < rows / 2)\n");
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\treturn nrows;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
}
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "\treturn 0;\n");
|
||||
fprintf(cuda, "}\n");
|
||||
|
||||
fclose(cuda);
|
||||
sprintf(str, "%s.h", argv[2]);
|
||||
cuda = fopen(str, "w"); /*tipo de archivo cambiar*/
|
||||
|
||||
fprintf(cuda, "#ifndef _");
|
||||
for(x = 0; x < strlen(argv[2]); x++)
|
||||
fprintf(cuda, "%c", toupper(argv[2][x]));
|
||||
fprintf(cuda, "_H_\n");
|
||||
fprintf(cuda, "#define _");
|
||||
for(x = 0; x < strlen(argv[2]); x++)
|
||||
fprintf(cuda, "%c", toupper(argv[2][x]));
|
||||
fprintf(cuda, "_H_\n\n");
|
||||
fprintf(cuda, "int unir(int *res, int rows, int tipo, int **ret, int final);\n\n");
|
||||
for(x = 2; x <= num; x++)
|
||||
{
|
||||
fprintf(cuda, "typedef struct n%d\n", x);
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\tint v[%d];\n", x);
|
||||
fprintf(cuda, "}s%d;\n\n", x);
|
||||
}
|
||||
fprintf(cuda, "struct q1\n");
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\t__host__ __device__\n");
|
||||
fprintf(cuda, "\tbool operator()(const int &r1, const int &r2)\n");
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tif(r1 != r2)\n");
|
||||
fprintf(cuda, "\t\t\treturn true;\n");
|
||||
fprintf(cuda, "\t\treturn false;\n");
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "};\n\n");
|
||||
for(x = 2; x <= num; x++)
|
||||
{
|
||||
fprintf(cuda, "struct p%d\n", x);
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\t__host__ __device__\n");
|
||||
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tint x;\n");
|
||||
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tif(r1.v[x] != r2.v[x])\n");
|
||||
fprintf(cuda, "\t\t\t\treturn false;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
fprintf(cuda, "\t\treturn true;\n");
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "};\n\n");
|
||||
fprintf(cuda, "struct q%d\n", x);
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\t__host__ __device__\n");
|
||||
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tint x;\n");
|
||||
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tif(r1.v[x] != r2.v[x])\n");
|
||||
fprintf(cuda, "\t\t\t\treturn true;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
fprintf(cuda, "\t\treturn false;\n");
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "};\n\n");
|
||||
fprintf(cuda, "struct o%d\n", x);
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\t__host__ __device__\n");
|
||||
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tint x;\n");
|
||||
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tif(r1.v[x] > r2.v[x])\n");
|
||||
fprintf(cuda, "\t\t\t\treturn true;\n");
|
||||
fprintf(cuda, "\t\t\tif(r1.v[x] < r2.v[x])\n");
|
||||
fprintf(cuda, "\t\t\t\treturn false;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
fprintf(cuda, "\t\treturn false;\n");
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "};\n\n");
|
||||
}
|
||||
fprintf(cuda, "#endif\n");
|
||||
fclose(cuda);
|
||||
free(str);
|
||||
}
|
603
packages/cuda/dbio.cu
Normal file
603
packages/cuda/dbio.cu
Normal file
@ -0,0 +1,603 @@
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include "memory.h"
|
||||
#include "union2.h"
|
||||
#include "dbio.h"
|
||||
|
||||
#ifdef DATALOG
|
||||
//template<class InputIterator>
|
||||
//void datalogWrite(int query, InputIterator rul_str, InputIterator fin, int finalDR, int **result)
|
||||
void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, int finalDR, int **result)
|
||||
{
|
||||
rulenode tmprule;
|
||||
vector<rulenode>::iterator qposr;
|
||||
int *dop1, *hres;
|
||||
int cols1, res_rows, tipo;
|
||||
tmprule.name = query;
|
||||
qposr = lower_bound(rul_str, fin, tmprule, comparer);
|
||||
cols1 = qposr->num_columns;
|
||||
res_rows = cargafinal(query, cols1, &dop1);
|
||||
|
||||
if(res_rows != 0)
|
||||
{
|
||||
if(res_rows > 0)
|
||||
{
|
||||
if(finalDR)
|
||||
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
*result = hres;
|
||||
}
|
||||
else
|
||||
{
|
||||
res_rows *= -1;
|
||||
if(finalDR)
|
||||
{
|
||||
int *dop2;
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
reservar(&dop2, tipo);
|
||||
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice);
|
||||
free(dop1);
|
||||
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop2);
|
||||
*result = hres;
|
||||
}
|
||||
else
|
||||
*result = dop1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TUFFY
|
||||
void postgresRead(PGconn **ret, vector<gpunode> *L, int *inpquery, char *names, int finalDR)
|
||||
{
|
||||
PGresult *pgr;
|
||||
int x, y;
|
||||
int *mat, *mat2;
|
||||
char *tok, sel[1024], **qrs;
|
||||
int w, z = 0, numt, numc, numc2, start = 0, start2, val;
|
||||
PGconn *conn = PQconnectdb("host=localhost port=5432 dbname = prueba user=tuffer password=root");
|
||||
if(PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
fprintf(stderr, "Connection to database failed: %s", PQerrorMessage(conn));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
pgr = PQexec(conn, "Select nspname from pg_catalog.pg_namespace where oid = (select max(oid) from pg_catalog.pg_namespace)");
|
||||
sprintf(sel, "SET search_path = %s", PQgetvalue(pgr, 0, 0));
|
||||
PQclear(pgr);
|
||||
PQexec(conn, sel);
|
||||
tok = strtok(names, " ");
|
||||
if(finalDR)
|
||||
{
|
||||
qrs = (char **)malloc(100 * sizeof(char *));
|
||||
while(tok != NULL)
|
||||
{
|
||||
sprintf(sel, "Select * from %s limit 0", tok);
|
||||
pgr = PQexec(conn, sel);
|
||||
numc = L->at(z).num_columns;
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
sprintf(sel, "Select ");
|
||||
numt = numc + 1;
|
||||
for(x = 1; x < numt; x++)
|
||||
{
|
||||
strcat(sel, PQfname(pgr, x));
|
||||
strcat(sel, ", ");
|
||||
}
|
||||
sel[strlen(sel)-2] = '\0';
|
||||
sprintf(sel, "%s from %s", sel, tok);
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(sel, "Select id, Club, ");
|
||||
numt = numc + 6;
|
||||
for(x = 8; x < numt; x++)
|
||||
{
|
||||
strcat(sel, PQfname(pgr, x));
|
||||
strcat(sel, ", ");
|
||||
}
|
||||
sel[strlen(sel)-2] = '\0';
|
||||
sprintf(sel, "%s from %s", sel, tok);
|
||||
}
|
||||
PQclear(pgr);
|
||||
pgr = PQexec(conn, sel);
|
||||
numt = PQntuples(pgr);
|
||||
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
for(x = 0; x < numt; x++)
|
||||
{
|
||||
start = x * numc;
|
||||
for(y = 0; y < numc; y++)
|
||||
mat[start + y] = atoi(PQgetvalue(pgr, x, y));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
numc2 = numc - 2;
|
||||
mat2 = (int *)malloc(numt * numc2 * sizeof(int));
|
||||
start = 0;
|
||||
start2 = 0;
|
||||
for(x = 0; x < numt; x++)
|
||||
{
|
||||
w = atoi(PQgetvalue(pgr, x, 1));
|
||||
if(w < 2)
|
||||
{
|
||||
mat[start] = atoi(PQgetvalue(pgr, x, 0));
|
||||
start++;
|
||||
mat[start] = w;
|
||||
start++;
|
||||
if(w > 0)
|
||||
{
|
||||
for(y = 2; y < numc; y++)
|
||||
{
|
||||
val = atoi(PQgetvalue(pgr, x, y));
|
||||
mat[start] = val;
|
||||
mat2[start2] = val;
|
||||
start++;
|
||||
start2++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(y = 2; y < numc; y++)
|
||||
{
|
||||
val = atoi(PQgetvalue(pgr, x, y));
|
||||
mat[start] = val;
|
||||
start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(y = 2; y < numc; y++)
|
||||
{
|
||||
val = atoi(PQgetvalue(pgr, x, y));
|
||||
mat2[start2] = val;
|
||||
start2++;
|
||||
}
|
||||
}
|
||||
}
|
||||
L->at(z+1).address_host_table = mat2;
|
||||
L->at(z+1).num_rows = start2 / numc2;
|
||||
}
|
||||
L->at(z).address_host_table = mat;
|
||||
L->at(z).num_rows = start / numc;
|
||||
PQclear(pgr);
|
||||
|
||||
x = 1;
|
||||
while(inpquery[x] != -1)
|
||||
{
|
||||
if(L->at(z).name == inpquery[x])
|
||||
{
|
||||
numt = (strlen(tok) + 1) * sizeof(char);
|
||||
qrs[x] = (char *)malloc(numt);
|
||||
memcpy(qrs[x], tok, numt);
|
||||
}
|
||||
x += 2;
|
||||
}
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
tok = strtok(NULL, " ");
|
||||
z++;
|
||||
}
|
||||
else
|
||||
{
|
||||
strtok(NULL, " ");
|
||||
tok = strtok(NULL, " ");
|
||||
z += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(tok != NULL)
|
||||
{
|
||||
sprintf(sel, "Select * from %s limit 0", tok);
|
||||
pgr = PQexec(conn, sel);
|
||||
numc = L->at(z).num_columns;
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
sprintf(sel, "Select weight, myid, ");
|
||||
start = 1;
|
||||
numt = numc + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(sel, "Select truth, Club, atomID, ");
|
||||
start = 8;
|
||||
numt = numc + 5;
|
||||
}
|
||||
for(x = start; x < numt; x++)
|
||||
{
|
||||
strcat(sel, PQfname(pgr, x));
|
||||
strcat(sel, ", ");
|
||||
}
|
||||
sel[strlen(sel)-2] = '\0';
|
||||
sprintf(sel, "%s from %s", sel, tok);
|
||||
PQclear(pgr);
|
||||
pgr = PQexec(conn, sel);
|
||||
numt = PQntuples(pgr);
|
||||
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||
L->at(z).weight = (double *)malloc(numt * sizeof(double));
|
||||
L->at(z).num_rows = numt;
|
||||
|
||||
for(x = 0; x < numt; x++)
|
||||
{
|
||||
start = x * numc;
|
||||
for(y = 1; y < numc; y++)
|
||||
mat[start + y] = atoi(PQgetvalue(pgr, x, y));
|
||||
}
|
||||
|
||||
numt *= numc;
|
||||
double flo;
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
for(x = 0, y = 0; x < numt; x+=numc, y++)
|
||||
{
|
||||
flo = atof(PQgetvalue(pgr, y, 0));
|
||||
L->at(z).weight[y] = flo;
|
||||
if(flo > 0)
|
||||
mat[x] = y + 1;
|
||||
else
|
||||
mat[x] = -y - 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(x = 0, y = 0; x < numt; x+=numc, y++)
|
||||
{
|
||||
if(PQgetvalue(pgr, y, 0)[0] == 't')
|
||||
mat[x] = 2;
|
||||
else
|
||||
mat[x] = 1;
|
||||
}
|
||||
}
|
||||
L->at(z).address_host_table = mat;
|
||||
numc = (strlen(tok) + 1) * sizeof(char);
|
||||
L->at(z).predname = (char *)malloc(numc);
|
||||
memcpy(L->at(z).predname, tok, numc);
|
||||
PQclear(pgr);
|
||||
tok = strtok(NULL, " ");
|
||||
z++;
|
||||
}
|
||||
}
|
||||
*ret = conn;
|
||||
}
|
||||
|
||||
void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, PGconn *conn, int finalDR)
|
||||
{
|
||||
char sel[1024];
|
||||
double *matw = NULL;
|
||||
int qname, cols1, res_rows, tipo, *dop1;
|
||||
int x, w, z, y, *hres;
|
||||
rulenode tmprule;
|
||||
vector<rulenode>::iterator qposr;
|
||||
if(finalDR)
|
||||
{
|
||||
char file[] = "/dev/shm/mln0_atoms.csv";
|
||||
z = 0;
|
||||
int seqid = 1;
|
||||
FILE *fp;
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||
sprintf(file, "./temp/mln0_atoms.csv");
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
while((qname = inpquery[z]) != -1)
|
||||
{
|
||||
tmprule.name = qname;
|
||||
qposr = lower_bound(rul_str, fin, tmprule, comparer);
|
||||
cols1 = qposr->num_columns;
|
||||
res_rows = cargafinal(qname, cols1, &dop1);
|
||||
|
||||
if(res_rows != 0)
|
||||
{
|
||||
if(res_rows < 0)
|
||||
res_rows = unir(dop1, -res_rows, cols1, &dop1, 0); /*duplicate elimination on result*/
|
||||
else
|
||||
res_rows = unir(dop1, res_rows, cols1, &dop1, finalDR);
|
||||
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
w = z + 1;
|
||||
|
||||
strtok(qposr->rulename, "_");
|
||||
strtok(NULL, "_");
|
||||
int prid = atoi(strtok(NULL, "_"));
|
||||
|
||||
for(x = 0, w = 0; x < res_rows; x++, w+=2)
|
||||
{
|
||||
if(hres[w+1])
|
||||
fprintf(fp, "%d,%d,%d,true\n", seqid, hres[w], prid);
|
||||
else
|
||||
fprintf(fp, "%d,%d,%d,false\n", seqid, hres[w], prid);
|
||||
seqid++;
|
||||
}
|
||||
free(hres);
|
||||
}
|
||||
z += 2;
|
||||
}
|
||||
fclose(fp);
|
||||
sprintf(sel, "Copy mln0_atoms(atomid,tupleID,predID,isquery) from '%s' CSV", file);
|
||||
PQexec(conn, sel);
|
||||
}
|
||||
else
|
||||
{
|
||||
while(rul_str != fin)
|
||||
{
|
||||
cols1 = rul_str->num_columns;
|
||||
res_rows = cargafinal(rul_str->name, cols1, &dop1);
|
||||
if(res_rows == 0)
|
||||
{
|
||||
rul_str++;
|
||||
continue;
|
||||
}
|
||||
res_rows = abs(res_rows);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
|
||||
char file[] = "/dev/shm/buffer.csv";
|
||||
FILE *fp;
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||
sprintf(file, "./temp/buffer.csv");
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if(rul_str->rulename[0] == 'z')
|
||||
{
|
||||
char *name = rul_str->rulename + 1;
|
||||
for(x = 0; x < ninpf; x++)
|
||||
{
|
||||
if(strncmp(L->at(x).predname, name, strlen(name)) == 0)
|
||||
{
|
||||
matw = L->at(x).weight;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cols1 -= 3;
|
||||
for(x = 0, z = 0; x < res_rows; x++, z+=3)
|
||||
{
|
||||
for(y = 0; y < cols1; y++, z++)
|
||||
fprintf(fp, "%d,", hres[z]);
|
||||
fprintf(fp, "%d,%lf,%d\n", hres[z], matw[abs(hres[z+1])-1], hres[z+2]);
|
||||
}
|
||||
fclose(fp);
|
||||
sprintf(sel, "Copy %s from '%s' CSV", name, file);
|
||||
PQexec(conn, sel);
|
||||
}
|
||||
else
|
||||
{
|
||||
cols1--;
|
||||
for(x = 0, z = 0; x < res_rows; x++, z++)
|
||||
{
|
||||
for(y = 0; y < cols1; y++, z++)
|
||||
fprintf(fp, "%d,", hres[z]);
|
||||
fprintf(fp, "%d\n", hres[z]);
|
||||
}
|
||||
fclose(fp);
|
||||
sprintf(sel, "Copy %s from '%s' CSV", rul_str->rulename, file);
|
||||
PQexec(conn, sel);
|
||||
}
|
||||
free(hres);
|
||||
rul_str++;
|
||||
}
|
||||
}
|
||||
PQfinish(conn);
|
||||
if(finalDR)
|
||||
clear_memory_all();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ROCKIT
|
||||
void mysqlRead(MYSQL **ret, int *qrs, vector<gpunode> *L, int ninpf, char *names, int finalDR)
|
||||
{
|
||||
char *tok, sel[1024];
|
||||
int w, x, y, z = 0, numt, numc;
|
||||
int *mat;
|
||||
MYSQL *con = mysql_init(NULL);
|
||||
if(con == NULL)
|
||||
{
|
||||
fprintf(stderr, "mysql_init() failed\n");
|
||||
exit(1);
|
||||
}
|
||||
mysql_options(con, MYSQL_OPT_LOCAL_INFILE, NULL);
|
||||
mysql_real_connect(con, "localhost", "root", "root", "rockit", 0, NULL, 0);
|
||||
if(finalDR)
|
||||
{
|
||||
y = 0;
|
||||
while(qrs[y] != 0)
|
||||
{
|
||||
for(z = 0; z < ninpf; z++)
|
||||
{
|
||||
if(qrs[y] == L->at(z).name)
|
||||
{
|
||||
MYSQL_ROW row;
|
||||
sprintf(sel, "Select count(*) from %s", L->at(z).predname);
|
||||
mysql_query(con, sel);
|
||||
MYSQL_RES *result = mysql_store_result(con);
|
||||
row = mysql_fetch_row(result);
|
||||
numt = atoi(row[0]);
|
||||
mysql_free_result(result);
|
||||
|
||||
if(numt != L->at(z).num_rows)
|
||||
{
|
||||
liberar(L->at(z).name);
|
||||
numc = L->at(z).num_columns;
|
||||
sprintf(sel, "Select * from %s", L->at(z).predname);
|
||||
mysql_query(con, sel);
|
||||
MYSQL_RES *result = mysql_store_result(con);
|
||||
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||
w = 0;
|
||||
while ((row = mysql_fetch_row(result)))
|
||||
{
|
||||
for(x = 0; x < numc; x++, w++)
|
||||
mat[w] = atoi(row[x]);
|
||||
}
|
||||
|
||||
mysql_free_result(result);
|
||||
if(L->at(z).address_host_table != NULL)
|
||||
free(L->at(z).address_host_table);
|
||||
L->at(z).address_host_table = mat;
|
||||
L->at(z).num_rows = numt;
|
||||
}
|
||||
}
|
||||
}
|
||||
y++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tok = strtok(names, " ");
|
||||
while(tok != NULL)
|
||||
{
|
||||
numc = L->at(z).num_columns;
|
||||
sprintf(sel, "Select * from %s", tok);
|
||||
mysql_query(con, sel);
|
||||
MYSQL_RES *result = mysql_store_result(con);
|
||||
numt = mysql_num_rows(result);
|
||||
|
||||
MYSQL_ROW row;
|
||||
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||
w = 0;
|
||||
if(tok[0] == 'f' && tok[1] >= '0' && tok[1] <= '9')
|
||||
{
|
||||
while ((row = mysql_fetch_row(result)))
|
||||
{
|
||||
for(x = 1; x <= numc; x++, w++)
|
||||
mat[w] = atoi(row[x]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((row = mysql_fetch_row(result)))
|
||||
{
|
||||
for(x = 0; x < numc; x++, w++)
|
||||
mat[w] = atoi(row[x]);
|
||||
}
|
||||
}
|
||||
mysql_free_result(result);
|
||||
L->at(z).address_host_table = mat;
|
||||
L->at(z).num_rows = numt;
|
||||
|
||||
numc = (strlen(tok) + 1) * sizeof(char);
|
||||
L->at(z).predname = (char *)malloc(numc);
|
||||
strcpy(L->at(z).predname, tok);
|
||||
tok = strtok(NULL, " ");
|
||||
z++;
|
||||
}
|
||||
}
|
||||
*ret = con;
|
||||
}
|
||||
|
||||
void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, MYSQL *con)
|
||||
{
|
||||
int x, y, z, cols1, cols2, res_rows, tipo;
|
||||
int *hres, *dop1;
|
||||
char *id, *sign, *q1, *q2;
|
||||
char sel[1024], weight[1024];
|
||||
gpunode tmpfact;
|
||||
while(rul_str != fin)
|
||||
{
|
||||
cols1 = rul_str->num_columns;
|
||||
res_rows = cargafinal(rul_str->name, cols1, &dop1);
|
||||
id = strtok(rul_str->rulename, "_");
|
||||
sprintf(sel, "create table if not exists %s(weight double, ", id);
|
||||
for(x = 0; x < cols1; x++)
|
||||
{
|
||||
sprintf(weight, "a%d char(10), ", x);
|
||||
strcat(sel, weight);
|
||||
}
|
||||
sel[strlen(sel)-2] = ')';
|
||||
strcat(sel, "ENGINE = MEMORY DEFAULT CHARSET=latin1");
|
||||
mysql_query(con, sel);
|
||||
sprintf(sel, "truncate %s", id);
|
||||
mysql_query(con, sel);
|
||||
|
||||
if(res_rows == 0)
|
||||
{
|
||||
rul_str++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(res_rows > 0)
|
||||
{
|
||||
tmpfact = L->at(-rul_str->referencias[rul_str->num_rows - 2] - 1);
|
||||
sign = tmpfact.predname;
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
|
||||
sumar(tmpfact.name, dop1, cols1, res_rows);
|
||||
}
|
||||
else
|
||||
{
|
||||
hres = dop1;
|
||||
res_rows = -res_rows;
|
||||
}
|
||||
|
||||
sign = strtok(NULL, "_");
|
||||
q1 = strtok(NULL, "_");
|
||||
q2 = strtok(NULL, "_");
|
||||
if(sign[0] == '0')
|
||||
sprintf(weight, "%s.%s", q1, q2);
|
||||
else
|
||||
sprintf(weight, "-%s.%s", q1, q2);
|
||||
|
||||
FILE *fp;
|
||||
char file[512];
|
||||
sprintf(file, "/dev/shm/%s.tsv", id);
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||
sprintf(file, "./temp/%s.tsv", id);
|
||||
fp = fopen(file, "w");
|
||||
}
|
||||
|
||||
cols2 = cols1 - 1;
|
||||
for(x = 0, z = 0; x < res_rows; x++, z++)
|
||||
{
|
||||
fprintf(fp, "%s\t", weight);
|
||||
for(y = 0; y < cols2; y++, z++)
|
||||
fprintf(fp, "%d\t", hres[z]);
|
||||
fprintf(fp, "%d\n", hres[z]);
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
sprintf(sel, "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s", file, id);
|
||||
mysql_query(con, sel);
|
||||
rul_str++;
|
||||
}
|
||||
mysql_close(con);
|
||||
}
|
||||
#endif
|
||||
|
28
packages/cuda/dbio.h
Normal file
28
packages/cuda/dbio.h
Normal file
@ -0,0 +1,28 @@
|
||||
#ifndef _DBIO_H_
|
||||
#define _DBIO_H_
|
||||
|
||||
#include "pred.h"
|
||||
#ifdef TUFFY
|
||||
#include <libpq-fe.h>
|
||||
#endif
|
||||
#ifdef ROCKIT
|
||||
#include <mysql/mysql.h>
|
||||
#endif
|
||||
#include <vector>
|
||||
#include "lista.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifdef TUFFY
|
||||
void postgresRead(PGconn **ret, vector<gpunode> *L, int *inpquery, char *names, int finalDR);
|
||||
void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, PGconn *conn, int finalDR);
|
||||
#endif
|
||||
#ifdef ROCKIT
|
||||
void mysqlRead(MYSQL **ret, int *qrs, vector<gpunode> *L, int ninpf, char *names, int finalDR);
|
||||
void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, MYSQL *con);
|
||||
#endif
|
||||
#ifdef DATALOG
|
||||
void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, int finalDR, int **result);
|
||||
#endif
|
||||
|
||||
#endif
|
500
packages/cuda/joincpu.cpp
Executable file
500
packages/cuda/joincpu.cpp
Executable file
@ -0,0 +1,500 @@
|
||||
#include "CC_CSSTree.h"
|
||||
#include <vector>
|
||||
#include <thrust/sort.h>
|
||||
#include <thrust/system/omp/execution_policy.h>
|
||||
#include "pred.h"
|
||||
|
||||
void partInlj(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int wj, int halfrul, int lenrul)
|
||||
{
|
||||
//set_thread_affinity(cpuid,NUM_T);
|
||||
int i=0;
|
||||
int k=0;
|
||||
int curIndex=0;
|
||||
int keyForSearch;
|
||||
int y, posS, posR;
|
||||
for(k=startS; k<endS; k++)
|
||||
{
|
||||
if(S == NULL)
|
||||
posS = k * of2;
|
||||
else
|
||||
posS = S[k] * of2;
|
||||
keyForSearch=p2[posS + wj];
|
||||
curIndex=tree->search(keyForSearch);
|
||||
for(i=curIndex-1;i>0;i--)
|
||||
{
|
||||
if(keyForSearch == R[i])
|
||||
{
|
||||
|
||||
//cout << keyForSearch << endl;
|
||||
|
||||
posR = perm[i] * of1;
|
||||
for(y = 0; y < halfrul; y++)
|
||||
res->push_back(p1[posR + proj[y]]);
|
||||
for(; y < lenrul; y++)
|
||||
res->push_back(p2[posS + proj[y]]);
|
||||
}
|
||||
else
|
||||
if(R[i]<keyForSearch)
|
||||
break;
|
||||
}
|
||||
for(i=curIndex;i<rLen;i++)
|
||||
{
|
||||
if(keyForSearch == R[i])
|
||||
{
|
||||
|
||||
//cout << -i << " " << keyForSearch << endl;
|
||||
|
||||
posR = perm[i] * of1;
|
||||
for(y = 0; y < halfrul; y++)
|
||||
res->push_back(p1[posR + proj[y]]);
|
||||
for(; y < lenrul; y++)
|
||||
res->push_back(p2[posS + proj[y]]);
|
||||
}
|
||||
else
|
||||
if(R[i]>keyForSearch)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void partInlj2(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int cols, int wj)
|
||||
{
|
||||
//set_thread_affinity(cpuid,NUM_T);
|
||||
int i=0;
|
||||
int k=0;
|
||||
int curIndex=0;
|
||||
int keyForSearch;
|
||||
int y, cond, posS, posR;
|
||||
for(k=startS; k<endS; k++)
|
||||
{
|
||||
if(S == NULL)
|
||||
posS = k * of2;
|
||||
else
|
||||
posS = S[k] * of2;
|
||||
keyForSearch=p2[posS + wj];
|
||||
curIndex=tree->search(keyForSearch);
|
||||
for(i=curIndex-1;i>0;i--)
|
||||
{
|
||||
if(keyForSearch == R[i])
|
||||
{
|
||||
|
||||
//cout << keyForSearch << endl;
|
||||
|
||||
posR = perm[i] * of1 - 1;
|
||||
for(y = 0; y < cols; y++)
|
||||
{
|
||||
cond = proj[y];
|
||||
if(cond > 0)
|
||||
res->push_back(p1[posR + cond]);
|
||||
else
|
||||
res->push_back(p2[posS - cond - 1]);
|
||||
}
|
||||
}
|
||||
else
|
||||
if(R[i]<keyForSearch)
|
||||
break;
|
||||
}
|
||||
for(i=curIndex;i<rLen;i++)
|
||||
{
|
||||
if(keyForSearch == R[i])
|
||||
{
|
||||
|
||||
//cout << -i << " " << keyForSearch << endl;
|
||||
|
||||
posR = perm[i] * of1 - 1;
|
||||
for(y = 0; y < cols; y++)
|
||||
{
|
||||
cond = proj[y];
|
||||
if(cond > 0)
|
||||
res->push_back(p1[posR + cond]);
|
||||
else
|
||||
res->push_back(p2[posS - cond - 1]);
|
||||
}
|
||||
}
|
||||
else
|
||||
if(R[i]>keyForSearch)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void multipartInlj(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int *wj, int numj, int halfrul, int lenrul)
|
||||
{
|
||||
//set_thread_affinity(cpuid,NUM_T);
|
||||
int i=0;
|
||||
int k=0;
|
||||
int curIndex=0;
|
||||
int keyForSearch;
|
||||
int y, posS, posR;
|
||||
for(k=startS; k<endS; k++)
|
||||
{
|
||||
if(S == NULL)
|
||||
posS = k * of2;
|
||||
else
|
||||
posS = S[k] * of2;
|
||||
keyForSearch=p2[posS + wj[1]];
|
||||
curIndex=tree->search(keyForSearch);
|
||||
for(i=curIndex-1;i>0;i--)
|
||||
{
|
||||
if(keyForSearch == R[i])
|
||||
{
|
||||
posR = perm[i] * of1;
|
||||
for(y = 2; y < numj; y += 2)
|
||||
{
|
||||
if(p1[posR + wj[y]] != p2[posS + wj[y+1]])
|
||||
break;
|
||||
}
|
||||
if(y < numj)
|
||||
continue;
|
||||
for(y = 0; y < halfrul; y++)
|
||||
res->push_back(p1[posR + proj[y]]);
|
||||
for(; y < lenrul; y++)
|
||||
res->push_back(p2[posS + proj[y]]);
|
||||
}
|
||||
else
|
||||
if(R[i]<keyForSearch)
|
||||
break;
|
||||
}
|
||||
for(i=curIndex;i<rLen;i++)
|
||||
{
|
||||
if(keyForSearch == R[i])
|
||||
{
|
||||
posR = perm[i] * of1;
|
||||
for(y = 2; y < numj; y += 2)
|
||||
{
|
||||
if(p1[posR + wj[y]] != p2[posS + wj[y+1]])
|
||||
break;
|
||||
}
|
||||
if(y < numj)
|
||||
continue;
|
||||
for(y = 0; y < halfrul; y++)
|
||||
res->push_back(p1[posR + proj[y]]);
|
||||
for(; y < lenrul; y++)
|
||||
res->push_back(p2[posS + proj[y]]);
|
||||
}
|
||||
else
|
||||
if(R[i]>keyForSearch)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void multipartInlj2(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int cols, int *wj, int numj)
|
||||
{
|
||||
//set_thread_affinity(cpuid,NUM_T);
|
||||
int i=0;
|
||||
int k=0;
|
||||
int curIndex=0;
|
||||
int keyForSearch;
|
||||
int y, cond, posS, posR;
|
||||
for(k=startS; k<endS; k++)
|
||||
{
|
||||
if(S == NULL)
|
||||
posS = k * of2;
|
||||
else
|
||||
posS = S[k] * of2;
|
||||
keyForSearch=p2[posS + wj[1]];
|
||||
curIndex=tree->search(keyForSearch);
|
||||
for(i=curIndex-1;i>0;i--)
|
||||
{
|
||||
if(keyForSearch == R[i])
|
||||
{
|
||||
posR = perm[i] * of1;
|
||||
for(y = 2; y < numj; y += 2)
|
||||
{
|
||||
if(p1[posR + wj[y]] != p2[posS + wj[y+1]])
|
||||
break;
|
||||
}
|
||||
if(y < numj)
|
||||
continue;
|
||||
for(y = 0; y < cols; y++)
|
||||
{
|
||||
cond = proj[y];
|
||||
if(cond > 0)
|
||||
res->push_back(p1[posR + cond - 1]);
|
||||
else
|
||||
res->push_back(p2[posS - cond - 1]);
|
||||
}
|
||||
}
|
||||
else
|
||||
if(R[i]<keyForSearch)
|
||||
break;
|
||||
}
|
||||
for(i=curIndex;i<rLen;i++)
|
||||
{
|
||||
if(keyForSearch == R[i])
|
||||
{
|
||||
posR = perm[i] * of1;
|
||||
for(y = 2; y < numj; y += 2)
|
||||
{
|
||||
if(p1[posR + wj[y]] != p2[posS + wj[y+1]])
|
||||
break;
|
||||
}
|
||||
if(y < numj)
|
||||
continue;
|
||||
for(y = 0; y < cols; y++)
|
||||
{
|
||||
cond = proj[y];
|
||||
if(cond > 0)
|
||||
res->push_back(p1[posR + cond - 1]);
|
||||
else
|
||||
res->push_back(p2[posS - cond - 1]);
|
||||
}
|
||||
}
|
||||
else
|
||||
if(R[i]>keyForSearch)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void inlj_omp(Record *R, int rLen, CC_CSSTree *tree, Record *S, int sLen, int of1, int of2, vector<int> *res, int *p1, int *p2, int *perm, int *proj, int2 projp, int cols, int* wj, int numj, int tipo)
|
||||
{
|
||||
int i=0;
|
||||
int j=0;
|
||||
int *startS=new int[NUM_T];
|
||||
int *endS=new int[NUM_T];
|
||||
int chunkSize=sLen/NUM_T;
|
||||
for(i=0;i<NUM_T;i++)
|
||||
{
|
||||
startS[i]=i*chunkSize;
|
||||
if(i==(NUM_T-1))
|
||||
endS[i]=sLen;
|
||||
else
|
||||
endS[i]=(i+1)*chunkSize;
|
||||
|
||||
//cout<<"T"<<i<<", "<<endS[i]-startS[i]<<"; ";
|
||||
|
||||
}
|
||||
|
||||
//cout<<endl;
|
||||
|
||||
//omp_set_num_threads(NUM_T);
|
||||
|
||||
//cout << "inicio" << endl;
|
||||
|
||||
#pragma omp parallel for
|
||||
for(j=0;j<NUM_T;j++)
|
||||
{
|
||||
if(tipo)
|
||||
{
|
||||
if(numj > 2)
|
||||
multipartInlj2(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, cols, wj, numj);
|
||||
else
|
||||
partInlj2(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, cols, wj[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numj > 2)
|
||||
multipartInlj(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, wj, numj, projp.x, projp.y);
|
||||
else
|
||||
partInlj(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, wj[1], projp.x, projp.y);
|
||||
}
|
||||
}
|
||||
|
||||
//cout << "fin" << endl;
|
||||
|
||||
delete startS;
|
||||
delete endS;
|
||||
}
|
||||
|
||||
int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>::iterator rule, int pos, int bothops, int **ret)
|
||||
{
|
||||
int pos2 = pos + 1;
|
||||
int *sel1, nsel1 = 0;
|
||||
int *sel2 = rule->select[pos2];
|
||||
int nsel2 = rule->numsel[pos2];
|
||||
int *proj = rule->project[pos];
|
||||
int2 projp = rule->projpos[pos];
|
||||
int *sjoin1, nsj1 = 0;
|
||||
int *sjoin2 = rule->selfjoin[pos2];
|
||||
int nsj2 = rule->numselfj[pos2];
|
||||
int *wherej = rule->wherejoin[pos];
|
||||
int numj = rule->numjoin[pos];
|
||||
int size, *fres, ini[NUM_T], *temp;
|
||||
int x, tipo = 0;
|
||||
int *Sres = NULL, *Rres, Snl, Rnl, *permutation;
|
||||
|
||||
if(bothops)
|
||||
{
|
||||
sel1 = rule->select[pos];
|
||||
nsel1 = rule->numsel[pos];
|
||||
sjoin1 = rule->selfjoin[pos];
|
||||
nsj1 = rule->numselfj[pos];
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEvent_t start, stop;
|
||||
float time;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
if(nsel1 > 0 || nsj1 > 0)
|
||||
Rnl = selectproyectcpu2(p1, rLen, of1, sel1, nsel1, sjoin1, nsj1, wherej[0], &Rres, &permutation);
|
||||
else
|
||||
{
|
||||
|
||||
/*cout << "sin sel" << endl;
|
||||
cout << "valores = " << rLen << " " << of1 << " " << wherej[0] << endl;
|
||||
for(x = 0; x < 100; x++)
|
||||
cout << p1[x] << " ";
|
||||
cout << endl;
|
||||
cout << "ultimo = " << p1[of1 * rLen - 1] << endl;*/
|
||||
|
||||
Rnl = rLen;
|
||||
size = Rnl * sizeof(int);
|
||||
permutation = (int *)malloc(size);
|
||||
Rres = (int *)malloc(size);
|
||||
#pragma omp parallel for firstprivate(of1)
|
||||
for(x = 0; x < Rnl; x++)
|
||||
{
|
||||
permutation[x] = x;
|
||||
Rres[x] = p1[of1 * x + wherej[0]];
|
||||
}
|
||||
|
||||
//cout << "sin sel fin" << endl;
|
||||
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.select1_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
if(nsel2 > 0 || nsj2 > 0)
|
||||
{
|
||||
//cout << "con sel S" << endl;
|
||||
|
||||
Snl = selectproyectcpu2(p2, sLen, of2, sel2, nsel2, sjoin2, nsj2, wherej[1], &Sres, NULL);
|
||||
}
|
||||
else
|
||||
Snl = sLen;
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.select2_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
//cout << "antes" << endl;
|
||||
|
||||
/*cout << "antes" << endl;
|
||||
for(x = 0; x < Rnl; x++)
|
||||
cout << permutation[x] << " ";
|
||||
cout << endl;
|
||||
for(x = 0; x < 100; x++)
|
||||
cout << Rres[x] << " ";
|
||||
cout << endl;*/
|
||||
|
||||
thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation);
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.sort_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
/*cout << "despues" << endl;
|
||||
for(x = 0; x < Rnl; x++)
|
||||
cout << permutation[x] << " ";
|
||||
cout << endl;
|
||||
for(x = 0; x < Rnl; x++)
|
||||
cout << Rres[x] << " ";
|
||||
cout << endl;*/
|
||||
|
||||
//cout << "despues sort" << endl;
|
||||
|
||||
vector<int> *res = new vector<int>[NUM_T];
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
res[x].reserve(INISIZE);
|
||||
CC_CSSTree *tree = new CC_CSSTree(Rres, Rnl, CSS_TREE_FANOUT);
|
||||
if(pos == (rule->num_rows - 3)) // && rule->num_bpreds.x == 0)
|
||||
tipo = 1;
|
||||
inlj_omp(Rres, Rnl, tree, Sres, Snl, of1, of2, res, p1, p2, permutation, proj, projp, rule->num_columns, wherej, numj, tipo);
|
||||
|
||||
/*cout << "proj = ";
|
||||
for(x = 0; x < rule->num_columns; x++)
|
||||
cout << proj[x] << " ";
|
||||
cout << endl;
|
||||
int y,z;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
cout << "Thread " << x << endl;
|
||||
for(y = 0; y < res[x].size() / projp.y; y++)
|
||||
{
|
||||
for(z = 0; z < projp.y; z++)
|
||||
cout << res[x][y * projp.y + z] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
cout << "Tamanios" << endl;*/
|
||||
|
||||
size = 0;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
ini[x] = res[x].size();
|
||||
size += ini[x];
|
||||
|
||||
//cout << ini[x] << " " << size << endl;
|
||||
|
||||
}
|
||||
fres = (int *)malloc(size * sizeof(int));
|
||||
temp = fres;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
memcpy(temp, res[x].data(), ini[x] * sizeof(int));
|
||||
temp += ini[x];
|
||||
}
|
||||
|
||||
if(*ret != NULL)
|
||||
free(*ret);
|
||||
free(Rres);
|
||||
free(permutation);
|
||||
if(Sres != NULL)
|
||||
free(Sres);
|
||||
delete tree;
|
||||
delete [] res;
|
||||
|
||||
*ret = fres;
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.join_time += time;
|
||||
#endif
|
||||
|
||||
//cout << "Projp.x = " << projp.x << " projp.y = " << projp.y << endl;
|
||||
|
||||
/*if(numj > 2)
|
||||
{
|
||||
cout << "total = " << rLen << " " << size / projp.y << " " << projp.y << " " << rule->num_columns << endl;
|
||||
exit(1);
|
||||
}*/
|
||||
|
||||
return size / projp.y;
|
||||
}
|
394
packages/cuda/selectproyectcpu.cpp
Executable file
394
packages/cuda/selectproyectcpu.cpp
Executable file
@ -0,0 +1,394 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <omp.h>
|
||||
#include <vector>
|
||||
#include "pred.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int selectproyectcpu(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
|
||||
{
|
||||
int size = 0, pos, temp;
|
||||
int i, x, y, z, w;
|
||||
int *fres, *ptr;
|
||||
int div, fin, ini[NUM_T + 1];
|
||||
vector<int> vec[NUM_T];
|
||||
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
vec[x].reserve(INISIZE);
|
||||
|
||||
//omp_set_num_threads(NUM_T);
|
||||
div = rows / NUM_T;
|
||||
ini[0] = 0;
|
||||
for(x = 1; x < NUM_T; x++)
|
||||
ini[x] = div * x;
|
||||
ini[NUM_T] = rows;
|
||||
|
||||
if(numselect > 0)
|
||||
{
|
||||
#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
|
||||
for(i = 0; i < NUM_T; i++)
|
||||
{
|
||||
fin = ini[i+1];
|
||||
for(x = ini[i]; x < fin; x++)
|
||||
{
|
||||
pos = x * cols;
|
||||
for(y = 0; y < numselect; y += 2)
|
||||
{
|
||||
if(dop1[pos+select[y]] != select[y+1])
|
||||
break;
|
||||
}
|
||||
for(z = 0; z < numselfj; z++)
|
||||
{
|
||||
temp = dop1[pos+selfjoin[z]];
|
||||
w = z + 1;
|
||||
while(selfjoin[w] > -1)
|
||||
{
|
||||
if(temp != dop1[pos+selfjoin[w]])
|
||||
break;
|
||||
w++;
|
||||
}
|
||||
z = w;
|
||||
if(selfjoin[w] != -1)
|
||||
break;
|
||||
}
|
||||
if(y == numselect && z == numselfj)
|
||||
{
|
||||
for(y = 0; y < head_size; y++)
|
||||
vec[i].push_back(dop1[pos+project[y]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numselfj > 0)
|
||||
{
|
||||
#pragma omp parallel for private(x,pos,y,z,fin,w,temp)
|
||||
for(i = 0; i < NUM_T; i++)
|
||||
{
|
||||
fin = ini[i+1];
|
||||
for(x = ini[i]; x < fin; x++)
|
||||
{
|
||||
pos = x * cols;
|
||||
for(z = 0; z < numselfj; z++)
|
||||
{
|
||||
temp = dop1[pos+selfjoin[z]];
|
||||
w = z + 1;
|
||||
while(selfjoin[w] > -1)
|
||||
{
|
||||
if(temp != dop1[pos+selfjoin[w]])
|
||||
break;
|
||||
w++;
|
||||
}
|
||||
z = w;
|
||||
if(selfjoin[w] != -1)
|
||||
break;
|
||||
}
|
||||
if(z == numselfj)
|
||||
{
|
||||
for(y = 0; y < head_size; y++)
|
||||
vec[i].push_back(dop1[pos+project[y]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fres = (int *)malloc(rows * cols * sizeof(int));
|
||||
#pragma omp parallel for private(pos,y,z)
|
||||
for(x = 0; x < rows; x++)
|
||||
{
|
||||
pos = x * cols;
|
||||
z = pos;
|
||||
for(y = 0; y < head_size; y++, z++)
|
||||
fres[z] = dop1[pos+project[y]];
|
||||
}
|
||||
*ret = fres;
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
ini[x] = vec[x].size();
|
||||
size += ini[x];
|
||||
}
|
||||
fres = (int *)malloc(size * sizeof(int));
|
||||
ptr = fres;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));
|
||||
ptr += ini[x];
|
||||
}
|
||||
*ret = fres;
|
||||
return size / head_size;
|
||||
}
|
||||
|
||||
int selectproyectcpu2(int *dop1, int rows, int cols, int *select, int numselect, int *selfjoin, int numselfj, int wj, int **ret, int **list)
|
||||
{
|
||||
int size = 0, pos, temp;
|
||||
int i, x, y, z, w;
|
||||
int *fres, *lres, *ptr;
|
||||
int div, fin, ini[NUM_T + 1];
|
||||
int *vec[NUM_T];
|
||||
int *lis[NUM_T];
|
||||
int cont[NUM_T];
|
||||
|
||||
//omp_set_num_threads(NUM_T);
|
||||
div = rows / NUM_T;
|
||||
ini[0] = 0;
|
||||
for(x = 1; x < NUM_T; x++)
|
||||
ini[x] = div * x;
|
||||
ini[NUM_T] = rows;
|
||||
|
||||
pos = div + NUM_T;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
vec[x] = (int *)malloc(pos * sizeof(int));
|
||||
lis[x] = (int *)malloc(pos * sizeof(int));
|
||||
cont[x] = 0;
|
||||
}
|
||||
|
||||
/*cout << "numselect = " << numselect << endl;
|
||||
for(x = 0; x < numselect; x++)
|
||||
cout << select[x] << " ";
|
||||
cout << endl;*/
|
||||
|
||||
if(numselect > 0)
|
||||
{
|
||||
#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
|
||||
for(i = 0; i < NUM_T; i++)
|
||||
{
|
||||
fin = ini[i+1];
|
||||
for(x = ini[i]; x < fin; x++)
|
||||
{
|
||||
pos = x * cols;
|
||||
for(y = 0; y < numselect; y += 2)
|
||||
{
|
||||
if(dop1[pos+select[y]] != select[y+1])
|
||||
break;
|
||||
}
|
||||
for(z = 0; z < numselfj; z++)
|
||||
{
|
||||
temp = dop1[pos+selfjoin[z]];
|
||||
w = z + 1;
|
||||
while(selfjoin[w] > -1)
|
||||
{
|
||||
if(temp != dop1[pos+selfjoin[w]])
|
||||
break;
|
||||
w++;
|
||||
}
|
||||
z = w;
|
||||
if(selfjoin[w] != -1)
|
||||
break;
|
||||
}
|
||||
if(y == numselect && z == numselfj)
|
||||
{
|
||||
lis[i][cont[i]] = x;
|
||||
if(list != NULL)
|
||||
vec[i][cont[i]] = dop1[pos+wj];
|
||||
cont[i]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numselfj > 0)
|
||||
{
|
||||
#pragma omp parallel for private(x,pos,y,z,fin,w,temp) firstprivate(cont)
|
||||
for(i = 0; i < NUM_T; i++)
|
||||
{
|
||||
fin = ini[i+1];
|
||||
for(x = ini[i]; x < fin; x++)
|
||||
{
|
||||
pos = x * cols;
|
||||
for(z = 0; z < numselfj; z++)
|
||||
{
|
||||
temp = dop1[pos+selfjoin[z]];
|
||||
w = z + 1;
|
||||
while(selfjoin[w] > -1)
|
||||
{
|
||||
if(temp != dop1[pos+selfjoin[w]])
|
||||
break;
|
||||
w++;
|
||||
}
|
||||
z = w;
|
||||
if(selfjoin[w] != -1)
|
||||
break;
|
||||
}
|
||||
if(z == numselfj)
|
||||
{
|
||||
lis[i][cont[i]] = x;
|
||||
if(list != NULL)
|
||||
vec[i][cont[i]] = dop1[pos+wj];
|
||||
cont[i]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//cout << "despues sel" << endl;
|
||||
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
size += cont[x];
|
||||
lres = (int *)malloc(size * sizeof(int));
|
||||
ptr = lres;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
memcpy(ptr, lis[x], cont[x] * sizeof(int));
|
||||
ptr += cont[x];
|
||||
}
|
||||
if(list != NULL)
|
||||
{
|
||||
fres = (int *)malloc(size * sizeof(int));
|
||||
ptr = fres;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
memcpy(ptr, vec[x], cont[x] * sizeof(int));
|
||||
ptr += cont[x];
|
||||
}
|
||||
*ret = fres;
|
||||
*list = lres;
|
||||
}
|
||||
else
|
||||
*ret = lres;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
free(lis[x]);
|
||||
free(vec[x]);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
int selectproyectcpu3(int *dop1, int rows, int cols, int *select, int numselect, int *selfjoin, int numselfj, int wj, int **ret, int **list)
|
||||
{
|
||||
int size = 0, pos, temp;
|
||||
int i, x, y, z, w;
|
||||
int *fres, *lres, *ptr;
|
||||
int div, fin, ini[NUM_T + 1];
|
||||
vector<int> vec[NUM_T];
|
||||
vector<int> lis[NUM_T];
|
||||
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
vec[x].reserve(INISIZE);
|
||||
lis[x].reserve(INISIZE);
|
||||
}
|
||||
|
||||
//omp_set_num_threads(NUM_T);
|
||||
div = rows / NUM_T;
|
||||
ini[0] = 0;
|
||||
for(x = 1; x < NUM_T; x++)
|
||||
ini[x] = div * x;
|
||||
ini[NUM_T] = rows;
|
||||
|
||||
/*cout << "numselect = " << numselect << endl;
|
||||
for(x = 0; x < numselect; x++)
|
||||
cout << select[x] << " ";
|
||||
cout << endl;*/
|
||||
|
||||
if(numselect > 0)
|
||||
{
|
||||
#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
|
||||
for(i = 0; i < NUM_T; i++)
|
||||
{
|
||||
fin = ini[i+1];
|
||||
for(x = ini[i]; x < fin; x++)
|
||||
{
|
||||
pos = x * cols;
|
||||
for(y = 0; y < numselect; y += 2)
|
||||
{
|
||||
if(dop1[pos+select[y]] != select[y+1])
|
||||
break;
|
||||
}
|
||||
for(z = 0; z < numselfj; z++)
|
||||
{
|
||||
temp = dop1[pos+selfjoin[z]];
|
||||
w = z + 1;
|
||||
while(selfjoin[w] > -1)
|
||||
{
|
||||
if(temp != dop1[pos+selfjoin[w]])
|
||||
break;
|
||||
w++;
|
||||
}
|
||||
z = w;
|
||||
if(selfjoin[w] != -1)
|
||||
break;
|
||||
}
|
||||
if(y == numselect && z == numselfj)
|
||||
{
|
||||
lis[i].push_back(x);
|
||||
if(list != NULL)
|
||||
vec[i].push_back(dop1[pos+wj]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numselfj > 0)
|
||||
{
|
||||
#pragma omp parallel for private(x,pos,y,z,fin,w,temp)
|
||||
for(i = 0; i < NUM_T; i++)
|
||||
{
|
||||
fin = ini[i+1];
|
||||
for(x = ini[i]; x < fin; x++)
|
||||
{
|
||||
pos = x * cols;
|
||||
for(z = 0; z < numselfj; z++)
|
||||
{
|
||||
temp = dop1[pos+selfjoin[z]];
|
||||
w = z + 1;
|
||||
while(selfjoin[w] > -1)
|
||||
{
|
||||
if(temp != dop1[pos+selfjoin[w]])
|
||||
break;
|
||||
w++;
|
||||
}
|
||||
z = w;
|
||||
if(selfjoin[w] != -1)
|
||||
break;
|
||||
}
|
||||
if(z == numselfj)
|
||||
{
|
||||
lis[i].push_back(x);
|
||||
if(list != NULL)
|
||||
vec[i].push_back(dop1[pos+wj]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//cout << "despues sel" << endl;
|
||||
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
ini[x] = lis[x].size();
|
||||
size += ini[x];
|
||||
}
|
||||
lres = (int *)malloc(size * sizeof(int));
|
||||
ptr = lres;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
memcpy(ptr, lis[x].data(), ini[x] * sizeof(int));
|
||||
ptr += ini[x];
|
||||
}
|
||||
if(list != NULL)
|
||||
{
|
||||
fres = (int *)malloc(size * sizeof(int));
|
||||
ptr = fres;
|
||||
for(x = 0; x < NUM_T; x++)
|
||||
{
|
||||
memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));
|
||||
ptr += ini[x];
|
||||
}
|
||||
*ret = fres;
|
||||
*list = lres;
|
||||
}
|
||||
else
|
||||
*ret = lres;
|
||||
return size;
|
||||
}
|
1005
packages/cuda/union2.h
Executable file
1005
packages/cuda/union2.h
Executable file
File diff suppressed because it is too large
Load Diff
68
packages/cuda/unioncpu2.cpp
Executable file
68
packages/cuda/unioncpu2.cpp
Executable file
@ -0,0 +1,68 @@
|
||||
#include <thrust/unique.h>
|
||||
#include <thrust/distance.h>
|
||||
#include <thrust/system/omp/execution_policy.h>
|
||||
#include <iostream>
|
||||
#include "union2.h"
|
||||
|
||||
int unircpu(int *res, int rows, int tipo, int **ret)
|
||||
{
|
||||
|
||||
//cout << "En union = " << rows << " " << tipo << endl;
|
||||
|
||||
s2 *t2, *re2;
|
||||
s3 *t3, *re3;
|
||||
int nrows, *nres;
|
||||
//int size;
|
||||
|
||||
switch(tipo)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
thrust::sort(thrust::omp::par, res, res + rows);
|
||||
nres = thrust::unique(thrust::omp::par, res, res + rows);
|
||||
nrows = thrust::distance(res, nres);
|
||||
/*if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
nres = (int *)malloc(size);
|
||||
memcpy(nres, res, size);
|
||||
free(*ret);
|
||||
*ret = nres;
|
||||
}*/
|
||||
return nrows;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
t2 = (s2*)res;
|
||||
thrust::sort(thrust::omp::par, t2, t2 + rows, o2());
|
||||
re2 = thrust::unique(thrust::omp::par, t2, t2 + rows, p2());
|
||||
nrows = thrust::distance(t2, re2);
|
||||
/*if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
nres = (int *)malloc(size);
|
||||
memcpy(nres, res, size);
|
||||
free(*ret);
|
||||
*ret = nres;
|
||||
}*/
|
||||
return nrows;
|
||||
}
|
||||
case 3:
|
||||
{
|
||||
t3 = (s3*)res;
|
||||
thrust::sort(thrust::omp::par, t3, t3 + rows, o3());
|
||||
re3 = thrust::unique(thrust::omp::par, t3, t3 + rows, p3());
|
||||
nrows = thrust::distance(t3, re3);
|
||||
/*if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
nres = (int *)malloc(size);
|
||||
memcpy(nres, res, size);
|
||||
free(*ret);
|
||||
*ret = nres;
|
||||
}*/
|
||||
return nrows;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user