Merge branch 'master' of https://github.com/vscosta/yap-6.3
This commit is contained in:
30
packages/cuda/old/CC_CSSTree.cu
Executable file
30
packages/cuda/old/CC_CSSTree.cu
Executable file
@@ -0,0 +1,30 @@
|
||||
#include "CC_CSSTree.h"
|
||||
|
||||
//return the start position of searching the key.
|
||||
int CC_CSSTree::search(int key)
|
||||
{
|
||||
int i=0;
|
||||
int curIndex=0;
|
||||
int curNode=0;
|
||||
int j=0;
|
||||
//search
|
||||
for(i=0;i<level;i++)
|
||||
{
|
||||
for(j=0;j<blockSize;j++)
|
||||
{
|
||||
if(ntree[curIndex+j]==-1)
|
||||
break;
|
||||
if(key<=ntree[curIndex+j])
|
||||
break;
|
||||
}
|
||||
curNode=(fanout*(curNode)+j+1);
|
||||
curIndex=curNode*blockSize;
|
||||
//#ifdef DEBUG
|
||||
// cout<<curNode<<", "<<j<<", "<<ntree[curIndex]<<"; ";
|
||||
//#endif
|
||||
}
|
||||
curIndex=(curNode-numNode)*blockSize;
|
||||
if(curIndex>numRecord) curIndex=numRecord-1;
|
||||
//cout<<"I: "<<curIndex<<", ";//cout<<endl;
|
||||
return curIndex;
|
||||
}
|
||||
146
packages/cuda/old/CC_CSSTree.h
Executable file
146
packages/cuda/old/CC_CSSTree.h
Executable file
@@ -0,0 +1,146 @@
|
||||
#ifndef CSSTREE_H
|
||||
#define CSSTREE_H
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
using namespace std;
|
||||
|
||||
#define divRoundUp(n,s) (((n) / (s)) + ((((n) % (s)) > 0) ? 1 : 0))
|
||||
#define CSS_TREE_FANOUT 33
|
||||
//we use implicit pointer to perform the addressing.
|
||||
|
||||
typedef int Record;
|
||||
|
||||
class CC_GenericTree
|
||||
{
|
||||
public:
|
||||
int numRecord;
|
||||
Record *data;
|
||||
//we use the BFS layout as the default layout.
|
||||
int numNode;
|
||||
int level;
|
||||
int gResult;
|
||||
CC_GenericTree(){}
|
||||
//we assume that numR=2^i. Otherwise, we pad the array with -1 from the beginning.
|
||||
//we also assume that the record are sorted by the key.
|
||||
CC_GenericTree(Record *d, int numR)
|
||||
{
|
||||
data=d;
|
||||
numRecord=numR;
|
||||
}
|
||||
virtual ~CC_GenericTree()
|
||||
{
|
||||
}
|
||||
virtual int search(int key)=0;
|
||||
|
||||
};
|
||||
|
||||
class CC_CSSTree:public CC_GenericTree
|
||||
{
|
||||
public:
|
||||
int *ntree;
|
||||
int fanout;
|
||||
int blockSize;
|
||||
int *vStart;
|
||||
int *vG;//vG[0] is used in computing the position for level 1.
|
||||
int numKey;
|
||||
CC_CSSTree(Record *d, int numR, int f):CC_GenericTree(d,numR)
|
||||
{
|
||||
fanout=f;
|
||||
blockSize=fanout-1;
|
||||
int numLeaf=divRoundUp(numR,blockSize);
|
||||
level=1;
|
||||
int temp=numLeaf;
|
||||
while(temp>1)
|
||||
{
|
||||
temp=divRoundUp(temp, fanout);
|
||||
level++;
|
||||
}
|
||||
numNode=(int)((pow((double)fanout,(double)level)-1)/(fanout-1));
|
||||
numKey=numNode*blockSize;
|
||||
ntree=new int[numKey];
|
||||
vStart=new int[level];
|
||||
vG=new int[level];
|
||||
#ifdef DEBUG
|
||||
cout<<numLeaf<<","<<level<<", "<<numNode<<endl;
|
||||
#endif
|
||||
//layout the tree from bottom up.
|
||||
int i=0,j=0,k=0;
|
||||
int startNode=0;
|
||||
int endNode=0;
|
||||
int startKey, endKey;
|
||||
int curIndex;
|
||||
for(i=0;i<numNode;i++)
|
||||
ntree[i]=-1;
|
||||
//for <level-1>, i.e., the leaf level. [start,end]
|
||||
for(i=0;i<level;i++)//level
|
||||
{
|
||||
startNode=(int)((pow((double)fanout,(double)i)-1)/(fanout-1));
|
||||
endNode=(int)((pow((double)fanout,(double)(i+1))-1)/(fanout-1));
|
||||
for(j= startNode;j< endNode;j++)//which node
|
||||
{
|
||||
startKey=j*blockSize;
|
||||
endKey=startKey+blockSize;
|
||||
for(k=startKey;k<endKey;k++)
|
||||
{
|
||||
curIndex=(int)(blockSize*pow((double)fanout,(double)(level-i-1))*(k+1-startNode*blockSize+(j-startNode))-1);
|
||||
if(curIndex<numRecord+blockSize)
|
||||
{
|
||||
if(curIndex>=numRecord)
|
||||
curIndex=numRecord-1;
|
||||
ntree[k]=data[curIndex];
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
~CC_CSSTree()
|
||||
{
|
||||
delete [] ntree;
|
||||
delete [] vStart;
|
||||
delete [] vG;
|
||||
}
|
||||
virtual int search(int key);
|
||||
void print()
|
||||
{
|
||||
int i=0, j=0;
|
||||
int k=0;
|
||||
int startNode=0;
|
||||
int endNode=0;
|
||||
int startKey, endKey;
|
||||
for(i=0;i<level;i++)//level
|
||||
{
|
||||
cout<<"Level, "<<i<<endl;
|
||||
startNode=(int)((pow((double)fanout,(double)i)-1)/(fanout-1));
|
||||
endNode=(int)((pow((double)fanout,(double)(i+1))-1)/(fanout-1));
|
||||
for(j= startNode;j< endNode;j++)//which node
|
||||
{
|
||||
cout<<"Level, "<<i<<", Node, "<<j<<": ";
|
||||
startKey=j*blockSize;
|
||||
endKey=startKey+blockSize;
|
||||
for(k=startKey;k<endKey;k++)
|
||||
{
|
||||
cout<<ntree[k]<<", ";
|
||||
}
|
||||
cout<<endl;
|
||||
}
|
||||
}
|
||||
for(i=0;i<numRecord;i++)
|
||||
{
|
||||
cout<<data[i]<<", ";
|
||||
if(i%(fanout-1)==(fanout-2))
|
||||
cout<<"*"<<endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
460
packages/cuda/old/bpreds.cu
Executable file
460
packages/cuda/old/bpreds.cu
Executable file
@@ -0,0 +1,460 @@
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <cstdarg>
|
||||
#include "pred.h"
|
||||
|
||||
/*Determines the maximum from a set of values*/
|
||||
int maximo(int count, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int j, temp, mx = 0;
|
||||
va_start(ap, count);
|
||||
|
||||
for(j = 0; j < count; j++)
|
||||
{
|
||||
temp = va_arg(ap, int);
|
||||
if(temp > mx)
|
||||
mx = temp;
|
||||
}
|
||||
|
||||
va_end(ap);
|
||||
return mx;
|
||||
}
|
||||
|
||||
|
||||
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, rowact1, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact1 = id * of1;
|
||||
rowact = id * of2;
|
||||
for(x = nx; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 = dop1[rowact1 - op1 - 1];
|
||||
else
|
||||
op1 = dop2[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 = dop1[rowact1 - op2 - 1];
|
||||
else
|
||||
op2 = dop2[rowact + op2];
|
||||
switch(shared[x] - BPOFFSET)
|
||||
{
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
if(res2 != NULL)
|
||||
res2[id] = 1;
|
||||
for(x = 0; x < nx; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop2[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop2[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*Mark all rows that comply with the comparison predicates*/
|
||||
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact = id * of1;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*Unmark all rows that do not comply with the comparison predicates*/
|
||||
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
rowact = id * of1;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, rowact1, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact1 = id * of1;
|
||||
rowact = id * of2;
|
||||
for(x = nx; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 = dop1[rowact1 - op1 - 1];
|
||||
else
|
||||
op1 = dop2[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 = dop1[rowact1 - op2 - 1];
|
||||
else
|
||||
op2 = dop2[rowact + op2];
|
||||
switch(shared[x] - BPOFFSET)
|
||||
{
|
||||
case SBG_EQ: if(op1 == op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_GT: if(op1 > op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_LT: if(op1 < op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_GE: if(op1 >= op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_LE: if(op1 <= op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_DF: if(op1 != op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
}
|
||||
}
|
||||
for(x = 0; x < nx; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop2[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop2[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 == op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GT: if(op1 > op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LT: if(op1 < op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GE: if(op1 >= op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LE: if(op1 <= op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_DF: if(op1 != op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Mark all rows that comply with the comparison predicates using disjunctions (i.e. a row is marked if it complies with at least one predicate)*/
|
||||
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact = id * of1;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 == op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GT: if(op1 > op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LT: if(op1 < op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GE: if(op1 >= op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LE: if(op1 <= op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_DF: if(op1 != op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/*Unmark all rows that do not comply with the comparison predicates using disjunctions (i.e. a row is unmarked only if it complies with none of the predicates)*/
|
||||
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
rowact = id * of1;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 == op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GT: if(op1 > op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LT: if(op1 < op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GE: if(op1 >= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LE: if(op1 <= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_DF: if(op1 != op2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
res[id] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
12
packages/cuda/old/bpreds.h
Executable file
12
packages/cuda/old/bpreds.h
Executable file
@@ -0,0 +1,12 @@
|
||||
#ifndef _BPREDS_H_
|
||||
#define _BPREDS_H_
|
||||
|
||||
int maximo(int, ...);
|
||||
__global__ void bpreds(int*, int*, int, int, int, int*, int, int, int*, int*);
|
||||
__global__ void bpredsnormal(int*, int, int, int*, int, int*);
|
||||
__global__ void bpredsnormal2(int*, int, int, int*, int, int*);
|
||||
__global__ void bpredsOR(int*, int*, int, int, int, int*, int, int, int*, int*);
|
||||
__global__ void bpredsorlogic(int*, int, int, int*, int, int*);
|
||||
__global__ void bpredsorlogic2(int*, int, int, int*, int, int*);
|
||||
|
||||
#endif
|
||||
197
packages/cuda/old/creator2.c
Executable file
197
packages/cuda/old/creator2.c
Executable file
@@ -0,0 +1,197 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
/*Program used to generate union2.cu and union2.h. A new pointer and all its operations are generated for each set (pairs, triplets, etc.).
|
||||
Arguments are the cardinality of the biggest set and the name of the cuda file. For example, executing "creator2 20 union2" will generate
|
||||
all pointers and operations for all sets from 1 to 20 in the files union2.cu and union2.h.*/
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int num = atoi(argv[1]);
|
||||
int x;
|
||||
char *str = (char *)malloc((strlen(argv[2]) + 4) * sizeof(char));
|
||||
sprintf(str, "%s.cu", argv[2]);
|
||||
FILE *cuda = fopen(str, "w");
|
||||
|
||||
fprintf(cuda, "/*Computer generated file to remove duplicates. Since Thrust's unique and sort, unlike their std's counterparts, don't have a way to specify the size of each element in\n");
|
||||
fprintf(cuda, "the array, comparing pairs, triplets and other sets is not possible without defining a new pointer and all related operations for each set. If you have a better idea to do\n");
|
||||
fprintf(cuda, "this, please don't hesitate to email us.*/\n\n");
|
||||
fprintf(cuda, "#include <thrust/device_vector.h>\n");
|
||||
fprintf(cuda, "#include <thrust/unique.h>\n");
|
||||
fprintf(cuda, "#include <thrust/distance.h>\n");
|
||||
fprintf(cuda, "#include <thrust/sort.h>\n");
|
||||
fprintf(cuda, "#include <iostream>\n");
|
||||
fprintf(cuda, "#include \"memory.h\"\n");
|
||||
fprintf(cuda, "#include \"%s.h\"\n\n", argv[2]);
|
||||
fprintf(cuda, "int unir(int *res, int rows, int tipo, int **ret, int final)\n");
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\tthrust::device_ptr<int> pt, re;\n");
|
||||
for(x = 2; x <= num; x++)
|
||||
fprintf(cuda, "\tthrust::device_ptr<s%d> pt%d, re%d;\n", x, x ,x);
|
||||
for(x = 2; x <= num; x++)
|
||||
fprintf(cuda, "\ts%d *t%d;\n", x, x);
|
||||
fprintf(cuda, "\tint flag, nrows, *nres, size;\n\n");
|
||||
fprintf(cuda, "#if TIMER\n");
|
||||
fprintf(cuda, "\tcuda_stats.unions++;\n");
|
||||
fprintf(cuda, "#endif\n\n");
|
||||
fprintf(cuda, "\tswitch(tipo)\n");
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tcase 1:\n");
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tpt = thrust::device_pointer_cast(res);\n");
|
||||
fprintf(cuda, "\t\t\tflag = 0;\n");
|
||||
fprintf(cuda, "\t\t\twhile(flag != 1)\n");
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\ttry\n");
|
||||
fprintf(cuda, "\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\tthrust::sort(pt, pt + rows);\n");
|
||||
fprintf(cuda, "\t\t\t\t\tif(final)\n");
|
||||
fprintf(cuda, "\t\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, pt + rows, q1());\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, re);\n");
|
||||
fprintf(cuda, "\t\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t\t\telse\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre = thrust::unique(pt, pt + rows);\n");
|
||||
fprintf(cuda, "\t\t\t\t\tflag = 1;\n");
|
||||
fprintf(cuda, "\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t\tcatch(std::bad_alloc &e)\n");
|
||||
fprintf(cuda, "\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\tlimpiar(\"sort/unique in unir\", 0);\n");
|
||||
fprintf(cuda, "\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\tnrows = thrust::distance(pt, re);\n");
|
||||
fprintf(cuda, "\t\t\tif(nrows < rows / 2)\n");
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\treturn nrows;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
for(x = 2; x <= num; x++)
|
||||
{
|
||||
fprintf(cuda, "\t\tcase %d:\n", x);
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tt%d = (s%d*)res;\n", x, x);
|
||||
fprintf(cuda, "\t\t\tpt%d = thrust::device_pointer_cast(t%d);\n", x, x);
|
||||
fprintf(cuda, "\t\t\tflag = 0;\n");
|
||||
fprintf(cuda, "\t\t\twhile(flag != 1)\n");
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\ttry\n");
|
||||
fprintf(cuda, "\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\tthrust::sort(pt%d, pt%d + rows, o%d());\n", x, x, x);
|
||||
fprintf(cuda, "\t\t\t\t\tif(final)\n");
|
||||
fprintf(cuda, "\t\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, pt%d + rows, q%d());\n", x, x, x, x);
|
||||
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, re%d, p%d());\n", x, x, x, x);
|
||||
fprintf(cuda, "\t\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t\t\telse\n");
|
||||
fprintf(cuda, "\t\t\t\t\t\tre%d = thrust::unique(pt%d, pt%d + rows, p%d());\n", x, x, x, x);
|
||||
fprintf(cuda, "\t\t\t\t\tflag = 1;\n");
|
||||
fprintf(cuda, "\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t\tcatch(std::bad_alloc &e)\n");
|
||||
fprintf(cuda, "\t\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\t\tlimpiar(\"sort/unique in unir\", 0);\n");
|
||||
fprintf(cuda, "\t\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\tnrows = thrust::distance(pt%d, re%d);\n", x, x);
|
||||
fprintf(cuda, "\t\t\tif(nrows < rows / 2)\n");
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
fprintf(cuda, "\t\t\treturn nrows;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
}
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "\treturn 0;\n");
|
||||
fprintf(cuda, "}\n");
|
||||
|
||||
fclose(cuda);
|
||||
sprintf(str, "%s.h", argv[2]);
|
||||
cuda = fopen(str, "w"); /*tipo de archivo cambiar*/
|
||||
|
||||
fprintf(cuda, "#ifndef _");
|
||||
for(x = 0; x < strlen(argv[2]); x++)
|
||||
fprintf(cuda, "%c", toupper(argv[2][x]));
|
||||
fprintf(cuda, "_H_\n");
|
||||
fprintf(cuda, "#define _");
|
||||
for(x = 0; x < strlen(argv[2]); x++)
|
||||
fprintf(cuda, "%c", toupper(argv[2][x]));
|
||||
fprintf(cuda, "_H_\n\n");
|
||||
fprintf(cuda, "int unir(int *res, int rows, int tipo, int **ret, int final);\n\n");
|
||||
for(x = 2; x <= num; x++)
|
||||
{
|
||||
fprintf(cuda, "typedef struct n%d\n", x);
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\tint v[%d];\n", x);
|
||||
fprintf(cuda, "}s%d;\n\n", x);
|
||||
}
|
||||
fprintf(cuda, "struct q1\n");
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\t__host__ __device__\n");
|
||||
fprintf(cuda, "\tbool operator()(const int &r1, const int &r2)\n");
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tif(r1 != r2)\n");
|
||||
fprintf(cuda, "\t\t\treturn true;\n");
|
||||
fprintf(cuda, "\t\treturn false;\n");
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "};\n\n");
|
||||
for(x = 2; x <= num; x++)
|
||||
{
|
||||
fprintf(cuda, "struct p%d\n", x);
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\t__host__ __device__\n");
|
||||
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tint x;\n");
|
||||
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tif(r1.v[x] != r2.v[x])\n");
|
||||
fprintf(cuda, "\t\t\t\treturn false;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
fprintf(cuda, "\t\treturn true;\n");
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "};\n\n");
|
||||
fprintf(cuda, "struct q%d\n", x);
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\t__host__ __device__\n");
|
||||
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tint x;\n");
|
||||
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tif(r1.v[x] != r2.v[x])\n");
|
||||
fprintf(cuda, "\t\t\t\treturn true;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
fprintf(cuda, "\t\treturn false;\n");
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "};\n\n");
|
||||
fprintf(cuda, "struct o%d\n", x);
|
||||
fprintf(cuda, "{\n");
|
||||
fprintf(cuda, "\t__host__ __device__\n");
|
||||
fprintf(cuda, "\tbool operator()(const s%d &r1, const s%d &r2)\n", x, x);
|
||||
fprintf(cuda, "\t{\n");
|
||||
fprintf(cuda, "\t\tint x;\n");
|
||||
fprintf(cuda, "\t\tfor(x = 0; x < %d; x++)\n", x);
|
||||
fprintf(cuda, "\t\t{\n");
|
||||
fprintf(cuda, "\t\t\tif(r1.v[x] > r2.v[x])\n");
|
||||
fprintf(cuda, "\t\t\t\treturn true;\n");
|
||||
fprintf(cuda, "\t\t\tif(r1.v[x] < r2.v[x])\n");
|
||||
fprintf(cuda, "\t\t\t\treturn false;\n");
|
||||
fprintf(cuda, "\t\t}\n");
|
||||
fprintf(cuda, "\t\treturn false;\n");
|
||||
fprintf(cuda, "\t}\n");
|
||||
fprintf(cuda, "};\n\n");
|
||||
}
|
||||
fprintf(cuda, "#endif\n");
|
||||
fclose(cuda);
|
||||
free(str);
|
||||
}
|
||||
Reference in New Issue
Block a user