This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/cuda/bpreds.cu

144 lines
3.3 KiB
Plaintext

__global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int x, rowact, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
__syncthreads();
if(id < rows)
{
rowact = id * cols;
for(x = 0; x < numc; x += 3)
{
op1 = shared[x+1];
if(op1 < 0)
op1 *= -1;
else
op1 = dop1[rowact + op1];
op2 = shared[x+2];
if(op2 < 0)
op2 *= -1;
else
op2 = dop1[rowact + op2];
switch(shared[x])
{
case SBG_EQ: if(op1 != op2)
return;
break;
case SBG_GT: if(op1 <= op2)
return;
break;
case SBG_LT: if(op1 >= op2)
return;
break;
case SBG_GE: if(op1 < op2)
return;
break;
case SBG_LE: if(op1 > op2)
return;
break;
case SBG_DF: if(op1 == op2)
return;
}
}
res[id] = 1;
}
}
int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
{
int *temp;
int tmplen = rows + 1;
int size = tmplen * sizeof(int);
reservar(&temp, size);
#ifdef DEBUG_MEM
cerr << "+ " << temp << " temp bpreds " << size << endl;
#endif
cudaMemset(temp, 0, size);
#if TIMER
cuda_stats.builtins++;
#endif
int *dhead;
int predn = numpreds.x * 3;
int spredn = predn * sizeof(int);
int sproj = numpreds.z * sizeof(int);
int hsize;
if(predn > numpreds.z)
hsize = spredn;
else
hsize = sproj;
reservar(&dhead, hsize);
#ifdef DEBUG_MEM
cerr << "+ " << dhead << " dhead " << hsize << endl;
#endif
cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
int blockllen = rows / 1024 + 1;
int numthreads = 1024;
/*int x;
cout << "arraypreds = ";
for(x = 0; x < predn; x++)
cout << bin[x] << " ";
cout << endl;
cout << "temptable = ";
for(x = 0; x < numpreds.z; x++)
cout << bin[x+predn] << " ";
cout << endl;
int y;
int *hop1 = (int *)malloc(numpreds.y * rows * sizeof(int));
cudaMemcpy(hop1, dop1, numpreds.y * rows * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < rows; x++)
{
for(y = 0; y < numpreds.y; y++)
cout << hop1[x * numpreds.y + y] << " ";
cout << endl;
}
free(hop1);*/
predicates<<<blockllen, numthreads, spredn>>>(dop1, rows, numpreds.y, dhead, predn, temp + 1);
/*int y;
int *hop1 = (int *)malloc((rows + 1) * sizeof(int));
cudaMemcpy(hop1, temp, (rows + 1) * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < (rows + 1); x++)
cout << hop1[x] << " ";
cout << endl;
free(hop1);*/
thrust::device_ptr<int> res;
res = thrust::device_pointer_cast(temp);
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
int num = res[rows];
if(num == 0)
return 0;
int *fres;
reservar(&fres, num * sproj);
#ifdef DEBUG_MEM
cerr << "+ " << fres << " fres " << num * sproj << endl;
#endif
cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
/*int y;
int *hop1 = (int *)malloc(numpreds.z * num * sizeof(int));
cudaMemcpy(hop1, fres, numpreds.z * num * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < num; x++)
{
for(y = 0; y < numpreds.z; y++)
cout << hop1[x * numpreds.z + y] << " ";
cout << endl;
}
free(hop1);*/
liberar(dhead, hsize);
liberar(temp, size);
liberar(dop1, rows * cols * sizeof(int));
*ret = fres;
return num;
}