python support

This commit is contained in:
Vitor Santos Costa 2016-06-28 23:47:09 +01:00
parent c2fb631106
commit 3f59ec40cd
30 changed files with 1684 additions and 386 deletions

View File

@ -240,7 +240,6 @@ timer_stop(Name,Duration) :-
-> ->
statistics(walltime,[StopTime,_]), statistics(walltime,[StopTime,_]),
Duration is StopTime-StartTime; Duration is StopTime-StartTime;
throw(timer_not_started(timer_stop(Name,Duration))) throw(timer_not_started(timer_stop(Name,Duration)))
). ).

View File

@ -1,4 +1,4 @@
1%%% -*- Mode: Prolog; -*- %%% -*- Mode: Prolog; -*-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
@ -567,6 +567,8 @@ init_learning :-
set_problog_flag(alpha,Alpha) set_problog_flag(alpha,Alpha)
) )
) )
;
true
), ),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

0
packages/cuda/CC_CSSTree.cu Executable file → Normal file
View File

0
packages/cuda/CC_CSSTree.h Executable file → Normal file
View File

0
packages/cuda/Makefile.in Executable file → Normal file
View File

37
packages/cuda/bpreds.cu Executable file → Normal file
View File

@ -1,3 +1,4 @@
#include "hip/hip_runtime.h"
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/scan.h> #include <thrust/scan.h>
#include <cstdarg> #include <cstdarg>
@ -25,10 +26,10 @@ int maximo(int count, ...)
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2) __global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, rowact1, op1, op2; int x, rowact, rowact1, op1, op2;
if(threadIdx.x < numc) if(hipThreadIdx_x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -110,10 +111,10 @@ __global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *co
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res) __global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, op1, op2; int x, rowact, op1, op2;
if(threadIdx.x < numc) if(hipThreadIdx_x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -159,10 +160,10 @@ __global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc,
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res) __global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, op1, op2; int x, rowact, op1, op2;
if(threadIdx.x < numc) if(hipThreadIdx_x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -226,10 +227,10 @@ __global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc,
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2) __global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, rowact1, op1, op2; int x, rowact, rowact1, op1, op2;
if(threadIdx.x < numc) if(hipThreadIdx_x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -344,10 +345,10 @@ __global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res) __global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, op1, op2; int x, rowact, op1, op2;
if(threadIdx.x < numc) if(hipThreadIdx_x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -411,10 +412,10 @@ __global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res) __global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, op1, op2; int x, rowact, op1, op2;
if(threadIdx.x < numc) if(hipThreadIdx_x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {

1
packages/cuda/bpreds.h Executable file → Normal file
View File

@ -1,3 +1,4 @@
#include "hip/hip_runtime.h"
#ifndef _BPREDS_H_ #ifndef _BPREDS_H_
#define _BPREDS_H_ #define _BPREDS_H_

0
packages/cuda/bpredscpu.cpp Executable file → Normal file
View File

4
packages/cuda/creator2.c Executable file → Normal file
View File

@ -66,7 +66,7 @@ int main(int argc, char *argv[])
fprintf(cuda, "\t\t\t{\n"); fprintf(cuda, "\t\t\t{\n");
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n"); fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n"); fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n"); fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n"); fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
fprintf(cuda, "\t\t\t\t*ret = nres;\n"); fprintf(cuda, "\t\t\t\t*ret = nres;\n");
fprintf(cuda, "\t\t\t}\n"); fprintf(cuda, "\t\t\t}\n");
@ -103,7 +103,7 @@ int main(int argc, char *argv[])
fprintf(cuda, "\t\t\t{\n"); fprintf(cuda, "\t\t\t{\n");
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n"); fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n"); fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n"); fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n"); fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
fprintf(cuda, "\t\t\t\t*ret = nres;\n"); fprintf(cuda, "\t\t\t\t*ret = nres;\n");
fprintf(cuda, "\t\t\t}\n"); fprintf(cuda, "\t\t\t}\n");

0
packages/cuda/cuda.c Executable file → Normal file
View File

0
packages/cuda/cuda.yap Executable file → Normal file
View File

View File

@ -27,8 +27,8 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
res_rows = unir(dop1, res_rows, cols1, &dop1, 0); res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
tipo = res_rows * cols1 * sizeof(int); tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo); hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost); hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
cudaFree(dop1); hipFree(dop1);
*result = hres; *result = hres;
} }
else else
@ -39,13 +39,13 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
int *dop2; int *dop2;
tipo = res_rows * cols1 * sizeof(int); tipo = res_rows * cols1 * sizeof(int);
reservar(&dop2, tipo); reservar(&dop2, tipo);
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice); hipMemcpy(dop2, dop1, tipo, hipMemcpyHostToDevice);
free(dop1); free(dop1);
res_rows = unir(dop2, res_rows, cols1, &dop2, 0); res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
tipo = res_rows * cols1 * sizeof(int); tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo); hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost); hipMemcpy(hres, dop2, tipo, hipMemcpyDeviceToHost);
cudaFree(dop2); hipFree(dop2);
*result = hres; *result = hres;
} }
else else
@ -315,8 +315,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
tipo = res_rows * cols1 * sizeof(int); tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo); hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost); hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
cudaFree(dop1); hipFree(dop1);
w = z + 1; w = z + 1;
strtok(qposr->rulename, "_"); strtok(qposr->rulename, "_");
@ -353,8 +353,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
res_rows = abs(res_rows); res_rows = abs(res_rows);
tipo = res_rows * cols1 * sizeof(int); tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo); hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost); hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
cudaFree(dop1); hipFree(dop1);
char file[] = "/dev/shm/buffer.csv"; char file[] = "/dev/shm/buffer.csv";
FILE *fp; FILE *fp;
@ -554,7 +554,7 @@ void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator f
sign = tmpfact.predname; sign = tmpfact.predname;
tipo = res_rows * cols1 * sizeof(int); tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo); hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost); hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9') if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
sumar(tmpfact.name, dop1, cols1, res_rows); sumar(tmpfact.name, dop1, cols1, res_rows);
} }

62
packages/cuda/joincpu.cpp Executable file → Normal file
View File

@ -324,11 +324,11 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
} }
#ifdef TIMER #ifdef TIMER
cudaEvent_t start, stop; hipEvent_t start, stop;
float time; float time;
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
if(nsel1 > 0 || nsj1 > 0) if(nsel1 > 0 || nsj1 > 0)
@ -359,16 +359,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
} }
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
cuda_stats.select1_time += time; cuda_stats.select1_time += time;
cudaEventDestroy(start); hipEventDestroy(start);
cudaEventDestroy(stop); hipEventDestroy(stop);
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
if(nsel2 > 0 || nsj2 > 0) if(nsel2 > 0 || nsj2 > 0)
@ -381,16 +381,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
Snl = sLen; Snl = sLen;
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
cuda_stats.select2_time += time; cuda_stats.select2_time += time;
cudaEventDestroy(start); hipEventDestroy(start);
cudaEventDestroy(stop); hipEventDestroy(stop);
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
//cout << "antes" << endl; //cout << "antes" << endl;
@ -406,16 +406,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation); thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation);
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
cuda_stats.sort_time += time; cuda_stats.sort_time += time;
cudaEventDestroy(start); hipEventDestroy(start);
cudaEventDestroy(stop); hipEventDestroy(stop);
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
/*cout << "despues" << endl; /*cout << "despues" << endl;
@ -482,9 +482,9 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
*ret = fres; *ret = fres;
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
cuda_stats.join_time += time; cuda_stats.join_time += time;
#endif #endif

40
packages/cuda/lista.cu Executable file → Normal file
View File

@ -967,7 +967,7 @@ vector<gpunode> L;
extern "C" extern "C"
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, int *inpquery, int **result, char *names, int finalDR) int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, int *inpquery, int **result, char *names, int finalDR)
{ {
cudaSetDevice(0); hipSetDevice(0);
vector<rulenode> rules; vector<rulenode> rules;
int x; int x;
@ -1029,11 +1029,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
vector<rulenode>::iterator qposr; vector<rulenode>::iterator qposr;
#if TIMER #if TIMER
cudaEvent_t start, stop; hipEvent_t start, stop;
float time; float time;
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
while(reglas.size()) /*Here's the main loop*/ while(reglas.size()) /*Here's the main loop*/
@ -1084,7 +1084,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
{ {
num_refs = rows1 * cols1 * sizeof(int); num_refs = rows1 * cols1 * sizeof(int);
reservar(&res, num_refs); reservar(&res, num_refs);
cudaMemcpyAsync(res, dop1, num_refs, cudaMemcpyDeviceToDevice); hipMemcpyAsync(res, dop1, num_refs, hipMemcpyDeviceToDevice);
registrar(rul_act->name, cols1, res, rows1, itr, 1); registrar(rul_act->name, cols1, res, rows1, itr, 1);
genflag = 1; genflag = 1;
rul_act->gen_ant = rul_act->gen_act; rul_act->gen_ant = rul_act->gen_act;
@ -1251,10 +1251,10 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
if(x == num_refs) if(x == num_refs)
{ {
#ifdef TIMER #ifdef TIMER
cudaEvent_t start2, stop2; hipEvent_t start2, stop2;
cudaEventCreate(&start2); hipEventCreate(&start2);
cudaEventCreate(&stop2); hipEventCreate(&stop2);
cudaEventRecord(start2, 0); hipEventRecord(start2, 0);
#endif #endif
//cout << rul_act->name << " res_rows = " << res_rows << endl; //cout << rul_act->name << " res_rows = " << res_rows << endl;
@ -1263,11 +1263,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
res_rows = unir(res, res_rows, rul_act->num_columns, &res, 0); res_rows = unir(res, res_rows, rul_act->num_columns, &res, 0);
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop2, 0); hipEventRecord(stop2, 0);
cudaEventSynchronize(stop2); hipEventSynchronize(stop2);
cudaEventElapsedTime(&time, start2, stop2); hipEventElapsedTime(&time, start2, stop2);
cudaEventDestroy(start2); hipEventDestroy(start2);
cudaEventDestroy(stop2); hipEventDestroy(stop2);
//cout << "Union = " << time << endl; //cout << "Union = " << time << endl;
cuda_stats.union_time += time; cuda_stats.union_time += time;
#endif #endif
@ -1319,16 +1319,16 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
#endif #endif
#if TIMER #if TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
cuda_stats.total_time += time; cuda_stats.total_time += time;
if (time > cuda_stats.max_time) if (time > cuda_stats.max_time)
cuda_stats.max_time = time; cuda_stats.max_time = time;
if (time < cuda_stats.min_time || cuda_stats.calls == 1) if (time < cuda_stats.min_time || cuda_stats.calls == 1)
cuda_stats.min_time = time; cuda_stats.min_time = time;
cudaEventDestroy(start); hipEventDestroy(start);
cudaEventDestroy(stop); hipEventDestroy(stop);
Cuda_Statistics(); Cuda_Statistics();
#endif #endif

0
packages/cuda/lista.h Executable file → Normal file
View File

44
packages/cuda/memory.cu Executable file → Normal file
View File

@ -144,7 +144,7 @@ void limpiar(const char s[], size_t sz)
if(GPUmem.size() == 0) if(GPUmem.size() == 0)
{ {
cudaMemGetInfo(&free,&total); hipMemGetInfo(&free,&total);
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl; cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
exit(1); exit(1);
} }
@ -154,11 +154,11 @@ void limpiar(const char s[], size_t sz)
{ {
temp = *ini; temp = *ini;
temp.dev_address = (int *)malloc(ini->size); temp.dev_address = (int *)malloc(ini->size);
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost); hipMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, hipMemcpyDeviceToHost);
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration); list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
CPUmem.insert(pos, temp); CPUmem.insert(pos, temp);
} }
cudaFree(ini->dev_address); hipFree(ini->dev_address);
GPUmem.erase(ini); GPUmem.erase(ini);
} }
@ -173,19 +173,19 @@ void reservar(int **ptr, size_t size)
return; return;
} }
cudaMemGetInfo(&free, &total); hipMemGetInfo(&free, &total);
while(free < size) while(free < size)
{ {
cout << "Se limpio memoria " << free << " " << total << endl; cout << "Se limpio memoria " << free << " " << total << endl;
limpiar("not enough memory", size); limpiar("not enough memory", size);
cudaMemGetInfo(&free, &total); hipMemGetInfo(&free, &total);
} }
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation) while(hipMalloc(ptr, size) == hipErrorMemoryAllocation)
limpiar("Error in memory allocation", size); limpiar("Error in memory allocation", size);
if (! *ptr ) { if (! *ptr ) {
size_t free, total; size_t free, total;
cudaMemGetInfo( &free, &total ); hipMemGetInfo( &free, &total );
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl; cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
cerr << "Exiting CUDA...." << endl; cerr << "Exiting CUDA...." << endl;
exit(1); exit(1);
@ -277,7 +277,7 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
} }
size = num_rows * num_columns * sizeof(int); size = num_rows * num_columns * sizeof(int);
reservar(&temp, size); reservar(&temp, size);
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice); hipMemcpyAsync(temp, address_host_table, size, hipMemcpyHostToDevice);
registrar(name, num_columns, temp, num_rows, itr, 0); registrar(name, num_columns, temp, num_rows, itr, 0);
*ptr = temp; *ptr = temp;
return num_rows; return num_rows;
@ -296,13 +296,13 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
reservar(&temp, size); reservar(&temp, size);
for(x = 0; x < numgpu; x++) for(x = 0; x < numgpu; x++)
{ {
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToDevice);
inc += temp_storage[x].size / sizeof(int); inc += temp_storage[x].size / sizeof(int);
cudaFree(temp_storage[x].dev_address); hipFree(temp_storage[x].dev_address);
} }
for(; x < numcpu; x++) for(; x < numcpu; x++)
{ {
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice); hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyHostToDevice);
inc += temp_storage[x].size / sizeof(int); inc += temp_storage[x].size / sizeof(int);
free(temp_storage[x].dev_address); free(temp_storage[x].dev_address);
} }
@ -340,9 +340,9 @@ int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address
temp = (int *)malloc(size); temp = (int *)malloc(size);
for(x = 0; x < numgpu; x++) for(x = 0; x < numgpu; x++)
{ {
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost); hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToHost);
inc += temp_storage[x].size / sizeof(int); inc += temp_storage[x].size / sizeof(int);
cudaFree(temp_storage[x].dev_address); hipFree(temp_storage[x].dev_address);
} }
for(; x < numcpu; x++) for(; x < numcpu; x++)
{ {
@ -404,7 +404,7 @@ int cargafinal(int name, int cols, int **ptr)
cont = pos->rows; cont = pos->rows;
#ifdef TUFFY #ifdef TUFFY
reservar(&temp, pos->size); reservar(&temp, pos->size);
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice); hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
*ptr = temp; *ptr = temp;
#else #else
*ptr = pos->dev_address; *ptr = pos->dev_address;
@ -418,14 +418,14 @@ int cargafinal(int name, int cols, int **ptr)
pos = gpu; pos = gpu;
while(pos != endg && pos->name == name) while(pos != endg && pos->name == name)
{ {
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice); hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyDeviceToDevice);
temp += pos->size / sizeof(int); temp += pos->size / sizeof(int);
pos++; pos++;
} }
pos = cpu; pos = cpu;
while(pos != endc && pos->name == name) while(pos != endc && pos->name == name)
{ {
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice); hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
temp += pos->size / sizeof(int); temp += pos->size / sizeof(int);
pos++; pos++;
} }
@ -493,7 +493,7 @@ void clear_memory()
{ {
if(ini->isrule) if(ini->isrule)
{ {
cudaFree(ini->dev_address); hipFree(ini->dev_address);
ini = GPUmem.erase(ini); ini = GPUmem.erase(ini);
} }
else else
@ -518,7 +518,7 @@ void clear_memory_all()
fin = GPUmem.end(); fin = GPUmem.end();
while(ini != fin) while(ini != fin)
{ {
cudaFree(ini->dev_address); hipFree(ini->dev_address);
ini++; ini++;
} }
GPUmem.clear(); GPUmem.clear();
@ -542,7 +542,7 @@ void liberar(int name)
{ {
fact = *i; fact = *i;
GPUmem.erase(i); GPUmem.erase(i);
cudaFree(fact.dev_address); hipFree(fact.dev_address);
} }
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name); i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
if(i != CPUmem.end()) if(i != CPUmem.end())
@ -566,10 +566,10 @@ void sumar(int name, int *dop1, int cols, int rows)
newrows = rows + fact.rows; newrows = rows + fact.rows;
reservar(&res, newrows * cols * sizeof(int)); reservar(&res, newrows * cols * sizeof(int));
offset = fact.rows * cols; offset = fact.rows * cols;
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice); hipMemcpyAsync(res, fact.dev_address, offset * sizeof(int), hipMemcpyDeviceToDevice);
GPUmem.erase(i); GPUmem.erase(i);
registrar(name, cols, res, newrows, 0, 0); registrar(name, cols, res, newrows, 0, 0);
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice); hipMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), hipMemcpyDeviceToDevice);
cudaFree(fact.dev_address); hipFree(fact.dev_address);
} }
} }

0
packages/cuda/memory.h Executable file → Normal file
View File

0
packages/cuda/pred.h Executable file → Normal file
View File

103
packages/cuda/selectproyect.cu Executable file → Normal file
View File

@ -1,3 +1,4 @@
#include "hip/hip_runtime.h"
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/scan.h> #include <thrust/scan.h>
#include <stdlib.h> #include <stdlib.h>
@ -8,10 +9,10 @@
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res) __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, posact; int x, rowact, posact;
if(threadIdx.x < numc) if(hipThreadIdx_x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -30,10 +31,10 @@ we unmark any rows that do not comply with the selections*/
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res) __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, posact; int x, rowact, posact;
if(threadIdx.x < numc) if(hipThreadIdx_x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -56,10 +57,10 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res) __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int temp, temp2, pos, x, y; int temp, temp2, pos, x, y;
if(threadIdx.x < cont) if(hipThreadIdx_x < cont)
shared[threadIdx.x] = dhead[threadIdx.x]; shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -90,10 +91,10 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res) __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int temp, temp2, pos, x, y; int temp, temp2, pos, x, y;
if(threadIdx.x < cont) if(hipThreadIdx_x < cont)
shared[threadIdx.x] = dhead[threadIdx.x]; shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -120,10 +121,10 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res) __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int pos, posr, x; int pos, posr, x;
if(threadIdx.x < hsize) if(hipThreadIdx_x < hsize)
shared[threadIdx.x] = dhead[threadIdx.x]; shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -139,10 +140,10 @@ selections, selfjoins, etc.). The array 'temp' holds the result of the prefix su
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res) __global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int pos, posr, x; int pos, posr, x;
if(threadIdx.x < hsize) if(hipThreadIdx_x < hsize)
shared[threadIdx.x] = dhead[threadIdx.x]; shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
@ -184,27 +185,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
tmplen = rows + 1; tmplen = rows + 1;
size2 = tmplen * sizeof(int); size2 = tmplen * sizeof(int);
reservar(&temp, size2); reservar(&temp, size2);
cudaMemset(temp, 0, size2); hipMemset(temp, 0, size2);
size = numselect * sizeof(int); size = numselect * sizeof(int);
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, select, size, hipMemcpyHostToDevice);
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselect, temp + 1);
if(numselfj > 0) if(numselfj > 0)
{ {
size = numselfj * sizeof(int); size = numselfj * sizeof(int);
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
} }
if(numpreds > 0) if(numpreds > 0)
{ {
size = numpreds * sizeof(int); size = numpreds * sizeof(int);
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
if(ANDlogic) if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
else else
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
} }
res = thrust::device_pointer_cast(temp); res = thrust::device_pointer_cast(temp);
@ -215,10 +216,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
size = head_size * sizeof(int); size = head_size * sizeof(int);
reservar(&fres, num * size); reservar(&fres, num * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres); hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
cudaFree(dhead); hipFree(dhead);
cudaFree(temp); hipFree(temp);
*ret = fres; *ret = fres;
return num; return num;
} }
@ -229,19 +230,19 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
tmplen = rows + 1; tmplen = rows + 1;
size2 = tmplen * sizeof(int); size2 = tmplen * sizeof(int);
reservar(&temp, size2); reservar(&temp, size2);
cudaMemset(temp, 0, size2); hipMemset(temp, 0, size2);
size = numselfj * sizeof(int); size = numselfj * sizeof(int);
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
if(numpreds > 0) if(numpreds > 0)
{ {
size = numpreds * sizeof(int); size = numpreds * sizeof(int);
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
if(ANDlogic) if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
else else
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
} }
@ -253,10 +254,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
size = head_size * sizeof(int); size = head_size * sizeof(int);
reservar(&fres, num * size); reservar(&fres, num * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres); hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
cudaFree(dhead); hipFree(dhead);
cudaFree(temp); hipFree(temp);
*ret = fres; *ret = fres;
return num; return num;
} }
@ -267,14 +268,14 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
tmplen = rows + 1; tmplen = rows + 1;
size2 = tmplen * sizeof(int); size2 = tmplen * sizeof(int);
reservar(&temp, size2); reservar(&temp, size2);
cudaMemset(temp, 0, size2); hipMemset(temp, 0, size2);
size = numpreds * sizeof(int); size = numpreds * sizeof(int);
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
if(ANDlogic) if(ANDlogic)
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
else else
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
res = thrust::device_pointer_cast(temp); res = thrust::device_pointer_cast(temp);
thrust::inclusive_scan(res + 1, res + tmplen, res + 1); thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
num = res[rows]; num = res[rows];
@ -284,10 +285,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
size = head_size * sizeof(int); size = head_size * sizeof(int);
reservar(&fres, num * size); reservar(&fres, num * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres); hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
cudaFree(dhead); hipFree(dhead);
cudaFree(temp); hipFree(temp);
*ret = fres; *ret = fres;
return num; return num;
} }
@ -295,9 +296,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
{ {
size = head_size * sizeof(int); size = head_size * sizeof(int);
reservar(&fres, rows * size); reservar(&fres, rows * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres); hipLaunchKernel(HIP_KERNEL_NAME(proyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, head_size, fres);
cudaFree(dhead); hipFree(dhead);
*ret = fres; *ret = fres;
return rows; return rows;
} }

0
packages/cuda/selectproyectcpu.cpp Executable file → Normal file
View File

347
packages/cuda/treeb.cu Executable file → Normal file
View File

@ -1,3 +1,4 @@
#include "hip/hip_runtime.h"
#include <thrust/host_vector.h> #include <thrust/host_vector.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/sequence.h> #include <thrust/sequence.h>
@ -160,11 +161,11 @@ __device__ int firstMatchingKeyInDataNode2(Record records[], IKeyType key)
__global__ void gCreateIndex(IDataNode data[], IDirectoryNode dir[], int dirSize, int tree_size, int bottom_start, int nNodesPerBlock) __global__ void gCreateIndex(IDataNode data[], IDirectoryNode dir[], int dirSize, int tree_size, int bottom_start, int nNodesPerBlock)
{ {
int startIdx = blockIdx.x * nNodesPerBlock; int startIdx = hipBlockIdx_x * nNodesPerBlock;
int endIdx = startIdx + nNodesPerBlock; int endIdx = startIdx + nNodesPerBlock;
if(endIdx > dirSize) if(endIdx > dirSize)
endIdx = dirSize; endIdx = dirSize;
int keyIdx = threadIdx.x; int keyIdx = hipThreadIdx_x;
// Proceed only when in internal nodes // Proceed only when in internal nodes
for(int nodeIdx = startIdx; nodeIdx < endIdx; nodeIdx++) for(int nodeIdx = startIdx; nodeIdx < endIdx; nodeIdx++)
@ -191,11 +192,11 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
{ {
// Bringing the root node (visited by every tuple) to the faster shared memory // Bringing the root node (visited by every tuple) to the faster shared memory
__shared__ IKeyType RootNodeKeys[TREE_NODE_SIZE]; __shared__ IKeyType RootNodeKeys[TREE_NODE_SIZE];
RootNodeKeys[threadIdx.x] = dir->keys[threadIdx.x]; RootNodeKeys[hipThreadIdx_x] = dir->keys[hipThreadIdx_x];
__syncthreads(); __syncthreads();
int OverallThreadIdx = blockIdx.x * THRD_PER_BLCK_search + threadIdx.x; int OverallThreadIdx = hipBlockIdx_x * THRD_PER_BLCK_search + hipThreadIdx_x;
for(int keyIdx = OverallThreadIdx; keyIdx < nSearchKeys; keyIdx += THRD_PER_GRID_search) for(int keyIdx = OverallThreadIdx; keyIdx < nSearchKeys; keyIdx += THRD_PER_GRID_search)
{ {
@ -219,7 +220,7 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
/*Counts the number of times a row in 'S' is to be joined to a row in 'R'.*/ /*Counts the number of times a row in 'S' is to be joined to a row in 'R'.*/
__global__ void gIndexJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[]) __global__ void gIndexJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[])
{ {
int s_cur = blockIdx.x * blockDim.x + threadIdx.x; int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(s_cur < sLen) if(s_cur < sLen)
{ {
@ -246,11 +247,11 @@ in 'g_locations' those rows that have equal values in the checked columns.*/
__global__ void gIndexMultiJoinNegative(int *R, int *S, int g_locations[], int rLen, int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj) __global__ void gIndexMultiJoinNegative(int *R, int *S, int g_locations[], int rLen, int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int r_cur = blockIdx.x * blockDim.x + threadIdx.x; int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int posr, poss, x; int posr, poss, x;
if(threadIdx.x < wj) if(hipThreadIdx_x < wj)
shared[threadIdx.x] = muljoin[threadIdx.x]; shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(r_cur < rLen) if(r_cur < rLen)
@ -287,11 +288,11 @@ times a row in 'S' is to be joined to its corresponding row in 'R', storing the
__global__ void gIndexMultiJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[], int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj) __global__ void gIndexMultiJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[], int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x; int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int posr, poss, x; int posr, poss, x;
if(threadIdx.x < wj) if(hipThreadIdx_x < wj)
shared[threadIdx.x] = muljoin[threadIdx.x]; shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(s_cur < sLen) if(s_cur < sLen)
@ -330,10 +331,10 @@ __global__ void multiJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int *extjoins = &shared[lenrul]; int *extjoins = &shared[lenrul];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x; int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < (lenrul + wj)) if(hipThreadIdx_x < (lenrul + wj))
shared[threadIdx.x] = rule[threadIdx.x]; shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(s_cur < sLen) if(s_cur < sLen)
@ -382,10 +383,10 @@ __global__ void multiJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSum
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int *extjoins = &shared[cols]; int *extjoins = &shared[cols];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x; int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < (cols + wj)) if(hipThreadIdx_x < (cols + wj))
shared[threadIdx.x] = rule[threadIdx.x]; shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(s_cur < sLen) if(s_cur < sLen)
@ -432,11 +433,11 @@ predicate are projected.*/
__global__ void gJoinWithWriteNegative(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int halfrul, int *mloc) __global__ void gJoinWithWriteNegative(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int halfrul, int *mloc)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int r_cur = blockIdx.x * blockDim.x + threadIdx.x; int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int posr; int posr;
if(threadIdx.x < halfrul) if(hipThreadIdx_x < halfrul)
shared[threadIdx.x] = rule[threadIdx.x]; shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(r_cur < rLen) if(r_cur < rLen)
@ -461,11 +462,11 @@ predicate are projected.*/
__global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int cols, int *mloc) __global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int cols, int *mloc)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int r_cur = blockIdx.x * blockDim.x + threadIdx.x; int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int posr; int posr;
if(threadIdx.x < cols) if(hipThreadIdx_x < cols)
shared[threadIdx.x] = rule[threadIdx.x]; shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(r_cur < rLen) if(r_cur < rLen)
@ -489,10 +490,10 @@ __global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinR
__global__ void gJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int halfrul, int lenrul, int *mloc, int *sloc) __global__ void gJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int halfrul, int lenrul, int *mloc, int *sloc)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x; int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < lenrul) if(hipThreadIdx_x < lenrul)
shared[threadIdx.x] = rule[threadIdx.x]; shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(s_cur < sLen) if(s_cur < sLen)
@ -525,10 +526,10 @@ projection, which is performed based on the variables in the head of the rule.*/
__global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int cols, int *mloc, int *sloc) __global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int cols, int *mloc, int *sloc)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x; int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < cols) if(hipThreadIdx_x < cols)
shared[threadIdx.x] = rule[threadIdx.x]; shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(s_cur < sLen) if(s_cur < sLen)
@ -563,7 +564,7 @@ __global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[],
/*Load part of column 'wj' of 'p' in 'R'. Which values are loaded is defined by the prefix sum results in 'pos'.*/ /*Load part of column 'wj' of 'p' in 'R'. Which values are loaded is defined by the prefix sum results in 'pos'.*/
__global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *ids) __global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *ids)
{ {
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int cond; int cond;
if(id < len) if(id < len)
{ {
@ -579,7 +580,7 @@ __global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *i
/*Load an entire column from 'p' into 'R'.*/ /*Load an entire column from 'p' into 'R'.*/
__global__ void llenarnosel(int *p, int *R, int len, int of, int wj) __global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
{ {
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(id < len) if(id < len)
R[id] = p[id * of + wj]; R[id] = p[id * of + wj];
} }
@ -587,10 +588,10 @@ __global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
__global__ void projectfinal(int *res, int rows, int cols, int *rule, int *out) __global__ void projectfinal(int *res, int rows, int cols, int *rule, int *out)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < cols) if(hipThreadIdx_x < cols)
shared[threadIdx.x] = rule[threadIdx.x]; shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
@ -614,26 +615,26 @@ void project(int *res, int resrows, int numcols1, int numcols2, int *proj, int *
int *pt = (int *)malloc(sizepro); int *pt = (int *)malloc(sizepro);
for(z = 0; z < numcols2; z++) for(z = 0; z < numcols2; z++)
pt[z] = proj[z] - 1; pt[z] = proj[z] - 1;
cudaMemcpy(dcons, pt, sizepro, cudaMemcpyHostToDevice); hipMemcpy(dcons, pt, sizepro, hipMemcpyHostToDevice);
//cudaDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied. //hipDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
free(pt); free(pt);
} }
else else
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice); hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
reservar(&d_Rout, resrows * sizepro); reservar(&d_Rout, resrows * sizepro);
projectfinal<<<blockllen, numthreads, sizepro>>>(res, resrows, numcols1, dcons, d_Rout); hipLaunchKernel(HIP_KERNEL_NAME(projectfinal), dim3(blockllen), dim3(numthreads), sizepro, 0, res, resrows, numcols1, dcons, d_Rout);
cudaFree(dcons); hipFree(dcons);
cudaFree(*ret); hipFree(*ret);
*ret = d_Rout; *ret = d_Rout;
} }
__global__ void projectadd(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, int *dhead, int hsize, int *res) __global__ void projectadd(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, int *dhead, int hsize, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int pos2, posr, x, y, cond; int pos2, posr, x, y, cond;
if(threadIdx.x < hsize) if(hipThreadIdx_x < hsize)
shared[threadIdx.x] = dhead[threadIdx.x]; shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads(); __syncthreads();
if(id < rows2) if(id < rows2)
{ {
@ -662,10 +663,10 @@ void juntar(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, in
int blockllen = rows2 / numthreads + 1; int blockllen = rows2 / numthreads + 1;
sizepro = pcols * sizeof(int); sizepro = pcols * sizeof(int);
reservar(&dcons, sizepro); reservar(&dcons, sizepro);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice); hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
reservar(&d_Rout, rows1 * rows2 * sizepro); reservar(&d_Rout, rows1 * rows2 * sizepro);
projectadd<<<blockllen, numthreads, sizepro>>>(dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout); hipLaunchKernel(HIP_KERNEL_NAME(projectadd), dim3(blockllen), dim3(numthreads), sizepro, 0, dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
cudaFree(dcons); hipFree(dcons);
*ret = d_Rout; *ret = d_Rout;
} }
@ -743,51 +744,51 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
#ifdef TIMER #ifdef TIMER
//cout << "INICIO" << endl; //cout << "INICIO" << endl;
cudaEvent_t start, stop; hipEvent_t start, stop;
float time; float time;
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
if(npred2.x > 0 || npred2.y > 0 || nsel2 > 0 || nsj2 > 0) if(npred2.x > 0 || npred2.y > 0 || nsel2 > 0 || nsj2 > 0)
{ {
newLen = sLen + 1; newLen = sLen + 1;
cudaMemsetAsync(temp, 0, newLen * sizeof(int)); hipMemsetAsync(temp, 0, newLen * sizeof(int));
} }
if(npred2.x > 0 || npred2.y > 0) if(npred2.x > 0 || npred2.y > 0)
{ {
size = npred2tot * sizeof(int); size = npred2tot * sizeof(int);
cudaMemcpy(dcons, pred2, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, pred2, size, hipMemcpyHostToDevice);
if(npred2.y > 0) /*Fix case when a(X,Y),b(Y,Z),Z > Y*/ if(npred2.y > 0) /*Fix case when a(X,Y),b(Y,Z),Z > Y*/
{ {
reservar(&temp2, sizet2); reservar(&temp2, sizet2);
cudaMemsetAsync(temp2, 0, newLen * sizeof(int)); hipMemsetAsync(temp2, 0, newLen * sizeof(int));
//res = thrust::device_pointer_cast(temp2); //res = thrust::device_pointer_cast(temp2);
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
} }
else else
{ {
if(negative) if(negative)
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL); hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
else else
bpredsOR<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL); hipLaunchKernel(HIP_KERNEL_NAME(bpredsOR), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
} }
if(nsel2 > 0) if(nsel2 > 0)
{ {
size = nsel2 * sizeof(int); size = nsel2 * sizeof(int);
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
marcar<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
} }
if(nsj2 > 0) if(nsj2 > 0)
{ {
size = nsj2 * sizeof(int); size = nsj2 * sizeof(int);
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
} }
} }
else else
@ -795,14 +796,14 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(nsel2 > 0) if(nsel2 > 0)
{ {
size = nsel2 * sizeof(int); size = nsel2 * sizeof(int);
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
marcar2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
if(nsj2 > 0) if(nsj2 > 0)
{ {
size = nsj2 * sizeof(int); size = nsj2 * sizeof(int);
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
} }
} }
else else
@ -810,15 +811,15 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(nsj2 > 0) if(nsj2 > 0)
{ {
size = nsj2 * sizeof(int); size = nsj2 * sizeof(int);
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
samejoin2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
} }
else else
{ {
sizem32S = m32sLen * sizeof(int); sizem32S = m32sLen * sizeof(int);
reservar(&d_S, sizem32S); reservar(&d_S, sizem32S);
cudaMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int)); hipMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
llenarnosel<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1]); hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1]);
} }
} }
} }
@ -842,8 +843,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(newLen == 0) // && !negative) ARREGLAR if(newLen == 0) // && !negative) ARREGLAR
{ {
cudaFree(temp); hipFree(temp);
cudaFree(dcons); hipFree(dcons);
return 0; return 0;
} }
@ -854,24 +855,24 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
reservar(&d_S, sizem32S); reservar(&d_S, sizem32S);
reservar(&posS, sizem32S); reservar(&posS, sizem32S);
cudaMemsetAsync(d_S + newLen, 0x7f, sizextra); hipMemsetAsync(d_S + newLen, 0x7f, sizextra);
cudaMemsetAsync(posS + newLen, 0x7f, sizextra); hipMemsetAsync(posS + newLen, 0x7f, sizextra);
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS); hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1], temp, posS);
sLen = newLen; sLen = newLen;
} }
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
//cout << "Select1 = " << time << endl; //cout << "Select1 = " << time << endl;
cuda_stats.select1_time += time; cuda_stats.select1_time += time;
cudaEventDestroy(start); hipEventDestroy(start);
cudaEventDestroy(stop); hipEventDestroy(stop);
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
blockllen = rLen / numthreads + 1; blockllen = rLen / numthreads + 1;
@ -880,30 +881,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
{ {
if(temp2 != NULL) if(temp2 != NULL)
{ {
cudaFree(temp); hipFree(temp);
temp = temp2; temp = temp2;
res = thrust::device_pointer_cast(temp); res = thrust::device_pointer_cast(temp);
newLen = rLen + 1; newLen = rLen + 1;
if(nsel1 > 0) if(nsel1 > 0)
{ {
size = nsel1 * sizeof(int); size = nsel1 * sizeof(int);
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
marcar<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
} }
if(nsj1 > 0) if(nsj1 > 0)
{ {
size = nsj1 * sizeof(int); size = nsj1 * sizeof(int);
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
} }
if(npred1.x > 0) if(npred1.x > 0)
{ {
size = npred1.x * sizeof(int); size = npred1.x * sizeof(int);
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
if(ANDlogic) if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
else else
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
} }
} }
else else
@ -911,30 +912,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(npred1.x > 0 || nsel1 > 0 || nsj1 > 0) if(npred1.x > 0 || nsel1 > 0 || nsj1 > 0)
{ {
newLen = rLen + 1; newLen = rLen + 1;
cudaMemsetAsync(temp, 0, newLen * sizeof(int)); hipMemsetAsync(temp, 0, newLen * sizeof(int));
} }
if(nsel1 > 0) if(nsel1 > 0)
{ {
size = nsel1 * sizeof(int); size = nsel1 * sizeof(int);
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
marcar2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
if(nsj1 > 0) if(nsj1 > 0)
{ {
size = nsj1 * sizeof(int); size = nsj1 * sizeof(int);
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
} }
if(npred1.x > 0) if(npred1.x > 0)
{ {
size = npred1.x * sizeof(int); size = npred1.x * sizeof(int);
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
if(ANDlogic) if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
else else
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
} }
} }
else else
@ -942,17 +943,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(nsj1 > 0) if(nsj1 > 0)
{ {
size = nsj1 * sizeof(int); size = nsj1 * sizeof(int);
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
samejoin2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
if(npred1.x > 0) if(npred1.x > 0)
{ {
size = npred1.x * sizeof(int); size = npred1.x * sizeof(int);
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
if(ANDlogic) if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
else else
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
} }
} }
else else
@ -960,11 +961,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(npred1.x > 0) if(npred1.x > 0)
{ {
size = npred1.x * sizeof(int); size = npred1.x * sizeof(int);
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice); hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
if(ANDlogic) if(ANDlogic)
bpredsnormal2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
else else
bpredsorlogic2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1); hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
} }
} }
} }
@ -976,11 +977,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
newLen = res[rLen]; newLen = res[rLen];
if(newLen == 0) if(newLen == 0)
{ {
cudaFree(temp); hipFree(temp);
cudaFree(dcons); hipFree(dcons);
cudaFree(d_S); hipFree(d_S);
if(posS != NULL) if(posS != NULL)
cudaFree(posS); hipFree(posS);
return 0; return 0;
} }
@ -991,41 +992,41 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
reservar(&d_R, sizem32); reservar(&d_R, sizem32);
reservar(&posR, sizem32); reservar(&posR, sizem32);
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra); hipMemsetAsync(d_R + newLen, 0x7f, sizextra);
cudaMemsetAsync(posR + newLen, 0x7f, sizextra); hipMemsetAsync(posR + newLen, 0x7f, sizextra);
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR); hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0], temp, posR);
rLen = newLen; rLen = newLen;
} }
else else
{ {
sizem32 = m32rLen * sizeof(int); sizem32 = m32rLen * sizeof(int);
reservar(&d_R, sizem32); reservar(&d_R, sizem32);
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int)); hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]); hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
} }
} }
else else
{ {
sizem32 = m32rLen * sizeof(int); sizem32 = m32rLen * sizeof(int);
reservar(&d_R, sizem32); reservar(&d_R, sizem32);
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int)); hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]); hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
} }
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
//cout << "Select2 = " << time << endl; //cout << "Select2 = " << time << endl;
cuda_stats.select2_time += time; cuda_stats.select2_time += time;
#endif #endif
#ifdef TIMER #ifdef TIMER
cudaEventDestroy(start); hipEventDestroy(start);
cudaEventDestroy(stop); hipEventDestroy(stop);
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
thrust::device_ptr<Record> dvp1; thrust::device_ptr<Record> dvp1;
@ -1084,17 +1085,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
} }
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
//cout << "Sort = " << time << endl; //cout << "Sort = " << time << endl;
cuda_stats.sort_time += time; cuda_stats.sort_time += time;
cudaEventDestroy(start); hipEventDestroy(start);
cudaEventDestroy(stop); hipEventDestroy(stop);
cudaEventCreate(&start); hipEventCreate(&start);
cudaEventCreate(&stop); hipEventCreate(&stop);
cudaEventRecord(start, 0); hipEventRecord(start, 0);
#endif #endif
IDataNode* d_data; IDataNode* d_data;
@ -1123,7 +1124,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
dim3 Dbc(THRD_PER_BLCK_create, 1, 1); dim3 Dbc(THRD_PER_BLCK_create, 1, 1);
dim3 Dgc(BLCK_PER_GRID_create, 1, 1); dim3 Dgc(BLCK_PER_GRID_create, 1, 1);
gCreateIndex <<<Dgc, Dbc>>> (d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock); hipLaunchKernel(HIP_KERNEL_NAME(gCreateIndex), dim3(Dgc), dim3(Dbc), 0, 0, d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
int *d_locations; int *d_locations;
int memSizeR; int memSizeR;
@ -1132,7 +1133,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
{ {
memSizeR = (rLen + 1) * sizeof(int); memSizeR = (rLen + 1) * sizeof(int);
reservar(&d_locations, memSizeR); reservar(&d_locations, memSizeR);
cudaMemsetAsync(d_locations, 0, sizeof(int)); hipMemsetAsync(d_locations, 0, sizeof(int));
nSearchKeys = rLen; nSearchKeys = rLen;
} }
else else
@ -1146,13 +1147,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
unsigned int nKeysPerThread = uintCeilingDiv(nSearchKeys, THRD_PER_GRID_search); unsigned int nKeysPerThread = uintCeilingDiv(nSearchKeys, THRD_PER_GRID_search);
if(negative) if(negative)
{ {
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start); hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
cudaMemsetAsync(temp, 0, memSizeR); hipMemsetAsync(temp, 0, memSizeR);
} }
else else
{ {
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start); hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
cudaMemsetAsync(temp, 0, memSizeS); hipMemsetAsync(temp, 0, memSizeS);
} }
int muljoin = 0, muljoinsize = 0, sum; int muljoin = 0, muljoinsize = 0, sum;
@ -1165,8 +1166,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
{ {
muljoin = numj - 2; muljoin = numj - 2;
muljoinsize = muljoin * sizeof(int); muljoinsize = muljoin * sizeof(int);
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice); hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
gIndexMultiJoinNegative<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin); hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoinNegative), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
} }
res = thrust::device_pointer_cast(d_locations); res = thrust::device_pointer_cast(d_locations);
@ -1177,21 +1178,21 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(pos == (rule->num_rows - 3)) if(pos == (rule->num_rows - 3))
{ {
sizepro = rule->num_columns * sizeof(int); sizepro = rule->num_columns * sizeof(int);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice); hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
resSize = sum * sizepro; resSize = sum * sizepro;
reservar(&d_Rout, resSize); reservar(&d_Rout, resSize);
gJoinWithWriteNegative2<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR); hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
} }
else else
{ {
sizepro = projp.x * sizeof(int); sizepro = projp.x * sizeof(int);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice); hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
resSize = sum * sizepro; resSize = sum * sizepro;
reservar(&d_Rout, resSize); reservar(&d_Rout, resSize);
gJoinWithWriteNegative<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR); hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
} }
cudaFree(d_R); hipFree(d_R);
cudaFree(d_S); hipFree(d_S);
} }
else else
{ {
@ -1200,26 +1201,26 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
{ {
muljoin = numj - 2; muljoin = numj - 2;
muljoinsize = muljoin * sizeof(int); muljoinsize = muljoin * sizeof(int);
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice); hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
gIndexMultiJoin<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin); hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoin), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
} }
else else
gIndexJoin<<<blockllen, numthreads>>> (d_R, d_S, d_locations, sLen, temp); hipLaunchKernel(HIP_KERNEL_NAME(gIndexJoin), dim3(blockllen), dim3(numthreads), 0, 0, d_R, d_S, d_locations, sLen, temp);
cudaFree(d_R); hipFree(d_R);
cudaFree(d_S); hipFree(d_S);
sum = res[sLen-1]; sum = res[sLen-1];
thrust::exclusive_scan(res, res + sLen, res); thrust::exclusive_scan(res, res + sLen, res);
sum += res[sLen-1]; sum += res[sLen-1];
if(sum == 0) if(sum == 0)
{ {
cudaFree(dcons); hipFree(dcons);
cudaFree(d_locations); hipFree(d_locations);
cudaFree(temp); hipFree(temp);
if(posS != NULL) if(posS != NULL)
cudaFree(posS); hipFree(posS);
if(posR != NULL) if(posR != NULL)
cudaFree(posR); hipFree(posR);
return 0; return 0;
} }
res[sLen] = sum; res[sLen] = sum;
@ -1227,49 +1228,49 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(pos == (rule->num_rows - 3)) if(pos == (rule->num_rows - 3))
{ {
sizepro = rule->num_columns * sizeof(int); sizepro = rule->num_columns * sizeof(int);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice); hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
resSize = sum * sizepro; resSize = sum * sizepro;
reservar(&d_Rout, resSize); reservar(&d_Rout, resSize);
if(numj > 2) if(numj > 2)
{ {
cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice); hipMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
multiJoinWithWrite2<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin); hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
} }
else else
gJoinWithWrite2<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS); hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
} }
else else
{ {
sizepro = projp.y * sizeof(int); sizepro = projp.y * sizeof(int);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice); hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
resSize = sum * sizepro; resSize = sum * sizepro;
reservar(&d_Rout, resSize); reservar(&d_Rout, resSize);
if(numj > 2) if(numj > 2)
{ {
cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice); hipMemcpy(dcons + projp.y, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
multiJoinWithWrite<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin); hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
} }
else else
gJoinWithWrite<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS); hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
} }
} }
cudaFree(dcons); hipFree(dcons);
cudaFree(d_locations); hipFree(d_locations);
cudaFree(temp); hipFree(temp);
if(posS != NULL) if(posS != NULL)
cudaFree(posS); hipFree(posS);
if(posR != NULL) if(posR != NULL)
cudaFree(posR); hipFree(posR);
if(*ret != NULL) if(*ret != NULL)
cudaFree(*ret); hipFree(*ret);
*ret = d_Rout; *ret = d_Rout;
#ifdef TIMER #ifdef TIMER
cudaEventRecord(stop, 0); hipEventRecord(stop, 0);
cudaEventSynchronize(stop); hipEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop); hipEventElapsedTime(&time, start, stop);
//cout << "Join = " << time << endl; //cout << "Join = " << time << endl;
//cout << "FIN" << endl; //cout << "FIN" << endl;
cuda_stats.join_time += time; cuda_stats.join_time += time;

80
packages/cuda/union2.cu Executable file → Normal file
View File

@ -87,8 +87,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -122,8 +122,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -157,8 +157,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -192,8 +192,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -227,8 +227,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -262,8 +262,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -297,8 +297,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -332,8 +332,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -367,8 +367,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -402,8 +402,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -437,8 +437,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -472,8 +472,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -507,8 +507,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -542,8 +542,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -577,8 +577,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -612,8 +612,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -647,8 +647,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -682,8 +682,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -717,8 +717,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;
@ -752,8 +752,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{ {
size = nrows * tipo * sizeof(int); size = nrows * tipo * sizeof(int);
reservar(&nres, size); reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice); hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
cudaFree(*ret); hipFree(*ret);
*ret = nres; *ret = nres;
} }
return nrows; return nrows;

0
packages/cuda/union2.h Executable file → Normal file
View File

0
packages/cuda/unioncpu2.cpp Executable file → Normal file
View File

View File

@ -2,19 +2,21 @@
sqlite3 side: sqlite3 side:
create table test (id integer, x integer, y integer); create table test (id integer, x integer, y integer);
.separator "," .separator ","
.import /home/vsc/Yap/ILP/HH/DaysInHospital_Y3.csv test .import DaysInHospital_Y3.csv hh
.export hh
.save hh
myddas side: myddas side:
use_module(library(myddas)). use_module(library(myddas)).
db_open(sqlite3,con,'../hh',x,x). db_open(sqlite3,con,'.hh',x,x).
db_close(con). db_close(con).
test 2: test 2:
use_module(library(myddas)). use_module(library(myddas)).
assert(( assert((
t2 :- t2 :-
db_open(sqlite3,con,'../hh',x,x), db_open(sqlite3,con,'hh',x,x),
db_import(con,test,test), db_import(con,test,test),
test(A,B,C), test(A,B,C),
writeln(test(A,B,C)), writeln(test(A,B,C)),
@ -22,4 +24,3 @@ fail
)). )).
trace. trace.
t2. t2.

View File

@ -15,7 +15,12 @@
* * * *
*************************************************************************/ *************************************************************************/
#if USE_MYDDAS #if 1 //USE_MYDDAS
:- load_foreign_files([myddas], [], init_myddas).
/* Initialize MYDDAS GLOBAL STRUCTURES */
:- c_db_initialize_myddas.
#ifdef DEBUG #ifdef DEBUG
:- yap_flag(single_var_warnings,on). :- yap_flag(single_var_warnings,on).
@ -800,8 +805,6 @@
]). ]).
#ifdef MYDDAS_MYSQL #ifdef MYDDAS_MYSQL
:- load_foreign_files([], [], init_mysql).
:- use_module(myddas_mysql,[ :- use_module(myddas_mysql,[
db_my_result_set/1, db_my_result_set/1,
db_datalog_describe/1, db_datalog_describe/1,
@ -867,7 +870,7 @@
% db_open/4 % db_open/4
% %
#if MYDDAS_DECLARATIONS #if 1 // MYDDAS_DECLARATIONS
:- db_open(Protocol) extra_arguments :- db_open(Protocol) extra_arguments
db=Db, db=Db,
port=Port, port=Port,

View File

@ -23,12 +23,11 @@ Android/jni/sqlite/nativehelper/jni.h
#sqlite3 is now in the system #sqlite3 is now in the system
set (SQLITE3_FOUND ON PARENT_SCOPE) set (SQLITE3_FOUND ON PARENT_SCOPE)
macro_log_feature (SQLITE3_FOUND "Sqlite3" message (
"Sqlite3 Data-Base " " * Sqlite3 Data-Base (http://www.sqlite3ql.org) is distributed with
"http://www.sqlite3ql.org" FALSE) MYDDAS" )
include_directories ( .. . Android/jni/sqlite Android/jni/sqlite/nativehelper)
include_directories (${SQLITE3_INCLUDE_DIRECTORIES} .. . Android/jni/sqlite Android/jni/sqlite/nativehelper)
set (MYDDAS_FLAGS ${MYDDAS_FLAGS} -DMYDDAS_SQLITE3=1 PARENT_SCOPE) set (MYDDAS_FLAGS ${MYDDAS_FLAGS} -DMYDDAS_SQLITE3=1 PARENT_SCOPE)
set_property( DIRECTORY .. APPEND PROPERTY COMPILE_DEFINITIONS MYDDAS_SQLITE3=1 ) set_property( DIRECTORY .. APPEND PROPERTY COMPILE_DEFINITIONS MYDDAS_SQLITE3=1 )

View File

@ -0,0 +1,10 @@
from setuptools import setup, Extension
setup(
name = "yapex",
version = "0.1",
package_dir = {'': '${CMAKE_SOURCE_DIR}/packages/python' },
py_modules = ['yapex']
)

File diff suppressed because it is too large Load Diff

View File

@ -78,14 +78,11 @@ static lbfgsfloatval_t evaluate(
a1 = YAP_ArgOfTerm(1,call); a1 = YAP_ArgOfTerm(1,call);
if (YAP_IsFloatTerm(a1)) { if (YAP_IsFloatTerm(a1)) {
YAP_ShutdownGoal( TRUE );
return (lbfgsfloatval_t) YAP_FloatOfTerm(a1); return (lbfgsfloatval_t) YAP_FloatOfTerm(a1);
} else if (YAP_IsIntTerm(a1)) { } else if (YAP_IsIntTerm(a1)) {
YAP_ShutdownGoal( TRUE );
return (lbfgsfloatval_t) YAP_IntOfTerm(a1); return (lbfgsfloatval_t) YAP_IntOfTerm(a1);
} }
YAP_ShutdownGoal( TRUE );
fprintf(stderr, "ERROR: The evaluate call back function did not return a number as first argument.\n"); fprintf(stderr, "ERROR: The evaluate call back function did not return a number as first argument.\n");
return 0; return 0;
} }