python support
This commit is contained in:
parent
c2fb631106
commit
3f59ec40cd
@ -240,7 +240,6 @@ timer_stop(Name,Duration) :-
|
||||
->
|
||||
statistics(walltime,[StopTime,_]),
|
||||
Duration is StopTime-StartTime;
|
||||
|
||||
throw(timer_not_started(timer_stop(Name,Duration)))
|
||||
).
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
1%%% -*- Mode: Prolog; -*-
|
||||
%%% -*- Mode: Prolog; -*-
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%
|
||||
@ -567,6 +567,8 @@ init_learning :-
|
||||
set_problog_flag(alpha,Alpha)
|
||||
)
|
||||
)
|
||||
;
|
||||
true
|
||||
),
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
0
packages/cuda/CC_CSSTree.cu
Executable file → Normal file
0
packages/cuda/CC_CSSTree.cu
Executable file → Normal file
0
packages/cuda/CC_CSSTree.h
Executable file → Normal file
0
packages/cuda/CC_CSSTree.h
Executable file → Normal file
0
packages/cuda/Makefile.in
Executable file → Normal file
0
packages/cuda/Makefile.in
Executable file → Normal file
37
packages/cuda/bpreds.cu
Executable file → Normal file
37
packages/cuda/bpreds.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <cstdarg>
|
||||
@ -25,10 +26,10 @@ int maximo(int count, ...)
|
||||
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, rowact1, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -110,10 +111,10 @@ __global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *co
|
||||
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -159,10 +160,10 @@ __global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc,
|
||||
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -226,10 +227,10 @@ __global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc,
|
||||
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, rowact1, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -344,10 +345,10 @@ __global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *
|
||||
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -411,10 +412,10 @@ __global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc
|
||||
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
|
1
packages/cuda/bpreds.h
Executable file → Normal file
1
packages/cuda/bpreds.h
Executable file → Normal file
@ -1,3 +1,4 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#ifndef _BPREDS_H_
|
||||
#define _BPREDS_H_
|
||||
|
||||
|
0
packages/cuda/bpredscpu.cpp
Executable file → Normal file
0
packages/cuda/bpredscpu.cpp
Executable file → Normal file
4
packages/cuda/creator2.c
Executable file → Normal file
4
packages/cuda/creator2.c
Executable file → Normal file
@ -66,7 +66,7 @@ int main(int argc, char *argv[])
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
@ -103,7 +103,7 @@ int main(int argc, char *argv[])
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
|
0
packages/cuda/cuda.c
Executable file → Normal file
0
packages/cuda/cuda.c
Executable file → Normal file
0
packages/cuda/cuda.yap
Executable file → Normal file
0
packages/cuda/cuda.yap
Executable file → Normal file
@ -27,8 +27,8 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
|
||||
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||
hipFree(dop1);
|
||||
*result = hres;
|
||||
}
|
||||
else
|
||||
@ -39,13 +39,13 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
|
||||
int *dop2;
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
reservar(&dop2, tipo);
|
||||
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dop2, dop1, tipo, hipMemcpyHostToDevice);
|
||||
free(dop1);
|
||||
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop2);
|
||||
hipMemcpy(hres, dop2, tipo, hipMemcpyDeviceToHost);
|
||||
hipFree(dop2);
|
||||
*result = hres;
|
||||
}
|
||||
else
|
||||
@ -315,8 +315,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
|
||||
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||
hipFree(dop1);
|
||||
w = z + 1;
|
||||
|
||||
strtok(qposr->rulename, "_");
|
||||
@ -353,8 +353,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
|
||||
res_rows = abs(res_rows);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||
hipFree(dop1);
|
||||
|
||||
char file[] = "/dev/shm/buffer.csv";
|
||||
FILE *fp;
|
||||
@ -554,7 +554,7 @@ void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator f
|
||||
sign = tmpfact.predname;
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
|
||||
sumar(tmpfact.name, dop1, cols1, res_rows);
|
||||
}
|
||||
|
62
packages/cuda/joincpu.cpp
Executable file → Normal file
62
packages/cuda/joincpu.cpp
Executable file → Normal file
@ -324,11 +324,11 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEvent_t start, stop;
|
||||
hipEvent_t start, stop;
|
||||
float time;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
if(nsel1 > 0 || nsj1 > 0)
|
||||
@ -359,16 +359,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.select1_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
if(nsel2 > 0 || nsj2 > 0)
|
||||
@ -381,16 +381,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
Snl = sLen;
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.select2_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
//cout << "antes" << endl;
|
||||
@ -406,16 +406,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation);
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.sort_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
/*cout << "despues" << endl;
|
||||
@ -482,9 +482,9 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
*ret = fres;
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.join_time += time;
|
||||
#endif
|
||||
|
||||
|
40
packages/cuda/lista.cu
Executable file → Normal file
40
packages/cuda/lista.cu
Executable file → Normal file
@ -967,7 +967,7 @@ vector<gpunode> L;
|
||||
extern "C"
|
||||
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, int *inpquery, int **result, char *names, int finalDR)
|
||||
{
|
||||
cudaSetDevice(0);
|
||||
hipSetDevice(0);
|
||||
vector<rulenode> rules;
|
||||
int x;
|
||||
|
||||
@ -1029,11 +1029,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
vector<rulenode>::iterator qposr;
|
||||
|
||||
#if TIMER
|
||||
cudaEvent_t start, stop;
|
||||
hipEvent_t start, stop;
|
||||
float time;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
while(reglas.size()) /*Here's the main loop*/
|
||||
@ -1084,7 +1084,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
{
|
||||
num_refs = rows1 * cols1 * sizeof(int);
|
||||
reservar(&res, num_refs);
|
||||
cudaMemcpyAsync(res, dop1, num_refs, cudaMemcpyDeviceToDevice);
|
||||
hipMemcpyAsync(res, dop1, num_refs, hipMemcpyDeviceToDevice);
|
||||
registrar(rul_act->name, cols1, res, rows1, itr, 1);
|
||||
genflag = 1;
|
||||
rul_act->gen_ant = rul_act->gen_act;
|
||||
@ -1251,10 +1251,10 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
if(x == num_refs)
|
||||
{
|
||||
#ifdef TIMER
|
||||
cudaEvent_t start2, stop2;
|
||||
cudaEventCreate(&start2);
|
||||
cudaEventCreate(&stop2);
|
||||
cudaEventRecord(start2, 0);
|
||||
hipEvent_t start2, stop2;
|
||||
hipEventCreate(&start2);
|
||||
hipEventCreate(&stop2);
|
||||
hipEventRecord(start2, 0);
|
||||
#endif
|
||||
|
||||
//cout << rul_act->name << " res_rows = " << res_rows << endl;
|
||||
@ -1263,11 +1263,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
res_rows = unir(res, res_rows, rul_act->num_columns, &res, 0);
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop2, 0);
|
||||
cudaEventSynchronize(stop2);
|
||||
cudaEventElapsedTime(&time, start2, stop2);
|
||||
cudaEventDestroy(start2);
|
||||
cudaEventDestroy(stop2);
|
||||
hipEventRecord(stop2, 0);
|
||||
hipEventSynchronize(stop2);
|
||||
hipEventElapsedTime(&time, start2, stop2);
|
||||
hipEventDestroy(start2);
|
||||
hipEventDestroy(stop2);
|
||||
//cout << "Union = " << time << endl;
|
||||
cuda_stats.union_time += time;
|
||||
#endif
|
||||
@ -1319,16 +1319,16 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
#endif
|
||||
|
||||
#if TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.total_time += time;
|
||||
if (time > cuda_stats.max_time)
|
||||
cuda_stats.max_time = time;
|
||||
if (time < cuda_stats.min_time || cuda_stats.calls == 1)
|
||||
cuda_stats.min_time = time;
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
Cuda_Statistics();
|
||||
#endif
|
||||
|
||||
|
0
packages/cuda/lista.h
Executable file → Normal file
0
packages/cuda/lista.h
Executable file → Normal file
44
packages/cuda/memory.cu
Executable file → Normal file
44
packages/cuda/memory.cu
Executable file → Normal file
@ -144,7 +144,7 @@ void limpiar(const char s[], size_t sz)
|
||||
|
||||
if(GPUmem.size() == 0)
|
||||
{
|
||||
cudaMemGetInfo(&free,&total);
|
||||
hipMemGetInfo(&free,&total);
|
||||
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
|
||||
exit(1);
|
||||
}
|
||||
@ -154,11 +154,11 @@ void limpiar(const char s[], size_t sz)
|
||||
{
|
||||
temp = *ini;
|
||||
temp.dev_address = (int *)malloc(ini->size);
|
||||
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||
hipMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, hipMemcpyDeviceToHost);
|
||||
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
CPUmem.insert(pos, temp);
|
||||
}
|
||||
cudaFree(ini->dev_address);
|
||||
hipFree(ini->dev_address);
|
||||
GPUmem.erase(ini);
|
||||
}
|
||||
|
||||
@ -173,19 +173,19 @@ void reservar(int **ptr, size_t size)
|
||||
return;
|
||||
}
|
||||
|
||||
cudaMemGetInfo(&free, &total);
|
||||
hipMemGetInfo(&free, &total);
|
||||
while(free < size)
|
||||
{
|
||||
cout << "Se limpio memoria " << free << " " << total << endl;
|
||||
limpiar("not enough memory", size);
|
||||
cudaMemGetInfo(&free, &total);
|
||||
hipMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||
while(hipMalloc(ptr, size) == hipErrorMemoryAllocation)
|
||||
limpiar("Error in memory allocation", size);
|
||||
if (! *ptr ) {
|
||||
size_t free, total;
|
||||
cudaMemGetInfo( &free, &total );
|
||||
hipMemGetInfo( &free, &total );
|
||||
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
|
||||
cerr << "Exiting CUDA...." << endl;
|
||||
exit(1);
|
||||
@ -277,7 +277,7 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
||||
}
|
||||
size = num_rows * num_columns * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpyAsync(temp, address_host_table, size, hipMemcpyHostToDevice);
|
||||
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||
*ptr = temp;
|
||||
return num_rows;
|
||||
@ -296,13 +296,13 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
||||
reservar(&temp, size);
|
||||
for(x = 0; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToDevice);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
cudaFree(temp_storage[x].dev_address);
|
||||
hipFree(temp_storage[x].dev_address);
|
||||
}
|
||||
for(; x < numcpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
||||
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyHostToDevice);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
free(temp_storage[x].dev_address);
|
||||
}
|
||||
@ -340,9 +340,9 @@ int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address
|
||||
temp = (int *)malloc(size);
|
||||
for(x = 0; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
|
||||
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToHost);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
cudaFree(temp_storage[x].dev_address);
|
||||
hipFree(temp_storage[x].dev_address);
|
||||
}
|
||||
for(; x < numcpu; x++)
|
||||
{
|
||||
@ -404,7 +404,7 @@ int cargafinal(int name, int cols, int **ptr)
|
||||
cont = pos->rows;
|
||||
#ifdef TUFFY
|
||||
reservar(&temp, pos->size);
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
|
||||
*ptr = temp;
|
||||
#else
|
||||
*ptr = pos->dev_address;
|
||||
@ -418,14 +418,14 @@ int cargafinal(int name, int cols, int **ptr)
|
||||
pos = gpu;
|
||||
while(pos != endg && pos->name == name)
|
||||
{
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
|
||||
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyDeviceToDevice);
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
pos = cpu;
|
||||
while(pos != endc && pos->name == name)
|
||||
{
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
@ -493,7 +493,7 @@ void clear_memory()
|
||||
{
|
||||
if(ini->isrule)
|
||||
{
|
||||
cudaFree(ini->dev_address);
|
||||
hipFree(ini->dev_address);
|
||||
ini = GPUmem.erase(ini);
|
||||
}
|
||||
else
|
||||
@ -518,7 +518,7 @@ void clear_memory_all()
|
||||
fin = GPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
cudaFree(ini->dev_address);
|
||||
hipFree(ini->dev_address);
|
||||
ini++;
|
||||
}
|
||||
GPUmem.clear();
|
||||
@ -542,7 +542,7 @@ void liberar(int name)
|
||||
{
|
||||
fact = *i;
|
||||
GPUmem.erase(i);
|
||||
cudaFree(fact.dev_address);
|
||||
hipFree(fact.dev_address);
|
||||
}
|
||||
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
|
||||
if(i != CPUmem.end())
|
||||
@ -566,10 +566,10 @@ void sumar(int name, int *dop1, int cols, int rows)
|
||||
newrows = rows + fact.rows;
|
||||
reservar(&res, newrows * cols * sizeof(int));
|
||||
offset = fact.rows * cols;
|
||||
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||
hipMemcpyAsync(res, fact.dev_address, offset * sizeof(int), hipMemcpyDeviceToDevice);
|
||||
GPUmem.erase(i);
|
||||
registrar(name, cols, res, newrows, 0, 0);
|
||||
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||
cudaFree(fact.dev_address);
|
||||
hipMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), hipMemcpyDeviceToDevice);
|
||||
hipFree(fact.dev_address);
|
||||
}
|
||||
}
|
||||
|
0
packages/cuda/memory.h
Executable file → Normal file
0
packages/cuda/memory.h
Executable file → Normal file
0
packages/cuda/pred.h
Executable file → Normal file
0
packages/cuda/pred.h
Executable file → Normal file
103
packages/cuda/selectproyect.cu
Executable file → Normal file
103
packages/cuda/selectproyect.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <stdlib.h>
|
||||
@ -8,10 +9,10 @@
|
||||
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -30,10 +31,10 @@ we unmark any rows that do not comply with the selections*/
|
||||
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -56,10 +57,10 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
|
||||
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int temp, temp2, pos, x, y;
|
||||
if(threadIdx.x < cont)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < cont)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -90,10 +91,10 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
|
||||
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int temp, temp2, pos, x, y;
|
||||
if(threadIdx.x < cont)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < cont)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -120,10 +121,10 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
|
||||
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < hsize)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -139,10 +140,10 @@ selections, selfjoins, etc.). The array 'temp' holds the result of the prefix su
|
||||
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < hsize)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -184,27 +185,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
hipMemset(temp, 0, size2);
|
||||
size = numselect * sizeof(int);
|
||||
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dhead, select, size, hipMemcpyHostToDevice);
|
||||
|
||||
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselect, temp + 1);
|
||||
|
||||
if(numselfj > 0)
|
||||
{
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
}
|
||||
|
||||
if(numpreds > 0)
|
||||
{
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
@ -215,10 +216,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
hipFree(dhead);
|
||||
hipFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
@ -229,19 +230,19 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
hipMemset(temp, 0, size2);
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
|
||||
if(numpreds > 0)
|
||||
{
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
|
||||
}
|
||||
|
||||
@ -253,10 +254,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
hipFree(dhead);
|
||||
hipFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
@ -267,14 +268,14 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
hipMemset(temp, 0, size2);
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||
|
||||
if(ANDlogic)
|
||||
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
@ -284,10 +285,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
hipFree(dhead);
|
||||
hipFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
@ -295,9 +296,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
{
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, rows * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(proyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, head_size, fres);
|
||||
hipFree(dhead);
|
||||
*ret = fres;
|
||||
return rows;
|
||||
}
|
||||
|
0
packages/cuda/selectproyectcpu.cpp
Executable file → Normal file
0
packages/cuda/selectproyectcpu.cpp
Executable file → Normal file
347
packages/cuda/treeb.cu
Executable file → Normal file
347
packages/cuda/treeb.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <thrust/host_vector.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/sequence.h>
|
||||
@ -160,11 +161,11 @@ __device__ int firstMatchingKeyInDataNode2(Record records[], IKeyType key)
|
||||
|
||||
__global__ void gCreateIndex(IDataNode data[], IDirectoryNode dir[], int dirSize, int tree_size, int bottom_start, int nNodesPerBlock)
|
||||
{
|
||||
int startIdx = blockIdx.x * nNodesPerBlock;
|
||||
int startIdx = hipBlockIdx_x * nNodesPerBlock;
|
||||
int endIdx = startIdx + nNodesPerBlock;
|
||||
if(endIdx > dirSize)
|
||||
endIdx = dirSize;
|
||||
int keyIdx = threadIdx.x;
|
||||
int keyIdx = hipThreadIdx_x;
|
||||
|
||||
// Proceed only when in internal nodes
|
||||
for(int nodeIdx = startIdx; nodeIdx < endIdx; nodeIdx++)
|
||||
@ -191,11 +192,11 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
|
||||
{
|
||||
// Bringing the root node (visited by every tuple) to the faster shared memory
|
||||
__shared__ IKeyType RootNodeKeys[TREE_NODE_SIZE];
|
||||
RootNodeKeys[threadIdx.x] = dir->keys[threadIdx.x];
|
||||
RootNodeKeys[hipThreadIdx_x] = dir->keys[hipThreadIdx_x];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
int OverallThreadIdx = blockIdx.x * THRD_PER_BLCK_search + threadIdx.x;
|
||||
int OverallThreadIdx = hipBlockIdx_x * THRD_PER_BLCK_search + hipThreadIdx_x;
|
||||
|
||||
for(int keyIdx = OverallThreadIdx; keyIdx < nSearchKeys; keyIdx += THRD_PER_GRID_search)
|
||||
{
|
||||
@ -219,7 +220,7 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
|
||||
/*Counts the number of times a row in 'S' is to be joined to a row in 'R'.*/
|
||||
__global__ void gIndexJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[])
|
||||
{
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(s_cur < sLen)
|
||||
{
|
||||
@ -246,11 +247,11 @@ in 'g_locations' those rows that have equal values in the checked columns.*/
|
||||
__global__ void gIndexMultiJoinNegative(int *R, int *S, int g_locations[], int rLen, int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int posr, poss, x;
|
||||
|
||||
if(threadIdx.x < wj)
|
||||
shared[threadIdx.x] = muljoin[threadIdx.x];
|
||||
if(hipThreadIdx_x < wj)
|
||||
shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(r_cur < rLen)
|
||||
@ -287,11 +288,11 @@ times a row in 'S' is to be joined to its corresponding row in 'R', storing the
|
||||
__global__ void gIndexMultiJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[], int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int posr, poss, x;
|
||||
|
||||
if(threadIdx.x < wj)
|
||||
shared[threadIdx.x] = muljoin[threadIdx.x];
|
||||
if(hipThreadIdx_x < wj)
|
||||
shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -330,10 +331,10 @@ __global__ void multiJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int *extjoins = &shared[lenrul];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < (lenrul + wj))
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < (lenrul + wj))
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -382,10 +383,10 @@ __global__ void multiJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSum
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int *extjoins = &shared[cols];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < (cols + wj))
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < (cols + wj))
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -432,11 +433,11 @@ predicate are projected.*/
|
||||
__global__ void gJoinWithWriteNegative(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int halfrul, int *mloc)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int posr;
|
||||
|
||||
if(threadIdx.x < halfrul)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < halfrul)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(r_cur < rLen)
|
||||
@ -461,11 +462,11 @@ predicate are projected.*/
|
||||
__global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int cols, int *mloc)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int posr;
|
||||
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < cols)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(r_cur < rLen)
|
||||
@ -489,10 +490,10 @@ __global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinR
|
||||
__global__ void gJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int halfrul, int lenrul, int *mloc, int *sloc)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < lenrul)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < lenrul)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -525,10 +526,10 @@ projection, which is performed based on the variables in the head of the rule.*/
|
||||
__global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int cols, int *mloc, int *sloc)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < cols)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -563,7 +564,7 @@ __global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[],
|
||||
/*Load part of column 'wj' of 'p' in 'R'. Which values are loaded is defined by the prefix sum results in 'pos'.*/
|
||||
__global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *ids)
|
||||
{
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int cond;
|
||||
if(id < len)
|
||||
{
|
||||
@ -579,7 +580,7 @@ __global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *i
|
||||
/*Load an entire column from 'p' into 'R'.*/
|
||||
__global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
|
||||
{
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
if(id < len)
|
||||
R[id] = p[id * of + wj];
|
||||
}
|
||||
@ -587,10 +588,10 @@ __global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
|
||||
__global__ void projectfinal(int *res, int rows, int cols, int *rule, int *out)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < cols)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(id < rows)
|
||||
@ -614,26 +615,26 @@ void project(int *res, int resrows, int numcols1, int numcols2, int *proj, int *
|
||||
int *pt = (int *)malloc(sizepro);
|
||||
for(z = 0; z < numcols2; z++)
|
||||
pt[z] = proj[z] - 1;
|
||||
cudaMemcpy(dcons, pt, sizepro, cudaMemcpyHostToDevice);
|
||||
//cudaDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
|
||||
hipMemcpy(dcons, pt, sizepro, hipMemcpyHostToDevice);
|
||||
//hipDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
|
||||
free(pt);
|
||||
}
|
||||
else
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
reservar(&d_Rout, resrows * sizepro);
|
||||
projectfinal<<<blockllen, numthreads, sizepro>>>(res, resrows, numcols1, dcons, d_Rout);
|
||||
cudaFree(dcons);
|
||||
cudaFree(*ret);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(projectfinal), dim3(blockllen), dim3(numthreads), sizepro, 0, res, resrows, numcols1, dcons, d_Rout);
|
||||
hipFree(dcons);
|
||||
hipFree(*ret);
|
||||
*ret = d_Rout;
|
||||
}
|
||||
|
||||
__global__ void projectadd(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int pos2, posr, x, y, cond;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < hsize)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows2)
|
||||
{
|
||||
@ -662,10 +663,10 @@ void juntar(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, in
|
||||
int blockllen = rows2 / numthreads + 1;
|
||||
sizepro = pcols * sizeof(int);
|
||||
reservar(&dcons, sizepro);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
reservar(&d_Rout, rows1 * rows2 * sizepro);
|
||||
projectadd<<<blockllen, numthreads, sizepro>>>(dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
|
||||
cudaFree(dcons);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(projectadd), dim3(blockllen), dim3(numthreads), sizepro, 0, dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
|
||||
hipFree(dcons);
|
||||
*ret = d_Rout;
|
||||
}
|
||||
|
||||
@ -743,51 +744,51 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
|
||||
#ifdef TIMER
|
||||
//cout << "INICIO" << endl;
|
||||
cudaEvent_t start, stop;
|
||||
hipEvent_t start, stop;
|
||||
float time;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
if(npred2.x > 0 || npred2.y > 0 || nsel2 > 0 || nsj2 > 0)
|
||||
{
|
||||
newLen = sLen + 1;
|
||||
cudaMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||
hipMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||
}
|
||||
|
||||
if(npred2.x > 0 || npred2.y > 0)
|
||||
{
|
||||
size = npred2tot * sizeof(int);
|
||||
cudaMemcpy(dcons, pred2, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred2, size, hipMemcpyHostToDevice);
|
||||
|
||||
if(npred2.y > 0) /*Fix case when a(X,Y),b(Y,Z),Z > Y*/
|
||||
{
|
||||
reservar(&temp2, sizet2);
|
||||
cudaMemsetAsync(temp2, 0, newLen * sizeof(int));
|
||||
hipMemsetAsync(temp2, 0, newLen * sizeof(int));
|
||||
//res = thrust::device_pointer_cast(temp2);
|
||||
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(negative)
|
||||
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||
else
|
||||
bpredsOR<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsOR), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||
}
|
||||
|
||||
if(nsel2 > 0)
|
||||
{
|
||||
size = nsel2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice);
|
||||
marcar<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||
hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||
}
|
||||
|
||||
if(nsj2 > 0)
|
||||
{
|
||||
size = nsj2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -795,14 +796,14 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(nsel2 > 0)
|
||||
{
|
||||
size = nsel2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice);
|
||||
marcar2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||
hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||
|
||||
if(nsj2 > 0)
|
||||
{
|
||||
size = nsj2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -810,15 +811,15 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(nsj2 > 0)
|
||||
{
|
||||
size = nsj2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
sizem32S = m32sLen * sizeof(int);
|
||||
reservar(&d_S, sizem32S);
|
||||
cudaMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
|
||||
llenarnosel<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1]);
|
||||
hipMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -842,8 +843,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
|
||||
if(newLen == 0) // && !negative) ARREGLAR
|
||||
{
|
||||
cudaFree(temp);
|
||||
cudaFree(dcons);
|
||||
hipFree(temp);
|
||||
hipFree(dcons);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -854,24 +855,24 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
|
||||
reservar(&d_S, sizem32S);
|
||||
reservar(&posS, sizem32S);
|
||||
cudaMemsetAsync(d_S + newLen, 0x7f, sizextra);
|
||||
cudaMemsetAsync(posS + newLen, 0x7f, sizextra);
|
||||
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
|
||||
hipMemsetAsync(d_S + newLen, 0x7f, sizextra);
|
||||
hipMemsetAsync(posS + newLen, 0x7f, sizextra);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1], temp, posS);
|
||||
sLen = newLen;
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
//cout << "Select1 = " << time << endl;
|
||||
cuda_stats.select1_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
blockllen = rLen / numthreads + 1;
|
||||
@ -880,30 +881,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
{
|
||||
if(temp2 != NULL)
|
||||
{
|
||||
cudaFree(temp);
|
||||
hipFree(temp);
|
||||
temp = temp2;
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
newLen = rLen + 1;
|
||||
if(nsel1 > 0)
|
||||
{
|
||||
size = nsel1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice);
|
||||
marcar<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||
hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||
}
|
||||
if(nsj1 > 0)
|
||||
{
|
||||
size = nsj1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
}
|
||||
if(npred1.x > 0)
|
||||
{
|
||||
size = npred1.x * sizeof(int);
|
||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -911,30 +912,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(npred1.x > 0 || nsel1 > 0 || nsj1 > 0)
|
||||
{
|
||||
newLen = rLen + 1;
|
||||
cudaMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||
hipMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||
}
|
||||
|
||||
if(nsel1 > 0)
|
||||
{
|
||||
size = nsel1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice);
|
||||
marcar2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||
hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||
|
||||
if(nsj1 > 0)
|
||||
{
|
||||
size = nsj1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
}
|
||||
|
||||
if(npred1.x > 0)
|
||||
{
|
||||
size = npred1.x * sizeof(int);
|
||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -942,17 +943,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(nsj1 > 0)
|
||||
{
|
||||
size = nsj1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
|
||||
if(npred1.x > 0)
|
||||
{
|
||||
size = npred1.x * sizeof(int);
|
||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -960,11 +961,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(npred1.x > 0)
|
||||
{
|
||||
size = npred1.x * sizeof(int);
|
||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
else
|
||||
bpredsorlogic2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -976,11 +977,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
newLen = res[rLen];
|
||||
if(newLen == 0)
|
||||
{
|
||||
cudaFree(temp);
|
||||
cudaFree(dcons);
|
||||
cudaFree(d_S);
|
||||
hipFree(temp);
|
||||
hipFree(dcons);
|
||||
hipFree(d_S);
|
||||
if(posS != NULL)
|
||||
cudaFree(posS);
|
||||
hipFree(posS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -991,41 +992,41 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
|
||||
reservar(&d_R, sizem32);
|
||||
reservar(&posR, sizem32);
|
||||
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
||||
cudaMemsetAsync(posR + newLen, 0x7f, sizextra);
|
||||
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR);
|
||||
hipMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
||||
hipMemsetAsync(posR + newLen, 0x7f, sizextra);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0], temp, posR);
|
||||
rLen = newLen;
|
||||
}
|
||||
else
|
||||
{
|
||||
sizem32 = m32rLen * sizeof(int);
|
||||
reservar(&d_R, sizem32);
|
||||
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
||||
hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sizem32 = m32rLen * sizeof(int);
|
||||
reservar(&d_R, sizem32);
|
||||
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
||||
hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
//cout << "Select2 = " << time << endl;
|
||||
cuda_stats.select2_time += time;
|
||||
#endif
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
thrust::device_ptr<Record> dvp1;
|
||||
@ -1084,17 +1085,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
//cout << "Sort = " << time << endl;
|
||||
cuda_stats.sort_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
IDataNode* d_data;
|
||||
@ -1123,7 +1124,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
dim3 Dbc(THRD_PER_BLCK_create, 1, 1);
|
||||
dim3 Dgc(BLCK_PER_GRID_create, 1, 1);
|
||||
|
||||
gCreateIndex <<<Dgc, Dbc>>> (d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gCreateIndex), dim3(Dgc), dim3(Dbc), 0, 0, d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
|
||||
|
||||
int *d_locations;
|
||||
int memSizeR;
|
||||
@ -1132,7 +1133,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
{
|
||||
memSizeR = (rLen + 1) * sizeof(int);
|
||||
reservar(&d_locations, memSizeR);
|
||||
cudaMemsetAsync(d_locations, 0, sizeof(int));
|
||||
hipMemsetAsync(d_locations, 0, sizeof(int));
|
||||
nSearchKeys = rLen;
|
||||
}
|
||||
else
|
||||
@ -1146,13 +1147,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
unsigned int nKeysPerThread = uintCeilingDiv(nSearchKeys, THRD_PER_GRID_search);
|
||||
if(negative)
|
||||
{
|
||||
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||
cudaMemsetAsync(temp, 0, memSizeR);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||
hipMemsetAsync(temp, 0, memSizeR);
|
||||
}
|
||||
else
|
||||
{
|
||||
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||
cudaMemsetAsync(temp, 0, memSizeS);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||
hipMemsetAsync(temp, 0, memSizeS);
|
||||
}
|
||||
|
||||
int muljoin = 0, muljoinsize = 0, sum;
|
||||
@ -1165,8 +1166,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
{
|
||||
muljoin = numj - 2;
|
||||
muljoinsize = muljoin * sizeof(int);
|
||||
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||
gIndexMultiJoinNegative<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||
hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoinNegative), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(d_locations);
|
||||
@ -1177,21 +1178,21 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(pos == (rule->num_rows - 3))
|
||||
{
|
||||
sizepro = rule->num_columns * sizeof(int);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
resSize = sum * sizepro;
|
||||
reservar(&d_Rout, resSize);
|
||||
gJoinWithWriteNegative2<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
|
||||
}
|
||||
else
|
||||
{
|
||||
sizepro = projp.x * sizeof(int);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
resSize = sum * sizepro;
|
||||
reservar(&d_Rout, resSize);
|
||||
gJoinWithWriteNegative<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
|
||||
}
|
||||
cudaFree(d_R);
|
||||
cudaFree(d_S);
|
||||
hipFree(d_R);
|
||||
hipFree(d_S);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1200,26 +1201,26 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
{
|
||||
muljoin = numj - 2;
|
||||
muljoinsize = muljoin * sizeof(int);
|
||||
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||
gIndexMultiJoin<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||
hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoin), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||
}
|
||||
else
|
||||
gIndexJoin<<<blockllen, numthreads>>> (d_R, d_S, d_locations, sLen, temp);
|
||||
cudaFree(d_R);
|
||||
cudaFree(d_S);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gIndexJoin), dim3(blockllen), dim3(numthreads), 0, 0, d_R, d_S, d_locations, sLen, temp);
|
||||
hipFree(d_R);
|
||||
hipFree(d_S);
|
||||
|
||||
sum = res[sLen-1];
|
||||
thrust::exclusive_scan(res, res + sLen, res);
|
||||
sum += res[sLen-1];
|
||||
if(sum == 0)
|
||||
{
|
||||
cudaFree(dcons);
|
||||
cudaFree(d_locations);
|
||||
cudaFree(temp);
|
||||
hipFree(dcons);
|
||||
hipFree(d_locations);
|
||||
hipFree(temp);
|
||||
if(posS != NULL)
|
||||
cudaFree(posS);
|
||||
hipFree(posS);
|
||||
if(posR != NULL)
|
||||
cudaFree(posR);
|
||||
hipFree(posR);
|
||||
return 0;
|
||||
}
|
||||
res[sLen] = sum;
|
||||
@ -1227,49 +1228,49 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(pos == (rule->num_rows - 3))
|
||||
{
|
||||
sizepro = rule->num_columns * sizeof(int);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
resSize = sum * sizepro;
|
||||
reservar(&d_Rout, resSize);
|
||||
if(numj > 2)
|
||||
{
|
||||
cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||
multiJoinWithWrite2<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
|
||||
hipMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
|
||||
}
|
||||
else
|
||||
gJoinWithWrite2<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
|
||||
}
|
||||
else
|
||||
{
|
||||
sizepro = projp.y * sizeof(int);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
resSize = sum * sizepro;
|
||||
reservar(&d_Rout, resSize);
|
||||
if(numj > 2)
|
||||
{
|
||||
cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||
multiJoinWithWrite<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
|
||||
hipMemcpy(dcons + projp.y, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
|
||||
}
|
||||
else
|
||||
gJoinWithWrite<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(dcons);
|
||||
cudaFree(d_locations);
|
||||
cudaFree(temp);
|
||||
hipFree(dcons);
|
||||
hipFree(d_locations);
|
||||
hipFree(temp);
|
||||
if(posS != NULL)
|
||||
cudaFree(posS);
|
||||
hipFree(posS);
|
||||
if(posR != NULL)
|
||||
cudaFree(posR);
|
||||
hipFree(posR);
|
||||
|
||||
if(*ret != NULL)
|
||||
cudaFree(*ret);
|
||||
hipFree(*ret);
|
||||
*ret = d_Rout;
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
//cout << "Join = " << time << endl;
|
||||
//cout << "FIN" << endl;
|
||||
cuda_stats.join_time += time;
|
||||
|
80
packages/cuda/union2.cu
Executable file → Normal file
80
packages/cuda/union2.cu
Executable file → Normal file
@ -87,8 +87,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -122,8 +122,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -157,8 +157,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -192,8 +192,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -227,8 +227,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -262,8 +262,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -297,8 +297,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -332,8 +332,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -367,8 +367,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -402,8 +402,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -437,8 +437,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -472,8 +472,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -507,8 +507,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -542,8 +542,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -577,8 +577,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -612,8 +612,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -647,8 +647,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -682,8 +682,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -717,8 +717,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -752,8 +752,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
|
0
packages/cuda/union2.h
Executable file → Normal file
0
packages/cuda/union2.h
Executable file → Normal file
0
packages/cuda/unioncpu2.cpp
Executable file → Normal file
0
packages/cuda/unioncpu2.cpp
Executable file → Normal file
@ -2,19 +2,21 @@
|
||||
sqlite3 side:
|
||||
create table test (id integer, x integer, y integer);
|
||||
.separator ","
|
||||
.import /home/vsc/Yap/ILP/HH/DaysInHospital_Y3.csv test
|
||||
.import DaysInHospital_Y3.csv hh
|
||||
.export hh
|
||||
.save hh
|
||||
|
||||
myddas side:
|
||||
use_module(library(myddas)).
|
||||
db_open(sqlite3,con,'../hh',x,x).
|
||||
db_open(sqlite3,con,'.hh',x,x).
|
||||
db_close(con).
|
||||
|
||||
test 2:
|
||||
|
||||
use_module(library(myddas)).
|
||||
assert((
|
||||
t2 :-
|
||||
db_open(sqlite3,con,'../hh',x,x),
|
||||
t2 :-
|
||||
db_open(sqlite3,con,'hh',x,x),
|
||||
db_import(con,test,test),
|
||||
test(A,B,C),
|
||||
writeln(test(A,B,C)),
|
||||
@ -22,4 +24,3 @@ fail
|
||||
)).
|
||||
trace.
|
||||
t2.
|
||||
|
||||
|
@ -15,7 +15,12 @@
|
||||
* *
|
||||
*************************************************************************/
|
||||
|
||||
#if USE_MYDDAS
|
||||
#if 1 //USE_MYDDAS
|
||||
|
||||
:- load_foreign_files([myddas], [], init_myddas).
|
||||
|
||||
/* Initialize MYDDAS GLOBAL STRUCTURES */
|
||||
:- c_db_initialize_myddas.
|
||||
|
||||
#ifdef DEBUG
|
||||
:- yap_flag(single_var_warnings,on).
|
||||
@ -800,8 +805,6 @@
|
||||
]).
|
||||
|
||||
#ifdef MYDDAS_MYSQL
|
||||
:- load_foreign_files([], [], init_mysql).
|
||||
|
||||
:- use_module(myddas_mysql,[
|
||||
db_my_result_set/1,
|
||||
db_datalog_describe/1,
|
||||
@ -867,7 +870,7 @@
|
||||
% db_open/4
|
||||
%
|
||||
|
||||
#if MYDDAS_DECLARATIONS
|
||||
#if 1 // MYDDAS_DECLARATIONS
|
||||
:- db_open(Protocol) extra_arguments
|
||||
db=Db,
|
||||
port=Port,
|
||||
|
@ -23,12 +23,11 @@ Android/jni/sqlite/nativehelper/jni.h
|
||||
#sqlite3 is now in the system
|
||||
|
||||
set (SQLITE3_FOUND ON PARENT_SCOPE)
|
||||
macro_log_feature (SQLITE3_FOUND "Sqlite3"
|
||||
"Sqlite3 Data-Base "
|
||||
"http://www.sqlite3ql.org" FALSE)
|
||||
message (
|
||||
" * Sqlite3 Data-Base (http://www.sqlite3ql.org) is distributed with
|
||||
MYDDAS" )
|
||||
|
||||
|
||||
include_directories (${SQLITE3_INCLUDE_DIRECTORIES} .. . Android/jni/sqlite Android/jni/sqlite/nativehelper)
|
||||
include_directories ( .. . Android/jni/sqlite Android/jni/sqlite/nativehelper)
|
||||
|
||||
set (MYDDAS_FLAGS ${MYDDAS_FLAGS} -DMYDDAS_SQLITE3=1 PARENT_SCOPE)
|
||||
set_property( DIRECTORY .. APPEND PROPERTY COMPILE_DEFINITIONS MYDDAS_SQLITE3=1 )
|
||||
|
10
packages/python/setup.py.cmake
Normal file
10
packages/python/setup.py.cmake
Normal file
@ -0,0 +1,10 @@
|
||||
from setuptools import setup, Extension
|
||||
|
||||
|
||||
setup(
|
||||
name = "yapex",
|
||||
version = "0.1",
|
||||
package_dir = {'': '${CMAKE_SOURCE_DIR}/packages/python' },
|
||||
py_modules = ['yapex']
|
||||
|
||||
)
|
1283
packages/python/yap_kernel/prolog.js
Normal file
1283
packages/python/yap_kernel/prolog.js
Normal file
File diff suppressed because it is too large
Load Diff
@ -78,14 +78,11 @@ static lbfgsfloatval_t evaluate(
|
||||
|
||||
a1 = YAP_ArgOfTerm(1,call);
|
||||
if (YAP_IsFloatTerm(a1)) {
|
||||
YAP_ShutdownGoal( TRUE );
|
||||
return (lbfgsfloatval_t) YAP_FloatOfTerm(a1);
|
||||
} else if (YAP_IsIntTerm(a1)) {
|
||||
YAP_ShutdownGoal( TRUE );
|
||||
return (lbfgsfloatval_t) YAP_IntOfTerm(a1);
|
||||
}
|
||||
|
||||
YAP_ShutdownGoal( TRUE );
|
||||
fprintf(stderr, "ERROR: The evaluate call back function did not return a number as first argument.\n");
|
||||
return 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user