python support

This commit is contained in:
Vitor Santos Costa 2016-06-28 23:47:09 +01:00
parent c2fb631106
commit 3f59ec40cd
30 changed files with 1684 additions and 386 deletions

View File

@ -240,7 +240,6 @@ timer_stop(Name,Duration) :-
->
statistics(walltime,[StopTime,_]),
Duration is StopTime-StartTime;
throw(timer_not_started(timer_stop(Name,Duration)))
).

View File

@ -1,4 +1,4 @@
1%%% -*- Mode: Prolog; -*-
%%% -*- Mode: Prolog; -*-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
@ -567,6 +567,8 @@ init_learning :-
set_problog_flag(alpha,Alpha)
)
)
;
true
),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

0
packages/cuda/CC_CSSTree.cu Executable file → Normal file
View File

0
packages/cuda/CC_CSSTree.h Executable file → Normal file
View File

0
packages/cuda/Makefile.in Executable file → Normal file
View File

37
packages/cuda/bpreds.cu Executable file → Normal file
View File

@ -1,3 +1,4 @@
#include "hip/hip_runtime.h"
#include <thrust/device_vector.h>
#include <thrust/scan.h>
#include <cstdarg>
@ -25,10 +26,10 @@ int maximo(int count, ...)
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, rowact1, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
if(hipThreadIdx_x < numc)
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -110,10 +111,10 @@ __global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *co
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
if(hipThreadIdx_x < numc)
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -159,10 +160,10 @@ __global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc,
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
if(hipThreadIdx_x < numc)
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -226,10 +227,10 @@ __global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc,
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, rowact1, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
if(hipThreadIdx_x < numc)
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -344,10 +345,10 @@ __global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
if(hipThreadIdx_x < numc)
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -411,10 +412,10 @@ __global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
if(hipThreadIdx_x < numc)
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{

1
packages/cuda/bpreds.h Executable file → Normal file
View File

@ -1,3 +1,4 @@
#include "hip/hip_runtime.h"
#ifndef _BPREDS_H_
#define _BPREDS_H_

0
packages/cuda/bpredscpu.cpp Executable file → Normal file
View File

4
packages/cuda/creator2.c Executable file → Normal file
View File

@ -66,7 +66,7 @@ int main(int argc, char *argv[])
fprintf(cuda, "\t\t\t{\n");
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
fprintf(cuda, "\t\t\t}\n");
@ -103,7 +103,7 @@ int main(int argc, char *argv[])
fprintf(cuda, "\t\t\t{\n");
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
fprintf(cuda, "\t\t\t}\n");

0
packages/cuda/cuda.c Executable file → Normal file
View File

0
packages/cuda/cuda.yap Executable file → Normal file
View File

View File

@ -27,8 +27,8 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
cudaFree(dop1);
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
hipFree(dop1);
*result = hres;
}
else
@ -39,13 +39,13 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
int *dop2;
tipo = res_rows * cols1 * sizeof(int);
reservar(&dop2, tipo);
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice);
hipMemcpy(dop2, dop1, tipo, hipMemcpyHostToDevice);
free(dop1);
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost);
cudaFree(dop2);
hipMemcpy(hres, dop2, tipo, hipMemcpyDeviceToHost);
hipFree(dop2);
*result = hres;
}
else
@ -315,8 +315,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
cudaFree(dop1);
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
hipFree(dop1);
w = z + 1;
strtok(qposr->rulename, "_");
@ -353,8 +353,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
res_rows = abs(res_rows);
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
cudaFree(dop1);
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
hipFree(dop1);
char file[] = "/dev/shm/buffer.csv";
FILE *fp;
@ -554,7 +554,7 @@ void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator f
sign = tmpfact.predname;
tipo = res_rows * cols1 * sizeof(int);
hres = (int *)malloc(tipo);
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
sumar(tmpfact.name, dop1, cols1, res_rows);
}

62
packages/cuda/joincpu.cpp Executable file → Normal file
View File

@ -324,11 +324,11 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
}
#ifdef TIMER
cudaEvent_t start, stop;
hipEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
if(nsel1 > 0 || nsj1 > 0)
@ -359,16 +359,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
}
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
cuda_stats.select1_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventDestroy(start);
hipEventDestroy(stop);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
if(nsel2 > 0 || nsj2 > 0)
@ -381,16 +381,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
Snl = sLen;
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
cuda_stats.select2_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventDestroy(start);
hipEventDestroy(stop);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
//cout << "antes" << endl;
@ -406,16 +406,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation);
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
cuda_stats.sort_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventDestroy(start);
hipEventDestroy(stop);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
/*cout << "despues" << endl;
@ -482,9 +482,9 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
*ret = fres;
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
cuda_stats.join_time += time;
#endif

40
packages/cuda/lista.cu Executable file → Normal file
View File

@ -967,7 +967,7 @@ vector<gpunode> L;
extern "C"
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, int *inpquery, int **result, char *names, int finalDR)
{
cudaSetDevice(0);
hipSetDevice(0);
vector<rulenode> rules;
int x;
@ -1029,11 +1029,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
vector<rulenode>::iterator qposr;
#if TIMER
cudaEvent_t start, stop;
hipEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
while(reglas.size()) /*Here's the main loop*/
@ -1084,7 +1084,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
{
num_refs = rows1 * cols1 * sizeof(int);
reservar(&res, num_refs);
cudaMemcpyAsync(res, dop1, num_refs, cudaMemcpyDeviceToDevice);
hipMemcpyAsync(res, dop1, num_refs, hipMemcpyDeviceToDevice);
registrar(rul_act->name, cols1, res, rows1, itr, 1);
genflag = 1;
rul_act->gen_ant = rul_act->gen_act;
@ -1251,10 +1251,10 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
if(x == num_refs)
{
#ifdef TIMER
cudaEvent_t start2, stop2;
cudaEventCreate(&start2);
cudaEventCreate(&stop2);
cudaEventRecord(start2, 0);
hipEvent_t start2, stop2;
hipEventCreate(&start2);
hipEventCreate(&stop2);
hipEventRecord(start2, 0);
#endif
//cout << rul_act->name << " res_rows = " << res_rows << endl;
@ -1263,11 +1263,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
res_rows = unir(res, res_rows, rul_act->num_columns, &res, 0);
#ifdef TIMER
cudaEventRecord(stop2, 0);
cudaEventSynchronize(stop2);
cudaEventElapsedTime(&time, start2, stop2);
cudaEventDestroy(start2);
cudaEventDestroy(stop2);
hipEventRecord(stop2, 0);
hipEventSynchronize(stop2);
hipEventElapsedTime(&time, start2, stop2);
hipEventDestroy(start2);
hipEventDestroy(stop2);
//cout << "Union = " << time << endl;
cuda_stats.union_time += time;
#endif
@ -1319,16 +1319,16 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
#endif
#if TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
cuda_stats.total_time += time;
if (time > cuda_stats.max_time)
cuda_stats.max_time = time;
if (time < cuda_stats.min_time || cuda_stats.calls == 1)
cuda_stats.min_time = time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
hipEventDestroy(start);
hipEventDestroy(stop);
Cuda_Statistics();
#endif

0
packages/cuda/lista.h Executable file → Normal file
View File

44
packages/cuda/memory.cu Executable file → Normal file
View File

@ -144,7 +144,7 @@ void limpiar(const char s[], size_t sz)
if(GPUmem.size() == 0)
{
cudaMemGetInfo(&free,&total);
hipMemGetInfo(&free,&total);
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
exit(1);
}
@ -154,11 +154,11 @@ void limpiar(const char s[], size_t sz)
{
temp = *ini;
temp.dev_address = (int *)malloc(ini->size);
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
hipMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, hipMemcpyDeviceToHost);
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
CPUmem.insert(pos, temp);
}
cudaFree(ini->dev_address);
hipFree(ini->dev_address);
GPUmem.erase(ini);
}
@ -173,19 +173,19 @@ void reservar(int **ptr, size_t size)
return;
}
cudaMemGetInfo(&free, &total);
hipMemGetInfo(&free, &total);
while(free < size)
{
cout << "Se limpio memoria " << free << " " << total << endl;
limpiar("not enough memory", size);
cudaMemGetInfo(&free, &total);
hipMemGetInfo(&free, &total);
}
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
while(hipMalloc(ptr, size) == hipErrorMemoryAllocation)
limpiar("Error in memory allocation", size);
if (! *ptr ) {
size_t free, total;
cudaMemGetInfo( &free, &total );
hipMemGetInfo( &free, &total );
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
cerr << "Exiting CUDA...." << endl;
exit(1);
@ -277,7 +277,7 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
}
size = num_rows * num_columns * sizeof(int);
reservar(&temp, size);
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
hipMemcpyAsync(temp, address_host_table, size, hipMemcpyHostToDevice);
registrar(name, num_columns, temp, num_rows, itr, 0);
*ptr = temp;
return num_rows;
@ -296,13 +296,13 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
reservar(&temp, size);
for(x = 0; x < numgpu; x++)
{
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToDevice);
inc += temp_storage[x].size / sizeof(int);
cudaFree(temp_storage[x].dev_address);
hipFree(temp_storage[x].dev_address);
}
for(; x < numcpu; x++)
{
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyHostToDevice);
inc += temp_storage[x].size / sizeof(int);
free(temp_storage[x].dev_address);
}
@ -340,9 +340,9 @@ int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address
temp = (int *)malloc(size);
for(x = 0; x < numgpu; x++)
{
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToHost);
inc += temp_storage[x].size / sizeof(int);
cudaFree(temp_storage[x].dev_address);
hipFree(temp_storage[x].dev_address);
}
for(; x < numcpu; x++)
{
@ -404,7 +404,7 @@ int cargafinal(int name, int cols, int **ptr)
cont = pos->rows;
#ifdef TUFFY
reservar(&temp, pos->size);
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
*ptr = temp;
#else
*ptr = pos->dev_address;
@ -418,14 +418,14 @@ int cargafinal(int name, int cols, int **ptr)
pos = gpu;
while(pos != endg && pos->name == name)
{
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyDeviceToDevice);
temp += pos->size / sizeof(int);
pos++;
}
pos = cpu;
while(pos != endc && pos->name == name)
{
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
temp += pos->size / sizeof(int);
pos++;
}
@ -493,7 +493,7 @@ void clear_memory()
{
if(ini->isrule)
{
cudaFree(ini->dev_address);
hipFree(ini->dev_address);
ini = GPUmem.erase(ini);
}
else
@ -518,7 +518,7 @@ void clear_memory_all()
fin = GPUmem.end();
while(ini != fin)
{
cudaFree(ini->dev_address);
hipFree(ini->dev_address);
ini++;
}
GPUmem.clear();
@ -542,7 +542,7 @@ void liberar(int name)
{
fact = *i;
GPUmem.erase(i);
cudaFree(fact.dev_address);
hipFree(fact.dev_address);
}
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
if(i != CPUmem.end())
@ -566,10 +566,10 @@ void sumar(int name, int *dop1, int cols, int rows)
newrows = rows + fact.rows;
reservar(&res, newrows * cols * sizeof(int));
offset = fact.rows * cols;
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
hipMemcpyAsync(res, fact.dev_address, offset * sizeof(int), hipMemcpyDeviceToDevice);
GPUmem.erase(i);
registrar(name, cols, res, newrows, 0, 0);
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
cudaFree(fact.dev_address);
hipMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), hipMemcpyDeviceToDevice);
hipFree(fact.dev_address);
}
}

0
packages/cuda/memory.h Executable file → Normal file
View File

0
packages/cuda/pred.h Executable file → Normal file
View File

103
packages/cuda/selectproyect.cu Executable file → Normal file
View File

@ -1,3 +1,4 @@
#include "hip/hip_runtime.h"
#include <thrust/device_vector.h>
#include <thrust/scan.h>
#include <stdlib.h>
@ -8,10 +9,10 @@
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, posact;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
if(hipThreadIdx_x < numc)
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -30,10 +31,10 @@ we unmark any rows that do not comply with the selections*/
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int x, rowact, posact;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
if(hipThreadIdx_x < numc)
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -56,10 +57,10 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int temp, temp2, pos, x, y;
if(threadIdx.x < cont)
shared[threadIdx.x] = dhead[threadIdx.x];
if(hipThreadIdx_x < cont)
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -90,10 +91,10 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int temp, temp2, pos, x, y;
if(threadIdx.x < cont)
shared[threadIdx.x] = dhead[threadIdx.x];
if(hipThreadIdx_x < cont)
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -120,10 +121,10 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int pos, posr, x;
if(threadIdx.x < hsize)
shared[threadIdx.x] = dhead[threadIdx.x];
if(hipThreadIdx_x < hsize)
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -139,10 +140,10 @@ selections, selfjoins, etc.). The array 'temp' holds the result of the prefix su
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int pos, posr, x;
if(threadIdx.x < hsize)
shared[threadIdx.x] = dhead[threadIdx.x];
if(hipThreadIdx_x < hsize)
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads();
if(id < rows)
{
@ -184,27 +185,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
tmplen = rows + 1;
size2 = tmplen * sizeof(int);
reservar(&temp, size2);
cudaMemset(temp, 0, size2);
hipMemset(temp, 0, size2);
size = numselect * sizeof(int);
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
hipMemcpy(dhead, select, size, hipMemcpyHostToDevice);
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselect, temp + 1);
if(numselfj > 0)
{
size = numselfj * sizeof(int);
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
}
if(numpreds > 0)
{
size = numpreds * sizeof(int);
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
else
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
}
res = thrust::device_pointer_cast(temp);
@ -215,10 +216,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
size = head_size * sizeof(int);
reservar(&fres, num * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
cudaFree(dhead);
cudaFree(temp);
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
hipFree(dhead);
hipFree(temp);
*ret = fres;
return num;
}
@ -229,19 +230,19 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
tmplen = rows + 1;
size2 = tmplen * sizeof(int);
reservar(&temp, size2);
cudaMemset(temp, 0, size2);
hipMemset(temp, 0, size2);
size = numselfj * sizeof(int);
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
if(numpreds > 0)
{
size = numpreds * sizeof(int);
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
else
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
}
@ -253,10 +254,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
size = head_size * sizeof(int);
reservar(&fres, num * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
cudaFree(dhead);
cudaFree(temp);
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
hipFree(dhead);
hipFree(temp);
*ret = fres;
return num;
}
@ -267,14 +268,14 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
tmplen = rows + 1;
size2 = tmplen * sizeof(int);
reservar(&temp, size2);
cudaMemset(temp, 0, size2);
hipMemset(temp, 0, size2);
size = numpreds * sizeof(int);
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
else
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
res = thrust::device_pointer_cast(temp);
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
num = res[rows];
@ -284,10 +285,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
size = head_size * sizeof(int);
reservar(&fres, num * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
cudaFree(dhead);
cudaFree(temp);
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
hipFree(dhead);
hipFree(temp);
*ret = fres;
return num;
}
@ -295,9 +296,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
{
size = head_size * sizeof(int);
reservar(&fres, rows * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
cudaFree(dhead);
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(proyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, head_size, fres);
hipFree(dhead);
*ret = fres;
return rows;
}

0
packages/cuda/selectproyectcpu.cpp Executable file → Normal file
View File

347
packages/cuda/treeb.cu Executable file → Normal file
View File

@ -1,3 +1,4 @@
#include "hip/hip_runtime.h"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/sequence.h>
@ -160,11 +161,11 @@ __device__ int firstMatchingKeyInDataNode2(Record records[], IKeyType key)
__global__ void gCreateIndex(IDataNode data[], IDirectoryNode dir[], int dirSize, int tree_size, int bottom_start, int nNodesPerBlock)
{
int startIdx = blockIdx.x * nNodesPerBlock;
int startIdx = hipBlockIdx_x * nNodesPerBlock;
int endIdx = startIdx + nNodesPerBlock;
if(endIdx > dirSize)
endIdx = dirSize;
int keyIdx = threadIdx.x;
int keyIdx = hipThreadIdx_x;
// Proceed only when in internal nodes
for(int nodeIdx = startIdx; nodeIdx < endIdx; nodeIdx++)
@ -191,11 +192,11 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
{
// Bringing the root node (visited by every tuple) to the faster shared memory
__shared__ IKeyType RootNodeKeys[TREE_NODE_SIZE];
RootNodeKeys[threadIdx.x] = dir->keys[threadIdx.x];
RootNodeKeys[hipThreadIdx_x] = dir->keys[hipThreadIdx_x];
__syncthreads();
int OverallThreadIdx = blockIdx.x * THRD_PER_BLCK_search + threadIdx.x;
int OverallThreadIdx = hipBlockIdx_x * THRD_PER_BLCK_search + hipThreadIdx_x;
for(int keyIdx = OverallThreadIdx; keyIdx < nSearchKeys; keyIdx += THRD_PER_GRID_search)
{
@ -219,7 +220,7 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
/*Counts the number of times a row in 'S' is to be joined to a row in 'R'.*/
__global__ void gIndexJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[])
{
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(s_cur < sLen)
{
@ -246,11 +247,11 @@ in 'g_locations' those rows that have equal values in the checked columns.*/
__global__ void gIndexMultiJoinNegative(int *R, int *S, int g_locations[], int rLen, int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
{
extern __shared__ int shared[];
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int posr, poss, x;
if(threadIdx.x < wj)
shared[threadIdx.x] = muljoin[threadIdx.x];
if(hipThreadIdx_x < wj)
shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
__syncthreads();
if(r_cur < rLen)
@ -287,11 +288,11 @@ times a row in 'S' is to be joined to its corresponding row in 'R', storing the
__global__ void gIndexMultiJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[], int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
{
extern __shared__ int shared[];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int posr, poss, x;
if(threadIdx.x < wj)
shared[threadIdx.x] = muljoin[threadIdx.x];
if(hipThreadIdx_x < wj)
shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
__syncthreads();
if(s_cur < sLen)
@ -330,10 +331,10 @@ __global__ void multiJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums
{
extern __shared__ int shared[];
int *extjoins = &shared[lenrul];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < (lenrul + wj))
shared[threadIdx.x] = rule[threadIdx.x];
if(hipThreadIdx_x < (lenrul + wj))
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads();
if(s_cur < sLen)
@ -382,10 +383,10 @@ __global__ void multiJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSum
{
extern __shared__ int shared[];
int *extjoins = &shared[cols];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < (cols + wj))
shared[threadIdx.x] = rule[threadIdx.x];
if(hipThreadIdx_x < (cols + wj))
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads();
if(s_cur < sLen)
@ -432,11 +433,11 @@ predicate are projected.*/
__global__ void gJoinWithWriteNegative(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int halfrul, int *mloc)
{
extern __shared__ int shared[];
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int posr;
if(threadIdx.x < halfrul)
shared[threadIdx.x] = rule[threadIdx.x];
if(hipThreadIdx_x < halfrul)
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads();
if(r_cur < rLen)
@ -461,11 +462,11 @@ predicate are projected.*/
__global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int cols, int *mloc)
{
extern __shared__ int shared[];
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int posr;
if(threadIdx.x < cols)
shared[threadIdx.x] = rule[threadIdx.x];
if(hipThreadIdx_x < cols)
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads();
if(r_cur < rLen)
@ -489,10 +490,10 @@ __global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinR
__global__ void gJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int halfrul, int lenrul, int *mloc, int *sloc)
{
extern __shared__ int shared[];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < lenrul)
shared[threadIdx.x] = rule[threadIdx.x];
if(hipThreadIdx_x < lenrul)
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads();
if(s_cur < sLen)
@ -525,10 +526,10 @@ projection, which is performed based on the variables in the head of the rule.*/
__global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int cols, int *mloc, int *sloc)
{
extern __shared__ int shared[];
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < cols)
shared[threadIdx.x] = rule[threadIdx.x];
if(hipThreadIdx_x < cols)
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads();
if(s_cur < sLen)
@ -563,7 +564,7 @@ __global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[],
/*Load part of column 'wj' of 'p' in 'R'. Which values are loaded is defined by the prefix sum results in 'pos'.*/
__global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *ids)
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int cond;
if(id < len)
{
@ -579,7 +580,7 @@ __global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *i
/*Load an entire column from 'p' into 'R'.*/
__global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(id < len)
R[id] = p[id * of + wj];
}
@ -587,10 +588,10 @@ __global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
__global__ void projectfinal(int *res, int rows, int cols, int *rule, int *out)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if(threadIdx.x < cols)
shared[threadIdx.x] = rule[threadIdx.x];
if(hipThreadIdx_x < cols)
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
__syncthreads();
if(id < rows)
@ -614,26 +615,26 @@ void project(int *res, int resrows, int numcols1, int numcols2, int *proj, int *
int *pt = (int *)malloc(sizepro);
for(z = 0; z < numcols2; z++)
pt[z] = proj[z] - 1;
cudaMemcpy(dcons, pt, sizepro, cudaMemcpyHostToDevice);
//cudaDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
hipMemcpy(dcons, pt, sizepro, hipMemcpyHostToDevice);
//hipDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
free(pt);
}
else
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
reservar(&d_Rout, resrows * sizepro);
projectfinal<<<blockllen, numthreads, sizepro>>>(res, resrows, numcols1, dcons, d_Rout);
cudaFree(dcons);
cudaFree(*ret);
hipLaunchKernel(HIP_KERNEL_NAME(projectfinal), dim3(blockllen), dim3(numthreads), sizepro, 0, res, resrows, numcols1, dcons, d_Rout);
hipFree(dcons);
hipFree(*ret);
*ret = d_Rout;
}
__global__ void projectadd(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, int *dhead, int hsize, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int pos2, posr, x, y, cond;
if(threadIdx.x < hsize)
shared[threadIdx.x] = dhead[threadIdx.x];
if(hipThreadIdx_x < hsize)
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
__syncthreads();
if(id < rows2)
{
@ -662,10 +663,10 @@ void juntar(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, in
int blockllen = rows2 / numthreads + 1;
sizepro = pcols * sizeof(int);
reservar(&dcons, sizepro);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
reservar(&d_Rout, rows1 * rows2 * sizepro);
projectadd<<<blockllen, numthreads, sizepro>>>(dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
cudaFree(dcons);
hipLaunchKernel(HIP_KERNEL_NAME(projectadd), dim3(blockllen), dim3(numthreads), sizepro, 0, dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
hipFree(dcons);
*ret = d_Rout;
}
@ -743,51 +744,51 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
#ifdef TIMER
//cout << "INICIO" << endl;
cudaEvent_t start, stop;
hipEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
if(npred2.x > 0 || npred2.y > 0 || nsel2 > 0 || nsj2 > 0)
{
newLen = sLen + 1;
cudaMemsetAsync(temp, 0, newLen * sizeof(int));
hipMemsetAsync(temp, 0, newLen * sizeof(int));
}
if(npred2.x > 0 || npred2.y > 0)
{
size = npred2tot * sizeof(int);
cudaMemcpy(dcons, pred2, size, cudaMemcpyHostToDevice);
hipMemcpy(dcons, pred2, size, hipMemcpyHostToDevice);
if(npred2.y > 0) /*Fix case when a(X,Y),b(Y,Z),Z > Y*/
{
reservar(&temp2, sizet2);
cudaMemsetAsync(temp2, 0, newLen * sizeof(int));
hipMemsetAsync(temp2, 0, newLen * sizeof(int));
//res = thrust::device_pointer_cast(temp2);
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
}
else
{
if(negative)
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
else
bpredsOR<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsOR), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
}
if(nsel2 > 0)
{
size = nsel2 * sizeof(int);
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice);
marcar<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1);
hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
}
if(nsj2 > 0)
{
size = nsj2 * sizeof(int);
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
}
}
else
@ -795,14 +796,14 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(nsel2 > 0)
{
size = nsel2 * sizeof(int);
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice);
marcar2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1);
hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
if(nsj2 > 0)
{
size = nsj2 * sizeof(int);
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
}
}
else
@ -810,15 +811,15 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(nsj2 > 0)
{
size = nsj2 * sizeof(int);
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
samejoin2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
}
else
{
sizem32S = m32sLen * sizeof(int);
reservar(&d_S, sizem32S);
cudaMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
llenarnosel<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1]);
hipMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1]);
}
}
}
@ -842,8 +843,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(newLen == 0) // && !negative) ARREGLAR
{
cudaFree(temp);
cudaFree(dcons);
hipFree(temp);
hipFree(dcons);
return 0;
}
@ -854,24 +855,24 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
reservar(&d_S, sizem32S);
reservar(&posS, sizem32S);
cudaMemsetAsync(d_S + newLen, 0x7f, sizextra);
cudaMemsetAsync(posS + newLen, 0x7f, sizextra);
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
hipMemsetAsync(d_S + newLen, 0x7f, sizextra);
hipMemsetAsync(posS + newLen, 0x7f, sizextra);
hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1], temp, posS);
sLen = newLen;
}
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
//cout << "Select1 = " << time << endl;
cuda_stats.select1_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventDestroy(start);
hipEventDestroy(stop);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
blockllen = rLen / numthreads + 1;
@ -880,30 +881,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
{
if(temp2 != NULL)
{
cudaFree(temp);
hipFree(temp);
temp = temp2;
res = thrust::device_pointer_cast(temp);
newLen = rLen + 1;
if(nsel1 > 0)
{
size = nsel1 * sizeof(int);
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice);
marcar<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1);
hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
}
if(nsj1 > 0)
{
size = nsj1 * sizeof(int);
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
}
if(npred1.x > 0)
{
size = npred1.x * sizeof(int);
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
else
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
}
}
else
@ -911,30 +912,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(npred1.x > 0 || nsel1 > 0 || nsj1 > 0)
{
newLen = rLen + 1;
cudaMemsetAsync(temp, 0, newLen * sizeof(int));
hipMemsetAsync(temp, 0, newLen * sizeof(int));
}
if(nsel1 > 0)
{
size = nsel1 * sizeof(int);
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice);
marcar2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1);
hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
if(nsj1 > 0)
{
size = nsj1 * sizeof(int);
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
}
if(npred1.x > 0)
{
size = npred1.x * sizeof(int);
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
else
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
}
}
else
@ -942,17 +943,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(nsj1 > 0)
{
size = nsj1 * sizeof(int);
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
samejoin2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
if(npred1.x > 0)
{
size = npred1.x * sizeof(int);
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
else
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
}
}
else
@ -960,11 +961,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(npred1.x > 0)
{
size = npred1.x * sizeof(int);
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
else
bpredsorlogic2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
}
}
}
@ -976,11 +977,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
newLen = res[rLen];
if(newLen == 0)
{
cudaFree(temp);
cudaFree(dcons);
cudaFree(d_S);
hipFree(temp);
hipFree(dcons);
hipFree(d_S);
if(posS != NULL)
cudaFree(posS);
hipFree(posS);
return 0;
}
@ -991,41 +992,41 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
reservar(&d_R, sizem32);
reservar(&posR, sizem32);
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra);
cudaMemsetAsync(posR + newLen, 0x7f, sizextra);
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR);
hipMemsetAsync(d_R + newLen, 0x7f, sizextra);
hipMemsetAsync(posR + newLen, 0x7f, sizextra);
hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0], temp, posR);
rLen = newLen;
}
else
{
sizem32 = m32rLen * sizeof(int);
reservar(&d_R, sizem32);
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
}
}
else
{
sizem32 = m32rLen * sizeof(int);
reservar(&d_R, sizem32);
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
}
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
//cout << "Select2 = " << time << endl;
cuda_stats.select2_time += time;
#endif
#ifdef TIMER
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventDestroy(start);
hipEventDestroy(stop);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
thrust::device_ptr<Record> dvp1;
@ -1084,17 +1085,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
}
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
//cout << "Sort = " << time << endl;
cuda_stats.sort_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
hipEventDestroy(start);
hipEventDestroy(stop);
hipEventCreate(&start);
hipEventCreate(&stop);
hipEventRecord(start, 0);
#endif
IDataNode* d_data;
@ -1123,7 +1124,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
dim3 Dbc(THRD_PER_BLCK_create, 1, 1);
dim3 Dgc(BLCK_PER_GRID_create, 1, 1);
gCreateIndex <<<Dgc, Dbc>>> (d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
hipLaunchKernel(HIP_KERNEL_NAME(gCreateIndex), dim3(Dgc), dim3(Dbc), 0, 0, d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
int *d_locations;
int memSizeR;
@ -1132,7 +1133,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
{
memSizeR = (rLen + 1) * sizeof(int);
reservar(&d_locations, memSizeR);
cudaMemsetAsync(d_locations, 0, sizeof(int));
hipMemsetAsync(d_locations, 0, sizeof(int));
nSearchKeys = rLen;
}
else
@ -1146,13 +1147,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
unsigned int nKeysPerThread = uintCeilingDiv(nSearchKeys, THRD_PER_GRID_search);
if(negative)
{
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
cudaMemsetAsync(temp, 0, memSizeR);
hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
hipMemsetAsync(temp, 0, memSizeR);
}
else
{
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
cudaMemsetAsync(temp, 0, memSizeS);
hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
hipMemsetAsync(temp, 0, memSizeS);
}
int muljoin = 0, muljoinsize = 0, sum;
@ -1165,8 +1166,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
{
muljoin = numj - 2;
muljoinsize = muljoin * sizeof(int);
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
gIndexMultiJoinNegative<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoinNegative), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
}
res = thrust::device_pointer_cast(d_locations);
@ -1177,21 +1178,21 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(pos == (rule->num_rows - 3))
{
sizepro = rule->num_columns * sizeof(int);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
resSize = sum * sizepro;
reservar(&d_Rout, resSize);
gJoinWithWriteNegative2<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
}
else
{
sizepro = projp.x * sizeof(int);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
resSize = sum * sizepro;
reservar(&d_Rout, resSize);
gJoinWithWriteNegative<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
}
cudaFree(d_R);
cudaFree(d_S);
hipFree(d_R);
hipFree(d_S);
}
else
{
@ -1200,26 +1201,26 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
{
muljoin = numj - 2;
muljoinsize = muljoin * sizeof(int);
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
gIndexMultiJoin<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoin), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
}
else
gIndexJoin<<<blockllen, numthreads>>> (d_R, d_S, d_locations, sLen, temp);
cudaFree(d_R);
cudaFree(d_S);
hipLaunchKernel(HIP_KERNEL_NAME(gIndexJoin), dim3(blockllen), dim3(numthreads), 0, 0, d_R, d_S, d_locations, sLen, temp);
hipFree(d_R);
hipFree(d_S);
sum = res[sLen-1];
thrust::exclusive_scan(res, res + sLen, res);
sum += res[sLen-1];
if(sum == 0)
{
cudaFree(dcons);
cudaFree(d_locations);
cudaFree(temp);
hipFree(dcons);
hipFree(d_locations);
hipFree(temp);
if(posS != NULL)
cudaFree(posS);
hipFree(posS);
if(posR != NULL)
cudaFree(posR);
hipFree(posR);
return 0;
}
res[sLen] = sum;
@ -1227,49 +1228,49 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
if(pos == (rule->num_rows - 3))
{
sizepro = rule->num_columns * sizeof(int);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
resSize = sum * sizepro;
reservar(&d_Rout, resSize);
if(numj > 2)
{
cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
multiJoinWithWrite2<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
hipMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
}
else
gJoinWithWrite2<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
}
else
{
sizepro = projp.y * sizeof(int);
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
resSize = sum * sizepro;
reservar(&d_Rout, resSize);
if(numj > 2)
{
cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
multiJoinWithWrite<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
hipMemcpy(dcons + projp.y, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
}
else
gJoinWithWrite<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
}
}
cudaFree(dcons);
cudaFree(d_locations);
cudaFree(temp);
hipFree(dcons);
hipFree(d_locations);
hipFree(temp);
if(posS != NULL)
cudaFree(posS);
hipFree(posS);
if(posR != NULL)
cudaFree(posR);
hipFree(posR);
if(*ret != NULL)
cudaFree(*ret);
hipFree(*ret);
*ret = d_Rout;
#ifdef TIMER
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
//cout << "Join = " << time << endl;
//cout << "FIN" << endl;
cuda_stats.join_time += time;

80
packages/cuda/union2.cu Executable file → Normal file
View File

@ -87,8 +87,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -122,8 +122,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -157,8 +157,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -192,8 +192,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -227,8 +227,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -262,8 +262,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -297,8 +297,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -332,8 +332,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -367,8 +367,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -402,8 +402,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -437,8 +437,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -472,8 +472,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -507,8 +507,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -542,8 +542,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -577,8 +577,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -612,8 +612,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -647,8 +647,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -682,8 +682,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -717,8 +717,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;
@ -752,8 +752,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
{
size = nrows * tipo * sizeof(int);
reservar(&nres, size);
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
cudaFree(*ret);
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
hipFree(*ret);
*ret = nres;
}
return nrows;

0
packages/cuda/union2.h Executable file → Normal file
View File

0
packages/cuda/unioncpu2.cpp Executable file → Normal file
View File

View File

@ -2,11 +2,13 @@
sqlite3 side:
create table test (id integer, x integer, y integer);
.separator ","
.import /home/vsc/Yap/ILP/HH/DaysInHospital_Y3.csv test
.import DaysInHospital_Y3.csv hh
.export hh
.save hh
myddas side:
use_module(library(myddas)).
db_open(sqlite3,con,'../hh',x,x).
db_open(sqlite3,con,'.hh',x,x).
db_close(con).
test 2:
@ -14,7 +16,7 @@ test 2:
use_module(library(myddas)).
assert((
t2 :-
db_open(sqlite3,con,'../hh',x,x),
db_open(sqlite3,con,'hh',x,x),
db_import(con,test,test),
test(A,B,C),
writeln(test(A,B,C)),
@ -22,4 +24,3 @@ fail
)).
trace.
t2.

View File

@ -15,7 +15,12 @@
* *
*************************************************************************/
#if USE_MYDDAS
#if 1 //USE_MYDDAS
:- load_foreign_files([myddas], [], init_myddas).
/* Initialize MYDDAS GLOBAL STRUCTURES */
:- c_db_initialize_myddas.
#ifdef DEBUG
:- yap_flag(single_var_warnings,on).
@ -800,8 +805,6 @@
]).
#ifdef MYDDAS_MYSQL
:- load_foreign_files([], [], init_mysql).
:- use_module(myddas_mysql,[
db_my_result_set/1,
db_datalog_describe/1,
@ -867,7 +870,7 @@
% db_open/4
%
#if MYDDAS_DECLARATIONS
#if 1 // MYDDAS_DECLARATIONS
:- db_open(Protocol) extra_arguments
db=Db,
port=Port,

View File

@ -23,12 +23,11 @@ Android/jni/sqlite/nativehelper/jni.h
#sqlite3 is now in the system
set (SQLITE3_FOUND ON PARENT_SCOPE)
macro_log_feature (SQLITE3_FOUND "Sqlite3"
"Sqlite3 Data-Base "
"http://www.sqlite3ql.org" FALSE)
message (
" * Sqlite3 Data-Base (http://www.sqlite3ql.org) is distributed with
MYDDAS" )
include_directories (${SQLITE3_INCLUDE_DIRECTORIES} .. . Android/jni/sqlite Android/jni/sqlite/nativehelper)
include_directories ( .. . Android/jni/sqlite Android/jni/sqlite/nativehelper)
set (MYDDAS_FLAGS ${MYDDAS_FLAGS} -DMYDDAS_SQLITE3=1 PARENT_SCOPE)
set_property( DIRECTORY .. APPEND PROPERTY COMPILE_DEFINITIONS MYDDAS_SQLITE3=1 )

View File

@ -0,0 +1,10 @@
from setuptools import setup, Extension
setup(
name = "yapex",
version = "0.1",
package_dir = {'': '${CMAKE_SOURCE_DIR}/packages/python' },
py_modules = ['yapex']
)

File diff suppressed because it is too large Load Diff

View File

@ -78,14 +78,11 @@ static lbfgsfloatval_t evaluate(
a1 = YAP_ArgOfTerm(1,call);
if (YAP_IsFloatTerm(a1)) {
YAP_ShutdownGoal( TRUE );
return (lbfgsfloatval_t) YAP_FloatOfTerm(a1);
} else if (YAP_IsIntTerm(a1)) {
YAP_ShutdownGoal( TRUE );
return (lbfgsfloatval_t) YAP_IntOfTerm(a1);
}
YAP_ShutdownGoal( TRUE );
fprintf(stderr, "ERROR: The evaluate call back function did not return a number as first argument.\n");
return 0;
}