new version of cuda interface
This commit is contained in:
parent
c6d174841a
commit
d3599da6dc
2
.gitignore
vendored
2
.gitignore
vendored
@ -179,3 +179,5 @@ packages/myddas/hh
|
||||
packages/myddas/DaysInHospital_Y3.csv
|
||||
|
||||
packages/myddas/agile.csv
|
||||
|
||||
*.pyc
|
||||
|
0
packages/cuda/CC_CSSTree.cu
Executable file → Normal file
0
packages/cuda/CC_CSSTree.cu
Executable file → Normal file
0
packages/cuda/CC_CSSTree.h
Executable file → Normal file
0
packages/cuda/CC_CSSTree.h
Executable file → Normal file
0
packages/cuda/Makefile.in
Executable file → Normal file
0
packages/cuda/Makefile.in
Executable file → Normal file
37
packages/cuda/bpreds.cu
Executable file → Normal file
37
packages/cuda/bpreds.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <cstdarg>
|
||||
@ -25,10 +26,10 @@ int maximo(int count, ...)
|
||||
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, rowact1, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -110,10 +111,10 @@ __global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *co
|
||||
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -159,10 +160,10 @@ __global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc,
|
||||
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -226,10 +227,10 @@ __global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc,
|
||||
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, rowact1, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -344,10 +345,10 @@ __global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *
|
||||
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -411,10 +412,10 @@ __global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc
|
||||
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
|
1
packages/cuda/bpreds.h
Executable file → Normal file
1
packages/cuda/bpreds.h
Executable file → Normal file
@ -1,3 +1,4 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#ifndef _BPREDS_H_
|
||||
#define _BPREDS_H_
|
||||
|
||||
|
0
packages/cuda/bpredscpu.cpp
Executable file → Normal file
0
packages/cuda/bpredscpu.cpp
Executable file → Normal file
52
packages/cuda/clamp.rb
Normal file
52
packages/cuda/clamp.rb
Normal file
@ -0,0 +1,52 @@
|
||||
require "formula"
|
||||
|
||||
# Documentation: https://github.com/Homebrew/homebrew/wiki/Formula-Cookbook
|
||||
# /usr/local/Library/Contributions/example-formula.rb
|
||||
# PLEASE REMOVE ALL GENERATED COMMENTS BEFORE SUBMITTING YOUR PULL REQUEST!
|
||||
|
||||
class Clamp < Formula
|
||||
homepage "https://bitbucket.org/multicoreware/cppamp-driver-ng/wiki/Home"
|
||||
version "0.0.1-3"
|
||||
url "https://bitbucket.org/multicoreware/cppamp-driver-ng/get/milestone3.tar.bz2"
|
||||
head "https://bitbucket.org/multicoreware/cppamp-driver-ng.git"
|
||||
sha1 "b8b88306561a60942f8ecbd8ff20554661c4e5f9"
|
||||
|
||||
depends_on "cmake" => :build
|
||||
depends_on "wget" => :build
|
||||
depends_on "git" => :build
|
||||
depends_on "hg" => :build
|
||||
depends_on "subversion" => :build
|
||||
# depends_on :x11 # if your formula requires any X11/XQuartz components
|
||||
|
||||
def install
|
||||
# ENV.deparallelize # if your formula fails when building in parallel
|
||||
|
||||
# Remove unrecognized options if warned by configure
|
||||
# system "./configure", "--disable-debug",
|
||||
# "--disable-dependency-tracking",
|
||||
# "--disable-silent-rules",
|
||||
# "--prefix=#{prefix}"
|
||||
mkdir "macbuild" do
|
||||
args = std_cmake_args
|
||||
args << "-DCLANG_URL=https://bitbucket.org/multicoreware/cppamp-ng.git"
|
||||
args << "-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=CBackend"
|
||||
args << "-DGMAC_URL=https://bitbucket.org/multicoreware/gmac"
|
||||
system 'cmake', "..", *args
|
||||
system "make", "world"
|
||||
system "cd libc++; make install"
|
||||
system "make", "install" # if this fails, try separate make/make install steps
|
||||
end
|
||||
end
|
||||
|
||||
test do
|
||||
# `test do` will create, run in and delete a temporary directory.
|
||||
#
|
||||
# This test will fail and we won't accept that! It's enough to just replace
|
||||
# "false" with the main program this formula installs, but it'd be nice if you
|
||||
# were more thorough. Run the test with `brew test milestone`.
|
||||
#
|
||||
# The installed folder is not in the path, so use the entire path to any
|
||||
# executables being tested: `system "#{bin}/program", "do", "something"`.
|
||||
system "make", "test"
|
||||
end
|
||||
end
|
4
packages/cuda/creator2.c
Executable file → Normal file
4
packages/cuda/creator2.c
Executable file → Normal file
@ -66,7 +66,7 @@ int main(int argc, char *argv[])
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
@ -103,7 +103,7 @@ int main(int argc, char *argv[])
|
||||
fprintf(cuda, "\t\t\t{\n");
|
||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
|
||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||
fprintf(cuda, "\t\t\t}\n");
|
||||
|
0
packages/cuda/cuda.c
Executable file → Normal file
0
packages/cuda/cuda.c
Executable file → Normal file
0
packages/cuda/cuda.yap
Executable file → Normal file
0
packages/cuda/cuda.yap
Executable file → Normal file
@ -27,8 +27,8 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
|
||||
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||
hipFree(dop1);
|
||||
*result = hres;
|
||||
}
|
||||
else
|
||||
@ -39,13 +39,13 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
|
||||
int *dop2;
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
reservar(&dop2, tipo);
|
||||
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dop2, dop1, tipo, hipMemcpyHostToDevice);
|
||||
free(dop1);
|
||||
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop2);
|
||||
hipMemcpy(hres, dop2, tipo, hipMemcpyDeviceToHost);
|
||||
hipFree(dop2);
|
||||
*result = hres;
|
||||
}
|
||||
else
|
||||
@ -315,8 +315,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
|
||||
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||
hipFree(dop1);
|
||||
w = z + 1;
|
||||
|
||||
strtok(qposr->rulename, "_");
|
||||
@ -353,8 +353,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
|
||||
res_rows = abs(res_rows);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||
hipFree(dop1);
|
||||
|
||||
char file[] = "/dev/shm/buffer.csv";
|
||||
FILE *fp;
|
||||
@ -554,7 +554,7 @@ void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator f
|
||||
sign = tmpfact.predname;
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
|
||||
sumar(tmpfact.name, dop1, cols1, res_rows);
|
||||
}
|
||||
|
0
packages/cuda/hippy/hippy
Normal file
0
packages/cuda/hippy/hippy
Normal file
62
packages/cuda/joincpu.cpp
Executable file → Normal file
62
packages/cuda/joincpu.cpp
Executable file → Normal file
@ -324,11 +324,11 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEvent_t start, stop;
|
||||
hipEvent_t start, stop;
|
||||
float time;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
if(nsel1 > 0 || nsj1 > 0)
|
||||
@ -359,16 +359,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.select1_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
if(nsel2 > 0 || nsj2 > 0)
|
||||
@ -381,16 +381,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
Snl = sLen;
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.select2_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
//cout << "antes" << endl;
|
||||
@ -406,16 +406,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation);
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.sort_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
/*cout << "despues" << endl;
|
||||
@ -482,9 +482,9 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
||||
*ret = fres;
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.join_time += time;
|
||||
#endif
|
||||
|
||||
|
40
packages/cuda/lista.cu
Executable file → Normal file
40
packages/cuda/lista.cu
Executable file → Normal file
@ -967,7 +967,7 @@ vector<gpunode> L;
|
||||
extern "C"
|
||||
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, int *inpquery, int **result, char *names, int finalDR)
|
||||
{
|
||||
cudaSetDevice(0);
|
||||
hipSetDevice(0);
|
||||
vector<rulenode> rules;
|
||||
int x;
|
||||
|
||||
@ -1029,11 +1029,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
vector<rulenode>::iterator qposr;
|
||||
|
||||
#if TIMER
|
||||
cudaEvent_t start, stop;
|
||||
hipEvent_t start, stop;
|
||||
float time;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
while(reglas.size()) /*Here's the main loop*/
|
||||
@ -1084,7 +1084,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
{
|
||||
num_refs = rows1 * cols1 * sizeof(int);
|
||||
reservar(&res, num_refs);
|
||||
cudaMemcpyAsync(res, dop1, num_refs, cudaMemcpyDeviceToDevice);
|
||||
hipMemcpyAsync(res, dop1, num_refs, hipMemcpyDeviceToDevice);
|
||||
registrar(rul_act->name, cols1, res, rows1, itr, 1);
|
||||
genflag = 1;
|
||||
rul_act->gen_ant = rul_act->gen_act;
|
||||
@ -1251,10 +1251,10 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
if(x == num_refs)
|
||||
{
|
||||
#ifdef TIMER
|
||||
cudaEvent_t start2, stop2;
|
||||
cudaEventCreate(&start2);
|
||||
cudaEventCreate(&stop2);
|
||||
cudaEventRecord(start2, 0);
|
||||
hipEvent_t start2, stop2;
|
||||
hipEventCreate(&start2);
|
||||
hipEventCreate(&stop2);
|
||||
hipEventRecord(start2, 0);
|
||||
#endif
|
||||
|
||||
//cout << rul_act->name << " res_rows = " << res_rows << endl;
|
||||
@ -1263,11 +1263,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
res_rows = unir(res, res_rows, rul_act->num_columns, &res, 0);
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop2, 0);
|
||||
cudaEventSynchronize(stop2);
|
||||
cudaEventElapsedTime(&time, start2, stop2);
|
||||
cudaEventDestroy(start2);
|
||||
cudaEventDestroy(stop2);
|
||||
hipEventRecord(stop2, 0);
|
||||
hipEventSynchronize(stop2);
|
||||
hipEventElapsedTime(&time, start2, stop2);
|
||||
hipEventDestroy(start2);
|
||||
hipEventDestroy(stop2);
|
||||
//cout << "Union = " << time << endl;
|
||||
cuda_stats.union_time += time;
|
||||
#endif
|
||||
@ -1319,16 +1319,16 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
#endif
|
||||
|
||||
#if TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.total_time += time;
|
||||
if (time > cuda_stats.max_time)
|
||||
cuda_stats.max_time = time;
|
||||
if (time < cuda_stats.min_time || cuda_stats.calls == 1)
|
||||
cuda_stats.min_time = time;
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
Cuda_Statistics();
|
||||
#endif
|
||||
|
||||
|
0
packages/cuda/lista.h
Executable file → Normal file
0
packages/cuda/lista.h
Executable file → Normal file
44
packages/cuda/memory.cu
Executable file → Normal file
44
packages/cuda/memory.cu
Executable file → Normal file
@ -144,7 +144,7 @@ void limpiar(const char s[], size_t sz)
|
||||
|
||||
if(GPUmem.size() == 0)
|
||||
{
|
||||
cudaMemGetInfo(&free,&total);
|
||||
hipMemGetInfo(&free,&total);
|
||||
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
|
||||
exit(1);
|
||||
}
|
||||
@ -154,11 +154,11 @@ void limpiar(const char s[], size_t sz)
|
||||
{
|
||||
temp = *ini;
|
||||
temp.dev_address = (int *)malloc(ini->size);
|
||||
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||
hipMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, hipMemcpyDeviceToHost);
|
||||
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
CPUmem.insert(pos, temp);
|
||||
}
|
||||
cudaFree(ini->dev_address);
|
||||
hipFree(ini->dev_address);
|
||||
GPUmem.erase(ini);
|
||||
}
|
||||
|
||||
@ -173,19 +173,19 @@ void reservar(int **ptr, size_t size)
|
||||
return;
|
||||
}
|
||||
|
||||
cudaMemGetInfo(&free, &total);
|
||||
hipMemGetInfo(&free, &total);
|
||||
while(free < size)
|
||||
{
|
||||
cout << "Se limpio memoria " << free << " " << total << endl;
|
||||
limpiar("not enough memory", size);
|
||||
cudaMemGetInfo(&free, &total);
|
||||
hipMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||
while(hipMalloc(ptr, size) == hipErrorMemoryAllocation)
|
||||
limpiar("Error in memory allocation", size);
|
||||
if (! *ptr ) {
|
||||
size_t free, total;
|
||||
cudaMemGetInfo( &free, &total );
|
||||
hipMemGetInfo( &free, &total );
|
||||
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
|
||||
cerr << "Exiting CUDA...." << endl;
|
||||
exit(1);
|
||||
@ -277,7 +277,7 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
||||
}
|
||||
size = num_rows * num_columns * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpyAsync(temp, address_host_table, size, hipMemcpyHostToDevice);
|
||||
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||
*ptr = temp;
|
||||
return num_rows;
|
||||
@ -296,13 +296,13 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
||||
reservar(&temp, size);
|
||||
for(x = 0; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToDevice);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
cudaFree(temp_storage[x].dev_address);
|
||||
hipFree(temp_storage[x].dev_address);
|
||||
}
|
||||
for(; x < numcpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
||||
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyHostToDevice);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
free(temp_storage[x].dev_address);
|
||||
}
|
||||
@ -340,9 +340,9 @@ int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address
|
||||
temp = (int *)malloc(size);
|
||||
for(x = 0; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
|
||||
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToHost);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
cudaFree(temp_storage[x].dev_address);
|
||||
hipFree(temp_storage[x].dev_address);
|
||||
}
|
||||
for(; x < numcpu; x++)
|
||||
{
|
||||
@ -404,7 +404,7 @@ int cargafinal(int name, int cols, int **ptr)
|
||||
cont = pos->rows;
|
||||
#ifdef TUFFY
|
||||
reservar(&temp, pos->size);
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
|
||||
*ptr = temp;
|
||||
#else
|
||||
*ptr = pos->dev_address;
|
||||
@ -418,14 +418,14 @@ int cargafinal(int name, int cols, int **ptr)
|
||||
pos = gpu;
|
||||
while(pos != endg && pos->name == name)
|
||||
{
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
|
||||
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyDeviceToDevice);
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
pos = cpu;
|
||||
while(pos != endc && pos->name == name)
|
||||
{
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
@ -493,7 +493,7 @@ void clear_memory()
|
||||
{
|
||||
if(ini->isrule)
|
||||
{
|
||||
cudaFree(ini->dev_address);
|
||||
hipFree(ini->dev_address);
|
||||
ini = GPUmem.erase(ini);
|
||||
}
|
||||
else
|
||||
@ -518,7 +518,7 @@ void clear_memory_all()
|
||||
fin = GPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
cudaFree(ini->dev_address);
|
||||
hipFree(ini->dev_address);
|
||||
ini++;
|
||||
}
|
||||
GPUmem.clear();
|
||||
@ -542,7 +542,7 @@ void liberar(int name)
|
||||
{
|
||||
fact = *i;
|
||||
GPUmem.erase(i);
|
||||
cudaFree(fact.dev_address);
|
||||
hipFree(fact.dev_address);
|
||||
}
|
||||
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
|
||||
if(i != CPUmem.end())
|
||||
@ -566,10 +566,10 @@ void sumar(int name, int *dop1, int cols, int rows)
|
||||
newrows = rows + fact.rows;
|
||||
reservar(&res, newrows * cols * sizeof(int));
|
||||
offset = fact.rows * cols;
|
||||
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||
hipMemcpyAsync(res, fact.dev_address, offset * sizeof(int), hipMemcpyDeviceToDevice);
|
||||
GPUmem.erase(i);
|
||||
registrar(name, cols, res, newrows, 0, 0);
|
||||
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||
cudaFree(fact.dev_address);
|
||||
hipMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), hipMemcpyDeviceToDevice);
|
||||
hipFree(fact.dev_address);
|
||||
}
|
||||
}
|
||||
|
0
packages/cuda/memory.h
Executable file → Normal file
0
packages/cuda/memory.h
Executable file → Normal file
601
packages/cuda/old/cuda.c
Executable file
601
packages/cuda/old/cuda.c
Executable file
@ -0,0 +1,601 @@
|
||||
|
||||
// interface to CUDD Datalog evaluation
|
||||
#include "config.h"
|
||||
#include "YapInterface.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
#include "pred.h"
|
||||
|
||||
#define MAXARG 100
|
||||
|
||||
YAP_Atom AtomEq,
|
||||
AtomGt,
|
||||
AtomLt,
|
||||
AtomGe,
|
||||
AtomLe,
|
||||
AtomDf,
|
||||
AtomNt;
|
||||
|
||||
predicate *facts[MAXARG]; /*Temporary solution to maintain facts and rules*/
|
||||
predicate *rules[MAXARG];
|
||||
int32_t cf = 0, cr = 0;
|
||||
|
||||
char names[1024];
|
||||
|
||||
// initialize CUDA system
|
||||
void Cuda_Initialize( void );
|
||||
|
||||
// add/replace a set of facts for predicate pred
|
||||
int32_t Cuda_NewFacts(predicate *pred);
|
||||
|
||||
// add/replace a rule for predicate pred
|
||||
int32_t Cuda_NewRule(predicate *pred);
|
||||
|
||||
// erase predicate pred
|
||||
int32_t Cuda_Erase(predicate *pred);
|
||||
|
||||
// evaluate predicate pred, mat is bound to a vector of solutions, and
|
||||
// output the count
|
||||
//int32_t Cuda_Eval(predicate *pred, int32_t **mat); This functions arguments were changed, please see pred.h
|
||||
|
||||
void init_cuda( void );
|
||||
|
||||
//#define DEBUG_INTERFACE 1
|
||||
|
||||
#ifdef ROCKIT
|
||||
static int32_t query[100];
|
||||
static int32_t qcont = 0;
|
||||
static int cuda_init_query(void)
|
||||
{
|
||||
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG1));
|
||||
query[qcont] = pname;
|
||||
qcont++;
|
||||
query[qcont] = 0;
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if DEBUG_INTERFACE
|
||||
static void
|
||||
dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
|
||||
{
|
||||
return;
|
||||
int32_t i, j;
|
||||
for ( i=0; i< nrows; i++) {
|
||||
printf("%d", mat[i*ncols]);
|
||||
for (j=1; j < ncols; j++) {
|
||||
printf(", %d", mat[i*ncols+j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dump_vec(int32_t vec[], int32_t rows)
|
||||
{
|
||||
int32_t i = 1;
|
||||
int32_t j = 0;
|
||||
|
||||
for (j = 0; j < rows; j++) {
|
||||
for ( ; vec[i]; i++ ) {
|
||||
printf(", %d", vec[i]);
|
||||
}
|
||||
printf(", 0");
|
||||
i++;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif /* DEBUG_INTERFACE */
|
||||
|
||||
|
||||
// stubs, will point at Carlos code.
|
||||
|
||||
void Cuda_Initialize( void )
|
||||
{
|
||||
}
|
||||
|
||||
int32_t Cuda_NewFacts(predicate *pe)
|
||||
{
|
||||
#if DEBUG_INTERFACE
|
||||
dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
|
||||
#endif
|
||||
|
||||
#ifdef ROCKIT
|
||||
if(cf >= 0)
|
||||
{
|
||||
facts[cf] = pe;
|
||||
cf++;
|
||||
}
|
||||
#else
|
||||
facts[cf] = pe;
|
||||
cf++;
|
||||
#endif
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int32_t Cuda_NewRule(predicate *pe)
|
||||
{
|
||||
#if DEBUG_INTERFACE
|
||||
dump_vec( pe->address_host_table, pe->num_rows);
|
||||
#endif
|
||||
rules[cr] = pe;
|
||||
cr++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int32_t Cuda_Erase(predicate *pe)
|
||||
{
|
||||
int i = 0;
|
||||
while ( rules[i] != pe )
|
||||
i++;
|
||||
while (i < cr-1) {
|
||||
rules[i] = rules[i+1];
|
||||
i++;
|
||||
}
|
||||
rules[i] = NULL;
|
||||
cr--;
|
||||
if (pe->address_host_table)
|
||||
free( pe->address_host_table );
|
||||
free( pe );
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static int
|
||||
load_facts( void ) {
|
||||
|
||||
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
|
||||
YAP_Term t3 = YAP_ARG3;
|
||||
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
||||
int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
|
||||
predicate *pred;
|
||||
|
||||
while(YAP_IsPairTerm(t3)) {
|
||||
int32_t j = 0;
|
||||
YAP_Term th = YAP_HeadOfTerm(t3);
|
||||
|
||||
for (j = 0; j < ncols; j++) {
|
||||
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||
if (YAP_IsAtomTerm(ta)) {
|
||||
mat[i*ncols+j] = YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||
} else {
|
||||
mat[i*ncols+j] = YAP_IntOfTerm(ta);
|
||||
}
|
||||
}
|
||||
t3 = YAP_TailOfTerm( t3 );
|
||||
i++;
|
||||
}
|
||||
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||
// new
|
||||
pred = (predicate *)malloc(sizeof(predicate));
|
||||
} else {
|
||||
pred = (predicate *)YAP_IntOfTerm(YAP_ARG4);
|
||||
if (pred->address_host_table)
|
||||
free( pred->address_host_table );
|
||||
}
|
||||
pred->name = pname;
|
||||
pred->num_rows = nrows;
|
||||
pred->num_columns = ncols;
|
||||
pred->is_fact = TRUE;
|
||||
pred->address_host_table = mat;
|
||||
Cuda_NewFacts(pred);
|
||||
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||
} else {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static int currentFact = 0;
|
||||
static predicate *currentPred = NULL;
|
||||
|
||||
static int
|
||||
cuda_init_facts( void ) {
|
||||
|
||||
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
||||
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
|
||||
predicate *pred;
|
||||
|
||||
strcat(names, YAP_AtomName(YAP_AtomOfTerm(YAP_ARG3)));
|
||||
strcat(names, " ");
|
||||
|
||||
if (!mat)
|
||||
return FALSE;
|
||||
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||
// new
|
||||
pred = (predicate *)malloc(sizeof(predicate));
|
||||
} else {
|
||||
pred = (predicate *)YAP_IntOfTerm(YAP_ARG4);
|
||||
if (pred->address_host_table)
|
||||
free( pred->address_host_table );
|
||||
}
|
||||
pred->name = pname;
|
||||
pred->num_rows = nrows;
|
||||
pred->num_columns = ncols;
|
||||
pred->is_fact = TRUE;
|
||||
pred->address_host_table = mat;
|
||||
currentPred = pred;
|
||||
currentFact = 0;
|
||||
|
||||
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||
} else {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
cuda_load_fact( void ) {
|
||||
|
||||
int i = currentFact;
|
||||
|
||||
#if defined(DATALOG) || defined(TUFFY)
|
||||
YAP_Term th = YAP_ARG1;
|
||||
int ncols = currentPred->num_columns;
|
||||
int j;
|
||||
int *mat = currentPred->address_host_table;
|
||||
for (j = 0; j < ncols; j++) {
|
||||
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||
if (YAP_IsAtomTerm(ta)) {
|
||||
mat[i*ncols+j] = YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||
} else {
|
||||
mat[i*ncols+j] = YAP_IntOfTerm(ta);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
i++;
|
||||
if (i == currentPred->num_rows) {
|
||||
Cuda_NewFacts(currentPred);
|
||||
currentPred = NULL;
|
||||
currentFact = 0;
|
||||
} else {
|
||||
currentFact = i;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static int
|
||||
load_rule( void ) {
|
||||
// maximum of 2K symbols per rule, should be enough for ILP
|
||||
int32_t vec[2048], *ptr = vec, *nvec, neg[2048];
|
||||
// qK different variables;
|
||||
YAP_Term vars[1024];
|
||||
int32_t nvars = 0, x;
|
||||
int32_t ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */
|
||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||
YAP_Term t3 = YAP_ARG3;
|
||||
YAP_Atom name = YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3)));
|
||||
int32_t pname = YAP_AtomToInt(name);
|
||||
|
||||
const char *strname = YAP_AtomName(name);
|
||||
predicate *pred;
|
||||
int32_t cont = 0;
|
||||
memset(neg, 0x0, 2048 * sizeof(int32_t));
|
||||
|
||||
while(YAP_IsPairTerm(t3)) {
|
||||
int32_t j = 0, m;
|
||||
YAP_Term th = YAP_HeadOfTerm(t3);
|
||||
YAP_Functor f = YAP_FunctorOfTerm( th );
|
||||
int32_t n = YAP_ArityOfFunctor( f );
|
||||
YAP_Atom at = YAP_NameOfFunctor( f );
|
||||
|
||||
if (at == AtomEq)
|
||||
*ptr++ = SBG_EQ;
|
||||
else if (at == AtomGt)
|
||||
*ptr++ = SBG_GT;
|
||||
else if (at == AtomLt)
|
||||
*ptr++ = SBG_LT;
|
||||
else if (at == AtomGe)
|
||||
*ptr++ = SBG_GE;
|
||||
else if (at == AtomLe)
|
||||
*ptr++ = SBG_LE;
|
||||
else if (at == AtomDf)
|
||||
*ptr++ = SBG_DF;
|
||||
else if (at == AtomNt)
|
||||
{
|
||||
neg[cont] = 1;
|
||||
cont++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr++ = YAP_AtomToInt( at );
|
||||
cont++;
|
||||
}
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||
|
||||
if (YAP_IsVarTerm(ta)) {
|
||||
int32_t k;
|
||||
for (k = 0; k< nvars; k++) {
|
||||
if (vars[k] == ta) {
|
||||
*ptr++ = k+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (k == nvars) {
|
||||
vars[k] = ta;
|
||||
*ptr++ = k+1;
|
||||
nvars++;
|
||||
}
|
||||
} else if (YAP_IsAtomTerm(ta)) {
|
||||
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||
} else if (YAP_IsApplTerm(ta)) {
|
||||
f = YAP_FunctorOfTerm( ta );
|
||||
at = YAP_NameOfFunctor( f );
|
||||
m = YAP_ArityOfFunctor( f );
|
||||
*ptr++ = YAP_AtomToInt( at );
|
||||
|
||||
for (x = 0; x < m; x++) {
|
||||
YAP_Term ta2 = YAP_ArgOfTerm(x+1, ta);
|
||||
|
||||
if (YAP_IsVarTerm(ta2)) {
|
||||
int32_t k;
|
||||
for (k = 0; k < nvars; k++) {
|
||||
if (vars[k] == ta2) {
|
||||
*ptr++ = k+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (k == nvars) {
|
||||
vars[k] = ta2;
|
||||
*ptr++ = k+1;
|
||||
nvars++;
|
||||
}
|
||||
} else if (YAP_IsAtomTerm(ta2)) {
|
||||
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||
} else {
|
||||
*ptr++ = -YAP_IntOfTerm(ta);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*ptr++ = -YAP_IntOfTerm(ta);
|
||||
}
|
||||
}
|
||||
*ptr++ = 0;
|
||||
t3 = YAP_TailOfTerm( t3 );
|
||||
}
|
||||
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||
// new
|
||||
pred = (predicate *)malloc(sizeof(predicate));
|
||||
} else {
|
||||
pred = (predicate *)YAP_IntOfTerm(YAP_ARG4);
|
||||
if (pred->address_host_table)
|
||||
free( pred->address_host_table );
|
||||
}
|
||||
pred->name = pname;
|
||||
pred->num_rows = ngoals;
|
||||
pred->num_columns = ncols;
|
||||
pred->is_fact = FALSE;
|
||||
x = (strlen(strname) + 1) * sizeof(char);
|
||||
pred->predname = (char *)malloc(x);
|
||||
memcpy(pred->predname, strname, x);
|
||||
nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
|
||||
memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
|
||||
pred->address_host_table = nvec;
|
||||
pred->negatives = (int32_t *)malloc(sizeof(int32_t) * cont);
|
||||
memcpy(pred->negatives, neg, sizeof(int32_t) * cont);
|
||||
Cuda_NewRule( pred );
|
||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||
}
|
||||
|
||||
static int
|
||||
cuda_erase( void )
|
||||
{
|
||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||
return Cuda_Erase( ptr );
|
||||
}
|
||||
|
||||
void setQuery(YAP_Term t1, int32_t **res)
|
||||
{
|
||||
int32_t *query = (int32_t *)malloc(MAXARG * sizeof(int32_t));
|
||||
int32_t x, y = 0, *itr;
|
||||
predicate *ptr = NULL;
|
||||
if(YAP_IsPairTerm(t1))
|
||||
{
|
||||
while(YAP_IsPairTerm(t1))
|
||||
{
|
||||
ptr = (predicate *)YAP_IntOfTerm(YAP_HeadOfTerm(t1));
|
||||
query[y] = ptr->name;
|
||||
itr = ptr->address_host_table;
|
||||
x = 2;
|
||||
while(itr[x] != 0)
|
||||
x++;
|
||||
query[y+1] = itr[x+1];
|
||||
t1 = YAP_TailOfTerm(t1);
|
||||
y+=2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = (predicate *)YAP_IntOfTerm(t1);
|
||||
query[y] = ptr->name;
|
||||
itr = ptr->address_host_table;
|
||||
x = 2;
|
||||
while(itr[x] != 0)
|
||||
x++;
|
||||
query[y+1] = itr[x+1];
|
||||
y += 2;
|
||||
}
|
||||
query[y] = -1;
|
||||
query[y+1] = -1;
|
||||
*res = query;
|
||||
}
|
||||
|
||||
static int
|
||||
cuda_eval( void )
|
||||
{
|
||||
int32_t *mat;
|
||||
|
||||
#if defined(DATALOG) || defined(TUFFY)
|
||||
int32_t *query = NULL;
|
||||
setQuery(YAP_ARG1, &query);
|
||||
#endif
|
||||
|
||||
int32_t finalDR = YAP_IntOfTerm(YAP_ARG3);
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, names, finalDR);
|
||||
|
||||
#ifdef TUFFY
|
||||
cf = 0;
|
||||
#endif
|
||||
#ifdef ROCKIT
|
||||
if(cf > 0)
|
||||
cf *= -1;
|
||||
#endif
|
||||
#if defined(TUFFY) || defined(ROCKIT)
|
||||
cr = 0;
|
||||
names[0] = '\0';
|
||||
return FALSE;
|
||||
#else
|
||||
int32_t i;
|
||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||
int32_t ncols = ptr->num_columns;
|
||||
YAP_Term out = YAP_TermNil();
|
||||
YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
|
||||
YAP_Term vec[256];
|
||||
|
||||
YAP_Atom at;
|
||||
|
||||
if (n < 0)
|
||||
return FALSE;
|
||||
for (i=0; i<n; i++) {
|
||||
int32_t ni = ((n-1)-i)*ncols, j;
|
||||
|
||||
printf("%s(", YAP_AtomName(YAP_IntToAtom(ptr->name)));
|
||||
|
||||
for (j=0; j<ncols; j++) {
|
||||
vec[j] = YAP_MkIntTerm(mat[ni+j]);
|
||||
|
||||
at = YAP_IntToAtom(mat[ni+j]);
|
||||
if(at != NULL)
|
||||
printf("%s", YAP_AtomName(at));
|
||||
else
|
||||
printf("%d", mat[ni+j]);
|
||||
if(j < (ncols - 1))
|
||||
printf(",");
|
||||
|
||||
}
|
||||
out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
|
||||
|
||||
printf(")\n");
|
||||
|
||||
}
|
||||
if (n > 0)
|
||||
free( mat );
|
||||
return YAP_Unify(YAP_ARG2, out);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
cuda_coverage( void )
|
||||
{
|
||||
int32_t *mat;
|
||||
|
||||
#if defined(DATALOG) || defined(TUFFY)
|
||||
int32_t *query = NULL;
|
||||
setQuery(YAP_ARG1, &query);
|
||||
#endif
|
||||
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
|
||||
int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
|
||||
int32_t i = n/2, min = 0, max = n-1;
|
||||
int32_t t0, t1;
|
||||
|
||||
if (n < 0)
|
||||
return FALSE;
|
||||
if (n == 0) {
|
||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(0)) &&
|
||||
YAP_Unify(YAP_ARG3, YAP_MkIntTerm(0));
|
||||
}
|
||||
t0 = mat[0], t1 = mat[(n-1)*2];
|
||||
if (t0 == t1) { /* all sametype */
|
||||
free( mat );
|
||||
/* all pos */
|
||||
if (t0 == post)
|
||||
return YAP_Unify(YAP_ARG3, YAP_MkIntTerm(n)) &&
|
||||
YAP_Unify(YAP_ARG4, YAP_MkIntTerm(0));
|
||||
/* all neg */
|
||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(n)) &&
|
||||
YAP_Unify(YAP_ARG3, YAP_MkIntTerm(0));
|
||||
}
|
||||
do {
|
||||
i = (min+max)/2;
|
||||
if (i == min) i++;
|
||||
if (mat[i*2] == t0) {
|
||||
min = i;
|
||||
} else {
|
||||
max = i;
|
||||
}
|
||||
if (min+1 == max) {
|
||||
free( mat );
|
||||
if (t0 == post)
|
||||
return YAP_Unify(YAP_ARG3, YAP_MkIntTerm(max)) &&
|
||||
YAP_Unify(YAP_ARG4, YAP_MkIntTerm(n-max));
|
||||
/* all neg */
|
||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(max)) &&
|
||||
YAP_Unify(YAP_ARG3, YAP_MkIntTerm(n-max));
|
||||
}
|
||||
} while ( TRUE );
|
||||
}
|
||||
|
||||
static int cuda_count( void )
|
||||
{
|
||||
int32_t *mat;
|
||||
|
||||
#if defined(DATALOG) || defined(TUFFY)
|
||||
int32_t *query = NULL;
|
||||
setQuery(YAP_ARG1, &query);
|
||||
#endif
|
||||
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
|
||||
|
||||
if (n < 0)
|
||||
return FALSE;
|
||||
free( mat );
|
||||
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
|
||||
}
|
||||
|
||||
static int cuda_statistics( void )
|
||||
{
|
||||
Cuda_Statistics();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static int first_time = TRUE;
|
||||
|
||||
void
|
||||
init_cuda(void)
|
||||
{
|
||||
if (first_time) Cuda_Initialize();
|
||||
first_time = FALSE;
|
||||
|
||||
AtomEq = YAP_LookupAtom("=");
|
||||
AtomGt = YAP_LookupAtom(">");
|
||||
AtomLt = YAP_LookupAtom("<");
|
||||
AtomGe = YAP_LookupAtom(">=");
|
||||
AtomLe = YAP_LookupAtom("=<");
|
||||
AtomDf = YAP_LookupAtom("\\=");
|
||||
AtomNt = YAP_LookupAtom("not");
|
||||
YAP_UserCPredicate("load_facts", load_facts, 4);
|
||||
YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
|
||||
YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
|
||||
YAP_UserCPredicate("load_rule", load_rule, 4);
|
||||
YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
|
||||
YAP_UserCPredicate("cuda_eval", cuda_eval, 3);
|
||||
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
|
||||
YAP_UserCPredicate("cuda_count", cuda_count, 2);
|
||||
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
|
||||
|
||||
#ifdef ROCKIT
|
||||
YAP_UserCPredicate("cuda_init_query", cuda_init_query, 1);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
603
packages/cuda/old/dbio.cu
Normal file
603
packages/cuda/old/dbio.cu
Normal file
@ -0,0 +1,603 @@
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include "memory.h"
|
||||
#include "union2.h"
|
||||
#include "dbio.h"
|
||||
|
||||
#ifdef DATALOG
|
||||
//template<class InputIterator>
|
||||
//void datalogWrite(int query, InputIterator rul_str, InputIterator fin, int finalDR, int **result)
|
||||
void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, int finalDR, int **result)
|
||||
{
|
||||
rulenode tmprule;
|
||||
vector<rulenode>::iterator qposr;
|
||||
int *dop1, *hres;
|
||||
int cols1, res_rows, tipo;
|
||||
tmprule.name = query;
|
||||
qposr = lower_bound(rul_str, fin, tmprule, comparer);
|
||||
cols1 = qposr->num_columns;
|
||||
res_rows = cargafinal(query, cols1, &dop1);
|
||||
|
||||
if(res_rows != 0)
|
||||
{
|
||||
if(res_rows > 0)
|
||||
{
|
||||
if(finalDR)
|
||||
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
*result = hres;
|
||||
}
|
||||
else
|
||||
{
|
||||
res_rows *= -1;
|
||||
if(finalDR)
|
||||
{
|
||||
int *dop2;
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
reservar(&dop2, tipo);
|
||||
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice);
|
||||
free(dop1);
|
||||
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop2);
|
||||
*result = hres;
|
||||
}
|
||||
else
|
||||
*result = dop1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TUFFY
|
||||
void postgresRead(PGconn **ret, vector<gpunode> *L, int *inpquery, char *names, int finalDR)
|
||||
{
|
||||
PGresult *pgr;
|
||||
int x, y;
|
||||
int *mat, *mat2;
|
||||
char *tok, sel[1024], **qrs;
|
||||
int w, z = 0, numt, numc, numc2, start = 0, start2, val;
|
||||
PGconn *conn = PQconnectdb("host=localhost port=5432 dbname = prueba user=tuffer password=root");
|
||||
if(PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
fprintf(stderr, "Connection to database failed: %s", PQerrorMessage(conn));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
pgr = PQexec(conn, "Select nspname from pg_catalog.pg_namespace where oid = (select max(oid) from pg_catalog.pg_namespace)");
|
||||
sprintf(sel, "SET search_path = %s", PQgetvalue(pgr, 0, 0));
|
||||
PQclear(pgr);
|
||||
PQexec(conn, sel);
|
||||
tok = strtok(names, " ");
|
||||
if(finalDR)
|
||||
{
|
||||
qrs = (char **)malloc(100 * sizeof(char *));
|
||||
while(tok != NULL)
|
||||
{
|
||||
sprintf(sel, "Select * from %s limit 0", tok);
|
||||
pgr = PQexec(conn, sel);
|
||||
numc = L->at(z).num_columns;
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
sprintf(sel, "Select ");
|
||||
numt = numc + 1;
|
||||
for(x = 1; x < numt; x++)
|
||||
{
|
||||
strcat(sel, PQfname(pgr, x));
|
||||
strcat(sel, ", ");
|
||||
}
|
||||
sel[strlen(sel)-2] = '\0';
|
||||
sprintf(sel, "%s from %s", sel, tok);
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(sel, "Select id, Club, ");
|
||||
numt = numc + 6;
|
||||
for(x = 8; x < numt; x++)
|
||||
{
|
||||
strcat(sel, PQfname(pgr, x));
|
||||
strcat(sel, ", ");
|
||||
}
|
||||
sel[strlen(sel)-2] = '\0';
|
||||
sprintf(sel, "%s from %s", sel, tok);
|
||||
}
|
||||
PQclear(pgr);
|
||||
pgr = PQexec(conn, sel);
|
||||
numt = PQntuples(pgr);
|
||||
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
for(x = 0; x < numt; x++)
|
||||
{
|
||||
start = x * numc;
|
||||
for(y = 0; y < numc; y++)
|
||||
mat[start + y] = atoi(PQgetvalue(pgr, x, y));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
numc2 = numc - 2;
|
||||
mat2 = (int *)malloc(numt * numc2 * sizeof(int));
|
||||
start = 0;
|
||||
start2 = 0;
|
||||
for(x = 0; x < numt; x++)
|
||||
{
|
||||
w = atoi(PQgetvalue(pgr, x, 1));
|
||||
if(w < 2)
|
||||
{
|
||||
mat[start] = atoi(PQgetvalue(pgr, x, 0));
|
||||
start++;
|
||||
mat[start] = w;
|
||||
start++;
|
||||
if(w > 0)
|
||||
{
|
||||
for(y = 2; y < numc; y++)
|
||||
{
|
||||
val = atoi(PQgetvalue(pgr, x, y));
|
||||
mat[start] = val;
|
||||
mat2[start2] = val;
|
||||
start++;
|
||||
start2++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(y = 2; y < numc; y++)
|
||||
{
|
||||
val = atoi(PQgetvalue(pgr, x, y));
|
||||
mat[start] = val;
|
||||
start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(y = 2; y < numc; y++)
|
||||
{
|
||||
val = atoi(PQgetvalue(pgr, x, y));
|
||||
mat2[start2] = val;
|
||||
start2++;
|
||||
}
|
||||
}
|
||||
}
|
||||
L->at(z+1).address_host_table = mat2;
|
||||
L->at(z+1).num_rows = start2 / numc2;
|
||||
}
|
||||
L->at(z).address_host_table = mat;
|
||||
L->at(z).num_rows = start / numc;
|
||||
PQclear(pgr);
|
||||
|
||||
x = 1;
|
||||
while(inpquery[x] != -1)
|
||||
{
|
||||
if(L->at(z).name == inpquery[x])
|
||||
{
|
||||
numt = (strlen(tok) + 1) * sizeof(char);
|
||||
qrs[x] = (char *)malloc(numt);
|
||||
memcpy(qrs[x], tok, numt);
|
||||
}
|
||||
x += 2;
|
||||
}
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
tok = strtok(NULL, " ");
|
||||
z++;
|
||||
}
|
||||
else
|
||||
{
|
||||
strtok(NULL, " ");
|
||||
tok = strtok(NULL, " ");
|
||||
z += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(tok != NULL)
|
||||
{
|
||||
sprintf(sel, "Select * from %s limit 0", tok);
|
||||
pgr = PQexec(conn, sel);
|
||||
numc = L->at(z).num_columns;
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
sprintf(sel, "Select weight, myid, ");
|
||||
start = 1;
|
||||
numt = numc + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(sel, "Select truth, Club, atomID, ");
|
||||
start = 8;
|
||||
numt = numc + 5;
|
||||
}
|
||||
for(x = start; x < numt; x++)
|
||||
{
|
||||
strcat(sel, PQfname(pgr, x));
|
||||
strcat(sel, ", ");
|
||||
}
|
||||
sel[strlen(sel)-2] = '\0';
|
||||
sprintf(sel, "%s from %s", sel, tok);
|
||||
PQclear(pgr);
|
||||
pgr = PQexec(conn, sel);
|
||||
numt = PQntuples(pgr);
|
||||
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||
L->at(z).weight = (double *)malloc(numt * sizeof(double));
|
||||
L->at(z).num_rows = numt;
|
||||
|
||||
for(x = 0; x < numt; x++)
|
||||
{
|
||||
start = x * numc;
|
||||
for(y = 1; y < numc; y++)
|
||||
mat[start + y] = atoi(PQgetvalue(pgr, x, y));
|
||||
}
|
||||
|
||||
numt *= numc;
|
||||
double flo;
|
||||
if(tok[0] == 'c')
|
||||
{
|
||||
for(x = 0, y = 0; x < numt; x+=numc, y++)
|
||||
{
|
||||
flo = atof(PQgetvalue(pgr, y, 0));
|
||||
L->at(z).weight[y] = flo;
|
||||
if(flo > 0)
|
||||
mat[x] = y + 1;
|
||||
else
|
||||
mat[x] = -y - 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(x = 0, y = 0; x < numt; x+=numc, y++)
|
||||
{
|
||||
if(PQgetvalue(pgr, y, 0)[0] == 't')
|
||||
mat[x] = 2;
|
||||
else
|
||||
mat[x] = 1;
|
||||
}
|
||||
}
|
||||
L->at(z).address_host_table = mat;
|
||||
numc = (strlen(tok) + 1) * sizeof(char);
|
||||
L->at(z).predname = (char *)malloc(numc);
|
||||
memcpy(L->at(z).predname, tok, numc);
|
||||
PQclear(pgr);
|
||||
tok = strtok(NULL, " ");
|
||||
z++;
|
||||
}
|
||||
}
|
||||
*ret = conn;
|
||||
}
|
||||
|
||||
void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, PGconn *conn, int finalDR)
|
||||
{
|
||||
char sel[1024];
|
||||
double *matw = NULL;
|
||||
int qname, cols1, res_rows, tipo, *dop1;
|
||||
int x, w, z, y, *hres;
|
||||
rulenode tmprule;
|
||||
vector<rulenode>::iterator qposr;
|
||||
if(finalDR)
|
||||
{
|
||||
char file[] = "/dev/shm/mln0_atoms.csv";
|
||||
z = 0;
|
||||
int seqid = 1;
|
||||
FILE *fp;
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||
sprintf(file, "./temp/mln0_atoms.csv");
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
while((qname = inpquery[z]) != -1)
|
||||
{
|
||||
tmprule.name = qname;
|
||||
qposr = lower_bound(rul_str, fin, tmprule, comparer);
|
||||
cols1 = qposr->num_columns;
|
||||
res_rows = cargafinal(qname, cols1, &dop1);
|
||||
|
||||
if(res_rows != 0)
|
||||
{
|
||||
if(res_rows < 0)
|
||||
res_rows = unir(dop1, -res_rows, cols1, &dop1, 0); /*duplicate elimination on result*/
|
||||
else
|
||||
res_rows = unir(dop1, res_rows, cols1, &dop1, finalDR);
|
||||
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
w = z + 1;
|
||||
|
||||
strtok(qposr->rulename, "_");
|
||||
strtok(NULL, "_");
|
||||
int prid = atoi(strtok(NULL, "_"));
|
||||
|
||||
for(x = 0, w = 0; x < res_rows; x++, w+=2)
|
||||
{
|
||||
if(hres[w+1])
|
||||
fprintf(fp, "%d,%d,%d,true\n", seqid, hres[w], prid);
|
||||
else
|
||||
fprintf(fp, "%d,%d,%d,false\n", seqid, hres[w], prid);
|
||||
seqid++;
|
||||
}
|
||||
free(hres);
|
||||
}
|
||||
z += 2;
|
||||
}
|
||||
fclose(fp);
|
||||
sprintf(sel, "Copy mln0_atoms(atomid,tupleID,predID,isquery) from '%s' CSV", file);
|
||||
PQexec(conn, sel);
|
||||
}
|
||||
else
|
||||
{
|
||||
while(rul_str != fin)
|
||||
{
|
||||
cols1 = rul_str->num_columns;
|
||||
res_rows = cargafinal(rul_str->name, cols1, &dop1);
|
||||
if(res_rows == 0)
|
||||
{
|
||||
rul_str++;
|
||||
continue;
|
||||
}
|
||||
res_rows = abs(res_rows);
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
cudaFree(dop1);
|
||||
|
||||
char file[] = "/dev/shm/buffer.csv";
|
||||
FILE *fp;
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||
sprintf(file, "./temp/buffer.csv");
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if(rul_str->rulename[0] == 'z')
|
||||
{
|
||||
char *name = rul_str->rulename + 1;
|
||||
for(x = 0; x < ninpf; x++)
|
||||
{
|
||||
if(strncmp(L->at(x).predname, name, strlen(name)) == 0)
|
||||
{
|
||||
matw = L->at(x).weight;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cols1 -= 3;
|
||||
for(x = 0, z = 0; x < res_rows; x++, z+=3)
|
||||
{
|
||||
for(y = 0; y < cols1; y++, z++)
|
||||
fprintf(fp, "%d,", hres[z]);
|
||||
fprintf(fp, "%d,%lf,%d\n", hres[z], matw[abs(hres[z+1])-1], hres[z+2]);
|
||||
}
|
||||
fclose(fp);
|
||||
sprintf(sel, "Copy %s from '%s' CSV", name, file);
|
||||
PQexec(conn, sel);
|
||||
}
|
||||
else
|
||||
{
|
||||
cols1--;
|
||||
for(x = 0, z = 0; x < res_rows; x++, z++)
|
||||
{
|
||||
for(y = 0; y < cols1; y++, z++)
|
||||
fprintf(fp, "%d,", hres[z]);
|
||||
fprintf(fp, "%d\n", hres[z]);
|
||||
}
|
||||
fclose(fp);
|
||||
sprintf(sel, "Copy %s from '%s' CSV", rul_str->rulename, file);
|
||||
PQexec(conn, sel);
|
||||
}
|
||||
free(hres);
|
||||
rul_str++;
|
||||
}
|
||||
}
|
||||
PQfinish(conn);
|
||||
if(finalDR)
|
||||
clear_memory_all();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ROCKIT
|
||||
void mysqlRead(MYSQL **ret, int *qrs, vector<gpunode> *L, int ninpf, char *names, int finalDR)
|
||||
{
|
||||
char *tok, sel[1024];
|
||||
int w, x, y, z = 0, numt, numc;
|
||||
int *mat;
|
||||
MYSQL *con = mysql_init(NULL);
|
||||
if(con == NULL)
|
||||
{
|
||||
fprintf(stderr, "mysql_init() failed\n");
|
||||
exit(1);
|
||||
}
|
||||
mysql_options(con, MYSQL_OPT_LOCAL_INFILE, NULL);
|
||||
mysql_real_connect(con, "localhost", "root", "root", "rockit", 0, NULL, 0);
|
||||
if(finalDR)
|
||||
{
|
||||
y = 0;
|
||||
while(qrs[y] != 0)
|
||||
{
|
||||
for(z = 0; z < ninpf; z++)
|
||||
{
|
||||
if(qrs[y] == L->at(z).name)
|
||||
{
|
||||
MYSQL_ROW row;
|
||||
sprintf(sel, "Select count(*) from %s", L->at(z).predname);
|
||||
mysql_query(con, sel);
|
||||
MYSQL_RES *result = mysql_store_result(con);
|
||||
row = mysql_fetch_row(result);
|
||||
numt = atoi(row[0]);
|
||||
mysql_free_result(result);
|
||||
|
||||
if(numt != L->at(z).num_rows)
|
||||
{
|
||||
liberar(L->at(z).name);
|
||||
numc = L->at(z).num_columns;
|
||||
sprintf(sel, "Select * from %s", L->at(z).predname);
|
||||
mysql_query(con, sel);
|
||||
MYSQL_RES *result = mysql_store_result(con);
|
||||
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||
w = 0;
|
||||
while ((row = mysql_fetch_row(result)))
|
||||
{
|
||||
for(x = 0; x < numc; x++, w++)
|
||||
mat[w] = atoi(row[x]);
|
||||
}
|
||||
|
||||
mysql_free_result(result);
|
||||
if(L->at(z).address_host_table != NULL)
|
||||
free(L->at(z).address_host_table);
|
||||
L->at(z).address_host_table = mat;
|
||||
L->at(z).num_rows = numt;
|
||||
}
|
||||
}
|
||||
}
|
||||
y++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tok = strtok(names, " ");
|
||||
while(tok != NULL)
|
||||
{
|
||||
numc = L->at(z).num_columns;
|
||||
sprintf(sel, "Select * from %s", tok);
|
||||
mysql_query(con, sel);
|
||||
MYSQL_RES *result = mysql_store_result(con);
|
||||
numt = mysql_num_rows(result);
|
||||
|
||||
MYSQL_ROW row;
|
||||
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||
w = 0;
|
||||
if(tok[0] == 'f' && tok[1] >= '0' && tok[1] <= '9')
|
||||
{
|
||||
while ((row = mysql_fetch_row(result)))
|
||||
{
|
||||
for(x = 1; x <= numc; x++, w++)
|
||||
mat[w] = atoi(row[x]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((row = mysql_fetch_row(result)))
|
||||
{
|
||||
for(x = 0; x < numc; x++, w++)
|
||||
mat[w] = atoi(row[x]);
|
||||
}
|
||||
}
|
||||
mysql_free_result(result);
|
||||
L->at(z).address_host_table = mat;
|
||||
L->at(z).num_rows = numt;
|
||||
|
||||
numc = (strlen(tok) + 1) * sizeof(char);
|
||||
L->at(z).predname = (char *)malloc(numc);
|
||||
strcpy(L->at(z).predname, tok);
|
||||
tok = strtok(NULL, " ");
|
||||
z++;
|
||||
}
|
||||
}
|
||||
*ret = con;
|
||||
}
|
||||
|
||||
void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, MYSQL *con)
|
||||
{
|
||||
int x, y, z, cols1, cols2, res_rows, tipo;
|
||||
int *hres, *dop1;
|
||||
char *id, *sign, *q1, *q2;
|
||||
char sel[1024], weight[1024];
|
||||
gpunode tmpfact;
|
||||
while(rul_str != fin)
|
||||
{
|
||||
cols1 = rul_str->num_columns;
|
||||
res_rows = cargafinal(rul_str->name, cols1, &dop1);
|
||||
id = strtok(rul_str->rulename, "_");
|
||||
sprintf(sel, "create table if not exists %s(weight double, ", id);
|
||||
for(x = 0; x < cols1; x++)
|
||||
{
|
||||
sprintf(weight, "a%d char(10), ", x);
|
||||
strcat(sel, weight);
|
||||
}
|
||||
sel[strlen(sel)-2] = ')';
|
||||
strcat(sel, "ENGINE = MEMORY DEFAULT CHARSET=latin1");
|
||||
mysql_query(con, sel);
|
||||
sprintf(sel, "truncate %s", id);
|
||||
mysql_query(con, sel);
|
||||
|
||||
if(res_rows == 0)
|
||||
{
|
||||
rul_str++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(res_rows > 0)
|
||||
{
|
||||
tmpfact = L->at(-rul_str->referencias[rul_str->num_rows - 2] - 1);
|
||||
sign = tmpfact.predname;
|
||||
tipo = res_rows * cols1 * sizeof(int);
|
||||
hres = (int *)malloc(tipo);
|
||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
|
||||
sumar(tmpfact.name, dop1, cols1, res_rows);
|
||||
}
|
||||
else
|
||||
{
|
||||
hres = dop1;
|
||||
res_rows = -res_rows;
|
||||
}
|
||||
|
||||
sign = strtok(NULL, "_");
|
||||
q1 = strtok(NULL, "_");
|
||||
q2 = strtok(NULL, "_");
|
||||
if(sign[0] == '0')
|
||||
sprintf(weight, "%s.%s", q1, q2);
|
||||
else
|
||||
sprintf(weight, "-%s.%s", q1, q2);
|
||||
|
||||
FILE *fp;
|
||||
char file[512];
|
||||
sprintf(file, "/dev/shm/%s.tsv", id);
|
||||
fp = fopen(file, "w");
|
||||
if(fp == NULL)
|
||||
{
|
||||
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||
sprintf(file, "./temp/%s.tsv", id);
|
||||
fp = fopen(file, "w");
|
||||
}
|
||||
|
||||
cols2 = cols1 - 1;
|
||||
for(x = 0, z = 0; x < res_rows; x++, z++)
|
||||
{
|
||||
fprintf(fp, "%s\t", weight);
|
||||
for(y = 0; y < cols2; y++, z++)
|
||||
fprintf(fp, "%d\t", hres[z]);
|
||||
fprintf(fp, "%d\n", hres[z]);
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
sprintf(sel, "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s", file, id);
|
||||
mysql_query(con, sel);
|
||||
rul_str++;
|
||||
}
|
||||
mysql_close(con);
|
||||
}
|
||||
#endif
|
||||
|
28
packages/cuda/old/dbio.h
Normal file
28
packages/cuda/old/dbio.h
Normal file
@ -0,0 +1,28 @@
|
||||
#ifndef _DBIO_H_
|
||||
#define _DBIO_H_
|
||||
|
||||
#include "pred.h"
|
||||
#ifdef TUFFY
|
||||
#include <libpq-fe.h>
|
||||
#endif
|
||||
#ifdef ROCKIT
|
||||
#include <mysql/mysql.h>
|
||||
#endif
|
||||
#include <vector>
|
||||
#include "lista.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifdef TUFFY
|
||||
void postgresRead(PGconn **ret, vector<gpunode> *L, int *inpquery, char *names, int finalDR);
|
||||
void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, PGconn *conn, int finalDR);
|
||||
#endif
|
||||
#ifdef ROCKIT
|
||||
void mysqlRead(MYSQL **ret, int *qrs, vector<gpunode> *L, int ninpf, char *names, int finalDR);
|
||||
void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, MYSQL *con);
|
||||
#endif
|
||||
#ifdef DATALOG
|
||||
void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, int finalDR, int **result);
|
||||
#endif
|
||||
|
||||
#endif
|
1337
packages/cuda/old/lista.cu
Executable file
1337
packages/cuda/old/lista.cu
Executable file
File diff suppressed because it is too large
Load Diff
44
packages/cuda/old/lista.h
Executable file
44
packages/cuda/old/lista.h
Executable file
@ -0,0 +1,44 @@
|
||||
#ifndef _LISTA_H_
|
||||
#define _LISTA_H_
|
||||
|
||||
typedef struct Node{
|
||||
int name;
|
||||
int *dev_address;
|
||||
int rows;
|
||||
int size;
|
||||
int iteration;
|
||||
int isrule;
|
||||
}memnode;
|
||||
|
||||
typedef struct auxiliar{
|
||||
int name;
|
||||
int num_rows;
|
||||
int num_columns;
|
||||
int *address_host_table;
|
||||
int *rule_names;
|
||||
int *referencias;
|
||||
int **select;
|
||||
int *numsel;
|
||||
int **project;
|
||||
int2 *projpos;
|
||||
int **selfjoin;
|
||||
int *numselfj;
|
||||
int **wherejoin;
|
||||
int *numjoin;
|
||||
int totalpreds;
|
||||
int **preds;
|
||||
int2 *numpreds;
|
||||
int *negatives;
|
||||
char *rulename;
|
||||
int gen_act;
|
||||
int gen_ant;
|
||||
}rulenode;
|
||||
|
||||
typedef struct completed{
|
||||
int name;
|
||||
int numrules;
|
||||
int reduce;
|
||||
int reset;
|
||||
}compnode;
|
||||
|
||||
#endif
|
575
packages/cuda/old/memory.cu
Executable file
575
packages/cuda/old/memory.cu
Executable file
@ -0,0 +1,575 @@
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
#include <thrust/device_vector.h>
|
||||
#include "lista.h"
|
||||
#include "memory.h"
|
||||
#include "pred.h"
|
||||
|
||||
#define MAX_REC 200
|
||||
#define MAX_FIX_POINTS 100
|
||||
|
||||
memnode temp_storage[MAX_REC];
|
||||
/*List used to store information (address, size, etc.) about facts and rule results loaded in the GPU*/
|
||||
list<memnode> GPUmem;
|
||||
/*List used to store information about rule results offloaded from the GPU to the CPU*/
|
||||
list<memnode> CPUmem;
|
||||
|
||||
/*Auxiliary function to sort rule list*/
|
||||
bool comparer(const rulenode &r1, const rulenode &r2)
|
||||
{
|
||||
return (r1.name > r2.name);
|
||||
}
|
||||
|
||||
/*Used in search functions to compare iterations*/
|
||||
bool compareiteration(const memnode &r1, const memnode &r2)
|
||||
{
|
||||
return (r1.iteration < r2.iteration);
|
||||
}
|
||||
|
||||
/*Used in search functions to compare names*/
|
||||
bool comparename(const memnode &r1, const memnode &r2)
|
||||
{
|
||||
return (r1.name > r2.name);
|
||||
}
|
||||
|
||||
/*Linear search of 'name' fact*/
|
||||
template<class InputIterator>
|
||||
InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
|
||||
{
|
||||
while(first!=last)
|
||||
{
|
||||
if(first->name == name && first->isrule == 0) return first;
|
||||
++first;
|
||||
}
|
||||
return last;
|
||||
}
|
||||
|
||||
/*Finds all results of rule 'name' in iteration 'itr' in both CPU and GPU memory. Every result found is removed from its respective list*/
|
||||
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
|
||||
{
|
||||
int x = 0, sum = 0;
|
||||
memnode temp;
|
||||
list<memnode>::iterator i;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name && rec.first->isrule == 1)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
rec.first = GPUmem.erase(rec.first);
|
||||
sum += temp_storage[x].rows;
|
||||
x++;
|
||||
}
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
*gpunum = x;
|
||||
temp.name = name;
|
||||
temp.isrule = 1;
|
||||
i = GPUmem.insert(rec.first, temp);
|
||||
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name && rec.first->isrule == 1)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
rec.first = CPUmem.erase(rec.first);
|
||||
sum += temp_storage[x].rows;
|
||||
x++;
|
||||
}
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
*totalrows = sum;
|
||||
*cpunum = x;
|
||||
return i;
|
||||
}
|
||||
|
||||
list<memnode>::iterator buscarpornombrecpu(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
|
||||
{
|
||||
int x = 0, sum = 0;
|
||||
memnode temp;
|
||||
list<memnode>::iterator i;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
rec.first = GPUmem.erase(rec.first);
|
||||
sum += temp_storage[x].rows;
|
||||
x++;
|
||||
}
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
|
||||
*gpunum = x;
|
||||
temp.name = name;
|
||||
temp.isrule = 1;
|
||||
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
rec.first = CPUmem.erase(rec.first);
|
||||
sum += temp_storage[x].rows;
|
||||
x++;
|
||||
}
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
i = CPUmem.insert(rec.first, temp);
|
||||
*totalrows = sum;
|
||||
*cpunum = x;
|
||||
return i;
|
||||
}
|
||||
|
||||
/*Removes the least recently used memory block from GPU memory, sending it to CPU memory if it's a rule result.
|
||||
If there are no used memory blocks in the GPU and we still don't have enough memory, the program exits with error*/
|
||||
void limpiar(const char s[], size_t sz)
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
memnode temp;
|
||||
size_t free, total;
|
||||
|
||||
if(GPUmem.size() == 0)
|
||||
{
|
||||
cudaMemGetInfo(&free,&total);
|
||||
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ini = GPUmem.begin();
|
||||
if(ini->isrule)
|
||||
{
|
||||
temp = *ini;
|
||||
temp.dev_address = (int *)malloc(ini->size);
|
||||
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
CPUmem.insert(pos, temp);
|
||||
}
|
||||
cudaFree(ini->dev_address);
|
||||
GPUmem.erase(ini);
|
||||
}
|
||||
|
||||
/*Allocs 'size' amount of bytes in GPU memory. If not enough memory is available, removes least recently used memory blocks until
|
||||
enough space is available*/
|
||||
void reservar(int **ptr, size_t size)
|
||||
{
|
||||
size_t free, total;
|
||||
|
||||
if (size == 0) {
|
||||
*ptr = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
cudaMemGetInfo(&free, &total);
|
||||
while(free < size)
|
||||
{
|
||||
cout << "Se limpio memoria " << free << " " << total << endl;
|
||||
limpiar("not enough memory", size);
|
||||
cudaMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||
limpiar("Error in memory allocation", size);
|
||||
if (! *ptr ) {
|
||||
size_t free, total;
|
||||
cudaMemGetInfo( &free, &total );
|
||||
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
|
||||
cerr << "Exiting CUDA...." << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*Creates a new entry in the GPU memory list*/
|
||||
void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||
{
|
||||
memnode temp;
|
||||
temp.name = name;
|
||||
temp.dev_address = ptr;
|
||||
temp.rows = rows;
|
||||
temp.size = rows * num_columns * sizeof(int);
|
||||
temp.iteration = itr;
|
||||
temp.isrule = rule;
|
||||
GPUmem.push_back(temp);
|
||||
}
|
||||
|
||||
void registrarcpu(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||
{
|
||||
memnode temp;
|
||||
temp.name = name;
|
||||
temp.dev_address = ptr;
|
||||
temp.rows = rows;
|
||||
temp.size = rows * num_columns * sizeof(int);
|
||||
temp.iteration = itr;
|
||||
temp.isrule = rule;
|
||||
CPUmem.push_back(temp);
|
||||
}
|
||||
|
||||
/*Updates the information of an element in a list*/
|
||||
template<class InputIterator>
|
||||
void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
|
||||
{
|
||||
i->dev_address = ptr;
|
||||
i->rows = rows;
|
||||
i->size = rows * num_columns * sizeof(int);
|
||||
}
|
||||
|
||||
/*Count the total number of rows generated by rule 'name' in iteration 'iter'*/
|
||||
int numrows(int name, int itr)
|
||||
{
|
||||
int sum = 0;
|
||||
memnode temp;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name)
|
||||
sum += rec.first->rows;
|
||||
rec.first++;
|
||||
}
|
||||
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name)
|
||||
sum += rec.first->rows;
|
||||
rec.first++;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
extern "C" void * YAP_IntToAtom(int);
|
||||
extern "C" char * YAP_AtomName(void *);
|
||||
|
||||
/*Loads facts or rule results in GPU memory. If a fact is already in GPU memory, its pointer is simply returned. Otherwise,
|
||||
memory is reserved and the fact is loaded. Rule results are loaded based on the current iteration 'itr' and both GPU and
|
||||
CPU memories are searched for all instances of said results. The instances are combined into a single one in GPU memory.*/
|
||||
int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||
{
|
||||
int numgpu, numcpu, totalrows = 0;
|
||||
int *temp, x;
|
||||
int size, itrant, inc = 0;
|
||||
list<memnode>::iterator i;
|
||||
memnode fact;
|
||||
|
||||
if(is_fact)
|
||||
{
|
||||
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||
if(i != GPUmem.end())
|
||||
{
|
||||
fact = *i;
|
||||
GPUmem.erase(i);
|
||||
fact.iteration = itr;
|
||||
*ptr = fact.dev_address;
|
||||
GPUmem.push_back(fact);
|
||||
return fact.rows;
|
||||
}
|
||||
size = num_rows * num_columns * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
||||
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||
*ptr = temp;
|
||||
return num_rows;
|
||||
}
|
||||
if(itr > 0)
|
||||
{
|
||||
itrant = itr - 1;
|
||||
i = buscarpornombre(name, itrant, &totalrows, &numgpu, &numcpu);
|
||||
if((numgpu == 1) && (numcpu == 1))
|
||||
{
|
||||
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
|
||||
*ptr = temp_storage[0].dev_address;
|
||||
return temp_storage[0].rows;
|
||||
}
|
||||
size = totalrows * num_columns * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
for(x = 0; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
cudaFree(temp_storage[x].dev_address);
|
||||
}
|
||||
for(; x < numcpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
free(temp_storage[x].dev_address);
|
||||
}
|
||||
actualizar(num_columns, temp, totalrows, i);
|
||||
*ptr = temp;
|
||||
return totalrows;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||
{
|
||||
int numgpu, numcpu, totalrows = 0;
|
||||
int *temp, x;
|
||||
int size, itrant, inc = 0;
|
||||
list<memnode>::iterator i;
|
||||
|
||||
if(is_fact)
|
||||
{
|
||||
*ptr = address_host_table;
|
||||
return num_rows;
|
||||
}
|
||||
if(itr > 0)
|
||||
{
|
||||
itrant = itr - 1;
|
||||
i = buscarpornombrecpu(name, itrant, &totalrows, &numgpu, &numcpu);
|
||||
|
||||
if((numgpu == 0) && (numcpu == 1))
|
||||
{
|
||||
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
|
||||
*ptr = temp_storage[0].dev_address;
|
||||
return temp_storage[0].rows;
|
||||
}
|
||||
size = totalrows * num_columns * sizeof(int);
|
||||
temp = (int *)malloc(size);
|
||||
for(x = 0; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
cudaFree(temp_storage[x].dev_address);
|
||||
}
|
||||
for(; x < numcpu; x++)
|
||||
{
|
||||
memcpy(temp + inc, temp_storage[x].dev_address, temp_storage[x].size);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
free(temp_storage[x].dev_address);
|
||||
}
|
||||
actualizar(num_columns, temp, totalrows, i);
|
||||
*ptr = temp;
|
||||
return totalrows;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*Loads all results of rule 'name' from both GPU and CPU memories into the GPU*/
|
||||
int cargafinal(int name, int cols, int **ptr)
|
||||
{
|
||||
int *temp, *ini, cont = 0, numg = 0, numc = 0;
|
||||
memnode bus;
|
||||
bus.name = name;
|
||||
GPUmem.sort(comparename);
|
||||
CPUmem.sort(comparename);
|
||||
list<memnode>::iterator endg = GPUmem.end();
|
||||
list<memnode>::iterator endc = CPUmem.end();
|
||||
list<memnode>::iterator pos = lower_bound(GPUmem.begin(), endg, bus, comparename);
|
||||
list<memnode>::iterator gpu = pos;
|
||||
while(pos != endg && pos->name == name)
|
||||
{
|
||||
cont += pos->rows;
|
||||
numg++;
|
||||
pos++;
|
||||
}
|
||||
pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
|
||||
list<memnode>::iterator cpu = pos;
|
||||
while(pos != endc && pos->name == name)
|
||||
{
|
||||
cont += pos->rows;
|
||||
numc++;
|
||||
pos++;
|
||||
}
|
||||
|
||||
if(numg == 0 && numc == 0)
|
||||
return 0;
|
||||
if(numg == 1 && numc == 0)
|
||||
{
|
||||
pos = gpu;
|
||||
*ptr = pos->dev_address;
|
||||
cont = pos->rows;
|
||||
GPUmem.erase(pos);
|
||||
#ifdef TUFFY
|
||||
return -cont;
|
||||
#else
|
||||
return cont;
|
||||
#endif
|
||||
}
|
||||
if(numg == 0 && numc == 1)
|
||||
{
|
||||
pos = cpu;
|
||||
cont = pos->rows;
|
||||
#ifdef TUFFY
|
||||
reservar(&temp, pos->size);
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||
*ptr = temp;
|
||||
#else
|
||||
*ptr = pos->dev_address;
|
||||
#endif
|
||||
CPUmem.erase(pos);
|
||||
return -cont;
|
||||
}
|
||||
|
||||
reservar(&temp, cont * cols * sizeof(int));
|
||||
ini = temp;
|
||||
pos = gpu;
|
||||
while(pos != endg && pos->name == name)
|
||||
{
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
pos = cpu;
|
||||
while(pos != endc && pos->name == name)
|
||||
{
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
*ptr = ini;
|
||||
return cont;
|
||||
}
|
||||
|
||||
/*Compares the results of the current iteration against the results of older iterations.
|
||||
Used to avoid infinite computations when the result is not a single fixed-point, but an
|
||||
orbit of points.*/
|
||||
bool generadas(int name, int filas, int cols, int itr)
|
||||
{
|
||||
int r1, r2, x, fin;
|
||||
int *dop1, *dop2;
|
||||
|
||||
r2 = numrows(name, itr);
|
||||
if(itr < MAX_FIX_POINTS)
|
||||
fin = itr;
|
||||
else
|
||||
fin = MAX_FIX_POINTS;
|
||||
for(x = 1; x <= fin; x++)
|
||||
{
|
||||
r1 = numrows(name, itr - x);
|
||||
if(r1 == r2)
|
||||
{
|
||||
r2 = cargar(name, filas, cols, 0, NULL, &dop2, itr + 1);
|
||||
thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
|
||||
r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
|
||||
thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
|
||||
if(thrust::equal(pt1, pt1 + r1, pt2) == true)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void mostrar_memoria()
|
||||
{
|
||||
unsigned int x;
|
||||
list<memnode>::iterator i = GPUmem.begin();
|
||||
cout << "Memoria inicio GPU" << endl;
|
||||
for(x = 0; x < GPUmem.size(); x++, i++)
|
||||
cout << i->name << " " << i->iteration << " " << i->isrule << " " << i->rows << " " << i->size << endl;
|
||||
cout << "Memoria fin GPU" << endl;
|
||||
}
|
||||
|
||||
void mostrar_memcpu()
|
||||
{
|
||||
unsigned int x;
|
||||
list<memnode>::iterator i = CPUmem.begin();
|
||||
cout << "Memoria inicio CPU" << endl;
|
||||
for(x = 0; x < CPUmem.size(); x++, i++)
|
||||
cout << i->name << " " << i->iteration << endl;
|
||||
cout << "Memoria fin CPU" << endl;
|
||||
}
|
||||
|
||||
/*Clear all rule results from both GPU and CPU memory*/
|
||||
void clear_memory()
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
list<memnode>::iterator fin;
|
||||
ini = GPUmem.begin();
|
||||
fin = GPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
if(ini->isrule)
|
||||
{
|
||||
cudaFree(ini->dev_address);
|
||||
ini = GPUmem.erase(ini);
|
||||
}
|
||||
else
|
||||
ini++;
|
||||
}
|
||||
ini = CPUmem.begin();
|
||||
fin = CPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
free(ini->dev_address);
|
||||
ini++;
|
||||
}
|
||||
CPUmem.clear();
|
||||
}
|
||||
|
||||
/*Clear everything from both GPU and CPU memory*/
|
||||
void clear_memory_all()
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
list<memnode>::iterator fin;
|
||||
ini = GPUmem.begin();
|
||||
fin = GPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
cudaFree(ini->dev_address);
|
||||
ini++;
|
||||
}
|
||||
GPUmem.clear();
|
||||
ini = CPUmem.begin();
|
||||
fin = CPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
free(ini->dev_address);
|
||||
ini++;
|
||||
}
|
||||
CPUmem.clear();
|
||||
}
|
||||
|
||||
/*Remove all instances of fact 'name' from both CPU and GPU memories*/
|
||||
void liberar(int name)
|
||||
{
|
||||
list<memnode>::iterator i;
|
||||
memnode fact;
|
||||
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||
if(i != GPUmem.end())
|
||||
{
|
||||
fact = *i;
|
||||
GPUmem.erase(i);
|
||||
cudaFree(fact.dev_address);
|
||||
}
|
||||
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
|
||||
if(i != CPUmem.end())
|
||||
{
|
||||
fact = *i;
|
||||
CPUmem.erase(i);
|
||||
free(fact.dev_address);
|
||||
}
|
||||
}
|
||||
|
||||
/*Add all rows in 'dop1' to the fact 'name' by creating a new array capable of holding both.*/
|
||||
void sumar(int name, int *dop1, int cols, int rows)
|
||||
{
|
||||
list<memnode>::iterator i;
|
||||
memnode fact;
|
||||
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||
int *res, newrows, offset;
|
||||
if(i != GPUmem.end())
|
||||
{
|
||||
fact = *i;
|
||||
newrows = rows + fact.rows;
|
||||
reservar(&res, newrows * cols * sizeof(int));
|
||||
offset = fact.rows * cols;
|
||||
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||
GPUmem.erase(i);
|
||||
registrar(name, cols, res, newrows, 0, 0);
|
||||
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||
cudaFree(fact.dev_address);
|
||||
}
|
||||
}
|
27
packages/cuda/old/memory.h
Executable file
27
packages/cuda/old/memory.h
Executable file
@ -0,0 +1,27 @@
|
||||
#ifndef _MEMORY_H_
|
||||
#define _MEMORY_H_
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include "lista.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool comparer(const rulenode&, const rulenode&);
|
||||
void limpiar(const char [], size_t);
|
||||
void limpiartodo(int*, int*);
|
||||
int cargar(int, int, int, int, int*, int**, int);
|
||||
int cargarcpu(int, int, int, int, int*, int**, int);
|
||||
int cargafinal(int, int, int**);
|
||||
void reservar(int**, size_t);
|
||||
void registrar(int, int, int*, int, int, int);
|
||||
void registrarcpu(int, int, int*, int, int, int);
|
||||
bool generadas(int, int, int, int);
|
||||
void sumar(int, int*, int, int);
|
||||
void liberar(int);
|
||||
void mostrar_memoria(void);
|
||||
void mostrar_memcpu(void);
|
||||
void clear_memory(void);
|
||||
void clear_memory_all(void);
|
||||
|
||||
#endif
|
47
packages/cuda/old/pred.h
Executable file
47
packages/cuda/old/pred.h
Executable file
@ -0,0 +1,47 @@
|
||||
#ifndef _PRED_H_
|
||||
#define _PRED_H_
|
||||
|
||||
// #define DEBUG_MEM 1
|
||||
|
||||
typedef struct Nodo{
|
||||
int name;
|
||||
int num_rows;
|
||||
int num_columns;
|
||||
int is_fact;
|
||||
int *address_host_table;
|
||||
int *negatives;
|
||||
char *predname;
|
||||
double *weight;
|
||||
}gpunode;
|
||||
|
||||
typedef gpunode predicate;
|
||||
|
||||
//#define TIMER 1
|
||||
#define DATALOG 1
|
||||
#define NUM_T 4
|
||||
#define INISIZE 1000000
|
||||
|
||||
#if TIMER
|
||||
typedef struct Stats{
|
||||
size_t joins, selects, unions, builtins;
|
||||
size_t calls;
|
||||
double total_time;
|
||||
float max_time, min_time;
|
||||
float select1_time, select2_time, join_time, sort_time, union_time, pred_time;
|
||||
}statinfo;
|
||||
|
||||
extern statinfo cuda_stats;
|
||||
#endif
|
||||
|
||||
/*Constants used to mark comparison predicates*/
|
||||
#define BPOFFSET (-6)
|
||||
#define SBG_EQ (-1)
|
||||
#define SBG_GT (-2)
|
||||
#define SBG_LT (-3)
|
||||
#define SBG_GE (-4)
|
||||
#define SBG_LE (-5)
|
||||
#define SBG_DF (-6)
|
||||
|
||||
int Cuda_Eval(predicate**, int, predicate**, int, int*, int**, char*, int);
|
||||
void Cuda_Statistics( void );
|
||||
#endif
|
306
packages/cuda/old/selectproyect.cu
Executable file
306
packages/cuda/old/selectproyect.cu
Executable file
@ -0,0 +1,306 @@
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <stdlib.h>
|
||||
#include "memory.h"
|
||||
#include "bpreds.h"
|
||||
|
||||
/*Mark all rows that comply with the selections*/
|
||||
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact = id * cols;
|
||||
for(x = 0; x < numc; x += 2)
|
||||
{
|
||||
posact = rowact + shared[x];
|
||||
if(dop1[posact] != shared[x+1])
|
||||
return;
|
||||
}
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
/*If we already have an array of marks (perhaps because the selfjoin was applied first),
|
||||
we unmark any rows that do not comply with the selections*/
|
||||
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
rowact = id * cols;
|
||||
for(x = 0; x < numc; x += 2)
|
||||
{
|
||||
posact = rowact + shared[x];
|
||||
if(dop1[posact] != shared[x+1])
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Unmark all rows that do not comply with the selfjoins.*/
|
||||
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int temp, temp2, pos, x, y;
|
||||
if(threadIdx.x < cont)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
pos = id * cols;
|
||||
for(x = 0; x < cont; x++)
|
||||
{
|
||||
temp = dop1[pos+shared[x]];
|
||||
y = x + 1;
|
||||
temp2 = shared[y];
|
||||
while(temp2 > -1)
|
||||
{
|
||||
if(temp != dop1[temp2+pos])
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
y++;
|
||||
temp2 = shared[y];
|
||||
}
|
||||
x = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Mark all rows that comply with the selfjoins*/
|
||||
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int temp, temp2, pos, x, y;
|
||||
if(threadIdx.x < cont)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
pos = id * cols;
|
||||
for(x = 0; x < cont; x++)
|
||||
{
|
||||
temp = dop1[pos+shared[x]];
|
||||
y = x + 1;
|
||||
temp2 = shared[y];
|
||||
while(temp2 > -1)
|
||||
{
|
||||
if(temp != dop1[temp2+pos])
|
||||
return;
|
||||
y++;
|
||||
temp2 = shared[y];
|
||||
}
|
||||
x = y;
|
||||
}
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*Project all columns found in 'dhead' to a new array 'res'*/
|
||||
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
pos = id * cols;
|
||||
posr = id * hsize;
|
||||
for(x = 0; x < hsize; x++, posr++)
|
||||
res[posr] = dop1[pos+shared[x]];
|
||||
}
|
||||
}
|
||||
|
||||
/*Project all columns found in 'dhead' using only the rows marked as valid (i.e. those that complied with
|
||||
selections, selfjoins, etc.). The array 'temp' holds the result of the prefix sum of said marks.*/
|
||||
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
posr = temp[id];
|
||||
if(temp[id+1] != posr)
|
||||
{
|
||||
pos = id * cols;
|
||||
posr *= hsize;
|
||||
for(x = 0; x < hsize; x++, posr++)
|
||||
res[posr] = dop1[pos+shared[x]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Performs selections, selfjoins and comparison predicates when the rule has a single normal predicate.*/
|
||||
int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *preds, int numpreds, int *project, int **ret, int ANDlogic)
|
||||
{
|
||||
int *fres = NULL, *temp = NULL;
|
||||
int *dhead = NULL, tmplen;
|
||||
int size, size2, num;
|
||||
thrust::device_ptr<int> res;
|
||||
|
||||
#if TIMER
|
||||
cuda_stats.selects++;
|
||||
#endif
|
||||
|
||||
int head_bytes = maximo(4, numselect, numselfj, numpreds, head_size) * sizeof(int);
|
||||
reservar(&dhead, head_bytes);
|
||||
int numthreads = 1024;
|
||||
//int numthreads = 32;
|
||||
int blockllen = rows / numthreads + 1;
|
||||
|
||||
#ifdef ROCKIT
|
||||
ANDlogic = 1;
|
||||
#endif
|
||||
|
||||
if(numselect > 0)
|
||||
{
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
size = numselect * sizeof(int);
|
||||
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
||||
|
||||
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
||||
|
||||
if(numselfj > 0)
|
||||
{
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
}
|
||||
|
||||
if(numpreds > 0)
|
||||
{
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
if(num == 0)
|
||||
return 0;
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numselfj > 0)
|
||||
{
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
|
||||
if(numpreds > 0)
|
||||
{
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
if(num == 0)
|
||||
return 0;
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numpreds > 0)
|
||||
{
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
|
||||
if(ANDlogic)
|
||||
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
|
||||
if(num == 0)
|
||||
return 0;
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
else
|
||||
{
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, rows * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
*ret = fres;
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
1279
packages/cuda/old/treeb.cu
Executable file
1279
packages/cuda/old/treeb.cu
Executable file
File diff suppressed because it is too large
Load Diff
763
packages/cuda/old/union2.cu
Executable file
763
packages/cuda/old/union2.cu
Executable file
@ -0,0 +1,763 @@
|
||||
/*Computer generated file to remove duplicates. Since Thrust's unique and sort, unlike their std's counterparts, don't have a way to specify the size of each element in
|
||||
the array, comparing pairs, triplets and other sets is not possible without defining a new pointer and all related operations for each set. If you have a better idea to do
|
||||
this, please don't hesitate to email us.*/
|
||||
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/unique.h>
|
||||
#include <thrust/distance.h>
|
||||
#include <thrust/sort.h>
|
||||
#include <iostream>
|
||||
#include "memory.h"
|
||||
#include "union2.h"
|
||||
|
||||
int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
thrust::device_ptr<int> pt, re;
|
||||
thrust::device_ptr<s2> pt2, re2;
|
||||
thrust::device_ptr<s3> pt3, re3;
|
||||
thrust::device_ptr<s4> pt4, re4;
|
||||
thrust::device_ptr<s5> pt5, re5;
|
||||
thrust::device_ptr<s6> pt6, re6;
|
||||
thrust::device_ptr<s7> pt7, re7;
|
||||
thrust::device_ptr<s8> pt8, re8;
|
||||
thrust::device_ptr<s9> pt9, re9;
|
||||
thrust::device_ptr<s10> pt10, re10;
|
||||
thrust::device_ptr<s11> pt11, re11;
|
||||
thrust::device_ptr<s12> pt12, re12;
|
||||
thrust::device_ptr<s13> pt13, re13;
|
||||
thrust::device_ptr<s14> pt14, re14;
|
||||
thrust::device_ptr<s15> pt15, re15;
|
||||
thrust::device_ptr<s16> pt16, re16;
|
||||
thrust::device_ptr<s17> pt17, re17;
|
||||
thrust::device_ptr<s18> pt18, re18;
|
||||
thrust::device_ptr<s19> pt19, re19;
|
||||
thrust::device_ptr<s20> pt20, re20;
|
||||
s2 *t2;
|
||||
s3 *t3;
|
||||
s4 *t4;
|
||||
s5 *t5;
|
||||
s6 *t6;
|
||||
s7 *t7;
|
||||
s8 *t8;
|
||||
s9 *t9;
|
||||
s10 *t10;
|
||||
s11 *t11;
|
||||
s12 *t12;
|
||||
s13 *t13;
|
||||
s14 *t14;
|
||||
s15 *t15;
|
||||
s16 *t16;
|
||||
s17 *t17;
|
||||
s18 *t18;
|
||||
s19 *t19;
|
||||
s20 *t20;
|
||||
int flag, nrows, *nres, size;
|
||||
|
||||
#if TIMER
|
||||
cuda_stats.unions++;
|
||||
#endif
|
||||
|
||||
switch(tipo)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
pt = thrust::device_pointer_cast(res);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt, pt + rows);
|
||||
if(final)
|
||||
{
|
||||
re = thrust::unique(pt, pt + rows, q1());
|
||||
re = thrust::unique(pt, re);
|
||||
}
|
||||
else
|
||||
re = thrust::unique(pt, pt + rows);
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt, re);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
t2 = (s2*)res;
|
||||
pt2 = thrust::device_pointer_cast(t2);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt2, pt2 + rows, o2());
|
||||
if(final)
|
||||
{
|
||||
re2 = thrust::unique(pt2, pt2 + rows, q2());
|
||||
re2 = thrust::unique(pt2, re2, p2());
|
||||
}
|
||||
else
|
||||
re2 = thrust::unique(pt2, pt2 + rows, p2());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt2, re2);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 3:
|
||||
{
|
||||
t3 = (s3*)res;
|
||||
pt3 = thrust::device_pointer_cast(t3);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt3, pt3 + rows, o3());
|
||||
if(final)
|
||||
{
|
||||
re3 = thrust::unique(pt3, pt3 + rows, q3());
|
||||
re3 = thrust::unique(pt3, re3, p3());
|
||||
}
|
||||
else
|
||||
re3 = thrust::unique(pt3, pt3 + rows, p3());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt3, re3);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
t4 = (s4*)res;
|
||||
pt4 = thrust::device_pointer_cast(t4);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt4, pt4 + rows, o4());
|
||||
if(final)
|
||||
{
|
||||
re4 = thrust::unique(pt4, pt4 + rows, q4());
|
||||
re4 = thrust::unique(pt4, re4, p4());
|
||||
}
|
||||
else
|
||||
re4 = thrust::unique(pt4, pt4 + rows, p4());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt4, re4);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 5:
|
||||
{
|
||||
t5 = (s5*)res;
|
||||
pt5 = thrust::device_pointer_cast(t5);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt5, pt5 + rows, o5());
|
||||
if(final)
|
||||
{
|
||||
re5 = thrust::unique(pt5, pt5 + rows, q5());
|
||||
re5 = thrust::unique(pt5, re5, p5());
|
||||
}
|
||||
else
|
||||
re5 = thrust::unique(pt5, pt5 + rows, p5());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt5, re5);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 6:
|
||||
{
|
||||
t6 = (s6*)res;
|
||||
pt6 = thrust::device_pointer_cast(t6);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt6, pt6 + rows, o6());
|
||||
if(final)
|
||||
{
|
||||
re6 = thrust::unique(pt6, pt6 + rows, q6());
|
||||
re6 = thrust::unique(pt6, re6, p6());
|
||||
}
|
||||
else
|
||||
re6 = thrust::unique(pt6, pt6 + rows, p6());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt6, re6);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 7:
|
||||
{
|
||||
t7 = (s7*)res;
|
||||
pt7 = thrust::device_pointer_cast(t7);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt7, pt7 + rows, o7());
|
||||
if(final)
|
||||
{
|
||||
re7 = thrust::unique(pt7, pt7 + rows, q7());
|
||||
re7 = thrust::unique(pt7, re7, p7());
|
||||
}
|
||||
else
|
||||
re7 = thrust::unique(pt7, pt7 + rows, p7());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt7, re7);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
t8 = (s8*)res;
|
||||
pt8 = thrust::device_pointer_cast(t8);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt8, pt8 + rows, o8());
|
||||
if(final)
|
||||
{
|
||||
re8 = thrust::unique(pt8, pt8 + rows, q8());
|
||||
re8 = thrust::unique(pt8, re8, p8());
|
||||
}
|
||||
else
|
||||
re8 = thrust::unique(pt8, pt8 + rows, p8());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt8, re8);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 9:
|
||||
{
|
||||
t9 = (s9*)res;
|
||||
pt9 = thrust::device_pointer_cast(t9);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt9, pt9 + rows, o9());
|
||||
if(final)
|
||||
{
|
||||
re9 = thrust::unique(pt9, pt9 + rows, q9());
|
||||
re9 = thrust::unique(pt9, re9, p9());
|
||||
}
|
||||
else
|
||||
re9 = thrust::unique(pt9, pt9 + rows, p9());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt9, re9);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 10:
|
||||
{
|
||||
t10 = (s10*)res;
|
||||
pt10 = thrust::device_pointer_cast(t10);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt10, pt10 + rows, o10());
|
||||
if(final)
|
||||
{
|
||||
re10 = thrust::unique(pt10, pt10 + rows, q10());
|
||||
re10 = thrust::unique(pt10, re10, p10());
|
||||
}
|
||||
else
|
||||
re10 = thrust::unique(pt10, pt10 + rows, p10());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt10, re10);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 11:
|
||||
{
|
||||
t11 = (s11*)res;
|
||||
pt11 = thrust::device_pointer_cast(t11);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt11, pt11 + rows, o11());
|
||||
if(final)
|
||||
{
|
||||
re11 = thrust::unique(pt11, pt11 + rows, q11());
|
||||
re11 = thrust::unique(pt11, re11, p11());
|
||||
}
|
||||
else
|
||||
re11 = thrust::unique(pt11, pt11 + rows, p11());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt11, re11);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 12:
|
||||
{
|
||||
t12 = (s12*)res;
|
||||
pt12 = thrust::device_pointer_cast(t12);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt12, pt12 + rows, o12());
|
||||
if(final)
|
||||
{
|
||||
re12 = thrust::unique(pt12, pt12 + rows, q12());
|
||||
re12 = thrust::unique(pt12, re12, p12());
|
||||
}
|
||||
else
|
||||
re12 = thrust::unique(pt12, pt12 + rows, p12());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt12, re12);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 13:
|
||||
{
|
||||
t13 = (s13*)res;
|
||||
pt13 = thrust::device_pointer_cast(t13);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt13, pt13 + rows, o13());
|
||||
if(final)
|
||||
{
|
||||
re13 = thrust::unique(pt13, pt13 + rows, q13());
|
||||
re13 = thrust::unique(pt13, re13, p13());
|
||||
}
|
||||
else
|
||||
re13 = thrust::unique(pt13, pt13 + rows, p13());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt13, re13);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 14:
|
||||
{
|
||||
t14 = (s14*)res;
|
||||
pt14 = thrust::device_pointer_cast(t14);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt14, pt14 + rows, o14());
|
||||
if(final)
|
||||
{
|
||||
re14 = thrust::unique(pt14, pt14 + rows, q14());
|
||||
re14 = thrust::unique(pt14, re14, p14());
|
||||
}
|
||||
else
|
||||
re14 = thrust::unique(pt14, pt14 + rows, p14());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt14, re14);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 15:
|
||||
{
|
||||
t15 = (s15*)res;
|
||||
pt15 = thrust::device_pointer_cast(t15);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt15, pt15 + rows, o15());
|
||||
if(final)
|
||||
{
|
||||
re15 = thrust::unique(pt15, pt15 + rows, q15());
|
||||
re15 = thrust::unique(pt15, re15, p15());
|
||||
}
|
||||
else
|
||||
re15 = thrust::unique(pt15, pt15 + rows, p15());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt15, re15);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 16:
|
||||
{
|
||||
t16 = (s16*)res;
|
||||
pt16 = thrust::device_pointer_cast(t16);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt16, pt16 + rows, o16());
|
||||
if(final)
|
||||
{
|
||||
re16 = thrust::unique(pt16, pt16 + rows, q16());
|
||||
re16 = thrust::unique(pt16, re16, p16());
|
||||
}
|
||||
else
|
||||
re16 = thrust::unique(pt16, pt16 + rows, p16());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt16, re16);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 17:
|
||||
{
|
||||
t17 = (s17*)res;
|
||||
pt17 = thrust::device_pointer_cast(t17);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt17, pt17 + rows, o17());
|
||||
if(final)
|
||||
{
|
||||
re17 = thrust::unique(pt17, pt17 + rows, q17());
|
||||
re17 = thrust::unique(pt17, re17, p17());
|
||||
}
|
||||
else
|
||||
re17 = thrust::unique(pt17, pt17 + rows, p17());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt17, re17);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 18:
|
||||
{
|
||||
t18 = (s18*)res;
|
||||
pt18 = thrust::device_pointer_cast(t18);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt18, pt18 + rows, o18());
|
||||
if(final)
|
||||
{
|
||||
re18 = thrust::unique(pt18, pt18 + rows, q18());
|
||||
re18 = thrust::unique(pt18, re18, p18());
|
||||
}
|
||||
else
|
||||
re18 = thrust::unique(pt18, pt18 + rows, p18());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt18, re18);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 19:
|
||||
{
|
||||
t19 = (s19*)res;
|
||||
pt19 = thrust::device_pointer_cast(t19);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt19, pt19 + rows, o19());
|
||||
if(final)
|
||||
{
|
||||
re19 = thrust::unique(pt19, pt19 + rows, q19());
|
||||
re19 = thrust::unique(pt19, re19, p19());
|
||||
}
|
||||
else
|
||||
re19 = thrust::unique(pt19, pt19 + rows, p19());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt19, re19);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
case 20:
|
||||
{
|
||||
t20 = (s20*)res;
|
||||
pt20 = thrust::device_pointer_cast(t20);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt20, pt20 + rows, o20());
|
||||
if(final)
|
||||
{
|
||||
re20 = thrust::unique(pt20, pt20 + rows, q20());
|
||||
re20 = thrust::unique(pt20, re20, p20());
|
||||
}
|
||||
else
|
||||
re20 = thrust::unique(pt20, pt20 + rows, p20());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar("sort/unique in unir", 0);
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt20, re20);
|
||||
if(nrows < rows / 2)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
1005
packages/cuda/old/union2.h
Executable file
1005
packages/cuda/old/union2.h
Executable file
File diff suppressed because it is too large
Load Diff
0
packages/cuda/pred.h
Executable file → Normal file
0
packages/cuda/pred.h
Executable file → Normal file
103
packages/cuda/selectproyect.cu
Executable file → Normal file
103
packages/cuda/selectproyect.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <stdlib.h>
|
||||
@ -8,10 +9,10 @@
|
||||
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -30,10 +31,10 @@ we unmark any rows that do not comply with the selections*/
|
||||
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
if(hipThreadIdx_x < numc)
|
||||
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -56,10 +57,10 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
|
||||
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int temp, temp2, pos, x, y;
|
||||
if(threadIdx.x < cont)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < cont)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -90,10 +91,10 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
|
||||
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int temp, temp2, pos, x, y;
|
||||
if(threadIdx.x < cont)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < cont)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -120,10 +121,10 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
|
||||
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < hsize)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -139,10 +140,10 @@ selections, selfjoins, etc.). The array 'temp' holds the result of the prefix su
|
||||
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < hsize)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
@ -184,27 +185,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
hipMemset(temp, 0, size2);
|
||||
size = numselect * sizeof(int);
|
||||
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dhead, select, size, hipMemcpyHostToDevice);
|
||||
|
||||
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselect, temp + 1);
|
||||
|
||||
if(numselfj > 0)
|
||||
{
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
}
|
||||
|
||||
if(numpreds > 0)
|
||||
{
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
@ -215,10 +216,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
hipFree(dhead);
|
||||
hipFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
@ -229,19 +230,19 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
hipMemset(temp, 0, size2);
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
|
||||
if(numpreds > 0)
|
||||
{
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
|
||||
}
|
||||
|
||||
@ -253,10 +254,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
hipFree(dhead);
|
||||
hipFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
@ -267,14 +268,14 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
hipMemset(temp, 0, size2);
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||
|
||||
if(ANDlogic)
|
||||
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
@ -284,10 +285,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
hipFree(dhead);
|
||||
hipFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
@ -295,9 +296,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
{
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, rows * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(proyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, head_size, fres);
|
||||
hipFree(dhead);
|
||||
*ret = fres;
|
||||
return rows;
|
||||
}
|
||||
|
0
packages/cuda/selectproyectcpu.cpp
Executable file → Normal file
0
packages/cuda/selectproyectcpu.cpp
Executable file → Normal file
347
packages/cuda/treeb.cu
Executable file → Normal file
347
packages/cuda/treeb.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <thrust/host_vector.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/sequence.h>
|
||||
@ -160,11 +161,11 @@ __device__ int firstMatchingKeyInDataNode2(Record records[], IKeyType key)
|
||||
|
||||
__global__ void gCreateIndex(IDataNode data[], IDirectoryNode dir[], int dirSize, int tree_size, int bottom_start, int nNodesPerBlock)
|
||||
{
|
||||
int startIdx = blockIdx.x * nNodesPerBlock;
|
||||
int startIdx = hipBlockIdx_x * nNodesPerBlock;
|
||||
int endIdx = startIdx + nNodesPerBlock;
|
||||
if(endIdx > dirSize)
|
||||
endIdx = dirSize;
|
||||
int keyIdx = threadIdx.x;
|
||||
int keyIdx = hipThreadIdx_x;
|
||||
|
||||
// Proceed only when in internal nodes
|
||||
for(int nodeIdx = startIdx; nodeIdx < endIdx; nodeIdx++)
|
||||
@ -191,11 +192,11 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
|
||||
{
|
||||
// Bringing the root node (visited by every tuple) to the faster shared memory
|
||||
__shared__ IKeyType RootNodeKeys[TREE_NODE_SIZE];
|
||||
RootNodeKeys[threadIdx.x] = dir->keys[threadIdx.x];
|
||||
RootNodeKeys[hipThreadIdx_x] = dir->keys[hipThreadIdx_x];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
int OverallThreadIdx = blockIdx.x * THRD_PER_BLCK_search + threadIdx.x;
|
||||
int OverallThreadIdx = hipBlockIdx_x * THRD_PER_BLCK_search + hipThreadIdx_x;
|
||||
|
||||
for(int keyIdx = OverallThreadIdx; keyIdx < nSearchKeys; keyIdx += THRD_PER_GRID_search)
|
||||
{
|
||||
@ -219,7 +220,7 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
|
||||
/*Counts the number of times a row in 'S' is to be joined to a row in 'R'.*/
|
||||
__global__ void gIndexJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[])
|
||||
{
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(s_cur < sLen)
|
||||
{
|
||||
@ -246,11 +247,11 @@ in 'g_locations' those rows that have equal values in the checked columns.*/
|
||||
__global__ void gIndexMultiJoinNegative(int *R, int *S, int g_locations[], int rLen, int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int posr, poss, x;
|
||||
|
||||
if(threadIdx.x < wj)
|
||||
shared[threadIdx.x] = muljoin[threadIdx.x];
|
||||
if(hipThreadIdx_x < wj)
|
||||
shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(r_cur < rLen)
|
||||
@ -287,11 +288,11 @@ times a row in 'S' is to be joined to its corresponding row in 'R', storing the
|
||||
__global__ void gIndexMultiJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[], int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int posr, poss, x;
|
||||
|
||||
if(threadIdx.x < wj)
|
||||
shared[threadIdx.x] = muljoin[threadIdx.x];
|
||||
if(hipThreadIdx_x < wj)
|
||||
shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -330,10 +331,10 @@ __global__ void multiJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int *extjoins = &shared[lenrul];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < (lenrul + wj))
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < (lenrul + wj))
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -382,10 +383,10 @@ __global__ void multiJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSum
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int *extjoins = &shared[cols];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < (cols + wj))
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < (cols + wj))
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -432,11 +433,11 @@ predicate are projected.*/
|
||||
__global__ void gJoinWithWriteNegative(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int halfrul, int *mloc)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int posr;
|
||||
|
||||
if(threadIdx.x < halfrul)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < halfrul)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(r_cur < rLen)
|
||||
@ -461,11 +462,11 @@ predicate are projected.*/
|
||||
__global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int cols, int *mloc)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int posr;
|
||||
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < cols)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(r_cur < rLen)
|
||||
@ -489,10 +490,10 @@ __global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinR
|
||||
__global__ void gJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int halfrul, int lenrul, int *mloc, int *sloc)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < lenrul)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < lenrul)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -525,10 +526,10 @@ projection, which is performed based on the variables in the head of the rule.*/
|
||||
__global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int cols, int *mloc, int *sloc)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < cols)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(s_cur < sLen)
|
||||
@ -563,7 +564,7 @@ __global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[],
|
||||
/*Load part of column 'wj' of 'p' in 'R'. Which values are loaded is defined by the prefix sum results in 'pos'.*/
|
||||
__global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *ids)
|
||||
{
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int cond;
|
||||
if(id < len)
|
||||
{
|
||||
@ -579,7 +580,7 @@ __global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *i
|
||||
/*Load an entire column from 'p' into 'R'.*/
|
||||
__global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
|
||||
{
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
if(id < len)
|
||||
R[id] = p[id * of + wj];
|
||||
}
|
||||
@ -587,10 +588,10 @@ __global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
|
||||
__global__ void projectfinal(int *res, int rows, int cols, int *rule, int *out)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = rule[threadIdx.x];
|
||||
if(hipThreadIdx_x < cols)
|
||||
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
|
||||
if(id < rows)
|
||||
@ -614,26 +615,26 @@ void project(int *res, int resrows, int numcols1, int numcols2, int *proj, int *
|
||||
int *pt = (int *)malloc(sizepro);
|
||||
for(z = 0; z < numcols2; z++)
|
||||
pt[z] = proj[z] - 1;
|
||||
cudaMemcpy(dcons, pt, sizepro, cudaMemcpyHostToDevice);
|
||||
//cudaDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
|
||||
hipMemcpy(dcons, pt, sizepro, hipMemcpyHostToDevice);
|
||||
//hipDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
|
||||
free(pt);
|
||||
}
|
||||
else
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
reservar(&d_Rout, resrows * sizepro);
|
||||
projectfinal<<<blockllen, numthreads, sizepro>>>(res, resrows, numcols1, dcons, d_Rout);
|
||||
cudaFree(dcons);
|
||||
cudaFree(*ret);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(projectfinal), dim3(blockllen), dim3(numthreads), sizepro, 0, res, resrows, numcols1, dcons, d_Rout);
|
||||
hipFree(dcons);
|
||||
hipFree(*ret);
|
||||
*ret = d_Rout;
|
||||
}
|
||||
|
||||
__global__ void projectadd(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int pos2, posr, x, y, cond;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(hipThreadIdx_x < hsize)
|
||||
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||
__syncthreads();
|
||||
if(id < rows2)
|
||||
{
|
||||
@ -662,10 +663,10 @@ void juntar(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, in
|
||||
int blockllen = rows2 / numthreads + 1;
|
||||
sizepro = pcols * sizeof(int);
|
||||
reservar(&dcons, sizepro);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
reservar(&d_Rout, rows1 * rows2 * sizepro);
|
||||
projectadd<<<blockllen, numthreads, sizepro>>>(dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
|
||||
cudaFree(dcons);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(projectadd), dim3(blockllen), dim3(numthreads), sizepro, 0, dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
|
||||
hipFree(dcons);
|
||||
*ret = d_Rout;
|
||||
}
|
||||
|
||||
@ -743,51 +744,51 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
|
||||
#ifdef TIMER
|
||||
//cout << "INICIO" << endl;
|
||||
cudaEvent_t start, stop;
|
||||
hipEvent_t start, stop;
|
||||
float time;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
if(npred2.x > 0 || npred2.y > 0 || nsel2 > 0 || nsj2 > 0)
|
||||
{
|
||||
newLen = sLen + 1;
|
||||
cudaMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||
hipMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||
}
|
||||
|
||||
if(npred2.x > 0 || npred2.y > 0)
|
||||
{
|
||||
size = npred2tot * sizeof(int);
|
||||
cudaMemcpy(dcons, pred2, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred2, size, hipMemcpyHostToDevice);
|
||||
|
||||
if(npred2.y > 0) /*Fix case when a(X,Y),b(Y,Z),Z > Y*/
|
||||
{
|
||||
reservar(&temp2, sizet2);
|
||||
cudaMemsetAsync(temp2, 0, newLen * sizeof(int));
|
||||
hipMemsetAsync(temp2, 0, newLen * sizeof(int));
|
||||
//res = thrust::device_pointer_cast(temp2);
|
||||
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(negative)
|
||||
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||
else
|
||||
bpredsOR<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsOR), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||
}
|
||||
|
||||
if(nsel2 > 0)
|
||||
{
|
||||
size = nsel2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice);
|
||||
marcar<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||
hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||
}
|
||||
|
||||
if(nsj2 > 0)
|
||||
{
|
||||
size = nsj2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -795,14 +796,14 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(nsel2 > 0)
|
||||
{
|
||||
size = nsel2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice);
|
||||
marcar2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||
hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||
|
||||
if(nsj2 > 0)
|
||||
{
|
||||
size = nsj2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -810,15 +811,15 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(nsj2 > 0)
|
||||
{
|
||||
size = nsj2 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
sizem32S = m32sLen * sizeof(int);
|
||||
reservar(&d_S, sizem32S);
|
||||
cudaMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
|
||||
llenarnosel<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1]);
|
||||
hipMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -842,8 +843,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
|
||||
if(newLen == 0) // && !negative) ARREGLAR
|
||||
{
|
||||
cudaFree(temp);
|
||||
cudaFree(dcons);
|
||||
hipFree(temp);
|
||||
hipFree(dcons);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -854,24 +855,24 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
|
||||
reservar(&d_S, sizem32S);
|
||||
reservar(&posS, sizem32S);
|
||||
cudaMemsetAsync(d_S + newLen, 0x7f, sizextra);
|
||||
cudaMemsetAsync(posS + newLen, 0x7f, sizextra);
|
||||
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
|
||||
hipMemsetAsync(d_S + newLen, 0x7f, sizextra);
|
||||
hipMemsetAsync(posS + newLen, 0x7f, sizextra);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1], temp, posS);
|
||||
sLen = newLen;
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
//cout << "Select1 = " << time << endl;
|
||||
cuda_stats.select1_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
blockllen = rLen / numthreads + 1;
|
||||
@ -880,30 +881,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
{
|
||||
if(temp2 != NULL)
|
||||
{
|
||||
cudaFree(temp);
|
||||
hipFree(temp);
|
||||
temp = temp2;
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
newLen = rLen + 1;
|
||||
if(nsel1 > 0)
|
||||
{
|
||||
size = nsel1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice);
|
||||
marcar<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||
hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||
}
|
||||
if(nsj1 > 0)
|
||||
{
|
||||
size = nsj1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
}
|
||||
if(npred1.x > 0)
|
||||
{
|
||||
size = npred1.x * sizeof(int);
|
||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -911,30 +912,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(npred1.x > 0 || nsel1 > 0 || nsj1 > 0)
|
||||
{
|
||||
newLen = rLen + 1;
|
||||
cudaMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||
hipMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||
}
|
||||
|
||||
if(nsel1 > 0)
|
||||
{
|
||||
size = nsel1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice);
|
||||
marcar2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||
hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||
|
||||
if(nsj1 > 0)
|
||||
{
|
||||
size = nsj1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
}
|
||||
|
||||
if(npred1.x > 0)
|
||||
{
|
||||
size = npred1.x * sizeof(int);
|
||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -942,17 +943,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(nsj1 > 0)
|
||||
{
|
||||
size = nsj1 * sizeof(int);
|
||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||
|
||||
if(npred1.x > 0)
|
||||
{
|
||||
size = npred1.x * sizeof(int);
|
||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -960,11 +961,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(npred1.x > 0)
|
||||
{
|
||||
size = npred1.x * sizeof(int);
|
||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
else
|
||||
bpredsorlogic2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -976,11 +977,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
newLen = res[rLen];
|
||||
if(newLen == 0)
|
||||
{
|
||||
cudaFree(temp);
|
||||
cudaFree(dcons);
|
||||
cudaFree(d_S);
|
||||
hipFree(temp);
|
||||
hipFree(dcons);
|
||||
hipFree(d_S);
|
||||
if(posS != NULL)
|
||||
cudaFree(posS);
|
||||
hipFree(posS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -991,41 +992,41 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
|
||||
reservar(&d_R, sizem32);
|
||||
reservar(&posR, sizem32);
|
||||
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
||||
cudaMemsetAsync(posR + newLen, 0x7f, sizextra);
|
||||
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR);
|
||||
hipMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
||||
hipMemsetAsync(posR + newLen, 0x7f, sizextra);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0], temp, posR);
|
||||
rLen = newLen;
|
||||
}
|
||||
else
|
||||
{
|
||||
sizem32 = m32rLen * sizeof(int);
|
||||
reservar(&d_R, sizem32);
|
||||
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
||||
hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sizem32 = m32rLen * sizeof(int);
|
||||
reservar(&d_R, sizem32);
|
||||
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
||||
hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
//cout << "Select2 = " << time << endl;
|
||||
cuda_stats.select2_time += time;
|
||||
#endif
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
thrust::device_ptr<Record> dvp1;
|
||||
@ -1084,17 +1085,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
}
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
//cout << "Sort = " << time << endl;
|
||||
cuda_stats.sort_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
hipEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
IDataNode* d_data;
|
||||
@ -1123,7 +1124,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
dim3 Dbc(THRD_PER_BLCK_create, 1, 1);
|
||||
dim3 Dgc(BLCK_PER_GRID_create, 1, 1);
|
||||
|
||||
gCreateIndex <<<Dgc, Dbc>>> (d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gCreateIndex), dim3(Dgc), dim3(Dbc), 0, 0, d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
|
||||
|
||||
int *d_locations;
|
||||
int memSizeR;
|
||||
@ -1132,7 +1133,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
{
|
||||
memSizeR = (rLen + 1) * sizeof(int);
|
||||
reservar(&d_locations, memSizeR);
|
||||
cudaMemsetAsync(d_locations, 0, sizeof(int));
|
||||
hipMemsetAsync(d_locations, 0, sizeof(int));
|
||||
nSearchKeys = rLen;
|
||||
}
|
||||
else
|
||||
@ -1146,13 +1147,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
unsigned int nKeysPerThread = uintCeilingDiv(nSearchKeys, THRD_PER_GRID_search);
|
||||
if(negative)
|
||||
{
|
||||
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||
cudaMemsetAsync(temp, 0, memSizeR);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||
hipMemsetAsync(temp, 0, memSizeR);
|
||||
}
|
||||
else
|
||||
{
|
||||
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||
cudaMemsetAsync(temp, 0, memSizeS);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||
hipMemsetAsync(temp, 0, memSizeS);
|
||||
}
|
||||
|
||||
int muljoin = 0, muljoinsize = 0, sum;
|
||||
@ -1165,8 +1166,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
{
|
||||
muljoin = numj - 2;
|
||||
muljoinsize = muljoin * sizeof(int);
|
||||
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||
gIndexMultiJoinNegative<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||
hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoinNegative), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(d_locations);
|
||||
@ -1177,21 +1178,21 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(pos == (rule->num_rows - 3))
|
||||
{
|
||||
sizepro = rule->num_columns * sizeof(int);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
resSize = sum * sizepro;
|
||||
reservar(&d_Rout, resSize);
|
||||
gJoinWithWriteNegative2<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
|
||||
}
|
||||
else
|
||||
{
|
||||
sizepro = projp.x * sizeof(int);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
resSize = sum * sizepro;
|
||||
reservar(&d_Rout, resSize);
|
||||
gJoinWithWriteNegative<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
|
||||
}
|
||||
cudaFree(d_R);
|
||||
cudaFree(d_S);
|
||||
hipFree(d_R);
|
||||
hipFree(d_S);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1200,26 +1201,26 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
{
|
||||
muljoin = numj - 2;
|
||||
muljoinsize = muljoin * sizeof(int);
|
||||
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||
gIndexMultiJoin<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||
hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoin), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||
}
|
||||
else
|
||||
gIndexJoin<<<blockllen, numthreads>>> (d_R, d_S, d_locations, sLen, temp);
|
||||
cudaFree(d_R);
|
||||
cudaFree(d_S);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gIndexJoin), dim3(blockllen), dim3(numthreads), 0, 0, d_R, d_S, d_locations, sLen, temp);
|
||||
hipFree(d_R);
|
||||
hipFree(d_S);
|
||||
|
||||
sum = res[sLen-1];
|
||||
thrust::exclusive_scan(res, res + sLen, res);
|
||||
sum += res[sLen-1];
|
||||
if(sum == 0)
|
||||
{
|
||||
cudaFree(dcons);
|
||||
cudaFree(d_locations);
|
||||
cudaFree(temp);
|
||||
hipFree(dcons);
|
||||
hipFree(d_locations);
|
||||
hipFree(temp);
|
||||
if(posS != NULL)
|
||||
cudaFree(posS);
|
||||
hipFree(posS);
|
||||
if(posR != NULL)
|
||||
cudaFree(posR);
|
||||
hipFree(posR);
|
||||
return 0;
|
||||
}
|
||||
res[sLen] = sum;
|
||||
@ -1227,49 +1228,49 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
if(pos == (rule->num_rows - 3))
|
||||
{
|
||||
sizepro = rule->num_columns * sizeof(int);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
resSize = sum * sizepro;
|
||||
reservar(&d_Rout, resSize);
|
||||
if(numj > 2)
|
||||
{
|
||||
cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||
multiJoinWithWrite2<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
|
||||
hipMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
|
||||
}
|
||||
else
|
||||
gJoinWithWrite2<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
|
||||
}
|
||||
else
|
||||
{
|
||||
sizepro = projp.y * sizeof(int);
|
||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||
resSize = sum * sizepro;
|
||||
reservar(&d_Rout, resSize);
|
||||
if(numj > 2)
|
||||
{
|
||||
cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||
multiJoinWithWrite<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
|
||||
hipMemcpy(dcons + projp.y, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
|
||||
}
|
||||
else
|
||||
gJoinWithWrite<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(dcons);
|
||||
cudaFree(d_locations);
|
||||
cudaFree(temp);
|
||||
hipFree(dcons);
|
||||
hipFree(d_locations);
|
||||
hipFree(temp);
|
||||
if(posS != NULL)
|
||||
cudaFree(posS);
|
||||
hipFree(posS);
|
||||
if(posR != NULL)
|
||||
cudaFree(posR);
|
||||
hipFree(posR);
|
||||
|
||||
if(*ret != NULL)
|
||||
cudaFree(*ret);
|
||||
hipFree(*ret);
|
||||
*ret = d_Rout;
|
||||
|
||||
#ifdef TIMER
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&time, start, stop);
|
||||
//cout << "Join = " << time << endl;
|
||||
//cout << "FIN" << endl;
|
||||
cuda_stats.join_time += time;
|
||||
|
80
packages/cuda/union2.cu
Executable file → Normal file
80
packages/cuda/union2.cu
Executable file → Normal file
@ -87,8 +87,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -122,8 +122,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -157,8 +157,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -192,8 +192,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -227,8 +227,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -262,8 +262,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -297,8 +297,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -332,8 +332,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -367,8 +367,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -402,8 +402,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -437,8 +437,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -472,8 +472,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -507,8 +507,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -542,8 +542,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -577,8 +577,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -612,8 +612,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -647,8 +647,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -682,8 +682,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -717,8 +717,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
@ -752,8 +752,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||
{
|
||||
size = nrows * tipo * sizeof(int);
|
||||
reservar(&nres, size);
|
||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||
cudaFree(*ret);
|
||||
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||
hipFree(*ret);
|
||||
*ret = nres;
|
||||
}
|
||||
return nrows;
|
||||
|
0
packages/cuda/union2.h
Executable file → Normal file
0
packages/cuda/union2.h
Executable file → Normal file
0
packages/cuda/unioncpu2.cpp
Executable file → Normal file
0
packages/cuda/unioncpu2.cpp
Executable file → Normal file
Reference in New Issue
Block a user