new version of cuda interface
This commit is contained in:
parent
c6d174841a
commit
d3599da6dc
2
.gitignore
vendored
2
.gitignore
vendored
@ -179,3 +179,5 @@ packages/myddas/hh
|
|||||||
packages/myddas/DaysInHospital_Y3.csv
|
packages/myddas/DaysInHospital_Y3.csv
|
||||||
|
|
||||||
packages/myddas/agile.csv
|
packages/myddas/agile.csv
|
||||||
|
|
||||||
|
*.pyc
|
||||||
|
0
packages/cuda/CC_CSSTree.cu
Executable file → Normal file
0
packages/cuda/CC_CSSTree.cu
Executable file → Normal file
0
packages/cuda/CC_CSSTree.h
Executable file → Normal file
0
packages/cuda/CC_CSSTree.h
Executable file → Normal file
0
packages/cuda/Makefile.in
Executable file → Normal file
0
packages/cuda/Makefile.in
Executable file → Normal file
37
packages/cuda/bpreds.cu
Executable file → Normal file
37
packages/cuda/bpreds.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
|||||||
|
#include "hip/hip_runtime.h"
|
||||||
#include <thrust/device_vector.h>
|
#include <thrust/device_vector.h>
|
||||||
#include <thrust/scan.h>
|
#include <thrust/scan.h>
|
||||||
#include <cstdarg>
|
#include <cstdarg>
|
||||||
@ -25,10 +26,10 @@ int maximo(int count, ...)
|
|||||||
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int x, rowact, rowact1, op1, op2;
|
int x, rowact, rowact1, op1, op2;
|
||||||
if(threadIdx.x < numc)
|
if(hipThreadIdx_x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -110,10 +111,10 @@ __global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *co
|
|||||||
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int x, rowact, op1, op2;
|
int x, rowact, op1, op2;
|
||||||
if(threadIdx.x < numc)
|
if(hipThreadIdx_x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -159,10 +160,10 @@ __global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc,
|
|||||||
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int x, rowact, op1, op2;
|
int x, rowact, op1, op2;
|
||||||
if(threadIdx.x < numc)
|
if(hipThreadIdx_x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -226,10 +227,10 @@ __global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc,
|
|||||||
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int x, rowact, rowact1, op1, op2;
|
int x, rowact, rowact1, op1, op2;
|
||||||
if(threadIdx.x < numc)
|
if(hipThreadIdx_x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -344,10 +345,10 @@ __global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *
|
|||||||
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int x, rowact, op1, op2;
|
int x, rowact, op1, op2;
|
||||||
if(threadIdx.x < numc)
|
if(hipThreadIdx_x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -411,10 +412,10 @@ __global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc
|
|||||||
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int x, rowact, op1, op2;
|
int x, rowact, op1, op2;
|
||||||
if(threadIdx.x < numc)
|
if(hipThreadIdx_x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
|
1
packages/cuda/bpreds.h
Executable file → Normal file
1
packages/cuda/bpreds.h
Executable file → Normal file
@ -1,3 +1,4 @@
|
|||||||
|
#include "hip/hip_runtime.h"
|
||||||
#ifndef _BPREDS_H_
|
#ifndef _BPREDS_H_
|
||||||
#define _BPREDS_H_
|
#define _BPREDS_H_
|
||||||
|
|
||||||
|
0
packages/cuda/bpredscpu.cpp
Executable file → Normal file
0
packages/cuda/bpredscpu.cpp
Executable file → Normal file
52
packages/cuda/clamp.rb
Normal file
52
packages/cuda/clamp.rb
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
require "formula"
|
||||||
|
|
||||||
|
# Documentation: https://github.com/Homebrew/homebrew/wiki/Formula-Cookbook
|
||||||
|
# /usr/local/Library/Contributions/example-formula.rb
|
||||||
|
# PLEASE REMOVE ALL GENERATED COMMENTS BEFORE SUBMITTING YOUR PULL REQUEST!
|
||||||
|
|
||||||
|
class Clamp < Formula
|
||||||
|
homepage "https://bitbucket.org/multicoreware/cppamp-driver-ng/wiki/Home"
|
||||||
|
version "0.0.1-3"
|
||||||
|
url "https://bitbucket.org/multicoreware/cppamp-driver-ng/get/milestone3.tar.bz2"
|
||||||
|
head "https://bitbucket.org/multicoreware/cppamp-driver-ng.git"
|
||||||
|
sha1 "b8b88306561a60942f8ecbd8ff20554661c4e5f9"
|
||||||
|
|
||||||
|
depends_on "cmake" => :build
|
||||||
|
depends_on "wget" => :build
|
||||||
|
depends_on "git" => :build
|
||||||
|
depends_on "hg" => :build
|
||||||
|
depends_on "subversion" => :build
|
||||||
|
# depends_on :x11 # if your formula requires any X11/XQuartz components
|
||||||
|
|
||||||
|
def install
|
||||||
|
# ENV.deparallelize # if your formula fails when building in parallel
|
||||||
|
|
||||||
|
# Remove unrecognized options if warned by configure
|
||||||
|
# system "./configure", "--disable-debug",
|
||||||
|
# "--disable-dependency-tracking",
|
||||||
|
# "--disable-silent-rules",
|
||||||
|
# "--prefix=#{prefix}"
|
||||||
|
mkdir "macbuild" do
|
||||||
|
args = std_cmake_args
|
||||||
|
args << "-DCLANG_URL=https://bitbucket.org/multicoreware/cppamp-ng.git"
|
||||||
|
args << "-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=CBackend"
|
||||||
|
args << "-DGMAC_URL=https://bitbucket.org/multicoreware/gmac"
|
||||||
|
system 'cmake', "..", *args
|
||||||
|
system "make", "world"
|
||||||
|
system "cd libc++; make install"
|
||||||
|
system "make", "install" # if this fails, try separate make/make install steps
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
test do
|
||||||
|
# `test do` will create, run in and delete a temporary directory.
|
||||||
|
#
|
||||||
|
# This test will fail and we won't accept that! It's enough to just replace
|
||||||
|
# "false" with the main program this formula installs, but it'd be nice if you
|
||||||
|
# were more thorough. Run the test with `brew test milestone`.
|
||||||
|
#
|
||||||
|
# The installed folder is not in the path, so use the entire path to any
|
||||||
|
# executables being tested: `system "#{bin}/program", "do", "something"`.
|
||||||
|
system "make", "test"
|
||||||
|
end
|
||||||
|
end
|
4
packages/cuda/creator2.c
Executable file → Normal file
4
packages/cuda/creator2.c
Executable file → Normal file
@ -66,7 +66,7 @@ int main(int argc, char *argv[])
|
|||||||
fprintf(cuda, "\t\t\t{\n");
|
fprintf(cuda, "\t\t\t{\n");
|
||||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
|
||||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||||
fprintf(cuda, "\t\t\t}\n");
|
fprintf(cuda, "\t\t\t}\n");
|
||||||
@ -103,7 +103,7 @@ int main(int argc, char *argv[])
|
|||||||
fprintf(cuda, "\t\t\t{\n");
|
fprintf(cuda, "\t\t\t{\n");
|
||||||
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
fprintf(cuda, "\t\t\t\tsize = nrows * tipo * sizeof(int);\n");
|
||||||
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
fprintf(cuda, "\t\t\t\treservar(&nres, size);\n");
|
||||||
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);\n");
|
fprintf(cuda, "\t\t\t\tcudaMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);\n");
|
||||||
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
fprintf(cuda, "\t\t\t\tcudaFree(*ret);\n");
|
||||||
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
fprintf(cuda, "\t\t\t\t*ret = nres;\n");
|
||||||
fprintf(cuda, "\t\t\t}\n");
|
fprintf(cuda, "\t\t\t}\n");
|
||||||
|
0
packages/cuda/cuda.c
Executable file → Normal file
0
packages/cuda/cuda.c
Executable file → Normal file
0
packages/cuda/cuda.yap
Executable file → Normal file
0
packages/cuda/cuda.yap
Executable file → Normal file
@ -27,8 +27,8 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
|
|||||||
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
|
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
|
||||||
tipo = res_rows * cols1 * sizeof(int);
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
hres = (int *)malloc(tipo);
|
hres = (int *)malloc(tipo);
|
||||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||||
cudaFree(dop1);
|
hipFree(dop1);
|
||||||
*result = hres;
|
*result = hres;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -39,13 +39,13 @@ void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode
|
|||||||
int *dop2;
|
int *dop2;
|
||||||
tipo = res_rows * cols1 * sizeof(int);
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
reservar(&dop2, tipo);
|
reservar(&dop2, tipo);
|
||||||
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice);
|
hipMemcpy(dop2, dop1, tipo, hipMemcpyHostToDevice);
|
||||||
free(dop1);
|
free(dop1);
|
||||||
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
|
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
|
||||||
tipo = res_rows * cols1 * sizeof(int);
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
hres = (int *)malloc(tipo);
|
hres = (int *)malloc(tipo);
|
||||||
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost);
|
hipMemcpy(hres, dop2, tipo, hipMemcpyDeviceToHost);
|
||||||
cudaFree(dop2);
|
hipFree(dop2);
|
||||||
*result = hres;
|
*result = hres;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -315,8 +315,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
|
|||||||
|
|
||||||
tipo = res_rows * cols1 * sizeof(int);
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
hres = (int *)malloc(tipo);
|
hres = (int *)malloc(tipo);
|
||||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||||
cudaFree(dop1);
|
hipFree(dop1);
|
||||||
w = z + 1;
|
w = z + 1;
|
||||||
|
|
||||||
strtok(qposr->rulename, "_");
|
strtok(qposr->rulename, "_");
|
||||||
@ -353,8 +353,8 @@ void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str,
|
|||||||
res_rows = abs(res_rows);
|
res_rows = abs(res_rows);
|
||||||
tipo = res_rows * cols1 * sizeof(int);
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
hres = (int *)malloc(tipo);
|
hres = (int *)malloc(tipo);
|
||||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||||
cudaFree(dop1);
|
hipFree(dop1);
|
||||||
|
|
||||||
char file[] = "/dev/shm/buffer.csv";
|
char file[] = "/dev/shm/buffer.csv";
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
@ -554,7 +554,7 @@ void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator f
|
|||||||
sign = tmpfact.predname;
|
sign = tmpfact.predname;
|
||||||
tipo = res_rows * cols1 * sizeof(int);
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
hres = (int *)malloc(tipo);
|
hres = (int *)malloc(tipo);
|
||||||
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
hipMemcpy(hres, dop1, tipo, hipMemcpyDeviceToHost);
|
||||||
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
|
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
|
||||||
sumar(tmpfact.name, dop1, cols1, res_rows);
|
sumar(tmpfact.name, dop1, cols1, res_rows);
|
||||||
}
|
}
|
||||||
|
0
packages/cuda/hippy/hippy
Normal file
0
packages/cuda/hippy/hippy
Normal file
62
packages/cuda/joincpu.cpp
Executable file → Normal file
62
packages/cuda/joincpu.cpp
Executable file → Normal file
@ -324,11 +324,11 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEvent_t start, stop;
|
hipEvent_t start, stop;
|
||||||
float time;
|
float time;
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(nsel1 > 0 || nsj1 > 0)
|
if(nsel1 > 0 || nsj1 > 0)
|
||||||
@ -359,16 +359,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
cuda_stats.select1_time += time;
|
cuda_stats.select1_time += time;
|
||||||
|
|
||||||
cudaEventDestroy(start);
|
hipEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
hipEventDestroy(stop);
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(nsel2 > 0 || nsj2 > 0)
|
if(nsel2 > 0 || nsj2 > 0)
|
||||||
@ -381,16 +381,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
|||||||
Snl = sLen;
|
Snl = sLen;
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
cuda_stats.select2_time += time;
|
cuda_stats.select2_time += time;
|
||||||
|
|
||||||
cudaEventDestroy(start);
|
hipEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
hipEventDestroy(stop);
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//cout << "antes" << endl;
|
//cout << "antes" << endl;
|
||||||
@ -406,16 +406,16 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
|||||||
thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation);
|
thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
cuda_stats.sort_time += time;
|
cuda_stats.sort_time += time;
|
||||||
|
|
||||||
cudaEventDestroy(start);
|
hipEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
hipEventDestroy(stop);
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*cout << "despues" << endl;
|
/*cout << "despues" << endl;
|
||||||
@ -482,9 +482,9 @@ int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenod
|
|||||||
*ret = fres;
|
*ret = fres;
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
cuda_stats.join_time += time;
|
cuda_stats.join_time += time;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
40
packages/cuda/lista.cu
Executable file → Normal file
40
packages/cuda/lista.cu
Executable file → Normal file
@ -967,7 +967,7 @@ vector<gpunode> L;
|
|||||||
extern "C"
|
extern "C"
|
||||||
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, int *inpquery, int **result, char *names, int finalDR)
|
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, int *inpquery, int **result, char *names, int finalDR)
|
||||||
{
|
{
|
||||||
cudaSetDevice(0);
|
hipSetDevice(0);
|
||||||
vector<rulenode> rules;
|
vector<rulenode> rules;
|
||||||
int x;
|
int x;
|
||||||
|
|
||||||
@ -1029,11 +1029,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
vector<rulenode>::iterator qposr;
|
vector<rulenode>::iterator qposr;
|
||||||
|
|
||||||
#if TIMER
|
#if TIMER
|
||||||
cudaEvent_t start, stop;
|
hipEvent_t start, stop;
|
||||||
float time;
|
float time;
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
while(reglas.size()) /*Here's the main loop*/
|
while(reglas.size()) /*Here's the main loop*/
|
||||||
@ -1084,7 +1084,7 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
{
|
{
|
||||||
num_refs = rows1 * cols1 * sizeof(int);
|
num_refs = rows1 * cols1 * sizeof(int);
|
||||||
reservar(&res, num_refs);
|
reservar(&res, num_refs);
|
||||||
cudaMemcpyAsync(res, dop1, num_refs, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(res, dop1, num_refs, hipMemcpyDeviceToDevice);
|
||||||
registrar(rul_act->name, cols1, res, rows1, itr, 1);
|
registrar(rul_act->name, cols1, res, rows1, itr, 1);
|
||||||
genflag = 1;
|
genflag = 1;
|
||||||
rul_act->gen_ant = rul_act->gen_act;
|
rul_act->gen_ant = rul_act->gen_act;
|
||||||
@ -1251,10 +1251,10 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
if(x == num_refs)
|
if(x == num_refs)
|
||||||
{
|
{
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEvent_t start2, stop2;
|
hipEvent_t start2, stop2;
|
||||||
cudaEventCreate(&start2);
|
hipEventCreate(&start2);
|
||||||
cudaEventCreate(&stop2);
|
hipEventCreate(&stop2);
|
||||||
cudaEventRecord(start2, 0);
|
hipEventRecord(start2, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//cout << rul_act->name << " res_rows = " << res_rows << endl;
|
//cout << rul_act->name << " res_rows = " << res_rows << endl;
|
||||||
@ -1263,11 +1263,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
res_rows = unir(res, res_rows, rul_act->num_columns, &res, 0);
|
res_rows = unir(res, res_rows, rul_act->num_columns, &res, 0);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop2, 0);
|
hipEventRecord(stop2, 0);
|
||||||
cudaEventSynchronize(stop2);
|
hipEventSynchronize(stop2);
|
||||||
cudaEventElapsedTime(&time, start2, stop2);
|
hipEventElapsedTime(&time, start2, stop2);
|
||||||
cudaEventDestroy(start2);
|
hipEventDestroy(start2);
|
||||||
cudaEventDestroy(stop2);
|
hipEventDestroy(stop2);
|
||||||
//cout << "Union = " << time << endl;
|
//cout << "Union = " << time << endl;
|
||||||
cuda_stats.union_time += time;
|
cuda_stats.union_time += time;
|
||||||
#endif
|
#endif
|
||||||
@ -1319,16 +1319,16 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if TIMER
|
#if TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
cuda_stats.total_time += time;
|
cuda_stats.total_time += time;
|
||||||
if (time > cuda_stats.max_time)
|
if (time > cuda_stats.max_time)
|
||||||
cuda_stats.max_time = time;
|
cuda_stats.max_time = time;
|
||||||
if (time < cuda_stats.min_time || cuda_stats.calls == 1)
|
if (time < cuda_stats.min_time || cuda_stats.calls == 1)
|
||||||
cuda_stats.min_time = time;
|
cuda_stats.min_time = time;
|
||||||
cudaEventDestroy(start);
|
hipEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
hipEventDestroy(stop);
|
||||||
Cuda_Statistics();
|
Cuda_Statistics();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
0
packages/cuda/lista.h
Executable file → Normal file
0
packages/cuda/lista.h
Executable file → Normal file
44
packages/cuda/memory.cu
Executable file → Normal file
44
packages/cuda/memory.cu
Executable file → Normal file
@ -144,7 +144,7 @@ void limpiar(const char s[], size_t sz)
|
|||||||
|
|
||||||
if(GPUmem.size() == 0)
|
if(GPUmem.size() == 0)
|
||||||
{
|
{
|
||||||
cudaMemGetInfo(&free,&total);
|
hipMemGetInfo(&free,&total);
|
||||||
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
|
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -154,11 +154,11 @@ void limpiar(const char s[], size_t sz)
|
|||||||
{
|
{
|
||||||
temp = *ini;
|
temp = *ini;
|
||||||
temp.dev_address = (int *)malloc(ini->size);
|
temp.dev_address = (int *)malloc(ini->size);
|
||||||
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
hipMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, hipMemcpyDeviceToHost);
|
||||||
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||||
CPUmem.insert(pos, temp);
|
CPUmem.insert(pos, temp);
|
||||||
}
|
}
|
||||||
cudaFree(ini->dev_address);
|
hipFree(ini->dev_address);
|
||||||
GPUmem.erase(ini);
|
GPUmem.erase(ini);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -173,19 +173,19 @@ void reservar(int **ptr, size_t size)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaMemGetInfo(&free, &total);
|
hipMemGetInfo(&free, &total);
|
||||||
while(free < size)
|
while(free < size)
|
||||||
{
|
{
|
||||||
cout << "Se limpio memoria " << free << " " << total << endl;
|
cout << "Se limpio memoria " << free << " " << total << endl;
|
||||||
limpiar("not enough memory", size);
|
limpiar("not enough memory", size);
|
||||||
cudaMemGetInfo(&free, &total);
|
hipMemGetInfo(&free, &total);
|
||||||
}
|
}
|
||||||
|
|
||||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
while(hipMalloc(ptr, size) == hipErrorMemoryAllocation)
|
||||||
limpiar("Error in memory allocation", size);
|
limpiar("Error in memory allocation", size);
|
||||||
if (! *ptr ) {
|
if (! *ptr ) {
|
||||||
size_t free, total;
|
size_t free, total;
|
||||||
cudaMemGetInfo( &free, &total );
|
hipMemGetInfo( &free, &total );
|
||||||
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
|
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
|
||||||
cerr << "Exiting CUDA...." << endl;
|
cerr << "Exiting CUDA...." << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
@ -277,7 +277,7 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
|||||||
}
|
}
|
||||||
size = num_rows * num_columns * sizeof(int);
|
size = num_rows * num_columns * sizeof(int);
|
||||||
reservar(&temp, size);
|
reservar(&temp, size);
|
||||||
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
hipMemcpyAsync(temp, address_host_table, size, hipMemcpyHostToDevice);
|
||||||
registrar(name, num_columns, temp, num_rows, itr, 0);
|
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||||
*ptr = temp;
|
*ptr = temp;
|
||||||
return num_rows;
|
return num_rows;
|
||||||
@ -296,13 +296,13 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
|||||||
reservar(&temp, size);
|
reservar(&temp, size);
|
||||||
for(x = 0; x < numgpu; x++)
|
for(x = 0; x < numgpu; x++)
|
||||||
{
|
{
|
||||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToDevice);
|
||||||
inc += temp_storage[x].size / sizeof(int);
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
cudaFree(temp_storage[x].dev_address);
|
hipFree(temp_storage[x].dev_address);
|
||||||
}
|
}
|
||||||
for(; x < numcpu; x++)
|
for(; x < numcpu; x++)
|
||||||
{
|
{
|
||||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyHostToDevice);
|
||||||
inc += temp_storage[x].size / sizeof(int);
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
free(temp_storage[x].dev_address);
|
free(temp_storage[x].dev_address);
|
||||||
}
|
}
|
||||||
@ -340,9 +340,9 @@ int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address
|
|||||||
temp = (int *)malloc(size);
|
temp = (int *)malloc(size);
|
||||||
for(x = 0; x < numgpu; x++)
|
for(x = 0; x < numgpu; x++)
|
||||||
{
|
{
|
||||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
|
hipMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, hipMemcpyDeviceToHost);
|
||||||
inc += temp_storage[x].size / sizeof(int);
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
cudaFree(temp_storage[x].dev_address);
|
hipFree(temp_storage[x].dev_address);
|
||||||
}
|
}
|
||||||
for(; x < numcpu; x++)
|
for(; x < numcpu; x++)
|
||||||
{
|
{
|
||||||
@ -404,7 +404,7 @@ int cargafinal(int name, int cols, int **ptr)
|
|||||||
cont = pos->rows;
|
cont = pos->rows;
|
||||||
#ifdef TUFFY
|
#ifdef TUFFY
|
||||||
reservar(&temp, pos->size);
|
reservar(&temp, pos->size);
|
||||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
|
||||||
*ptr = temp;
|
*ptr = temp;
|
||||||
#else
|
#else
|
||||||
*ptr = pos->dev_address;
|
*ptr = pos->dev_address;
|
||||||
@ -418,14 +418,14 @@ int cargafinal(int name, int cols, int **ptr)
|
|||||||
pos = gpu;
|
pos = gpu;
|
||||||
while(pos != endg && pos->name == name)
|
while(pos != endg && pos->name == name)
|
||||||
{
|
{
|
||||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
|
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyDeviceToDevice);
|
||||||
temp += pos->size / sizeof(int);
|
temp += pos->size / sizeof(int);
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
pos = cpu;
|
pos = cpu;
|
||||||
while(pos != endc && pos->name == name)
|
while(pos != endc && pos->name == name)
|
||||||
{
|
{
|
||||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
hipMemcpy(temp, pos->dev_address, pos->size, hipMemcpyHostToDevice);
|
||||||
temp += pos->size / sizeof(int);
|
temp += pos->size / sizeof(int);
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
@ -493,7 +493,7 @@ void clear_memory()
|
|||||||
{
|
{
|
||||||
if(ini->isrule)
|
if(ini->isrule)
|
||||||
{
|
{
|
||||||
cudaFree(ini->dev_address);
|
hipFree(ini->dev_address);
|
||||||
ini = GPUmem.erase(ini);
|
ini = GPUmem.erase(ini);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -518,7 +518,7 @@ void clear_memory_all()
|
|||||||
fin = GPUmem.end();
|
fin = GPUmem.end();
|
||||||
while(ini != fin)
|
while(ini != fin)
|
||||||
{
|
{
|
||||||
cudaFree(ini->dev_address);
|
hipFree(ini->dev_address);
|
||||||
ini++;
|
ini++;
|
||||||
}
|
}
|
||||||
GPUmem.clear();
|
GPUmem.clear();
|
||||||
@ -542,7 +542,7 @@ void liberar(int name)
|
|||||||
{
|
{
|
||||||
fact = *i;
|
fact = *i;
|
||||||
GPUmem.erase(i);
|
GPUmem.erase(i);
|
||||||
cudaFree(fact.dev_address);
|
hipFree(fact.dev_address);
|
||||||
}
|
}
|
||||||
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
|
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
|
||||||
if(i != CPUmem.end())
|
if(i != CPUmem.end())
|
||||||
@ -566,10 +566,10 @@ void sumar(int name, int *dop1, int cols, int rows)
|
|||||||
newrows = rows + fact.rows;
|
newrows = rows + fact.rows;
|
||||||
reservar(&res, newrows * cols * sizeof(int));
|
reservar(&res, newrows * cols * sizeof(int));
|
||||||
offset = fact.rows * cols;
|
offset = fact.rows * cols;
|
||||||
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(res, fact.dev_address, offset * sizeof(int), hipMemcpyDeviceToDevice);
|
||||||
GPUmem.erase(i);
|
GPUmem.erase(i);
|
||||||
registrar(name, cols, res, newrows, 0, 0);
|
registrar(name, cols, res, newrows, 0, 0);
|
||||||
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), hipMemcpyDeviceToDevice);
|
||||||
cudaFree(fact.dev_address);
|
hipFree(fact.dev_address);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
0
packages/cuda/memory.h
Executable file → Normal file
0
packages/cuda/memory.h
Executable file → Normal file
601
packages/cuda/old/cuda.c
Executable file
601
packages/cuda/old/cuda.c
Executable file
@ -0,0 +1,601 @@
|
|||||||
|
|
||||||
|
// interface to CUDD Datalog evaluation
|
||||||
|
#include "config.h"
|
||||||
|
#include "YapInterface.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include "pred.h"
|
||||||
|
|
||||||
|
#define MAXARG 100
|
||||||
|
|
||||||
|
YAP_Atom AtomEq,
|
||||||
|
AtomGt,
|
||||||
|
AtomLt,
|
||||||
|
AtomGe,
|
||||||
|
AtomLe,
|
||||||
|
AtomDf,
|
||||||
|
AtomNt;
|
||||||
|
|
||||||
|
predicate *facts[MAXARG]; /*Temporary solution to maintain facts and rules*/
|
||||||
|
predicate *rules[MAXARG];
|
||||||
|
int32_t cf = 0, cr = 0;
|
||||||
|
|
||||||
|
char names[1024];
|
||||||
|
|
||||||
|
// initialize CUDA system
|
||||||
|
void Cuda_Initialize( void );
|
||||||
|
|
||||||
|
// add/replace a set of facts for predicate pred
|
||||||
|
int32_t Cuda_NewFacts(predicate *pred);
|
||||||
|
|
||||||
|
// add/replace a rule for predicate pred
|
||||||
|
int32_t Cuda_NewRule(predicate *pred);
|
||||||
|
|
||||||
|
// erase predicate pred
|
||||||
|
int32_t Cuda_Erase(predicate *pred);
|
||||||
|
|
||||||
|
// evaluate predicate pred, mat is bound to a vector of solutions, and
|
||||||
|
// output the count
|
||||||
|
//int32_t Cuda_Eval(predicate *pred, int32_t **mat); This functions arguments were changed, please see pred.h
|
||||||
|
|
||||||
|
void init_cuda( void );
|
||||||
|
|
||||||
|
//#define DEBUG_INTERFACE 1
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
static int32_t query[100];
|
||||||
|
static int32_t qcont = 0;
|
||||||
|
static int cuda_init_query(void)
|
||||||
|
{
|
||||||
|
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG1));
|
||||||
|
query[qcont] = pname;
|
||||||
|
qcont++;
|
||||||
|
query[qcont] = 0;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if DEBUG_INTERFACE
|
||||||
|
static void
|
||||||
|
dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
int32_t i, j;
|
||||||
|
for ( i=0; i< nrows; i++) {
|
||||||
|
printf("%d", mat[i*ncols]);
|
||||||
|
for (j=1; j < ncols; j++) {
|
||||||
|
printf(", %d", mat[i*ncols+j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_vec(int32_t vec[], int32_t rows)
|
||||||
|
{
|
||||||
|
int32_t i = 1;
|
||||||
|
int32_t j = 0;
|
||||||
|
|
||||||
|
for (j = 0; j < rows; j++) {
|
||||||
|
for ( ; vec[i]; i++ ) {
|
||||||
|
printf(", %d", vec[i]);
|
||||||
|
}
|
||||||
|
printf(", 0");
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
#endif /* DEBUG_INTERFACE */
|
||||||
|
|
||||||
|
|
||||||
|
// stubs, will point at Carlos code.
|
||||||
|
|
||||||
|
void Cuda_Initialize( void )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t Cuda_NewFacts(predicate *pe)
|
||||||
|
{
|
||||||
|
#if DEBUG_INTERFACE
|
||||||
|
dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
if(cf >= 0)
|
||||||
|
{
|
||||||
|
facts[cf] = pe;
|
||||||
|
cf++;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
facts[cf] = pe;
|
||||||
|
cf++;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t Cuda_NewRule(predicate *pe)
|
||||||
|
{
|
||||||
|
#if DEBUG_INTERFACE
|
||||||
|
dump_vec( pe->address_host_table, pe->num_rows);
|
||||||
|
#endif
|
||||||
|
rules[cr] = pe;
|
||||||
|
cr++;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t Cuda_Erase(predicate *pe)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
while ( rules[i] != pe )
|
||||||
|
i++;
|
||||||
|
while (i < cr-1) {
|
||||||
|
rules[i] = rules[i+1];
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
rules[i] = NULL;
|
||||||
|
cr--;
|
||||||
|
if (pe->address_host_table)
|
||||||
|
free( pe->address_host_table );
|
||||||
|
free( pe );
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
load_facts( void ) {
|
||||||
|
|
||||||
|
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||||
|
int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
|
||||||
|
YAP_Term t3 = YAP_ARG3;
|
||||||
|
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
||||||
|
int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
|
||||||
|
predicate *pred;
|
||||||
|
|
||||||
|
while(YAP_IsPairTerm(t3)) {
|
||||||
|
int32_t j = 0;
|
||||||
|
YAP_Term th = YAP_HeadOfTerm(t3);
|
||||||
|
|
||||||
|
for (j = 0; j < ncols; j++) {
|
||||||
|
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||||
|
if (YAP_IsAtomTerm(ta)) {
|
||||||
|
mat[i*ncols+j] = YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||||
|
} else {
|
||||||
|
mat[i*ncols+j] = YAP_IntOfTerm(ta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t3 = YAP_TailOfTerm( t3 );
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||||
|
// new
|
||||||
|
pred = (predicate *)malloc(sizeof(predicate));
|
||||||
|
} else {
|
||||||
|
pred = (predicate *)YAP_IntOfTerm(YAP_ARG4);
|
||||||
|
if (pred->address_host_table)
|
||||||
|
free( pred->address_host_table );
|
||||||
|
}
|
||||||
|
pred->name = pname;
|
||||||
|
pred->num_rows = nrows;
|
||||||
|
pred->num_columns = ncols;
|
||||||
|
pred->is_fact = TRUE;
|
||||||
|
pred->address_host_table = mat;
|
||||||
|
Cuda_NewFacts(pred);
|
||||||
|
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||||
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||||
|
} else {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int currentFact = 0;
|
||||||
|
static predicate *currentPred = NULL;
|
||||||
|
|
||||||
|
static int
|
||||||
|
cuda_init_facts( void ) {
|
||||||
|
|
||||||
|
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||||
|
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||||
|
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
||||||
|
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
|
||||||
|
predicate *pred;
|
||||||
|
|
||||||
|
strcat(names, YAP_AtomName(YAP_AtomOfTerm(YAP_ARG3)));
|
||||||
|
strcat(names, " ");
|
||||||
|
|
||||||
|
if (!mat)
|
||||||
|
return FALSE;
|
||||||
|
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||||
|
// new
|
||||||
|
pred = (predicate *)malloc(sizeof(predicate));
|
||||||
|
} else {
|
||||||
|
pred = (predicate *)YAP_IntOfTerm(YAP_ARG4);
|
||||||
|
if (pred->address_host_table)
|
||||||
|
free( pred->address_host_table );
|
||||||
|
}
|
||||||
|
pred->name = pname;
|
||||||
|
pred->num_rows = nrows;
|
||||||
|
pred->num_columns = ncols;
|
||||||
|
pred->is_fact = TRUE;
|
||||||
|
pred->address_host_table = mat;
|
||||||
|
currentPred = pred;
|
||||||
|
currentFact = 0;
|
||||||
|
|
||||||
|
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||||
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||||
|
} else {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
cuda_load_fact( void ) {
|
||||||
|
|
||||||
|
int i = currentFact;
|
||||||
|
|
||||||
|
#if defined(DATALOG) || defined(TUFFY)
|
||||||
|
YAP_Term th = YAP_ARG1;
|
||||||
|
int ncols = currentPred->num_columns;
|
||||||
|
int j;
|
||||||
|
int *mat = currentPred->address_host_table;
|
||||||
|
for (j = 0; j < ncols; j++) {
|
||||||
|
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||||
|
if (YAP_IsAtomTerm(ta)) {
|
||||||
|
mat[i*ncols+j] = YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||||
|
} else {
|
||||||
|
mat[i*ncols+j] = YAP_IntOfTerm(ta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
i++;
|
||||||
|
if (i == currentPred->num_rows) {
|
||||||
|
Cuda_NewFacts(currentPred);
|
||||||
|
currentPred = NULL;
|
||||||
|
currentFact = 0;
|
||||||
|
} else {
|
||||||
|
currentFact = i;
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
load_rule( void ) {
|
||||||
|
// maximum of 2K symbols per rule, should be enough for ILP
|
||||||
|
int32_t vec[2048], *ptr = vec, *nvec, neg[2048];
|
||||||
|
// qK different variables;
|
||||||
|
YAP_Term vars[1024];
|
||||||
|
int32_t nvars = 0, x;
|
||||||
|
int32_t ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */
|
||||||
|
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||||
|
YAP_Term t3 = YAP_ARG3;
|
||||||
|
YAP_Atom name = YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3)));
|
||||||
|
int32_t pname = YAP_AtomToInt(name);
|
||||||
|
|
||||||
|
const char *strname = YAP_AtomName(name);
|
||||||
|
predicate *pred;
|
||||||
|
int32_t cont = 0;
|
||||||
|
memset(neg, 0x0, 2048 * sizeof(int32_t));
|
||||||
|
|
||||||
|
while(YAP_IsPairTerm(t3)) {
|
||||||
|
int32_t j = 0, m;
|
||||||
|
YAP_Term th = YAP_HeadOfTerm(t3);
|
||||||
|
YAP_Functor f = YAP_FunctorOfTerm( th );
|
||||||
|
int32_t n = YAP_ArityOfFunctor( f );
|
||||||
|
YAP_Atom at = YAP_NameOfFunctor( f );
|
||||||
|
|
||||||
|
if (at == AtomEq)
|
||||||
|
*ptr++ = SBG_EQ;
|
||||||
|
else if (at == AtomGt)
|
||||||
|
*ptr++ = SBG_GT;
|
||||||
|
else if (at == AtomLt)
|
||||||
|
*ptr++ = SBG_LT;
|
||||||
|
else if (at == AtomGe)
|
||||||
|
*ptr++ = SBG_GE;
|
||||||
|
else if (at == AtomLe)
|
||||||
|
*ptr++ = SBG_LE;
|
||||||
|
else if (at == AtomDf)
|
||||||
|
*ptr++ = SBG_DF;
|
||||||
|
else if (at == AtomNt)
|
||||||
|
{
|
||||||
|
neg[cont] = 1;
|
||||||
|
cont++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*ptr++ = YAP_AtomToInt( at );
|
||||||
|
cont++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||||
|
|
||||||
|
if (YAP_IsVarTerm(ta)) {
|
||||||
|
int32_t k;
|
||||||
|
for (k = 0; k< nvars; k++) {
|
||||||
|
if (vars[k] == ta) {
|
||||||
|
*ptr++ = k+1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (k == nvars) {
|
||||||
|
vars[k] = ta;
|
||||||
|
*ptr++ = k+1;
|
||||||
|
nvars++;
|
||||||
|
}
|
||||||
|
} else if (YAP_IsAtomTerm(ta)) {
|
||||||
|
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||||
|
} else if (YAP_IsApplTerm(ta)) {
|
||||||
|
f = YAP_FunctorOfTerm( ta );
|
||||||
|
at = YAP_NameOfFunctor( f );
|
||||||
|
m = YAP_ArityOfFunctor( f );
|
||||||
|
*ptr++ = YAP_AtomToInt( at );
|
||||||
|
|
||||||
|
for (x = 0; x < m; x++) {
|
||||||
|
YAP_Term ta2 = YAP_ArgOfTerm(x+1, ta);
|
||||||
|
|
||||||
|
if (YAP_IsVarTerm(ta2)) {
|
||||||
|
int32_t k;
|
||||||
|
for (k = 0; k < nvars; k++) {
|
||||||
|
if (vars[k] == ta2) {
|
||||||
|
*ptr++ = k+1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (k == nvars) {
|
||||||
|
vars[k] = ta2;
|
||||||
|
*ptr++ = k+1;
|
||||||
|
nvars++;
|
||||||
|
}
|
||||||
|
} else if (YAP_IsAtomTerm(ta2)) {
|
||||||
|
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||||
|
} else {
|
||||||
|
*ptr++ = -YAP_IntOfTerm(ta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*ptr++ = -YAP_IntOfTerm(ta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*ptr++ = 0;
|
||||||
|
t3 = YAP_TailOfTerm( t3 );
|
||||||
|
}
|
||||||
|
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||||
|
// new
|
||||||
|
pred = (predicate *)malloc(sizeof(predicate));
|
||||||
|
} else {
|
||||||
|
pred = (predicate *)YAP_IntOfTerm(YAP_ARG4);
|
||||||
|
if (pred->address_host_table)
|
||||||
|
free( pred->address_host_table );
|
||||||
|
}
|
||||||
|
pred->name = pname;
|
||||||
|
pred->num_rows = ngoals;
|
||||||
|
pred->num_columns = ncols;
|
||||||
|
pred->is_fact = FALSE;
|
||||||
|
x = (strlen(strname) + 1) * sizeof(char);
|
||||||
|
pred->predname = (char *)malloc(x);
|
||||||
|
memcpy(pred->predname, strname, x);
|
||||||
|
nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
|
||||||
|
memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
|
||||||
|
pred->address_host_table = nvec;
|
||||||
|
pred->negatives = (int32_t *)malloc(sizeof(int32_t) * cont);
|
||||||
|
memcpy(pred->negatives, neg, sizeof(int32_t) * cont);
|
||||||
|
Cuda_NewRule( pred );
|
||||||
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
cuda_erase( void )
|
||||||
|
{
|
||||||
|
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||||
|
return Cuda_Erase( ptr );
|
||||||
|
}
|
||||||
|
|
||||||
|
void setQuery(YAP_Term t1, int32_t **res)
|
||||||
|
{
|
||||||
|
int32_t *query = (int32_t *)malloc(MAXARG * sizeof(int32_t));
|
||||||
|
int32_t x, y = 0, *itr;
|
||||||
|
predicate *ptr = NULL;
|
||||||
|
if(YAP_IsPairTerm(t1))
|
||||||
|
{
|
||||||
|
while(YAP_IsPairTerm(t1))
|
||||||
|
{
|
||||||
|
ptr = (predicate *)YAP_IntOfTerm(YAP_HeadOfTerm(t1));
|
||||||
|
query[y] = ptr->name;
|
||||||
|
itr = ptr->address_host_table;
|
||||||
|
x = 2;
|
||||||
|
while(itr[x] != 0)
|
||||||
|
x++;
|
||||||
|
query[y+1] = itr[x+1];
|
||||||
|
t1 = YAP_TailOfTerm(t1);
|
||||||
|
y+=2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ptr = (predicate *)YAP_IntOfTerm(t1);
|
||||||
|
query[y] = ptr->name;
|
||||||
|
itr = ptr->address_host_table;
|
||||||
|
x = 2;
|
||||||
|
while(itr[x] != 0)
|
||||||
|
x++;
|
||||||
|
query[y+1] = itr[x+1];
|
||||||
|
y += 2;
|
||||||
|
}
|
||||||
|
query[y] = -1;
|
||||||
|
query[y+1] = -1;
|
||||||
|
*res = query;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
cuda_eval( void )
|
||||||
|
{
|
||||||
|
int32_t *mat;
|
||||||
|
|
||||||
|
#if defined(DATALOG) || defined(TUFFY)
|
||||||
|
int32_t *query = NULL;
|
||||||
|
setQuery(YAP_ARG1, &query);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int32_t finalDR = YAP_IntOfTerm(YAP_ARG3);
|
||||||
|
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, names, finalDR);
|
||||||
|
|
||||||
|
#ifdef TUFFY
|
||||||
|
cf = 0;
|
||||||
|
#endif
|
||||||
|
#ifdef ROCKIT
|
||||||
|
if(cf > 0)
|
||||||
|
cf *= -1;
|
||||||
|
#endif
|
||||||
|
#if defined(TUFFY) || defined(ROCKIT)
|
||||||
|
cr = 0;
|
||||||
|
names[0] = '\0';
|
||||||
|
return FALSE;
|
||||||
|
#else
|
||||||
|
int32_t i;
|
||||||
|
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||||
|
int32_t ncols = ptr->num_columns;
|
||||||
|
YAP_Term out = YAP_TermNil();
|
||||||
|
YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
|
||||||
|
YAP_Term vec[256];
|
||||||
|
|
||||||
|
YAP_Atom at;
|
||||||
|
|
||||||
|
if (n < 0)
|
||||||
|
return FALSE;
|
||||||
|
for (i=0; i<n; i++) {
|
||||||
|
int32_t ni = ((n-1)-i)*ncols, j;
|
||||||
|
|
||||||
|
printf("%s(", YAP_AtomName(YAP_IntToAtom(ptr->name)));
|
||||||
|
|
||||||
|
for (j=0; j<ncols; j++) {
|
||||||
|
vec[j] = YAP_MkIntTerm(mat[ni+j]);
|
||||||
|
|
||||||
|
at = YAP_IntToAtom(mat[ni+j]);
|
||||||
|
if(at != NULL)
|
||||||
|
printf("%s", YAP_AtomName(at));
|
||||||
|
else
|
||||||
|
printf("%d", mat[ni+j]);
|
||||||
|
if(j < (ncols - 1))
|
||||||
|
printf(",");
|
||||||
|
|
||||||
|
}
|
||||||
|
out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
|
||||||
|
|
||||||
|
printf(")\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
if (n > 0)
|
||||||
|
free( mat );
|
||||||
|
return YAP_Unify(YAP_ARG2, out);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
cuda_coverage( void )
|
||||||
|
{
|
||||||
|
int32_t *mat;
|
||||||
|
|
||||||
|
#if defined(DATALOG) || defined(TUFFY)
|
||||||
|
int32_t *query = NULL;
|
||||||
|
setQuery(YAP_ARG1, &query);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
|
||||||
|
int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
|
||||||
|
int32_t i = n/2, min = 0, max = n-1;
|
||||||
|
int32_t t0, t1;
|
||||||
|
|
||||||
|
if (n < 0)
|
||||||
|
return FALSE;
|
||||||
|
if (n == 0) {
|
||||||
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(0)) &&
|
||||||
|
YAP_Unify(YAP_ARG3, YAP_MkIntTerm(0));
|
||||||
|
}
|
||||||
|
t0 = mat[0], t1 = mat[(n-1)*2];
|
||||||
|
if (t0 == t1) { /* all sametype */
|
||||||
|
free( mat );
|
||||||
|
/* all pos */
|
||||||
|
if (t0 == post)
|
||||||
|
return YAP_Unify(YAP_ARG3, YAP_MkIntTerm(n)) &&
|
||||||
|
YAP_Unify(YAP_ARG4, YAP_MkIntTerm(0));
|
||||||
|
/* all neg */
|
||||||
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(n)) &&
|
||||||
|
YAP_Unify(YAP_ARG3, YAP_MkIntTerm(0));
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
i = (min+max)/2;
|
||||||
|
if (i == min) i++;
|
||||||
|
if (mat[i*2] == t0) {
|
||||||
|
min = i;
|
||||||
|
} else {
|
||||||
|
max = i;
|
||||||
|
}
|
||||||
|
if (min+1 == max) {
|
||||||
|
free( mat );
|
||||||
|
if (t0 == post)
|
||||||
|
return YAP_Unify(YAP_ARG3, YAP_MkIntTerm(max)) &&
|
||||||
|
YAP_Unify(YAP_ARG4, YAP_MkIntTerm(n-max));
|
||||||
|
/* all neg */
|
||||||
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm(max)) &&
|
||||||
|
YAP_Unify(YAP_ARG3, YAP_MkIntTerm(n-max));
|
||||||
|
}
|
||||||
|
} while ( TRUE );
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cuda_count( void )
|
||||||
|
{
|
||||||
|
int32_t *mat;
|
||||||
|
|
||||||
|
#if defined(DATALOG) || defined(TUFFY)
|
||||||
|
int32_t *query = NULL;
|
||||||
|
setQuery(YAP_ARG1, &query);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
|
||||||
|
|
||||||
|
if (n < 0)
|
||||||
|
return FALSE;
|
||||||
|
free( mat );
|
||||||
|
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cuda_statistics( void )
|
||||||
|
{
|
||||||
|
Cuda_Statistics();
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int first_time = TRUE;
|
||||||
|
|
||||||
|
void
|
||||||
|
init_cuda(void)
|
||||||
|
{
|
||||||
|
if (first_time) Cuda_Initialize();
|
||||||
|
first_time = FALSE;
|
||||||
|
|
||||||
|
AtomEq = YAP_LookupAtom("=");
|
||||||
|
AtomGt = YAP_LookupAtom(">");
|
||||||
|
AtomLt = YAP_LookupAtom("<");
|
||||||
|
AtomGe = YAP_LookupAtom(">=");
|
||||||
|
AtomLe = YAP_LookupAtom("=<");
|
||||||
|
AtomDf = YAP_LookupAtom("\\=");
|
||||||
|
AtomNt = YAP_LookupAtom("not");
|
||||||
|
YAP_UserCPredicate("load_facts", load_facts, 4);
|
||||||
|
YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
|
||||||
|
YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
|
||||||
|
YAP_UserCPredicate("load_rule", load_rule, 4);
|
||||||
|
YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
|
||||||
|
YAP_UserCPredicate("cuda_eval", cuda_eval, 3);
|
||||||
|
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
|
||||||
|
YAP_UserCPredicate("cuda_count", cuda_count, 2);
|
||||||
|
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
YAP_UserCPredicate("cuda_init_query", cuda_init_query, 1);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
603
packages/cuda/old/dbio.cu
Normal file
603
packages/cuda/old/dbio.cu
Normal file
@ -0,0 +1,603 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "memory.h"
|
||||||
|
#include "union2.h"
|
||||||
|
#include "dbio.h"
|
||||||
|
|
||||||
|
#ifdef DATALOG
|
||||||
|
//template<class InputIterator>
|
||||||
|
//void datalogWrite(int query, InputIterator rul_str, InputIterator fin, int finalDR, int **result)
|
||||||
|
void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, int finalDR, int **result)
|
||||||
|
{
|
||||||
|
rulenode tmprule;
|
||||||
|
vector<rulenode>::iterator qposr;
|
||||||
|
int *dop1, *hres;
|
||||||
|
int cols1, res_rows, tipo;
|
||||||
|
tmprule.name = query;
|
||||||
|
qposr = lower_bound(rul_str, fin, tmprule, comparer);
|
||||||
|
cols1 = qposr->num_columns;
|
||||||
|
res_rows = cargafinal(query, cols1, &dop1);
|
||||||
|
|
||||||
|
if(res_rows != 0)
|
||||||
|
{
|
||||||
|
if(res_rows > 0)
|
||||||
|
{
|
||||||
|
if(finalDR)
|
||||||
|
res_rows = unir(dop1, res_rows, cols1, &dop1, 0);
|
||||||
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
|
hres = (int *)malloc(tipo);
|
||||||
|
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||||
|
cudaFree(dop1);
|
||||||
|
*result = hres;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
res_rows *= -1;
|
||||||
|
if(finalDR)
|
||||||
|
{
|
||||||
|
int *dop2;
|
||||||
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
|
reservar(&dop2, tipo);
|
||||||
|
cudaMemcpy(dop2, dop1, tipo, cudaMemcpyHostToDevice);
|
||||||
|
free(dop1);
|
||||||
|
res_rows = unir(dop2, res_rows, cols1, &dop2, 0);
|
||||||
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
|
hres = (int *)malloc(tipo);
|
||||||
|
cudaMemcpy(hres, dop2, tipo, cudaMemcpyDeviceToHost);
|
||||||
|
cudaFree(dop2);
|
||||||
|
*result = hres;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
*result = dop1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef TUFFY
|
||||||
|
void postgresRead(PGconn **ret, vector<gpunode> *L, int *inpquery, char *names, int finalDR)
|
||||||
|
{
|
||||||
|
PGresult *pgr;
|
||||||
|
int x, y;
|
||||||
|
int *mat, *mat2;
|
||||||
|
char *tok, sel[1024], **qrs;
|
||||||
|
int w, z = 0, numt, numc, numc2, start = 0, start2, val;
|
||||||
|
PGconn *conn = PQconnectdb("host=localhost port=5432 dbname = prueba user=tuffer password=root");
|
||||||
|
if(PQstatus(conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Connection to database failed: %s", PQerrorMessage(conn));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
pgr = PQexec(conn, "Select nspname from pg_catalog.pg_namespace where oid = (select max(oid) from pg_catalog.pg_namespace)");
|
||||||
|
sprintf(sel, "SET search_path = %s", PQgetvalue(pgr, 0, 0));
|
||||||
|
PQclear(pgr);
|
||||||
|
PQexec(conn, sel);
|
||||||
|
tok = strtok(names, " ");
|
||||||
|
if(finalDR)
|
||||||
|
{
|
||||||
|
qrs = (char **)malloc(100 * sizeof(char *));
|
||||||
|
while(tok != NULL)
|
||||||
|
{
|
||||||
|
sprintf(sel, "Select * from %s limit 0", tok);
|
||||||
|
pgr = PQexec(conn, sel);
|
||||||
|
numc = L->at(z).num_columns;
|
||||||
|
if(tok[0] == 'c')
|
||||||
|
{
|
||||||
|
sprintf(sel, "Select ");
|
||||||
|
numt = numc + 1;
|
||||||
|
for(x = 1; x < numt; x++)
|
||||||
|
{
|
||||||
|
strcat(sel, PQfname(pgr, x));
|
||||||
|
strcat(sel, ", ");
|
||||||
|
}
|
||||||
|
sel[strlen(sel)-2] = '\0';
|
||||||
|
sprintf(sel, "%s from %s", sel, tok);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sprintf(sel, "Select id, Club, ");
|
||||||
|
numt = numc + 6;
|
||||||
|
for(x = 8; x < numt; x++)
|
||||||
|
{
|
||||||
|
strcat(sel, PQfname(pgr, x));
|
||||||
|
strcat(sel, ", ");
|
||||||
|
}
|
||||||
|
sel[strlen(sel)-2] = '\0';
|
||||||
|
sprintf(sel, "%s from %s", sel, tok);
|
||||||
|
}
|
||||||
|
PQclear(pgr);
|
||||||
|
pgr = PQexec(conn, sel);
|
||||||
|
numt = PQntuples(pgr);
|
||||||
|
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||||
|
if(tok[0] == 'c')
|
||||||
|
{
|
||||||
|
for(x = 0; x < numt; x++)
|
||||||
|
{
|
||||||
|
start = x * numc;
|
||||||
|
for(y = 0; y < numc; y++)
|
||||||
|
mat[start + y] = atoi(PQgetvalue(pgr, x, y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
numc2 = numc - 2;
|
||||||
|
mat2 = (int *)malloc(numt * numc2 * sizeof(int));
|
||||||
|
start = 0;
|
||||||
|
start2 = 0;
|
||||||
|
for(x = 0; x < numt; x++)
|
||||||
|
{
|
||||||
|
w = atoi(PQgetvalue(pgr, x, 1));
|
||||||
|
if(w < 2)
|
||||||
|
{
|
||||||
|
mat[start] = atoi(PQgetvalue(pgr, x, 0));
|
||||||
|
start++;
|
||||||
|
mat[start] = w;
|
||||||
|
start++;
|
||||||
|
if(w > 0)
|
||||||
|
{
|
||||||
|
for(y = 2; y < numc; y++)
|
||||||
|
{
|
||||||
|
val = atoi(PQgetvalue(pgr, x, y));
|
||||||
|
mat[start] = val;
|
||||||
|
mat2[start2] = val;
|
||||||
|
start++;
|
||||||
|
start2++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(y = 2; y < numc; y++)
|
||||||
|
{
|
||||||
|
val = atoi(PQgetvalue(pgr, x, y));
|
||||||
|
mat[start] = val;
|
||||||
|
start++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(y = 2; y < numc; y++)
|
||||||
|
{
|
||||||
|
val = atoi(PQgetvalue(pgr, x, y));
|
||||||
|
mat2[start2] = val;
|
||||||
|
start2++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
L->at(z+1).address_host_table = mat2;
|
||||||
|
L->at(z+1).num_rows = start2 / numc2;
|
||||||
|
}
|
||||||
|
L->at(z).address_host_table = mat;
|
||||||
|
L->at(z).num_rows = start / numc;
|
||||||
|
PQclear(pgr);
|
||||||
|
|
||||||
|
x = 1;
|
||||||
|
while(inpquery[x] != -1)
|
||||||
|
{
|
||||||
|
if(L->at(z).name == inpquery[x])
|
||||||
|
{
|
||||||
|
numt = (strlen(tok) + 1) * sizeof(char);
|
||||||
|
qrs[x] = (char *)malloc(numt);
|
||||||
|
memcpy(qrs[x], tok, numt);
|
||||||
|
}
|
||||||
|
x += 2;
|
||||||
|
}
|
||||||
|
if(tok[0] == 'c')
|
||||||
|
{
|
||||||
|
tok = strtok(NULL, " ");
|
||||||
|
z++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strtok(NULL, " ");
|
||||||
|
tok = strtok(NULL, " ");
|
||||||
|
z += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while(tok != NULL)
|
||||||
|
{
|
||||||
|
sprintf(sel, "Select * from %s limit 0", tok);
|
||||||
|
pgr = PQexec(conn, sel);
|
||||||
|
numc = L->at(z).num_columns;
|
||||||
|
if(tok[0] == 'c')
|
||||||
|
{
|
||||||
|
sprintf(sel, "Select weight, myid, ");
|
||||||
|
start = 1;
|
||||||
|
numt = numc + 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sprintf(sel, "Select truth, Club, atomID, ");
|
||||||
|
start = 8;
|
||||||
|
numt = numc + 5;
|
||||||
|
}
|
||||||
|
for(x = start; x < numt; x++)
|
||||||
|
{
|
||||||
|
strcat(sel, PQfname(pgr, x));
|
||||||
|
strcat(sel, ", ");
|
||||||
|
}
|
||||||
|
sel[strlen(sel)-2] = '\0';
|
||||||
|
sprintf(sel, "%s from %s", sel, tok);
|
||||||
|
PQclear(pgr);
|
||||||
|
pgr = PQexec(conn, sel);
|
||||||
|
numt = PQntuples(pgr);
|
||||||
|
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||||
|
L->at(z).weight = (double *)malloc(numt * sizeof(double));
|
||||||
|
L->at(z).num_rows = numt;
|
||||||
|
|
||||||
|
for(x = 0; x < numt; x++)
|
||||||
|
{
|
||||||
|
start = x * numc;
|
||||||
|
for(y = 1; y < numc; y++)
|
||||||
|
mat[start + y] = atoi(PQgetvalue(pgr, x, y));
|
||||||
|
}
|
||||||
|
|
||||||
|
numt *= numc;
|
||||||
|
double flo;
|
||||||
|
if(tok[0] == 'c')
|
||||||
|
{
|
||||||
|
for(x = 0, y = 0; x < numt; x+=numc, y++)
|
||||||
|
{
|
||||||
|
flo = atof(PQgetvalue(pgr, y, 0));
|
||||||
|
L->at(z).weight[y] = flo;
|
||||||
|
if(flo > 0)
|
||||||
|
mat[x] = y + 1;
|
||||||
|
else
|
||||||
|
mat[x] = -y - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(x = 0, y = 0; x < numt; x+=numc, y++)
|
||||||
|
{
|
||||||
|
if(PQgetvalue(pgr, y, 0)[0] == 't')
|
||||||
|
mat[x] = 2;
|
||||||
|
else
|
||||||
|
mat[x] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
L->at(z).address_host_table = mat;
|
||||||
|
numc = (strlen(tok) + 1) * sizeof(char);
|
||||||
|
L->at(z).predname = (char *)malloc(numc);
|
||||||
|
memcpy(L->at(z).predname, tok, numc);
|
||||||
|
PQclear(pgr);
|
||||||
|
tok = strtok(NULL, " ");
|
||||||
|
z++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*ret = conn;
|
||||||
|
}
|
||||||
|
|
||||||
|
void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, PGconn *conn, int finalDR)
|
||||||
|
{
|
||||||
|
char sel[1024];
|
||||||
|
double *matw = NULL;
|
||||||
|
int qname, cols1, res_rows, tipo, *dop1;
|
||||||
|
int x, w, z, y, *hres;
|
||||||
|
rulenode tmprule;
|
||||||
|
vector<rulenode>::iterator qposr;
|
||||||
|
if(finalDR)
|
||||||
|
{
|
||||||
|
char file[] = "/dev/shm/mln0_atoms.csv";
|
||||||
|
z = 0;
|
||||||
|
int seqid = 1;
|
||||||
|
FILE *fp;
|
||||||
|
fp = fopen(file, "w");
|
||||||
|
if(fp == NULL)
|
||||||
|
{
|
||||||
|
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||||
|
sprintf(file, "./temp/mln0_atoms.csv");
|
||||||
|
fp = fopen(file, "w");
|
||||||
|
if(fp == NULL)
|
||||||
|
{
|
||||||
|
cerr << "Failed to create main memory temporary file" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while((qname = inpquery[z]) != -1)
|
||||||
|
{
|
||||||
|
tmprule.name = qname;
|
||||||
|
qposr = lower_bound(rul_str, fin, tmprule, comparer);
|
||||||
|
cols1 = qposr->num_columns;
|
||||||
|
res_rows = cargafinal(qname, cols1, &dop1);
|
||||||
|
|
||||||
|
if(res_rows != 0)
|
||||||
|
{
|
||||||
|
if(res_rows < 0)
|
||||||
|
res_rows = unir(dop1, -res_rows, cols1, &dop1, 0); /*duplicate elimination on result*/
|
||||||
|
else
|
||||||
|
res_rows = unir(dop1, res_rows, cols1, &dop1, finalDR);
|
||||||
|
|
||||||
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
|
hres = (int *)malloc(tipo);
|
||||||
|
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||||
|
cudaFree(dop1);
|
||||||
|
w = z + 1;
|
||||||
|
|
||||||
|
strtok(qposr->rulename, "_");
|
||||||
|
strtok(NULL, "_");
|
||||||
|
int prid = atoi(strtok(NULL, "_"));
|
||||||
|
|
||||||
|
for(x = 0, w = 0; x < res_rows; x++, w+=2)
|
||||||
|
{
|
||||||
|
if(hres[w+1])
|
||||||
|
fprintf(fp, "%d,%d,%d,true\n", seqid, hres[w], prid);
|
||||||
|
else
|
||||||
|
fprintf(fp, "%d,%d,%d,false\n", seqid, hres[w], prid);
|
||||||
|
seqid++;
|
||||||
|
}
|
||||||
|
free(hres);
|
||||||
|
}
|
||||||
|
z += 2;
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
sprintf(sel, "Copy mln0_atoms(atomid,tupleID,predID,isquery) from '%s' CSV", file);
|
||||||
|
PQexec(conn, sel);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while(rul_str != fin)
|
||||||
|
{
|
||||||
|
cols1 = rul_str->num_columns;
|
||||||
|
res_rows = cargafinal(rul_str->name, cols1, &dop1);
|
||||||
|
if(res_rows == 0)
|
||||||
|
{
|
||||||
|
rul_str++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
res_rows = abs(res_rows);
|
||||||
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
|
hres = (int *)malloc(tipo);
|
||||||
|
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||||
|
cudaFree(dop1);
|
||||||
|
|
||||||
|
char file[] = "/dev/shm/buffer.csv";
|
||||||
|
FILE *fp;
|
||||||
|
fp = fopen(file, "w");
|
||||||
|
if(fp == NULL)
|
||||||
|
{
|
||||||
|
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||||
|
sprintf(file, "./temp/buffer.csv");
|
||||||
|
fp = fopen(file, "w");
|
||||||
|
if(fp == NULL)
|
||||||
|
{
|
||||||
|
cerr << "Failed to create main memory temporary file" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(rul_str->rulename[0] == 'z')
|
||||||
|
{
|
||||||
|
char *name = rul_str->rulename + 1;
|
||||||
|
for(x = 0; x < ninpf; x++)
|
||||||
|
{
|
||||||
|
if(strncmp(L->at(x).predname, name, strlen(name)) == 0)
|
||||||
|
{
|
||||||
|
matw = L->at(x).weight;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cols1 -= 3;
|
||||||
|
for(x = 0, z = 0; x < res_rows; x++, z+=3)
|
||||||
|
{
|
||||||
|
for(y = 0; y < cols1; y++, z++)
|
||||||
|
fprintf(fp, "%d,", hres[z]);
|
||||||
|
fprintf(fp, "%d,%lf,%d\n", hres[z], matw[abs(hres[z+1])-1], hres[z+2]);
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
sprintf(sel, "Copy %s from '%s' CSV", name, file);
|
||||||
|
PQexec(conn, sel);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cols1--;
|
||||||
|
for(x = 0, z = 0; x < res_rows; x++, z++)
|
||||||
|
{
|
||||||
|
for(y = 0; y < cols1; y++, z++)
|
||||||
|
fprintf(fp, "%d,", hres[z]);
|
||||||
|
fprintf(fp, "%d\n", hres[z]);
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
sprintf(sel, "Copy %s from '%s' CSV", rul_str->rulename, file);
|
||||||
|
PQexec(conn, sel);
|
||||||
|
}
|
||||||
|
free(hres);
|
||||||
|
rul_str++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PQfinish(conn);
|
||||||
|
if(finalDR)
|
||||||
|
clear_memory_all();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
void mysqlRead(MYSQL **ret, int *qrs, vector<gpunode> *L, int ninpf, char *names, int finalDR)
|
||||||
|
{
|
||||||
|
char *tok, sel[1024];
|
||||||
|
int w, x, y, z = 0, numt, numc;
|
||||||
|
int *mat;
|
||||||
|
MYSQL *con = mysql_init(NULL);
|
||||||
|
if(con == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "mysql_init() failed\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
mysql_options(con, MYSQL_OPT_LOCAL_INFILE, NULL);
|
||||||
|
mysql_real_connect(con, "localhost", "root", "root", "rockit", 0, NULL, 0);
|
||||||
|
if(finalDR)
|
||||||
|
{
|
||||||
|
y = 0;
|
||||||
|
while(qrs[y] != 0)
|
||||||
|
{
|
||||||
|
for(z = 0; z < ninpf; z++)
|
||||||
|
{
|
||||||
|
if(qrs[y] == L->at(z).name)
|
||||||
|
{
|
||||||
|
MYSQL_ROW row;
|
||||||
|
sprintf(sel, "Select count(*) from %s", L->at(z).predname);
|
||||||
|
mysql_query(con, sel);
|
||||||
|
MYSQL_RES *result = mysql_store_result(con);
|
||||||
|
row = mysql_fetch_row(result);
|
||||||
|
numt = atoi(row[0]);
|
||||||
|
mysql_free_result(result);
|
||||||
|
|
||||||
|
if(numt != L->at(z).num_rows)
|
||||||
|
{
|
||||||
|
liberar(L->at(z).name);
|
||||||
|
numc = L->at(z).num_columns;
|
||||||
|
sprintf(sel, "Select * from %s", L->at(z).predname);
|
||||||
|
mysql_query(con, sel);
|
||||||
|
MYSQL_RES *result = mysql_store_result(con);
|
||||||
|
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||||
|
w = 0;
|
||||||
|
while ((row = mysql_fetch_row(result)))
|
||||||
|
{
|
||||||
|
for(x = 0; x < numc; x++, w++)
|
||||||
|
mat[w] = atoi(row[x]);
|
||||||
|
}
|
||||||
|
|
||||||
|
mysql_free_result(result);
|
||||||
|
if(L->at(z).address_host_table != NULL)
|
||||||
|
free(L->at(z).address_host_table);
|
||||||
|
L->at(z).address_host_table = mat;
|
||||||
|
L->at(z).num_rows = numt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
y++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tok = strtok(names, " ");
|
||||||
|
while(tok != NULL)
|
||||||
|
{
|
||||||
|
numc = L->at(z).num_columns;
|
||||||
|
sprintf(sel, "Select * from %s", tok);
|
||||||
|
mysql_query(con, sel);
|
||||||
|
MYSQL_RES *result = mysql_store_result(con);
|
||||||
|
numt = mysql_num_rows(result);
|
||||||
|
|
||||||
|
MYSQL_ROW row;
|
||||||
|
mat = (int *)malloc(numt * numc * sizeof(int));
|
||||||
|
w = 0;
|
||||||
|
if(tok[0] == 'f' && tok[1] >= '0' && tok[1] <= '9')
|
||||||
|
{
|
||||||
|
while ((row = mysql_fetch_row(result)))
|
||||||
|
{
|
||||||
|
for(x = 1; x <= numc; x++, w++)
|
||||||
|
mat[w] = atoi(row[x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while ((row = mysql_fetch_row(result)))
|
||||||
|
{
|
||||||
|
for(x = 0; x < numc; x++, w++)
|
||||||
|
mat[w] = atoi(row[x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mysql_free_result(result);
|
||||||
|
L->at(z).address_host_table = mat;
|
||||||
|
L->at(z).num_rows = numt;
|
||||||
|
|
||||||
|
numc = (strlen(tok) + 1) * sizeof(char);
|
||||||
|
L->at(z).predname = (char *)malloc(numc);
|
||||||
|
strcpy(L->at(z).predname, tok);
|
||||||
|
tok = strtok(NULL, " ");
|
||||||
|
z++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*ret = con;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, MYSQL *con)
|
||||||
|
{
|
||||||
|
int x, y, z, cols1, cols2, res_rows, tipo;
|
||||||
|
int *hres, *dop1;
|
||||||
|
char *id, *sign, *q1, *q2;
|
||||||
|
char sel[1024], weight[1024];
|
||||||
|
gpunode tmpfact;
|
||||||
|
while(rul_str != fin)
|
||||||
|
{
|
||||||
|
cols1 = rul_str->num_columns;
|
||||||
|
res_rows = cargafinal(rul_str->name, cols1, &dop1);
|
||||||
|
id = strtok(rul_str->rulename, "_");
|
||||||
|
sprintf(sel, "create table if not exists %s(weight double, ", id);
|
||||||
|
for(x = 0; x < cols1; x++)
|
||||||
|
{
|
||||||
|
sprintf(weight, "a%d char(10), ", x);
|
||||||
|
strcat(sel, weight);
|
||||||
|
}
|
||||||
|
sel[strlen(sel)-2] = ')';
|
||||||
|
strcat(sel, "ENGINE = MEMORY DEFAULT CHARSET=latin1");
|
||||||
|
mysql_query(con, sel);
|
||||||
|
sprintf(sel, "truncate %s", id);
|
||||||
|
mysql_query(con, sel);
|
||||||
|
|
||||||
|
if(res_rows == 0)
|
||||||
|
{
|
||||||
|
rul_str++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(res_rows > 0)
|
||||||
|
{
|
||||||
|
tmpfact = L->at(-rul_str->referencias[rul_str->num_rows - 2] - 1);
|
||||||
|
sign = tmpfact.predname;
|
||||||
|
tipo = res_rows * cols1 * sizeof(int);
|
||||||
|
hres = (int *)malloc(tipo);
|
||||||
|
cudaMemcpy(hres, dop1, tipo, cudaMemcpyDeviceToHost);
|
||||||
|
if(sign[0] == 'f' && sign[1] >= '0' && sign[1] <= '9')
|
||||||
|
sumar(tmpfact.name, dop1, cols1, res_rows);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hres = dop1;
|
||||||
|
res_rows = -res_rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
sign = strtok(NULL, "_");
|
||||||
|
q1 = strtok(NULL, "_");
|
||||||
|
q2 = strtok(NULL, "_");
|
||||||
|
if(sign[0] == '0')
|
||||||
|
sprintf(weight, "%s.%s", q1, q2);
|
||||||
|
else
|
||||||
|
sprintf(weight, "-%s.%s", q1, q2);
|
||||||
|
|
||||||
|
FILE *fp;
|
||||||
|
char file[512];
|
||||||
|
sprintf(file, "/dev/shm/%s.tsv", id);
|
||||||
|
fp = fopen(file, "w");
|
||||||
|
if(fp == NULL)
|
||||||
|
{
|
||||||
|
cerr << "Failed to create main memory temporary file, attempting to use hardrive" << endl;
|
||||||
|
sprintf(file, "./temp/%s.tsv", id);
|
||||||
|
fp = fopen(file, "w");
|
||||||
|
}
|
||||||
|
|
||||||
|
cols2 = cols1 - 1;
|
||||||
|
for(x = 0, z = 0; x < res_rows; x++, z++)
|
||||||
|
{
|
||||||
|
fprintf(fp, "%s\t", weight);
|
||||||
|
for(y = 0; y < cols2; y++, z++)
|
||||||
|
fprintf(fp, "%d\t", hres[z]);
|
||||||
|
fprintf(fp, "%d\n", hres[z]);
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
sprintf(sel, "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s", file, id);
|
||||||
|
mysql_query(con, sel);
|
||||||
|
rul_str++;
|
||||||
|
}
|
||||||
|
mysql_close(con);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
28
packages/cuda/old/dbio.h
Normal file
28
packages/cuda/old/dbio.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#ifndef _DBIO_H_
|
||||||
|
#define _DBIO_H_
|
||||||
|
|
||||||
|
#include "pred.h"
|
||||||
|
#ifdef TUFFY
|
||||||
|
#include <libpq-fe.h>
|
||||||
|
#endif
|
||||||
|
#ifdef ROCKIT
|
||||||
|
#include <mysql/mysql.h>
|
||||||
|
#endif
|
||||||
|
#include <vector>
|
||||||
|
#include "lista.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#ifdef TUFFY
|
||||||
|
void postgresRead(PGconn **ret, vector<gpunode> *L, int *inpquery, char *names, int finalDR);
|
||||||
|
void postgresWrite(int *inpquery, int ninpf, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, PGconn *conn, int finalDR);
|
||||||
|
#endif
|
||||||
|
#ifdef ROCKIT
|
||||||
|
void mysqlRead(MYSQL **ret, int *qrs, vector<gpunode> *L, int ninpf, char *names, int finalDR);
|
||||||
|
void mysqlWrite(vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, vector<gpunode> *L, MYSQL *con);
|
||||||
|
#endif
|
||||||
|
#ifdef DATALOG
|
||||||
|
void datalogWrite(int query, vector<rulenode>::iterator rul_str, vector<rulenode>::iterator fin, int finalDR, int **result);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
1337
packages/cuda/old/lista.cu
Executable file
1337
packages/cuda/old/lista.cu
Executable file
File diff suppressed because it is too large
Load Diff
44
packages/cuda/old/lista.h
Executable file
44
packages/cuda/old/lista.h
Executable file
@ -0,0 +1,44 @@
|
|||||||
|
#ifndef _LISTA_H_
|
||||||
|
#define _LISTA_H_
|
||||||
|
|
||||||
|
typedef struct Node{
|
||||||
|
int name;
|
||||||
|
int *dev_address;
|
||||||
|
int rows;
|
||||||
|
int size;
|
||||||
|
int iteration;
|
||||||
|
int isrule;
|
||||||
|
}memnode;
|
||||||
|
|
||||||
|
typedef struct auxiliar{
|
||||||
|
int name;
|
||||||
|
int num_rows;
|
||||||
|
int num_columns;
|
||||||
|
int *address_host_table;
|
||||||
|
int *rule_names;
|
||||||
|
int *referencias;
|
||||||
|
int **select;
|
||||||
|
int *numsel;
|
||||||
|
int **project;
|
||||||
|
int2 *projpos;
|
||||||
|
int **selfjoin;
|
||||||
|
int *numselfj;
|
||||||
|
int **wherejoin;
|
||||||
|
int *numjoin;
|
||||||
|
int totalpreds;
|
||||||
|
int **preds;
|
||||||
|
int2 *numpreds;
|
||||||
|
int *negatives;
|
||||||
|
char *rulename;
|
||||||
|
int gen_act;
|
||||||
|
int gen_ant;
|
||||||
|
}rulenode;
|
||||||
|
|
||||||
|
typedef struct completed{
|
||||||
|
int name;
|
||||||
|
int numrules;
|
||||||
|
int reduce;
|
||||||
|
int reset;
|
||||||
|
}compnode;
|
||||||
|
|
||||||
|
#endif
|
575
packages/cuda/old/memory.cu
Executable file
575
packages/cuda/old/memory.cu
Executable file
@ -0,0 +1,575 @@
|
|||||||
|
#include <list>
|
||||||
|
#include <iostream>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <thrust/device_vector.h>
|
||||||
|
#include "lista.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "pred.h"
|
||||||
|
|
||||||
|
#define MAX_REC 200
|
||||||
|
#define MAX_FIX_POINTS 100
|
||||||
|
|
||||||
|
memnode temp_storage[MAX_REC];
|
||||||
|
/*List used to store information (address, size, etc.) about facts and rule results loaded in the GPU*/
|
||||||
|
list<memnode> GPUmem;
|
||||||
|
/*List used to store information about rule results offloaded from the GPU to the CPU*/
|
||||||
|
list<memnode> CPUmem;
|
||||||
|
|
||||||
|
/*Auxiliary function to sort rule list*/
|
||||||
|
bool comparer(const rulenode &r1, const rulenode &r2)
|
||||||
|
{
|
||||||
|
return (r1.name > r2.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Used in search functions to compare iterations*/
|
||||||
|
bool compareiteration(const memnode &r1, const memnode &r2)
|
||||||
|
{
|
||||||
|
return (r1.iteration < r2.iteration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Used in search functions to compare names*/
|
||||||
|
bool comparename(const memnode &r1, const memnode &r2)
|
||||||
|
{
|
||||||
|
return (r1.name > r2.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Linear search of 'name' fact*/
|
||||||
|
template<class InputIterator>
|
||||||
|
InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
|
||||||
|
{
|
||||||
|
while(first!=last)
|
||||||
|
{
|
||||||
|
if(first->name == name && first->isrule == 0) return first;
|
||||||
|
++first;
|
||||||
|
}
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Finds all results of rule 'name' in iteration 'itr' in both CPU and GPU memory. Every result found is removed from its respective list*/
|
||||||
|
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
|
||||||
|
{
|
||||||
|
int x = 0, sum = 0;
|
||||||
|
memnode temp;
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
temp.iteration = itr;
|
||||||
|
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||||
|
|
||||||
|
while(rec.first != rec.second)
|
||||||
|
{
|
||||||
|
if(rec.first->name == name && rec.first->isrule == 1)
|
||||||
|
{
|
||||||
|
temp_storage[x] = *rec.first;
|
||||||
|
rec.first = GPUmem.erase(rec.first);
|
||||||
|
sum += temp_storage[x].rows;
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rec.first++;
|
||||||
|
}
|
||||||
|
*gpunum = x;
|
||||||
|
temp.name = name;
|
||||||
|
temp.isrule = 1;
|
||||||
|
i = GPUmem.insert(rec.first, temp);
|
||||||
|
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||||
|
|
||||||
|
while(rec.first != rec.second)
|
||||||
|
{
|
||||||
|
if(rec.first->name == name && rec.first->isrule == 1)
|
||||||
|
{
|
||||||
|
temp_storage[x] = *rec.first;
|
||||||
|
rec.first = CPUmem.erase(rec.first);
|
||||||
|
sum += temp_storage[x].rows;
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rec.first++;
|
||||||
|
}
|
||||||
|
*totalrows = sum;
|
||||||
|
*cpunum = x;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
list<memnode>::iterator buscarpornombrecpu(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
|
||||||
|
{
|
||||||
|
int x = 0, sum = 0;
|
||||||
|
memnode temp;
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
temp.iteration = itr;
|
||||||
|
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||||
|
|
||||||
|
while(rec.first != rec.second)
|
||||||
|
{
|
||||||
|
if(rec.first->name == name)
|
||||||
|
{
|
||||||
|
temp_storage[x] = *rec.first;
|
||||||
|
rec.first = GPUmem.erase(rec.first);
|
||||||
|
sum += temp_storage[x].rows;
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rec.first++;
|
||||||
|
}
|
||||||
|
|
||||||
|
*gpunum = x;
|
||||||
|
temp.name = name;
|
||||||
|
temp.isrule = 1;
|
||||||
|
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||||
|
|
||||||
|
while(rec.first != rec.second)
|
||||||
|
{
|
||||||
|
if(rec.first->name == name)
|
||||||
|
{
|
||||||
|
temp_storage[x] = *rec.first;
|
||||||
|
rec.first = CPUmem.erase(rec.first);
|
||||||
|
sum += temp_storage[x].rows;
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rec.first++;
|
||||||
|
}
|
||||||
|
i = CPUmem.insert(rec.first, temp);
|
||||||
|
*totalrows = sum;
|
||||||
|
*cpunum = x;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Removes the least recently used memory block from GPU memory, sending it to CPU memory if it's a rule result.
|
||||||
|
If there are no used memory blocks in the GPU and we still don't have enough memory, the program exits with error*/
|
||||||
|
void limpiar(const char s[], size_t sz)
|
||||||
|
{
|
||||||
|
list<memnode>::iterator ini;
|
||||||
|
memnode temp;
|
||||||
|
size_t free, total;
|
||||||
|
|
||||||
|
if(GPUmem.size() == 0)
|
||||||
|
{
|
||||||
|
cudaMemGetInfo(&free,&total);
|
||||||
|
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
ini = GPUmem.begin();
|
||||||
|
if(ini->isrule)
|
||||||
|
{
|
||||||
|
temp = *ini;
|
||||||
|
temp.dev_address = (int *)malloc(ini->size);
|
||||||
|
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||||
|
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||||
|
CPUmem.insert(pos, temp);
|
||||||
|
}
|
||||||
|
cudaFree(ini->dev_address);
|
||||||
|
GPUmem.erase(ini);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Allocs 'size' amount of bytes in GPU memory. If not enough memory is available, removes least recently used memory blocks until
|
||||||
|
enough space is available*/
|
||||||
|
void reservar(int **ptr, size_t size)
|
||||||
|
{
|
||||||
|
size_t free, total;
|
||||||
|
|
||||||
|
if (size == 0) {
|
||||||
|
*ptr = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaMemGetInfo(&free, &total);
|
||||||
|
while(free < size)
|
||||||
|
{
|
||||||
|
cout << "Se limpio memoria " << free << " " << total << endl;
|
||||||
|
limpiar("not enough memory", size);
|
||||||
|
cudaMemGetInfo(&free, &total);
|
||||||
|
}
|
||||||
|
|
||||||
|
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||||
|
limpiar("Error in memory allocation", size);
|
||||||
|
if (! *ptr ) {
|
||||||
|
size_t free, total;
|
||||||
|
cudaMemGetInfo( &free, &total );
|
||||||
|
cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
|
||||||
|
cerr << "Exiting CUDA...." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Creates a new entry in the GPU memory list*/
|
||||||
|
void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||||
|
{
|
||||||
|
memnode temp;
|
||||||
|
temp.name = name;
|
||||||
|
temp.dev_address = ptr;
|
||||||
|
temp.rows = rows;
|
||||||
|
temp.size = rows * num_columns * sizeof(int);
|
||||||
|
temp.iteration = itr;
|
||||||
|
temp.isrule = rule;
|
||||||
|
GPUmem.push_back(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void registrarcpu(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||||
|
{
|
||||||
|
memnode temp;
|
||||||
|
temp.name = name;
|
||||||
|
temp.dev_address = ptr;
|
||||||
|
temp.rows = rows;
|
||||||
|
temp.size = rows * num_columns * sizeof(int);
|
||||||
|
temp.iteration = itr;
|
||||||
|
temp.isrule = rule;
|
||||||
|
CPUmem.push_back(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Updates the information of an element in a list*/
|
||||||
|
template<class InputIterator>
|
||||||
|
void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
|
||||||
|
{
|
||||||
|
i->dev_address = ptr;
|
||||||
|
i->rows = rows;
|
||||||
|
i->size = rows * num_columns * sizeof(int);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Count the total number of rows generated by rule 'name' in iteration 'iter'*/
|
||||||
|
int numrows(int name, int itr)
|
||||||
|
{
|
||||||
|
int sum = 0;
|
||||||
|
memnode temp;
|
||||||
|
temp.iteration = itr;
|
||||||
|
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||||
|
while(rec.first != rec.second)
|
||||||
|
{
|
||||||
|
if(rec.first->name == name)
|
||||||
|
sum += rec.first->rows;
|
||||||
|
rec.first++;
|
||||||
|
}
|
||||||
|
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||||
|
while(rec.first != rec.second)
|
||||||
|
{
|
||||||
|
if(rec.first->name == name)
|
||||||
|
sum += rec.first->rows;
|
||||||
|
rec.first++;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void * YAP_IntToAtom(int);
|
||||||
|
extern "C" char * YAP_AtomName(void *);
|
||||||
|
|
||||||
|
/*Loads facts or rule results in GPU memory. If a fact is already in GPU memory, its pointer is simply returned. Otherwise,
|
||||||
|
memory is reserved and the fact is loaded. Rule results are loaded based on the current iteration 'itr' and both GPU and
|
||||||
|
CPU memories are searched for all instances of said results. The instances are combined into a single one in GPU memory.*/
|
||||||
|
int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||||
|
{
|
||||||
|
int numgpu, numcpu, totalrows = 0;
|
||||||
|
int *temp, x;
|
||||||
|
int size, itrant, inc = 0;
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
memnode fact;
|
||||||
|
|
||||||
|
if(is_fact)
|
||||||
|
{
|
||||||
|
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||||
|
if(i != GPUmem.end())
|
||||||
|
{
|
||||||
|
fact = *i;
|
||||||
|
GPUmem.erase(i);
|
||||||
|
fact.iteration = itr;
|
||||||
|
*ptr = fact.dev_address;
|
||||||
|
GPUmem.push_back(fact);
|
||||||
|
return fact.rows;
|
||||||
|
}
|
||||||
|
size = num_rows * num_columns * sizeof(int);
|
||||||
|
reservar(&temp, size);
|
||||||
|
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
||||||
|
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||||
|
*ptr = temp;
|
||||||
|
return num_rows;
|
||||||
|
}
|
||||||
|
if(itr > 0)
|
||||||
|
{
|
||||||
|
itrant = itr - 1;
|
||||||
|
i = buscarpornombre(name, itrant, &totalrows, &numgpu, &numcpu);
|
||||||
|
if((numgpu == 1) && (numcpu == 1))
|
||||||
|
{
|
||||||
|
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
|
||||||
|
*ptr = temp_storage[0].dev_address;
|
||||||
|
return temp_storage[0].rows;
|
||||||
|
}
|
||||||
|
size = totalrows * num_columns * sizeof(int);
|
||||||
|
reservar(&temp, size);
|
||||||
|
for(x = 0; x < numgpu; x++)
|
||||||
|
{
|
||||||
|
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||||
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
|
cudaFree(temp_storage[x].dev_address);
|
||||||
|
}
|
||||||
|
for(; x < numcpu; x++)
|
||||||
|
{
|
||||||
|
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
||||||
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
|
free(temp_storage[x].dev_address);
|
||||||
|
}
|
||||||
|
actualizar(num_columns, temp, totalrows, i);
|
||||||
|
*ptr = temp;
|
||||||
|
return totalrows;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||||
|
{
|
||||||
|
int numgpu, numcpu, totalrows = 0;
|
||||||
|
int *temp, x;
|
||||||
|
int size, itrant, inc = 0;
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
|
||||||
|
if(is_fact)
|
||||||
|
{
|
||||||
|
*ptr = address_host_table;
|
||||||
|
return num_rows;
|
||||||
|
}
|
||||||
|
if(itr > 0)
|
||||||
|
{
|
||||||
|
itrant = itr - 1;
|
||||||
|
i = buscarpornombrecpu(name, itrant, &totalrows, &numgpu, &numcpu);
|
||||||
|
|
||||||
|
if((numgpu == 0) && (numcpu == 1))
|
||||||
|
{
|
||||||
|
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
|
||||||
|
*ptr = temp_storage[0].dev_address;
|
||||||
|
return temp_storage[0].rows;
|
||||||
|
}
|
||||||
|
size = totalrows * num_columns * sizeof(int);
|
||||||
|
temp = (int *)malloc(size);
|
||||||
|
for(x = 0; x < numgpu; x++)
|
||||||
|
{
|
||||||
|
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
|
||||||
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
|
cudaFree(temp_storage[x].dev_address);
|
||||||
|
}
|
||||||
|
for(; x < numcpu; x++)
|
||||||
|
{
|
||||||
|
memcpy(temp + inc, temp_storage[x].dev_address, temp_storage[x].size);
|
||||||
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
|
free(temp_storage[x].dev_address);
|
||||||
|
}
|
||||||
|
actualizar(num_columns, temp, totalrows, i);
|
||||||
|
*ptr = temp;
|
||||||
|
return totalrows;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Loads all results of rule 'name' from both GPU and CPU memories into the GPU*/
|
||||||
|
int cargafinal(int name, int cols, int **ptr)
|
||||||
|
{
|
||||||
|
int *temp, *ini, cont = 0, numg = 0, numc = 0;
|
||||||
|
memnode bus;
|
||||||
|
bus.name = name;
|
||||||
|
GPUmem.sort(comparename);
|
||||||
|
CPUmem.sort(comparename);
|
||||||
|
list<memnode>::iterator endg = GPUmem.end();
|
||||||
|
list<memnode>::iterator endc = CPUmem.end();
|
||||||
|
list<memnode>::iterator pos = lower_bound(GPUmem.begin(), endg, bus, comparename);
|
||||||
|
list<memnode>::iterator gpu = pos;
|
||||||
|
while(pos != endg && pos->name == name)
|
||||||
|
{
|
||||||
|
cont += pos->rows;
|
||||||
|
numg++;
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
|
||||||
|
list<memnode>::iterator cpu = pos;
|
||||||
|
while(pos != endc && pos->name == name)
|
||||||
|
{
|
||||||
|
cont += pos->rows;
|
||||||
|
numc++;
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(numg == 0 && numc == 0)
|
||||||
|
return 0;
|
||||||
|
if(numg == 1 && numc == 0)
|
||||||
|
{
|
||||||
|
pos = gpu;
|
||||||
|
*ptr = pos->dev_address;
|
||||||
|
cont = pos->rows;
|
||||||
|
GPUmem.erase(pos);
|
||||||
|
#ifdef TUFFY
|
||||||
|
return -cont;
|
||||||
|
#else
|
||||||
|
return cont;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if(numg == 0 && numc == 1)
|
||||||
|
{
|
||||||
|
pos = cpu;
|
||||||
|
cont = pos->rows;
|
||||||
|
#ifdef TUFFY
|
||||||
|
reservar(&temp, pos->size);
|
||||||
|
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||||
|
*ptr = temp;
|
||||||
|
#else
|
||||||
|
*ptr = pos->dev_address;
|
||||||
|
#endif
|
||||||
|
CPUmem.erase(pos);
|
||||||
|
return -cont;
|
||||||
|
}
|
||||||
|
|
||||||
|
reservar(&temp, cont * cols * sizeof(int));
|
||||||
|
ini = temp;
|
||||||
|
pos = gpu;
|
||||||
|
while(pos != endg && pos->name == name)
|
||||||
|
{
|
||||||
|
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
|
||||||
|
temp += pos->size / sizeof(int);
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
pos = cpu;
|
||||||
|
while(pos != endc && pos->name == name)
|
||||||
|
{
|
||||||
|
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||||
|
temp += pos->size / sizeof(int);
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
*ptr = ini;
|
||||||
|
return cont;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Compares the results of the current iteration against the results of older iterations.
|
||||||
|
Used to avoid infinite computations when the result is not a single fixed-point, but an
|
||||||
|
orbit of points.*/
|
||||||
|
bool generadas(int name, int filas, int cols, int itr)
|
||||||
|
{
|
||||||
|
int r1, r2, x, fin;
|
||||||
|
int *dop1, *dop2;
|
||||||
|
|
||||||
|
r2 = numrows(name, itr);
|
||||||
|
if(itr < MAX_FIX_POINTS)
|
||||||
|
fin = itr;
|
||||||
|
else
|
||||||
|
fin = MAX_FIX_POINTS;
|
||||||
|
for(x = 1; x <= fin; x++)
|
||||||
|
{
|
||||||
|
r1 = numrows(name, itr - x);
|
||||||
|
if(r1 == r2)
|
||||||
|
{
|
||||||
|
r2 = cargar(name, filas, cols, 0, NULL, &dop2, itr + 1);
|
||||||
|
thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
|
||||||
|
r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
|
||||||
|
thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
|
||||||
|
if(thrust::equal(pt1, pt1 + r1, pt2) == true)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mostrar_memoria()
|
||||||
|
{
|
||||||
|
unsigned int x;
|
||||||
|
list<memnode>::iterator i = GPUmem.begin();
|
||||||
|
cout << "Memoria inicio GPU" << endl;
|
||||||
|
for(x = 0; x < GPUmem.size(); x++, i++)
|
||||||
|
cout << i->name << " " << i->iteration << " " << i->isrule << " " << i->rows << " " << i->size << endl;
|
||||||
|
cout << "Memoria fin GPU" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mostrar_memcpu()
|
||||||
|
{
|
||||||
|
unsigned int x;
|
||||||
|
list<memnode>::iterator i = CPUmem.begin();
|
||||||
|
cout << "Memoria inicio CPU" << endl;
|
||||||
|
for(x = 0; x < CPUmem.size(); x++, i++)
|
||||||
|
cout << i->name << " " << i->iteration << endl;
|
||||||
|
cout << "Memoria fin CPU" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Clear all rule results from both GPU and CPU memory*/
|
||||||
|
void clear_memory()
|
||||||
|
{
|
||||||
|
list<memnode>::iterator ini;
|
||||||
|
list<memnode>::iterator fin;
|
||||||
|
ini = GPUmem.begin();
|
||||||
|
fin = GPUmem.end();
|
||||||
|
while(ini != fin)
|
||||||
|
{
|
||||||
|
if(ini->isrule)
|
||||||
|
{
|
||||||
|
cudaFree(ini->dev_address);
|
||||||
|
ini = GPUmem.erase(ini);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
ini++;
|
||||||
|
}
|
||||||
|
ini = CPUmem.begin();
|
||||||
|
fin = CPUmem.end();
|
||||||
|
while(ini != fin)
|
||||||
|
{
|
||||||
|
free(ini->dev_address);
|
||||||
|
ini++;
|
||||||
|
}
|
||||||
|
CPUmem.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Clear everything from both GPU and CPU memory*/
|
||||||
|
void clear_memory_all()
|
||||||
|
{
|
||||||
|
list<memnode>::iterator ini;
|
||||||
|
list<memnode>::iterator fin;
|
||||||
|
ini = GPUmem.begin();
|
||||||
|
fin = GPUmem.end();
|
||||||
|
while(ini != fin)
|
||||||
|
{
|
||||||
|
cudaFree(ini->dev_address);
|
||||||
|
ini++;
|
||||||
|
}
|
||||||
|
GPUmem.clear();
|
||||||
|
ini = CPUmem.begin();
|
||||||
|
fin = CPUmem.end();
|
||||||
|
while(ini != fin)
|
||||||
|
{
|
||||||
|
free(ini->dev_address);
|
||||||
|
ini++;
|
||||||
|
}
|
||||||
|
CPUmem.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Remove all instances of fact 'name' from both CPU and GPU memories*/
|
||||||
|
void liberar(int name)
|
||||||
|
{
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
memnode fact;
|
||||||
|
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||||
|
if(i != GPUmem.end())
|
||||||
|
{
|
||||||
|
fact = *i;
|
||||||
|
GPUmem.erase(i);
|
||||||
|
cudaFree(fact.dev_address);
|
||||||
|
}
|
||||||
|
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
|
||||||
|
if(i != CPUmem.end())
|
||||||
|
{
|
||||||
|
fact = *i;
|
||||||
|
CPUmem.erase(i);
|
||||||
|
free(fact.dev_address);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Add all rows in 'dop1' to the fact 'name' by creating a new array capable of holding both.*/
|
||||||
|
void sumar(int name, int *dop1, int cols, int rows)
|
||||||
|
{
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
memnode fact;
|
||||||
|
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||||
|
int *res, newrows, offset;
|
||||||
|
if(i != GPUmem.end())
|
||||||
|
{
|
||||||
|
fact = *i;
|
||||||
|
newrows = rows + fact.rows;
|
||||||
|
reservar(&res, newrows * cols * sizeof(int));
|
||||||
|
offset = fact.rows * cols;
|
||||||
|
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||||
|
GPUmem.erase(i);
|
||||||
|
registrar(name, cols, res, newrows, 0, 0);
|
||||||
|
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(fact.dev_address);
|
||||||
|
}
|
||||||
|
}
|
27
packages/cuda/old/memory.h
Executable file
27
packages/cuda/old/memory.h
Executable file
@ -0,0 +1,27 @@
|
|||||||
|
#ifndef _MEMORY_H_
|
||||||
|
#define _MEMORY_H_
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
#include <vector>
|
||||||
|
#include "lista.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
bool comparer(const rulenode&, const rulenode&);
|
||||||
|
void limpiar(const char [], size_t);
|
||||||
|
void limpiartodo(int*, int*);
|
||||||
|
int cargar(int, int, int, int, int*, int**, int);
|
||||||
|
int cargarcpu(int, int, int, int, int*, int**, int);
|
||||||
|
int cargafinal(int, int, int**);
|
||||||
|
void reservar(int**, size_t);
|
||||||
|
void registrar(int, int, int*, int, int, int);
|
||||||
|
void registrarcpu(int, int, int*, int, int, int);
|
||||||
|
bool generadas(int, int, int, int);
|
||||||
|
void sumar(int, int*, int, int);
|
||||||
|
void liberar(int);
|
||||||
|
void mostrar_memoria(void);
|
||||||
|
void mostrar_memcpu(void);
|
||||||
|
void clear_memory(void);
|
||||||
|
void clear_memory_all(void);
|
||||||
|
|
||||||
|
#endif
|
47
packages/cuda/old/pred.h
Executable file
47
packages/cuda/old/pred.h
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
#ifndef _PRED_H_
|
||||||
|
#define _PRED_H_
|
||||||
|
|
||||||
|
// #define DEBUG_MEM 1
|
||||||
|
|
||||||
|
typedef struct Nodo{
|
||||||
|
int name;
|
||||||
|
int num_rows;
|
||||||
|
int num_columns;
|
||||||
|
int is_fact;
|
||||||
|
int *address_host_table;
|
||||||
|
int *negatives;
|
||||||
|
char *predname;
|
||||||
|
double *weight;
|
||||||
|
}gpunode;
|
||||||
|
|
||||||
|
typedef gpunode predicate;
|
||||||
|
|
||||||
|
//#define TIMER 1
|
||||||
|
#define DATALOG 1
|
||||||
|
#define NUM_T 4
|
||||||
|
#define INISIZE 1000000
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
typedef struct Stats{
|
||||||
|
size_t joins, selects, unions, builtins;
|
||||||
|
size_t calls;
|
||||||
|
double total_time;
|
||||||
|
float max_time, min_time;
|
||||||
|
float select1_time, select2_time, join_time, sort_time, union_time, pred_time;
|
||||||
|
}statinfo;
|
||||||
|
|
||||||
|
extern statinfo cuda_stats;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*Constants used to mark comparison predicates*/
|
||||||
|
#define BPOFFSET (-6)
|
||||||
|
#define SBG_EQ (-1)
|
||||||
|
#define SBG_GT (-2)
|
||||||
|
#define SBG_LT (-3)
|
||||||
|
#define SBG_GE (-4)
|
||||||
|
#define SBG_LE (-5)
|
||||||
|
#define SBG_DF (-6)
|
||||||
|
|
||||||
|
int Cuda_Eval(predicate**, int, predicate**, int, int*, int**, char*, int);
|
||||||
|
void Cuda_Statistics( void );
|
||||||
|
#endif
|
306
packages/cuda/old/selectproyect.cu
Executable file
306
packages/cuda/old/selectproyect.cu
Executable file
@ -0,0 +1,306 @@
|
|||||||
|
#include <thrust/device_vector.h>
|
||||||
|
#include <thrust/scan.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "memory.h"
|
||||||
|
#include "bpreds.h"
|
||||||
|
|
||||||
|
/*Mark all rows that comply with the selections*/
|
||||||
|
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int x, rowact, posact;
|
||||||
|
if(threadIdx.x < numc)
|
||||||
|
shared[threadIdx.x] = cons[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
rowact = id * cols;
|
||||||
|
for(x = 0; x < numc; x += 2)
|
||||||
|
{
|
||||||
|
posact = rowact + shared[x];
|
||||||
|
if(dop1[posact] != shared[x+1])
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
res[id] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*If we already have an array of marks (perhaps because the selfjoin was applied first),
|
||||||
|
we unmark any rows that do not comply with the selections*/
|
||||||
|
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int x, rowact, posact;
|
||||||
|
if(threadIdx.x < numc)
|
||||||
|
shared[threadIdx.x] = cons[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
if(res[id] == 0)
|
||||||
|
return;
|
||||||
|
rowact = id * cols;
|
||||||
|
for(x = 0; x < numc; x += 2)
|
||||||
|
{
|
||||||
|
posact = rowact + shared[x];
|
||||||
|
if(dop1[posact] != shared[x+1])
|
||||||
|
{
|
||||||
|
res[id] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Unmark all rows that do not comply with the selfjoins.*/
|
||||||
|
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int temp, temp2, pos, x, y;
|
||||||
|
if(threadIdx.x < cont)
|
||||||
|
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
if(res[id] == 0)
|
||||||
|
return;
|
||||||
|
pos = id * cols;
|
||||||
|
for(x = 0; x < cont; x++)
|
||||||
|
{
|
||||||
|
temp = dop1[pos+shared[x]];
|
||||||
|
y = x + 1;
|
||||||
|
temp2 = shared[y];
|
||||||
|
while(temp2 > -1)
|
||||||
|
{
|
||||||
|
if(temp != dop1[temp2+pos])
|
||||||
|
{
|
||||||
|
res[id] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
y++;
|
||||||
|
temp2 = shared[y];
|
||||||
|
}
|
||||||
|
x = y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Mark all rows that comply with the selfjoins*/
|
||||||
|
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int temp, temp2, pos, x, y;
|
||||||
|
if(threadIdx.x < cont)
|
||||||
|
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
pos = id * cols;
|
||||||
|
for(x = 0; x < cont; x++)
|
||||||
|
{
|
||||||
|
temp = dop1[pos+shared[x]];
|
||||||
|
y = x + 1;
|
||||||
|
temp2 = shared[y];
|
||||||
|
while(temp2 > -1)
|
||||||
|
{
|
||||||
|
if(temp != dop1[temp2+pos])
|
||||||
|
return;
|
||||||
|
y++;
|
||||||
|
temp2 = shared[y];
|
||||||
|
}
|
||||||
|
x = y;
|
||||||
|
}
|
||||||
|
res[id] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Project all columns found in 'dhead' to a new array 'res'*/
|
||||||
|
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int pos, posr, x;
|
||||||
|
if(threadIdx.x < hsize)
|
||||||
|
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
pos = id * cols;
|
||||||
|
posr = id * hsize;
|
||||||
|
for(x = 0; x < hsize; x++, posr++)
|
||||||
|
res[posr] = dop1[pos+shared[x]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Project all columns found in 'dhead' using only the rows marked as valid (i.e. those that complied with
|
||||||
|
selections, selfjoins, etc.). The array 'temp' holds the result of the prefix sum of said marks.*/
|
||||||
|
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int pos, posr, x;
|
||||||
|
if(threadIdx.x < hsize)
|
||||||
|
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
posr = temp[id];
|
||||||
|
if(temp[id+1] != posr)
|
||||||
|
{
|
||||||
|
pos = id * cols;
|
||||||
|
posr *= hsize;
|
||||||
|
for(x = 0; x < hsize; x++, posr++)
|
||||||
|
res[posr] = dop1[pos+shared[x]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Performs selections, selfjoins and comparison predicates when the rule has a single normal predicate.*/
|
||||||
|
int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *preds, int numpreds, int *project, int **ret, int ANDlogic)
|
||||||
|
{
|
||||||
|
int *fres = NULL, *temp = NULL;
|
||||||
|
int *dhead = NULL, tmplen;
|
||||||
|
int size, size2, num;
|
||||||
|
thrust::device_ptr<int> res;
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
cuda_stats.selects++;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int head_bytes = maximo(4, numselect, numselfj, numpreds, head_size) * sizeof(int);
|
||||||
|
reservar(&dhead, head_bytes);
|
||||||
|
int numthreads = 1024;
|
||||||
|
//int numthreads = 32;
|
||||||
|
int blockllen = rows / numthreads + 1;
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
ANDlogic = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(numselect > 0)
|
||||||
|
{
|
||||||
|
tmplen = rows + 1;
|
||||||
|
size2 = tmplen * sizeof(int);
|
||||||
|
reservar(&temp, size2);
|
||||||
|
cudaMemset(temp, 0, size2);
|
||||||
|
size = numselect * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
||||||
|
|
||||||
|
if(numselfj > 0)
|
||||||
|
{
|
||||||
|
size = numselfj * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||||
|
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(numpreds > 0)
|
||||||
|
{
|
||||||
|
size = numpreds * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||||
|
if(ANDlogic)
|
||||||
|
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
else
|
||||||
|
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = thrust::device_pointer_cast(temp);
|
||||||
|
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||||
|
num = res[rows];
|
||||||
|
if(num == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
size = head_size * sizeof(int);
|
||||||
|
reservar(&fres, num * size);
|
||||||
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
|
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
|
cudaFree(dhead);
|
||||||
|
cudaFree(temp);
|
||||||
|
*ret = fres;
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(numselfj > 0)
|
||||||
|
{
|
||||||
|
tmplen = rows + 1;
|
||||||
|
size2 = tmplen * sizeof(int);
|
||||||
|
reservar(&temp, size2);
|
||||||
|
cudaMemset(temp, 0, size2);
|
||||||
|
size = numselfj * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||||
|
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||||
|
|
||||||
|
if(numpreds > 0)
|
||||||
|
{
|
||||||
|
size = numpreds * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||||
|
if(ANDlogic)
|
||||||
|
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
else
|
||||||
|
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
res = thrust::device_pointer_cast(temp);
|
||||||
|
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||||
|
num = res[rows];
|
||||||
|
if(num == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
size = head_size * sizeof(int);
|
||||||
|
reservar(&fres, num * size);
|
||||||
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
|
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
|
cudaFree(dhead);
|
||||||
|
cudaFree(temp);
|
||||||
|
*ret = fres;
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(numpreds > 0)
|
||||||
|
{
|
||||||
|
tmplen = rows + 1;
|
||||||
|
size2 = tmplen * sizeof(int);
|
||||||
|
reservar(&temp, size2);
|
||||||
|
cudaMemset(temp, 0, size2);
|
||||||
|
size = numpreds * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
if(ANDlogic)
|
||||||
|
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
else
|
||||||
|
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
res = thrust::device_pointer_cast(temp);
|
||||||
|
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||||
|
num = res[rows];
|
||||||
|
|
||||||
|
if(num == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
size = head_size * sizeof(int);
|
||||||
|
reservar(&fres, num * size);
|
||||||
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
|
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
|
cudaFree(dhead);
|
||||||
|
cudaFree(temp);
|
||||||
|
*ret = fres;
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
size = head_size * sizeof(int);
|
||||||
|
reservar(&fres, rows * size);
|
||||||
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
|
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
||||||
|
cudaFree(dhead);
|
||||||
|
*ret = fres;
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
1279
packages/cuda/old/treeb.cu
Executable file
1279
packages/cuda/old/treeb.cu
Executable file
File diff suppressed because it is too large
Load Diff
763
packages/cuda/old/union2.cu
Executable file
763
packages/cuda/old/union2.cu
Executable file
@ -0,0 +1,763 @@
|
|||||||
|
/*Computer generated file to remove duplicates. Since Thrust's unique and sort, unlike their std's counterparts, don't have a way to specify the size of each element in
|
||||||
|
the array, comparing pairs, triplets and other sets is not possible without defining a new pointer and all related operations for each set. If you have a better idea to do
|
||||||
|
this, please don't hesitate to email us.*/
|
||||||
|
|
||||||
|
#include <thrust/device_vector.h>
|
||||||
|
#include <thrust/unique.h>
|
||||||
|
#include <thrust/distance.h>
|
||||||
|
#include <thrust/sort.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include "memory.h"
|
||||||
|
#include "union2.h"
|
||||||
|
|
||||||
|
int unir(int *res, int rows, int tipo, int **ret, int final)
|
||||||
|
{
|
||||||
|
thrust::device_ptr<int> pt, re;
|
||||||
|
thrust::device_ptr<s2> pt2, re2;
|
||||||
|
thrust::device_ptr<s3> pt3, re3;
|
||||||
|
thrust::device_ptr<s4> pt4, re4;
|
||||||
|
thrust::device_ptr<s5> pt5, re5;
|
||||||
|
thrust::device_ptr<s6> pt6, re6;
|
||||||
|
thrust::device_ptr<s7> pt7, re7;
|
||||||
|
thrust::device_ptr<s8> pt8, re8;
|
||||||
|
thrust::device_ptr<s9> pt9, re9;
|
||||||
|
thrust::device_ptr<s10> pt10, re10;
|
||||||
|
thrust::device_ptr<s11> pt11, re11;
|
||||||
|
thrust::device_ptr<s12> pt12, re12;
|
||||||
|
thrust::device_ptr<s13> pt13, re13;
|
||||||
|
thrust::device_ptr<s14> pt14, re14;
|
||||||
|
thrust::device_ptr<s15> pt15, re15;
|
||||||
|
thrust::device_ptr<s16> pt16, re16;
|
||||||
|
thrust::device_ptr<s17> pt17, re17;
|
||||||
|
thrust::device_ptr<s18> pt18, re18;
|
||||||
|
thrust::device_ptr<s19> pt19, re19;
|
||||||
|
thrust::device_ptr<s20> pt20, re20;
|
||||||
|
s2 *t2;
|
||||||
|
s3 *t3;
|
||||||
|
s4 *t4;
|
||||||
|
s5 *t5;
|
||||||
|
s6 *t6;
|
||||||
|
s7 *t7;
|
||||||
|
s8 *t8;
|
||||||
|
s9 *t9;
|
||||||
|
s10 *t10;
|
||||||
|
s11 *t11;
|
||||||
|
s12 *t12;
|
||||||
|
s13 *t13;
|
||||||
|
s14 *t14;
|
||||||
|
s15 *t15;
|
||||||
|
s16 *t16;
|
||||||
|
s17 *t17;
|
||||||
|
s18 *t18;
|
||||||
|
s19 *t19;
|
||||||
|
s20 *t20;
|
||||||
|
int flag, nrows, *nres, size;
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
cuda_stats.unions++;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
switch(tipo)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
{
|
||||||
|
pt = thrust::device_pointer_cast(res);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt, pt + rows);
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re = thrust::unique(pt, pt + rows, q1());
|
||||||
|
re = thrust::unique(pt, re);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re = thrust::unique(pt, pt + rows);
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt, re);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 2:
|
||||||
|
{
|
||||||
|
t2 = (s2*)res;
|
||||||
|
pt2 = thrust::device_pointer_cast(t2);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt2, pt2 + rows, o2());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re2 = thrust::unique(pt2, pt2 + rows, q2());
|
||||||
|
re2 = thrust::unique(pt2, re2, p2());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re2 = thrust::unique(pt2, pt2 + rows, p2());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt2, re2);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 3:
|
||||||
|
{
|
||||||
|
t3 = (s3*)res;
|
||||||
|
pt3 = thrust::device_pointer_cast(t3);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt3, pt3 + rows, o3());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re3 = thrust::unique(pt3, pt3 + rows, q3());
|
||||||
|
re3 = thrust::unique(pt3, re3, p3());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re3 = thrust::unique(pt3, pt3 + rows, p3());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt3, re3);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 4:
|
||||||
|
{
|
||||||
|
t4 = (s4*)res;
|
||||||
|
pt4 = thrust::device_pointer_cast(t4);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt4, pt4 + rows, o4());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re4 = thrust::unique(pt4, pt4 + rows, q4());
|
||||||
|
re4 = thrust::unique(pt4, re4, p4());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re4 = thrust::unique(pt4, pt4 + rows, p4());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt4, re4);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 5:
|
||||||
|
{
|
||||||
|
t5 = (s5*)res;
|
||||||
|
pt5 = thrust::device_pointer_cast(t5);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt5, pt5 + rows, o5());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re5 = thrust::unique(pt5, pt5 + rows, q5());
|
||||||
|
re5 = thrust::unique(pt5, re5, p5());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re5 = thrust::unique(pt5, pt5 + rows, p5());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt5, re5);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 6:
|
||||||
|
{
|
||||||
|
t6 = (s6*)res;
|
||||||
|
pt6 = thrust::device_pointer_cast(t6);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt6, pt6 + rows, o6());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re6 = thrust::unique(pt6, pt6 + rows, q6());
|
||||||
|
re6 = thrust::unique(pt6, re6, p6());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re6 = thrust::unique(pt6, pt6 + rows, p6());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt6, re6);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 7:
|
||||||
|
{
|
||||||
|
t7 = (s7*)res;
|
||||||
|
pt7 = thrust::device_pointer_cast(t7);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt7, pt7 + rows, o7());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re7 = thrust::unique(pt7, pt7 + rows, q7());
|
||||||
|
re7 = thrust::unique(pt7, re7, p7());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re7 = thrust::unique(pt7, pt7 + rows, p7());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt7, re7);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 8:
|
||||||
|
{
|
||||||
|
t8 = (s8*)res;
|
||||||
|
pt8 = thrust::device_pointer_cast(t8);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt8, pt8 + rows, o8());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re8 = thrust::unique(pt8, pt8 + rows, q8());
|
||||||
|
re8 = thrust::unique(pt8, re8, p8());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re8 = thrust::unique(pt8, pt8 + rows, p8());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt8, re8);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 9:
|
||||||
|
{
|
||||||
|
t9 = (s9*)res;
|
||||||
|
pt9 = thrust::device_pointer_cast(t9);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt9, pt9 + rows, o9());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re9 = thrust::unique(pt9, pt9 + rows, q9());
|
||||||
|
re9 = thrust::unique(pt9, re9, p9());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re9 = thrust::unique(pt9, pt9 + rows, p9());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt9, re9);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 10:
|
||||||
|
{
|
||||||
|
t10 = (s10*)res;
|
||||||
|
pt10 = thrust::device_pointer_cast(t10);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt10, pt10 + rows, o10());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re10 = thrust::unique(pt10, pt10 + rows, q10());
|
||||||
|
re10 = thrust::unique(pt10, re10, p10());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re10 = thrust::unique(pt10, pt10 + rows, p10());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt10, re10);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 11:
|
||||||
|
{
|
||||||
|
t11 = (s11*)res;
|
||||||
|
pt11 = thrust::device_pointer_cast(t11);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt11, pt11 + rows, o11());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re11 = thrust::unique(pt11, pt11 + rows, q11());
|
||||||
|
re11 = thrust::unique(pt11, re11, p11());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re11 = thrust::unique(pt11, pt11 + rows, p11());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt11, re11);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 12:
|
||||||
|
{
|
||||||
|
t12 = (s12*)res;
|
||||||
|
pt12 = thrust::device_pointer_cast(t12);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt12, pt12 + rows, o12());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re12 = thrust::unique(pt12, pt12 + rows, q12());
|
||||||
|
re12 = thrust::unique(pt12, re12, p12());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re12 = thrust::unique(pt12, pt12 + rows, p12());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt12, re12);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 13:
|
||||||
|
{
|
||||||
|
t13 = (s13*)res;
|
||||||
|
pt13 = thrust::device_pointer_cast(t13);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt13, pt13 + rows, o13());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re13 = thrust::unique(pt13, pt13 + rows, q13());
|
||||||
|
re13 = thrust::unique(pt13, re13, p13());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re13 = thrust::unique(pt13, pt13 + rows, p13());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt13, re13);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 14:
|
||||||
|
{
|
||||||
|
t14 = (s14*)res;
|
||||||
|
pt14 = thrust::device_pointer_cast(t14);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt14, pt14 + rows, o14());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re14 = thrust::unique(pt14, pt14 + rows, q14());
|
||||||
|
re14 = thrust::unique(pt14, re14, p14());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re14 = thrust::unique(pt14, pt14 + rows, p14());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt14, re14);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 15:
|
||||||
|
{
|
||||||
|
t15 = (s15*)res;
|
||||||
|
pt15 = thrust::device_pointer_cast(t15);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt15, pt15 + rows, o15());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re15 = thrust::unique(pt15, pt15 + rows, q15());
|
||||||
|
re15 = thrust::unique(pt15, re15, p15());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re15 = thrust::unique(pt15, pt15 + rows, p15());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt15, re15);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 16:
|
||||||
|
{
|
||||||
|
t16 = (s16*)res;
|
||||||
|
pt16 = thrust::device_pointer_cast(t16);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt16, pt16 + rows, o16());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re16 = thrust::unique(pt16, pt16 + rows, q16());
|
||||||
|
re16 = thrust::unique(pt16, re16, p16());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re16 = thrust::unique(pt16, pt16 + rows, p16());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt16, re16);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 17:
|
||||||
|
{
|
||||||
|
t17 = (s17*)res;
|
||||||
|
pt17 = thrust::device_pointer_cast(t17);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt17, pt17 + rows, o17());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re17 = thrust::unique(pt17, pt17 + rows, q17());
|
||||||
|
re17 = thrust::unique(pt17, re17, p17());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re17 = thrust::unique(pt17, pt17 + rows, p17());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt17, re17);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 18:
|
||||||
|
{
|
||||||
|
t18 = (s18*)res;
|
||||||
|
pt18 = thrust::device_pointer_cast(t18);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt18, pt18 + rows, o18());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re18 = thrust::unique(pt18, pt18 + rows, q18());
|
||||||
|
re18 = thrust::unique(pt18, re18, p18());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re18 = thrust::unique(pt18, pt18 + rows, p18());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt18, re18);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 19:
|
||||||
|
{
|
||||||
|
t19 = (s19*)res;
|
||||||
|
pt19 = thrust::device_pointer_cast(t19);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt19, pt19 + rows, o19());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re19 = thrust::unique(pt19, pt19 + rows, q19());
|
||||||
|
re19 = thrust::unique(pt19, re19, p19());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re19 = thrust::unique(pt19, pt19 + rows, p19());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt19, re19);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
case 20:
|
||||||
|
{
|
||||||
|
t20 = (s20*)res;
|
||||||
|
pt20 = thrust::device_pointer_cast(t20);
|
||||||
|
flag = 0;
|
||||||
|
while(flag != 1)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
thrust::sort(pt20, pt20 + rows, o20());
|
||||||
|
if(final)
|
||||||
|
{
|
||||||
|
re20 = thrust::unique(pt20, pt20 + rows, q20());
|
||||||
|
re20 = thrust::unique(pt20, re20, p20());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
re20 = thrust::unique(pt20, pt20 + rows, p20());
|
||||||
|
flag = 1;
|
||||||
|
}
|
||||||
|
catch(std::bad_alloc &e)
|
||||||
|
{
|
||||||
|
limpiar("sort/unique in unir", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nrows = thrust::distance(pt20, re20);
|
||||||
|
if(nrows < rows / 2)
|
||||||
|
{
|
||||||
|
size = nrows * tipo * sizeof(int);
|
||||||
|
reservar(&nres, size);
|
||||||
|
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(*ret);
|
||||||
|
*ret = nres;
|
||||||
|
}
|
||||||
|
return nrows;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
1005
packages/cuda/old/union2.h
Executable file
1005
packages/cuda/old/union2.h
Executable file
File diff suppressed because it is too large
Load Diff
0
packages/cuda/pred.h
Executable file → Normal file
0
packages/cuda/pred.h
Executable file → Normal file
103
packages/cuda/selectproyect.cu
Executable file → Normal file
103
packages/cuda/selectproyect.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
|||||||
|
#include "hip/hip_runtime.h"
|
||||||
#include <thrust/device_vector.h>
|
#include <thrust/device_vector.h>
|
||||||
#include <thrust/scan.h>
|
#include <thrust/scan.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -8,10 +9,10 @@
|
|||||||
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int x, rowact, posact;
|
int x, rowact, posact;
|
||||||
if(threadIdx.x < numc)
|
if(hipThreadIdx_x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -30,10 +31,10 @@ we unmark any rows that do not comply with the selections*/
|
|||||||
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int x, rowact, posact;
|
int x, rowact, posact;
|
||||||
if(threadIdx.x < numc)
|
if(hipThreadIdx_x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[hipThreadIdx_x] = cons[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -56,10 +57,10 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
|
|||||||
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int temp, temp2, pos, x, y;
|
int temp, temp2, pos, x, y;
|
||||||
if(threadIdx.x < cont)
|
if(hipThreadIdx_x < cont)
|
||||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -90,10 +91,10 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
|
|||||||
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int temp, temp2, pos, x, y;
|
int temp, temp2, pos, x, y;
|
||||||
if(threadIdx.x < cont)
|
if(hipThreadIdx_x < cont)
|
||||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -120,10 +121,10 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
|
|||||||
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int pos, posr, x;
|
int pos, posr, x;
|
||||||
if(threadIdx.x < hsize)
|
if(hipThreadIdx_x < hsize)
|
||||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -139,10 +140,10 @@ selections, selfjoins, etc.). The array 'temp' holds the result of the prefix su
|
|||||||
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int pos, posr, x;
|
int pos, posr, x;
|
||||||
if(threadIdx.x < hsize)
|
if(hipThreadIdx_x < hsize)
|
||||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
@ -184,27 +185,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
tmplen = rows + 1;
|
tmplen = rows + 1;
|
||||||
size2 = tmplen * sizeof(int);
|
size2 = tmplen * sizeof(int);
|
||||||
reservar(&temp, size2);
|
reservar(&temp, size2);
|
||||||
cudaMemset(temp, 0, size2);
|
hipMemset(temp, 0, size2);
|
||||||
size = numselect * sizeof(int);
|
size = numselect * sizeof(int);
|
||||||
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, select, size, hipMemcpyHostToDevice);
|
||||||
|
|
||||||
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselect, temp + 1);
|
||||||
|
|
||||||
if(numselfj > 0)
|
if(numselfj > 0)
|
||||||
{
|
{
|
||||||
size = numselfj * sizeof(int);
|
size = numselfj * sizeof(int);
|
||||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
|
||||||
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(numpreds > 0)
|
if(numpreds > 0)
|
||||||
{
|
{
|
||||||
size = numpreds * sizeof(int);
|
size = numpreds * sizeof(int);
|
||||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||||
if(ANDlogic)
|
if(ANDlogic)
|
||||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
else
|
else
|
||||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
res = thrust::device_pointer_cast(temp);
|
res = thrust::device_pointer_cast(temp);
|
||||||
@ -215,10 +216,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
|
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, num * size);
|
reservar(&fres, num * size);
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
cudaFree(dhead);
|
hipFree(dhead);
|
||||||
cudaFree(temp);
|
hipFree(temp);
|
||||||
*ret = fres;
|
*ret = fres;
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
@ -229,19 +230,19 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
tmplen = rows + 1;
|
tmplen = rows + 1;
|
||||||
size2 = tmplen * sizeof(int);
|
size2 = tmplen * sizeof(int);
|
||||||
reservar(&temp, size2);
|
reservar(&temp, size2);
|
||||||
cudaMemset(temp, 0, size2);
|
hipMemset(temp, 0, size2);
|
||||||
size = numselfj * sizeof(int);
|
size = numselfj * sizeof(int);
|
||||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, selfjoin, size, hipMemcpyHostToDevice);
|
||||||
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||||
|
|
||||||
if(numpreds > 0)
|
if(numpreds > 0)
|
||||||
{
|
{
|
||||||
size = numpreds * sizeof(int);
|
size = numpreds * sizeof(int);
|
||||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||||
if(ANDlogic)
|
if(ANDlogic)
|
||||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
else
|
else
|
||||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,10 +254,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
|
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, num * size);
|
reservar(&fres, num * size);
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
cudaFree(dhead);
|
hipFree(dhead);
|
||||||
cudaFree(temp);
|
hipFree(temp);
|
||||||
*ret = fres;
|
*ret = fres;
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
@ -267,14 +268,14 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
tmplen = rows + 1;
|
tmplen = rows + 1;
|
||||||
size2 = tmplen * sizeof(int);
|
size2 = tmplen * sizeof(int);
|
||||||
reservar(&temp, size2);
|
reservar(&temp, size2);
|
||||||
cudaMemset(temp, 0, size2);
|
hipMemset(temp, 0, size2);
|
||||||
size = numpreds * sizeof(int);
|
size = numpreds * sizeof(int);
|
||||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, preds, size, hipMemcpyHostToDevice);
|
||||||
|
|
||||||
if(ANDlogic)
|
if(ANDlogic)
|
||||||
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
else
|
else
|
||||||
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
res = thrust::device_pointer_cast(temp);
|
res = thrust::device_pointer_cast(temp);
|
||||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||||
num = res[rows];
|
num = res[rows];
|
||||||
@ -284,10 +285,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
|
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, num * size);
|
reservar(&fres, num * size);
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
hipLaunchKernel(HIP_KERNEL_NAME(llenarproyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
cudaFree(dhead);
|
hipFree(dhead);
|
||||||
cudaFree(temp);
|
hipFree(temp);
|
||||||
*ret = fres;
|
*ret = fres;
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
@ -295,9 +296,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
{
|
{
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, rows * size);
|
reservar(&fres, rows * size);
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dhead, project, size, hipMemcpyHostToDevice);
|
||||||
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
hipLaunchKernel(HIP_KERNEL_NAME(proyectar), dim3(blockllen), dim3(numthreads), size, 0, dop1, rows, cols, dhead, head_size, fres);
|
||||||
cudaFree(dhead);
|
hipFree(dhead);
|
||||||
*ret = fres;
|
*ret = fres;
|
||||||
return rows;
|
return rows;
|
||||||
}
|
}
|
||||||
|
0
packages/cuda/selectproyectcpu.cpp
Executable file → Normal file
0
packages/cuda/selectproyectcpu.cpp
Executable file → Normal file
347
packages/cuda/treeb.cu
Executable file → Normal file
347
packages/cuda/treeb.cu
Executable file → Normal file
@ -1,3 +1,4 @@
|
|||||||
|
#include "hip/hip_runtime.h"
|
||||||
#include <thrust/host_vector.h>
|
#include <thrust/host_vector.h>
|
||||||
#include <thrust/device_vector.h>
|
#include <thrust/device_vector.h>
|
||||||
#include <thrust/sequence.h>
|
#include <thrust/sequence.h>
|
||||||
@ -160,11 +161,11 @@ __device__ int firstMatchingKeyInDataNode2(Record records[], IKeyType key)
|
|||||||
|
|
||||||
__global__ void gCreateIndex(IDataNode data[], IDirectoryNode dir[], int dirSize, int tree_size, int bottom_start, int nNodesPerBlock)
|
__global__ void gCreateIndex(IDataNode data[], IDirectoryNode dir[], int dirSize, int tree_size, int bottom_start, int nNodesPerBlock)
|
||||||
{
|
{
|
||||||
int startIdx = blockIdx.x * nNodesPerBlock;
|
int startIdx = hipBlockIdx_x * nNodesPerBlock;
|
||||||
int endIdx = startIdx + nNodesPerBlock;
|
int endIdx = startIdx + nNodesPerBlock;
|
||||||
if(endIdx > dirSize)
|
if(endIdx > dirSize)
|
||||||
endIdx = dirSize;
|
endIdx = dirSize;
|
||||||
int keyIdx = threadIdx.x;
|
int keyIdx = hipThreadIdx_x;
|
||||||
|
|
||||||
// Proceed only when in internal nodes
|
// Proceed only when in internal nodes
|
||||||
for(int nodeIdx = startIdx; nodeIdx < endIdx; nodeIdx++)
|
for(int nodeIdx = startIdx; nodeIdx < endIdx; nodeIdx++)
|
||||||
@ -191,11 +192,11 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
|
|||||||
{
|
{
|
||||||
// Bringing the root node (visited by every tuple) to the faster shared memory
|
// Bringing the root node (visited by every tuple) to the faster shared memory
|
||||||
__shared__ IKeyType RootNodeKeys[TREE_NODE_SIZE];
|
__shared__ IKeyType RootNodeKeys[TREE_NODE_SIZE];
|
||||||
RootNodeKeys[threadIdx.x] = dir->keys[threadIdx.x];
|
RootNodeKeys[hipThreadIdx_x] = dir->keys[hipThreadIdx_x];
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
int OverallThreadIdx = blockIdx.x * THRD_PER_BLCK_search + threadIdx.x;
|
int OverallThreadIdx = hipBlockIdx_x * THRD_PER_BLCK_search + hipThreadIdx_x;
|
||||||
|
|
||||||
for(int keyIdx = OverallThreadIdx; keyIdx < nSearchKeys; keyIdx += THRD_PER_GRID_search)
|
for(int keyIdx = OverallThreadIdx; keyIdx < nSearchKeys; keyIdx += THRD_PER_GRID_search)
|
||||||
{
|
{
|
||||||
@ -219,7 +220,7 @@ __global__ void gSearchTree(IDataNode* data, int nDataNodes, IDirectoryNode* dir
|
|||||||
/*Counts the number of times a row in 'S' is to be joined to a row in 'R'.*/
|
/*Counts the number of times a row in 'S' is to be joined to a row in 'R'.*/
|
||||||
__global__ void gIndexJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[])
|
__global__ void gIndexJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[])
|
||||||
{
|
{
|
||||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
|
|
||||||
if(s_cur < sLen)
|
if(s_cur < sLen)
|
||||||
{
|
{
|
||||||
@ -246,11 +247,11 @@ in 'g_locations' those rows that have equal values in the checked columns.*/
|
|||||||
__global__ void gIndexMultiJoinNegative(int *R, int *S, int g_locations[], int rLen, int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
|
__global__ void gIndexMultiJoinNegative(int *R, int *S, int g_locations[], int rLen, int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int posr, poss, x;
|
int posr, poss, x;
|
||||||
|
|
||||||
if(threadIdx.x < wj)
|
if(hipThreadIdx_x < wj)
|
||||||
shared[threadIdx.x] = muljoin[threadIdx.x];
|
shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(r_cur < rLen)
|
if(r_cur < rLen)
|
||||||
@ -287,11 +288,11 @@ times a row in 'S' is to be joined to its corresponding row in 'R', storing the
|
|||||||
__global__ void gIndexMultiJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[], int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
|
__global__ void gIndexMultiJoin(int *R, int *S, int g_locations[], int sLen, int g_ResNums[], int *p1, int *p2, int of1, int of2, int *mloc, int *sloc, int *muljoin, int wj)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int posr, poss, x;
|
int posr, poss, x;
|
||||||
|
|
||||||
if(threadIdx.x < wj)
|
if(hipThreadIdx_x < wj)
|
||||||
shared[threadIdx.x] = muljoin[threadIdx.x];
|
shared[hipThreadIdx_x] = muljoin[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(s_cur < sLen)
|
if(s_cur < sLen)
|
||||||
@ -330,10 +331,10 @@ __global__ void multiJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums
|
|||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int *extjoins = &shared[lenrul];
|
int *extjoins = &shared[lenrul];
|
||||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
|
|
||||||
if(threadIdx.x < (lenrul + wj))
|
if(hipThreadIdx_x < (lenrul + wj))
|
||||||
shared[threadIdx.x] = rule[threadIdx.x];
|
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(s_cur < sLen)
|
if(s_cur < sLen)
|
||||||
@ -382,10 +383,10 @@ __global__ void multiJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSum
|
|||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int *extjoins = &shared[cols];
|
int *extjoins = &shared[cols];
|
||||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
|
|
||||||
if(threadIdx.x < (cols + wj))
|
if(hipThreadIdx_x < (cols + wj))
|
||||||
shared[threadIdx.x] = rule[threadIdx.x];
|
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(s_cur < sLen)
|
if(s_cur < sLen)
|
||||||
@ -432,11 +433,11 @@ predicate are projected.*/
|
|||||||
__global__ void gJoinWithWriteNegative(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int halfrul, int *mloc)
|
__global__ void gJoinWithWriteNegative(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int halfrul, int *mloc)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int posr;
|
int posr;
|
||||||
|
|
||||||
if(threadIdx.x < halfrul)
|
if(hipThreadIdx_x < halfrul)
|
||||||
shared[threadIdx.x] = rule[threadIdx.x];
|
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(r_cur < rLen)
|
if(r_cur < rLen)
|
||||||
@ -461,11 +462,11 @@ predicate are projected.*/
|
|||||||
__global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int cols, int *mloc)
|
__global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinResultBuffers[], int *p1, int of1, int *rule, int cols, int *mloc)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int r_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int r_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int posr;
|
int posr;
|
||||||
|
|
||||||
if(threadIdx.x < cols)
|
if(hipThreadIdx_x < cols)
|
||||||
shared[threadIdx.x] = rule[threadIdx.x];
|
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(r_cur < rLen)
|
if(r_cur < rLen)
|
||||||
@ -489,10 +490,10 @@ __global__ void gJoinWithWriteNegative2(int g_locations[], int rLen, int g_joinR
|
|||||||
__global__ void gJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int halfrul, int lenrul, int *mloc, int *sloc)
|
__global__ void gJoinWithWrite(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int halfrul, int lenrul, int *mloc, int *sloc)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
|
|
||||||
if(threadIdx.x < lenrul)
|
if(hipThreadIdx_x < lenrul)
|
||||||
shared[threadIdx.x] = rule[threadIdx.x];
|
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(s_cur < sLen)
|
if(s_cur < sLen)
|
||||||
@ -525,10 +526,10 @@ projection, which is performed based on the variables in the head of the rule.*/
|
|||||||
__global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int cols, int *mloc, int *sloc)
|
__global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[], int g_joinResultBuffers[], int *p1, int *p2, int of1, int of2, int *rule, int cols, int *mloc, int *sloc)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int s_cur = blockIdx.x * blockDim.x + threadIdx.x;
|
int s_cur = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
|
|
||||||
if(threadIdx.x < cols)
|
if(hipThreadIdx_x < cols)
|
||||||
shared[threadIdx.x] = rule[threadIdx.x];
|
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(s_cur < sLen)
|
if(s_cur < sLen)
|
||||||
@ -563,7 +564,7 @@ __global__ void gJoinWithWrite2(int g_locations[], int sLen, int g_PrefixSums[],
|
|||||||
/*Load part of column 'wj' of 'p' in 'R'. Which values are loaded is defined by the prefix sum results in 'pos'.*/
|
/*Load part of column 'wj' of 'p' in 'R'. Which values are loaded is defined by the prefix sum results in 'pos'.*/
|
||||||
__global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *ids)
|
__global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *ids)
|
||||||
{
|
{
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int cond;
|
int cond;
|
||||||
if(id < len)
|
if(id < len)
|
||||||
{
|
{
|
||||||
@ -579,7 +580,7 @@ __global__ void llenar(int *p, int *R, int len, int of, int wj, int *pos, int *i
|
|||||||
/*Load an entire column from 'p' into 'R'.*/
|
/*Load an entire column from 'p' into 'R'.*/
|
||||||
__global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
|
__global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
|
||||||
{
|
{
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
if(id < len)
|
if(id < len)
|
||||||
R[id] = p[id * of + wj];
|
R[id] = p[id * of + wj];
|
||||||
}
|
}
|
||||||
@ -587,10 +588,10 @@ __global__ void llenarnosel(int *p, int *R, int len, int of, int wj)
|
|||||||
__global__ void projectfinal(int *res, int rows, int cols, int *rule, int *out)
|
__global__ void projectfinal(int *res, int rows, int cols, int *rule, int *out)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
|
|
||||||
if(threadIdx.x < cols)
|
if(hipThreadIdx_x < cols)
|
||||||
shared[threadIdx.x] = rule[threadIdx.x];
|
shared[hipThreadIdx_x] = rule[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
@ -614,26 +615,26 @@ void project(int *res, int resrows, int numcols1, int numcols2, int *proj, int *
|
|||||||
int *pt = (int *)malloc(sizepro);
|
int *pt = (int *)malloc(sizepro);
|
||||||
for(z = 0; z < numcols2; z++)
|
for(z = 0; z < numcols2; z++)
|
||||||
pt[z] = proj[z] - 1;
|
pt[z] = proj[z] - 1;
|
||||||
cudaMemcpy(dcons, pt, sizepro, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, pt, sizepro, hipMemcpyHostToDevice);
|
||||||
//cudaDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
|
//hipDeviceSynchronize(); //Small cudaMemcpys are asynchronous, uncomment this line if the pointer is being liberated before it is copied.
|
||||||
free(pt);
|
free(pt);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||||
reservar(&d_Rout, resrows * sizepro);
|
reservar(&d_Rout, resrows * sizepro);
|
||||||
projectfinal<<<blockllen, numthreads, sizepro>>>(res, resrows, numcols1, dcons, d_Rout);
|
hipLaunchKernel(HIP_KERNEL_NAME(projectfinal), dim3(blockllen), dim3(numthreads), sizepro, 0, res, resrows, numcols1, dcons, d_Rout);
|
||||||
cudaFree(dcons);
|
hipFree(dcons);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = d_Rout;
|
*ret = d_Rout;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void projectadd(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, int *dhead, int hsize, int *res)
|
__global__ void projectadd(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, int *dhead, int hsize, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||||
int pos2, posr, x, y, cond;
|
int pos2, posr, x, y, cond;
|
||||||
if(threadIdx.x < hsize)
|
if(hipThreadIdx_x < hsize)
|
||||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
shared[hipThreadIdx_x] = dhead[hipThreadIdx_x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows2)
|
if(id < rows2)
|
||||||
{
|
{
|
||||||
@ -662,10 +663,10 @@ void juntar(int *dop1, int *dop2, int rows1, int rows2, int cols1, int cols2, in
|
|||||||
int blockllen = rows2 / numthreads + 1;
|
int blockllen = rows2 / numthreads + 1;
|
||||||
sizepro = pcols * sizeof(int);
|
sizepro = pcols * sizeof(int);
|
||||||
reservar(&dcons, sizepro);
|
reservar(&dcons, sizepro);
|
||||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||||
reservar(&d_Rout, rows1 * rows2 * sizepro);
|
reservar(&d_Rout, rows1 * rows2 * sizepro);
|
||||||
projectadd<<<blockllen, numthreads, sizepro>>>(dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
|
hipLaunchKernel(HIP_KERNEL_NAME(projectadd), dim3(blockllen), dim3(numthreads), sizepro, 0, dop1, dop2, rows1, rows2, cols1, cols2, dcons, pcols, d_Rout);
|
||||||
cudaFree(dcons);
|
hipFree(dcons);
|
||||||
*ret = d_Rout;
|
*ret = d_Rout;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -743,51 +744,51 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
//cout << "INICIO" << endl;
|
//cout << "INICIO" << endl;
|
||||||
cudaEvent_t start, stop;
|
hipEvent_t start, stop;
|
||||||
float time;
|
float time;
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(npred2.x > 0 || npred2.y > 0 || nsel2 > 0 || nsj2 > 0)
|
if(npred2.x > 0 || npred2.y > 0 || nsel2 > 0 || nsj2 > 0)
|
||||||
{
|
{
|
||||||
newLen = sLen + 1;
|
newLen = sLen + 1;
|
||||||
cudaMemsetAsync(temp, 0, newLen * sizeof(int));
|
hipMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(npred2.x > 0 || npred2.y > 0)
|
if(npred2.x > 0 || npred2.y > 0)
|
||||||
{
|
{
|
||||||
size = npred2tot * sizeof(int);
|
size = npred2tot * sizeof(int);
|
||||||
cudaMemcpy(dcons, pred2, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, pred2, size, hipMemcpyHostToDevice);
|
||||||
|
|
||||||
if(npred2.y > 0) /*Fix case when a(X,Y),b(Y,Z),Z > Y*/
|
if(npred2.y > 0) /*Fix case when a(X,Y),b(Y,Z),Z > Y*/
|
||||||
{
|
{
|
||||||
reservar(&temp2, sizet2);
|
reservar(&temp2, sizet2);
|
||||||
cudaMemsetAsync(temp2, 0, newLen * sizeof(int));
|
hipMemsetAsync(temp2, 0, newLen * sizeof(int));
|
||||||
//res = thrust::device_pointer_cast(temp2);
|
//res = thrust::device_pointer_cast(temp2);
|
||||||
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, temp2 + 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if(negative)
|
if(negative)
|
||||||
bpreds<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpreds), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||||
else
|
else
|
||||||
bpredsOR<<<blockllen, numthreads, size>>>(p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsOR), dim3(blockllen), dim3(numthreads), size, 0, p1, p2, sLen, of1, of2, dcons, npred2tot, npred2.x, temp + 1, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(nsel2 > 0)
|
if(nsel2 > 0)
|
||||||
{
|
{
|
||||||
size = nsel2 * sizeof(int);
|
size = nsel2 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
|
||||||
marcar<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(nsj2 > 0)
|
if(nsj2 > 0)
|
||||||
{
|
{
|
||||||
size = nsj2 * sizeof(int);
|
size = nsj2 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||||
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -795,14 +796,14 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
if(nsel2 > 0)
|
if(nsel2 > 0)
|
||||||
{
|
{
|
||||||
size = nsel2 * sizeof(int);
|
size = nsel2 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sel2, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sel2, size, hipMemcpyHostToDevice);
|
||||||
marcar2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsel2, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsel2, temp + 1);
|
||||||
|
|
||||||
if(nsj2 > 0)
|
if(nsj2 > 0)
|
||||||
{
|
{
|
||||||
size = nsj2 * sizeof(int);
|
size = nsj2 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||||
samejoin<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -810,15 +811,15 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
if(nsj2 > 0)
|
if(nsj2 > 0)
|
||||||
{
|
{
|
||||||
size = nsj2 * sizeof(int);
|
size = nsj2 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sjoin2, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sjoin2, size, hipMemcpyHostToDevice);
|
||||||
samejoin2<<<blockllen, numthreads, size>>>(p2, sLen, of2, dcons, nsj2, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p2, sLen, of2, dcons, nsj2, temp + 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sizem32S = m32sLen * sizeof(int);
|
sizem32S = m32sLen * sizeof(int);
|
||||||
reservar(&d_S, sizem32S);
|
reservar(&d_S, sizem32S);
|
||||||
cudaMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
|
hipMemsetAsync(d_S + sLen, 0x7f, extraspaceS * sizeof(int));
|
||||||
llenarnosel<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1]);
|
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -842,8 +843,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
|
|
||||||
if(newLen == 0) // && !negative) ARREGLAR
|
if(newLen == 0) // && !negative) ARREGLAR
|
||||||
{
|
{
|
||||||
cudaFree(temp);
|
hipFree(temp);
|
||||||
cudaFree(dcons);
|
hipFree(dcons);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -854,24 +855,24 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
|
|
||||||
reservar(&d_S, sizem32S);
|
reservar(&d_S, sizem32S);
|
||||||
reservar(&posS, sizem32S);
|
reservar(&posS, sizem32S);
|
||||||
cudaMemsetAsync(d_S + newLen, 0x7f, sizextra);
|
hipMemsetAsync(d_S + newLen, 0x7f, sizextra);
|
||||||
cudaMemsetAsync(posS + newLen, 0x7f, sizextra);
|
hipMemsetAsync(posS + newLen, 0x7f, sizextra);
|
||||||
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
|
hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p2, d_S, sLen, of2, wherej[1], temp, posS);
|
||||||
sLen = newLen;
|
sLen = newLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
//cout << "Select1 = " << time << endl;
|
//cout << "Select1 = " << time << endl;
|
||||||
cuda_stats.select1_time += time;
|
cuda_stats.select1_time += time;
|
||||||
|
|
||||||
cudaEventDestroy(start);
|
hipEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
hipEventDestroy(stop);
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blockllen = rLen / numthreads + 1;
|
blockllen = rLen / numthreads + 1;
|
||||||
@ -880,30 +881,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
{
|
{
|
||||||
if(temp2 != NULL)
|
if(temp2 != NULL)
|
||||||
{
|
{
|
||||||
cudaFree(temp);
|
hipFree(temp);
|
||||||
temp = temp2;
|
temp = temp2;
|
||||||
res = thrust::device_pointer_cast(temp);
|
res = thrust::device_pointer_cast(temp);
|
||||||
newLen = rLen + 1;
|
newLen = rLen + 1;
|
||||||
if(nsel1 > 0)
|
if(nsel1 > 0)
|
||||||
{
|
{
|
||||||
size = nsel1 * sizeof(int);
|
size = nsel1 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
|
||||||
marcar<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(marcar), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||||
}
|
}
|
||||||
if(nsj1 > 0)
|
if(nsj1 > 0)
|
||||||
{
|
{
|
||||||
size = nsj1 * sizeof(int);
|
size = nsj1 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||||
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||||
}
|
}
|
||||||
if(npred1.x > 0)
|
if(npred1.x > 0)
|
||||||
{
|
{
|
||||||
size = npred1.x * sizeof(int);
|
size = npred1.x * sizeof(int);
|
||||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||||
if(ANDlogic)
|
if(ANDlogic)
|
||||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||||
else
|
else
|
||||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -911,30 +912,30 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
if(npred1.x > 0 || nsel1 > 0 || nsj1 > 0)
|
if(npred1.x > 0 || nsel1 > 0 || nsj1 > 0)
|
||||||
{
|
{
|
||||||
newLen = rLen + 1;
|
newLen = rLen + 1;
|
||||||
cudaMemsetAsync(temp, 0, newLen * sizeof(int));
|
hipMemsetAsync(temp, 0, newLen * sizeof(int));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(nsel1 > 0)
|
if(nsel1 > 0)
|
||||||
{
|
{
|
||||||
size = nsel1 * sizeof(int);
|
size = nsel1 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sel1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sel1, size, hipMemcpyHostToDevice);
|
||||||
marcar2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsel1, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(marcar2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsel1, temp + 1);
|
||||||
|
|
||||||
if(nsj1 > 0)
|
if(nsj1 > 0)
|
||||||
{
|
{
|
||||||
size = nsj1 * sizeof(int);
|
size = nsj1 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||||
samejoin<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(samejoin), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(npred1.x > 0)
|
if(npred1.x > 0)
|
||||||
{
|
{
|
||||||
size = npred1.x * sizeof(int);
|
size = npred1.x * sizeof(int);
|
||||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||||
if(ANDlogic)
|
if(ANDlogic)
|
||||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||||
else
|
else
|
||||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -942,17 +943,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
if(nsj1 > 0)
|
if(nsj1 > 0)
|
||||||
{
|
{
|
||||||
size = nsj1 * sizeof(int);
|
size = nsj1 * sizeof(int);
|
||||||
cudaMemcpy(dcons, sjoin1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, sjoin1, size, hipMemcpyHostToDevice);
|
||||||
samejoin2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, nsj1, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(samejoin2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, nsj1, temp + 1);
|
||||||
|
|
||||||
if(npred1.x > 0)
|
if(npred1.x > 0)
|
||||||
{
|
{
|
||||||
size = npred1.x * sizeof(int);
|
size = npred1.x * sizeof(int);
|
||||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||||
if(ANDlogic)
|
if(ANDlogic)
|
||||||
bpredsnormal<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||||
else
|
else
|
||||||
bpredsorlogic<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -960,11 +961,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
if(npred1.x > 0)
|
if(npred1.x > 0)
|
||||||
{
|
{
|
||||||
size = npred1.x * sizeof(int);
|
size = npred1.x * sizeof(int);
|
||||||
cudaMemcpy(dcons, pred1, size, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, pred1, size, hipMemcpyHostToDevice);
|
||||||
if(ANDlogic)
|
if(ANDlogic)
|
||||||
bpredsnormal2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsnormal2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||||
else
|
else
|
||||||
bpredsorlogic2<<<blockllen, numthreads, size>>>(p1, rLen, of1, dcons, npred1.x, temp + 1);
|
hipLaunchKernel(HIP_KERNEL_NAME(bpredsorlogic2), dim3(blockllen), dim3(numthreads), size, 0, p1, rLen, of1, dcons, npred1.x, temp + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -976,11 +977,11 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
newLen = res[rLen];
|
newLen = res[rLen];
|
||||||
if(newLen == 0)
|
if(newLen == 0)
|
||||||
{
|
{
|
||||||
cudaFree(temp);
|
hipFree(temp);
|
||||||
cudaFree(dcons);
|
hipFree(dcons);
|
||||||
cudaFree(d_S);
|
hipFree(d_S);
|
||||||
if(posS != NULL)
|
if(posS != NULL)
|
||||||
cudaFree(posS);
|
hipFree(posS);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -991,41 +992,41 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
|
|
||||||
reservar(&d_R, sizem32);
|
reservar(&d_R, sizem32);
|
||||||
reservar(&posR, sizem32);
|
reservar(&posR, sizem32);
|
||||||
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
hipMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
||||||
cudaMemsetAsync(posR + newLen, 0x7f, sizextra);
|
hipMemsetAsync(posR + newLen, 0x7f, sizextra);
|
||||||
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR);
|
hipLaunchKernel(HIP_KERNEL_NAME(llenar), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0], temp, posR);
|
||||||
rLen = newLen;
|
rLen = newLen;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sizem32 = m32rLen * sizeof(int);
|
sizem32 = m32rLen * sizeof(int);
|
||||||
reservar(&d_R, sizem32);
|
reservar(&d_R, sizem32);
|
||||||
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||||
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sizem32 = m32rLen * sizeof(int);
|
sizem32 = m32rLen * sizeof(int);
|
||||||
reservar(&d_R, sizem32);
|
reservar(&d_R, sizem32);
|
||||||
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
hipMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||||
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
hipLaunchKernel(HIP_KERNEL_NAME(llenarnosel), dim3(blockllen), dim3(numthreads), 0, 0, p1, d_R, rLen, of1, wherej[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
//cout << "Select2 = " << time << endl;
|
//cout << "Select2 = " << time << endl;
|
||||||
cuda_stats.select2_time += time;
|
cuda_stats.select2_time += time;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventDestroy(start);
|
hipEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
hipEventDestroy(stop);
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
thrust::device_ptr<Record> dvp1;
|
thrust::device_ptr<Record> dvp1;
|
||||||
@ -1084,17 +1085,17 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
//cout << "Sort = " << time << endl;
|
//cout << "Sort = " << time << endl;
|
||||||
cuda_stats.sort_time += time;
|
cuda_stats.sort_time += time;
|
||||||
|
|
||||||
cudaEventDestroy(start);
|
hipEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
hipEventDestroy(stop);
|
||||||
cudaEventCreate(&start);
|
hipEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
hipEventCreate(&stop);
|
||||||
cudaEventRecord(start, 0);
|
hipEventRecord(start, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
IDataNode* d_data;
|
IDataNode* d_data;
|
||||||
@ -1123,7 +1124,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
dim3 Dbc(THRD_PER_BLCK_create, 1, 1);
|
dim3 Dbc(THRD_PER_BLCK_create, 1, 1);
|
||||||
dim3 Dgc(BLCK_PER_GRID_create, 1, 1);
|
dim3 Dgc(BLCK_PER_GRID_create, 1, 1);
|
||||||
|
|
||||||
gCreateIndex <<<Dgc, Dbc>>> (d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
|
hipLaunchKernel(HIP_KERNEL_NAME(gCreateIndex), dim3(Dgc), dim3(Dbc), 0, 0, d_data, d_dir, nDirNodes, tree_size, bottom_start, nNodesPerBlock);
|
||||||
|
|
||||||
int *d_locations;
|
int *d_locations;
|
||||||
int memSizeR;
|
int memSizeR;
|
||||||
@ -1132,7 +1133,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
{
|
{
|
||||||
memSizeR = (rLen + 1) * sizeof(int);
|
memSizeR = (rLen + 1) * sizeof(int);
|
||||||
reservar(&d_locations, memSizeR);
|
reservar(&d_locations, memSizeR);
|
||||||
cudaMemsetAsync(d_locations, 0, sizeof(int));
|
hipMemsetAsync(d_locations, 0, sizeof(int));
|
||||||
nSearchKeys = rLen;
|
nSearchKeys = rLen;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1146,13 +1147,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
unsigned int nKeysPerThread = uintCeilingDiv(nSearchKeys, THRD_PER_GRID_search);
|
unsigned int nKeysPerThread = uintCeilingDiv(nSearchKeys, THRD_PER_GRID_search);
|
||||||
if(negative)
|
if(negative)
|
||||||
{
|
{
|
||||||
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_R, d_locations + 1, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||||
cudaMemsetAsync(temp, 0, memSizeR);
|
hipMemsetAsync(temp, 0, memSizeR);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
gSearchTree <<<Dgs, Dbs>>> (d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
hipLaunchKernel(HIP_KERNEL_NAME(gSearchTree), dim3(Dgs), dim3(Dbs), 0, 0, d_data, nDataNodes, d_dir, nDirNodes, lvlDir, d_S, d_locations, nSearchKeys, nKeysPerThread, tree_size, bottom_start);
|
||||||
cudaMemsetAsync(temp, 0, memSizeS);
|
hipMemsetAsync(temp, 0, memSizeS);
|
||||||
}
|
}
|
||||||
|
|
||||||
int muljoin = 0, muljoinsize = 0, sum;
|
int muljoin = 0, muljoinsize = 0, sum;
|
||||||
@ -1165,8 +1166,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
{
|
{
|
||||||
muljoin = numj - 2;
|
muljoin = numj - 2;
|
||||||
muljoinsize = muljoin * sizeof(int);
|
muljoinsize = muljoin * sizeof(int);
|
||||||
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||||
gIndexMultiJoinNegative<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoinNegative), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations + 1, rLen, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||||
}
|
}
|
||||||
|
|
||||||
res = thrust::device_pointer_cast(d_locations);
|
res = thrust::device_pointer_cast(d_locations);
|
||||||
@ -1177,21 +1178,21 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
if(pos == (rule->num_rows - 3))
|
if(pos == (rule->num_rows - 3))
|
||||||
{
|
{
|
||||||
sizepro = rule->num_columns * sizeof(int);
|
sizepro = rule->num_columns * sizeof(int);
|
||||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||||
resSize = sum * sizepro;
|
resSize = sum * sizepro;
|
||||||
reservar(&d_Rout, resSize);
|
reservar(&d_Rout, resSize);
|
||||||
gJoinWithWriteNegative2<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
|
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, rule->num_columns, posR);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sizepro = projp.x * sizeof(int);
|
sizepro = projp.x * sizeof(int);
|
||||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||||
resSize = sum * sizepro;
|
resSize = sum * sizepro;
|
||||||
reservar(&d_Rout, resSize);
|
reservar(&d_Rout, resSize);
|
||||||
gJoinWithWriteNegative<<<blockllen, numthreads, sizepro>>> (d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
|
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWriteNegative), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, rLen, d_Rout, p1, of1, dcons, projp.x, posR);
|
||||||
}
|
}
|
||||||
cudaFree(d_R);
|
hipFree(d_R);
|
||||||
cudaFree(d_S);
|
hipFree(d_S);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1200,26 +1201,26 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
{
|
{
|
||||||
muljoin = numj - 2;
|
muljoin = numj - 2;
|
||||||
muljoinsize = muljoin * sizeof(int);
|
muljoinsize = muljoin * sizeof(int);
|
||||||
cudaMemcpy(dcons, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||||
gIndexMultiJoin<<<blockllen, numthreads, muljoinsize>>> (d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
hipLaunchKernel(HIP_KERNEL_NAME(gIndexMultiJoin), dim3(blockllen), dim3(numthreads), muljoinsize, 0, d_R, d_S, d_locations, sLen, temp, p1, p2, of1, of2, posR, posS, dcons, muljoin);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
gIndexJoin<<<blockllen, numthreads>>> (d_R, d_S, d_locations, sLen, temp);
|
hipLaunchKernel(HIP_KERNEL_NAME(gIndexJoin), dim3(blockllen), dim3(numthreads), 0, 0, d_R, d_S, d_locations, sLen, temp);
|
||||||
cudaFree(d_R);
|
hipFree(d_R);
|
||||||
cudaFree(d_S);
|
hipFree(d_S);
|
||||||
|
|
||||||
sum = res[sLen-1];
|
sum = res[sLen-1];
|
||||||
thrust::exclusive_scan(res, res + sLen, res);
|
thrust::exclusive_scan(res, res + sLen, res);
|
||||||
sum += res[sLen-1];
|
sum += res[sLen-1];
|
||||||
if(sum == 0)
|
if(sum == 0)
|
||||||
{
|
{
|
||||||
cudaFree(dcons);
|
hipFree(dcons);
|
||||||
cudaFree(d_locations);
|
hipFree(d_locations);
|
||||||
cudaFree(temp);
|
hipFree(temp);
|
||||||
if(posS != NULL)
|
if(posS != NULL)
|
||||||
cudaFree(posS);
|
hipFree(posS);
|
||||||
if(posR != NULL)
|
if(posR != NULL)
|
||||||
cudaFree(posR);
|
hipFree(posR);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
res[sLen] = sum;
|
res[sLen] = sum;
|
||||||
@ -1227,49 +1228,49 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
if(pos == (rule->num_rows - 3))
|
if(pos == (rule->num_rows - 3))
|
||||||
{
|
{
|
||||||
sizepro = rule->num_columns * sizeof(int);
|
sizepro = rule->num_columns * sizeof(int);
|
||||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||||
resSize = sum * sizepro;
|
resSize = sum * sizepro;
|
||||||
reservar(&d_Rout, resSize);
|
reservar(&d_Rout, resSize);
|
||||||
if(numj > 2)
|
if(numj > 2)
|
||||||
{
|
{
|
||||||
cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||||
multiJoinWithWrite2<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
|
hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS, muljoin);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
gJoinWithWrite2<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
|
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite2), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, rule->num_columns, posR, posS);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sizepro = projp.y * sizeof(int);
|
sizepro = projp.y * sizeof(int);
|
||||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons, proj, sizepro, hipMemcpyHostToDevice);
|
||||||
resSize = sum * sizepro;
|
resSize = sum * sizepro;
|
||||||
reservar(&d_Rout, resSize);
|
reservar(&d_Rout, resSize);
|
||||||
if(numj > 2)
|
if(numj > 2)
|
||||||
{
|
{
|
||||||
cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
hipMemcpy(dcons + projp.y, wherej + 2, muljoinsize, hipMemcpyHostToDevice);
|
||||||
multiJoinWithWrite<<<blockllen, numthreads, sizepro + muljoinsize>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
|
hipLaunchKernel(HIP_KERNEL_NAME(multiJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro + muljoinsize, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS, muljoin);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
gJoinWithWrite<<<blockllen, numthreads, sizepro>>> (d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
|
hipLaunchKernel(HIP_KERNEL_NAME(gJoinWithWrite), dim3(blockllen), dim3(numthreads), sizepro, 0, d_locations, sLen, temp, d_Rout, p1, p2, of1, of2, dcons, projp.x, projp.y, posR, posS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaFree(dcons);
|
hipFree(dcons);
|
||||||
cudaFree(d_locations);
|
hipFree(d_locations);
|
||||||
cudaFree(temp);
|
hipFree(temp);
|
||||||
if(posS != NULL)
|
if(posS != NULL)
|
||||||
cudaFree(posS);
|
hipFree(posS);
|
||||||
if(posR != NULL)
|
if(posR != NULL)
|
||||||
cudaFree(posR);
|
hipFree(posR);
|
||||||
|
|
||||||
if(*ret != NULL)
|
if(*ret != NULL)
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = d_Rout;
|
*ret = d_Rout;
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cudaEventRecord(stop, 0);
|
hipEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
hipEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
hipEventElapsedTime(&time, start, stop);
|
||||||
//cout << "Join = " << time << endl;
|
//cout << "Join = " << time << endl;
|
||||||
//cout << "FIN" << endl;
|
//cout << "FIN" << endl;
|
||||||
cuda_stats.join_time += time;
|
cuda_stats.join_time += time;
|
||||||
|
80
packages/cuda/union2.cu
Executable file → Normal file
80
packages/cuda/union2.cu
Executable file → Normal file
@ -87,8 +87,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -122,8 +122,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -157,8 +157,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -192,8 +192,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -227,8 +227,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -262,8 +262,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -297,8 +297,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -332,8 +332,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -367,8 +367,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -402,8 +402,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -437,8 +437,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -472,8 +472,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -507,8 +507,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -542,8 +542,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -577,8 +577,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -612,8 +612,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -647,8 +647,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -682,8 +682,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -717,8 +717,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
@ -752,8 +752,8 @@ int unir(int *res, int rows, int tipo, int **ret, int final)
|
|||||||
{
|
{
|
||||||
size = nrows * tipo * sizeof(int);
|
size = nrows * tipo * sizeof(int);
|
||||||
reservar(&nres, size);
|
reservar(&nres, size);
|
||||||
cudaMemcpyAsync(nres, res, size, cudaMemcpyDeviceToDevice);
|
hipMemcpyAsync(nres, res, size, hipMemcpyDeviceToDevice);
|
||||||
cudaFree(*ret);
|
hipFree(*ret);
|
||||||
*ret = nres;
|
*ret = nres;
|
||||||
}
|
}
|
||||||
return nrows;
|
return nrows;
|
||||||
|
0
packages/cuda/union2.h
Executable file → Normal file
0
packages/cuda/union2.h
Executable file → Normal file
0
packages/cuda/unioncpu2.cpp
Executable file → Normal file
0
packages/cuda/unioncpu2.cpp
Executable file → Normal file
Reference in New Issue
Block a user