debug memory allocations
This commit is contained in:
parent
bd541bc57f
commit
2669240c4f
@ -52,7 +52,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
|
|||||||
int tmplen = rows + 1;
|
int tmplen = rows + 1;
|
||||||
int size = tmplen * sizeof(int);
|
int size = tmplen * sizeof(int);
|
||||||
reservar(&temp, size);
|
reservar(&temp, size);
|
||||||
// DEBUG_MEM cerr << "+ " << temp << " temp bpreds " << size << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << temp << " temp bpreds " << size << endl;
|
||||||
|
#endif
|
||||||
cudaMemset(temp, 0, size);
|
cudaMemset(temp, 0, size);
|
||||||
|
|
||||||
#if TIMER
|
#if TIMER
|
||||||
@ -68,7 +70,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
|
|||||||
else
|
else
|
||||||
hsize = sproj;
|
hsize = sproj;
|
||||||
reservar(&dhead, hsize);
|
reservar(&dhead, hsize);
|
||||||
// DEBUG_MEM cerr << "+ " << dhead << " dhead " << hsize << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << dhead << " dhead " << hsize << endl;
|
||||||
|
#endif
|
||||||
cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
int blockllen = rows / 1024 + 1;
|
int blockllen = rows / 1024 + 1;
|
||||||
@ -113,7 +117,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
|
|||||||
|
|
||||||
int *fres;
|
int *fres;
|
||||||
reservar(&fres, num * sproj);
|
reservar(&fres, num * sproj);
|
||||||
// DEBUG_MEM cerr << "+ " << fres << " fres " << num * sproj << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << fres << " fres " << num * sproj << endl;
|
||||||
|
#endif
|
||||||
cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
|
||||||
llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
|
llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
|
||||||
|
|
||||||
|
@ -1000,7 +1000,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
{
|
{
|
||||||
num_refs = rows1 * cols1 * sizeof(int);
|
num_refs = rows1 * cols1 * sizeof(int);
|
||||||
reservar(&res, num_refs);
|
reservar(&res, num_refs);
|
||||||
// DEBUG_MEM cerr << "+ " << res << " Res " << num_refs << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << res << " Res " << num_refs << endl;
|
||||||
|
#endif
|
||||||
cudaMemcpyAsync(res, dop1, num_refs, cudaMemcpyDeviceToDevice);
|
cudaMemcpyAsync(res, dop1, num_refs, cudaMemcpyDeviceToDevice);
|
||||||
registrar(rul_act->name, cols1, res, rows1, itr, 1);
|
registrar(rul_act->name, cols1, res, rows1, itr, 1);
|
||||||
rul_act->gen_ant = rul_act->gen_act;
|
rul_act->gen_ant = rul_act->gen_act;
|
||||||
@ -1289,7 +1291,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
res_rows = selectproyect(dop1, rows1, cols1, tmprule.num_columns, tmprule.select[0], tmprule.numsel[0], tmprule.selfjoin[0], tmprule.numselfj[0], tmprule.project[0], &res);
|
res_rows = selectproyect(dop1, rows1, cols1, tmprule.num_columns, tmprule.select[0], tmprule.numsel[0], tmprule.selfjoin[0], tmprule.numselfj[0], tmprule.project[0], &res);
|
||||||
if(qposr != fin && qposr->name == qname) {
|
if(qposr != fin && qposr->name == qname) {
|
||||||
cudaFree(dop1);
|
cudaFree(dop1);
|
||||||
// DEBUG_MEM cerr << "- " << dop1 << " dop1" << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "- " << dop1 << " dop1" << endl;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1299,7 +1303,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
cudaMemcpy(hres, res, tipo, cudaMemcpyDeviceToHost);
|
cudaMemcpy(hres, res, tipo, cudaMemcpyDeviceToHost);
|
||||||
if(res_rows > 0 /*&& tmprule.numsel[0] != 0 && tmprule.numselfj[0] != 0 */) {
|
if(res_rows > 0 /*&& tmprule.numsel[0] != 0 && tmprule.numselfj[0] != 0 */) {
|
||||||
cudaFree(res);
|
cudaFree(res);
|
||||||
// DEBUG_MEM cerr << "- " << res << " res" << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "- " << res << " res" << endl;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -105,14 +105,14 @@ int buscarpornombrecpu(int name, int itr, int *totalrows)
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
void limpiar(const char s[])
|
void limpiar(const char s[], size_t sz)
|
||||||
{
|
{
|
||||||
list<memnode>::iterator ini;
|
list<memnode>::iterator ini;
|
||||||
memnode temp;
|
memnode temp;
|
||||||
|
|
||||||
if(GPUmem.size() == 0)
|
if(GPUmem.size() == 0)
|
||||||
{
|
{
|
||||||
// DEBUG_MEM cerr << s << ": not enough GPU memory: have " << avmem << endl;
|
cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,7 +176,9 @@ void liberar(int *ptr, int size)
|
|||||||
//cout << "L " << avmem << " " << size;
|
//cout << "L " << avmem << " " << size;
|
||||||
|
|
||||||
cudaFree(ptr);
|
cudaFree(ptr);
|
||||||
// DEBUG_MEM cerr << "- " << ptr << " " << size << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "- " << ptr << " " << size << endl;
|
||||||
|
#endif
|
||||||
avmem += size;
|
avmem += size;
|
||||||
|
|
||||||
//cout << " " << avmem << endl;
|
//cout << " " << avmem << endl;
|
||||||
@ -193,9 +195,9 @@ void reservar(int **ptr, int size)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while(avmem < size)
|
while(avmem < size)
|
||||||
limpiar("not enough memory");
|
limpiar("not enough memory", size);
|
||||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||||
limpiar("error in memory allocation");
|
limpiar("Error in memory allocation", size);
|
||||||
if (! *ptr ) {
|
if (! *ptr ) {
|
||||||
size_t free, total;
|
size_t free, total;
|
||||||
cudaMemGetInfo( &free, &total );
|
cudaMemGetInfo( &free, &total );
|
||||||
@ -277,7 +279,9 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
|||||||
}
|
}
|
||||||
size = num_rows * num_columns * sizeof(int);
|
size = num_rows * num_columns * sizeof(int);
|
||||||
reservar(&temp, size);
|
reservar(&temp, size);
|
||||||
// DEBUG_MEM cerr << "+ " << temp << " temp " << size << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << temp << " temp " << size << endl;
|
||||||
|
#endif
|
||||||
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
||||||
registrar(name, num_columns, temp, num_rows, itr, 0);
|
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||||
*ptr = temp;
|
*ptr = temp;
|
||||||
@ -297,7 +301,9 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
|||||||
}
|
}
|
||||||
size = totalrows * num_columns * sizeof(int);
|
size = totalrows * num_columns * sizeof(int);
|
||||||
reservar(&temp, size);
|
reservar(&temp, size);
|
||||||
// DEBUG_MEM cerr << "+ " << temp << " temp 2 " << size << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << temp << " temp 2 " << size << endl;
|
||||||
|
#endif
|
||||||
for(x = 1; x < numgpu; x++)
|
for(x = 1; x < numgpu; x++)
|
||||||
{
|
{
|
||||||
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||||
@ -340,7 +346,9 @@ int cargafinal(int name, int cols, int **ptr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
reservar(&temp, cont * cols * sizeof(int));
|
reservar(&temp, cont * cols * sizeof(int));
|
||||||
// DEBUG_MEM cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;
|
||||||
|
#endif
|
||||||
ini = temp;
|
ini = temp;
|
||||||
|
|
||||||
pos = gpu;
|
pos = gpu;
|
||||||
@ -463,7 +471,9 @@ void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator las
|
|||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
cudaFree(gpu->dev_address);
|
cudaFree(gpu->dev_address);
|
||||||
// DEBUG_MEM cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;
|
||||||
|
#endif
|
||||||
free(temp);
|
free(temp);
|
||||||
gpu++;
|
gpu++;
|
||||||
}
|
}
|
||||||
@ -495,7 +505,9 @@ void clear_memory()
|
|||||||
{
|
{
|
||||||
if (ini->isrule) {
|
if (ini->isrule) {
|
||||||
cudaFree(ini->dev_address);
|
cudaFree(ini->dev_address);
|
||||||
// DEBUG_MEM cerr << "- " << ini->dev_address << " ini->dev_address" << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "- " << ini->dev_address << " ini->dev_address" << endl;
|
||||||
|
#endif
|
||||||
ini = GPUmem.erase(ini);
|
ini = GPUmem.erase(ini);
|
||||||
} else {
|
} else {
|
||||||
ini++;
|
ini++;
|
||||||
|
@ -11,7 +11,7 @@ using namespace std;
|
|||||||
|
|
||||||
void calcular_mem(int);
|
void calcular_mem(int);
|
||||||
void liberar(int*, int);
|
void liberar(int*, int);
|
||||||
void limpiar(const char []);
|
void limpiar(const char [], size_t);
|
||||||
void limpiartodo(int*, int*);
|
void limpiartodo(int*, int*);
|
||||||
int cargar(int, int, int, int, int*, int**, int);
|
int cargar(int, int, int, int, int*, int**, int);
|
||||||
int cargafinal(int, int, int**);
|
int cargafinal(int, int, int**);
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#ifndef _PRED_H_
|
#ifndef _PRED_H_
|
||||||
#define _PRED_H_
|
#define _PRED_H_
|
||||||
|
|
||||||
|
// #define DEBUG_MEM 1
|
||||||
|
|
||||||
typedef struct Nodo{
|
typedef struct Nodo{
|
||||||
int name;
|
int name;
|
||||||
int num_rows;
|
int num_rows;
|
||||||
|
@ -211,7 +211,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
#endif
|
#endif
|
||||||
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
|
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
|
||||||
reservar(&dhead, head_bytes);
|
reservar(&dhead, head_bytes);
|
||||||
// DEBUG_MEM cerr << "+ " << dhead << " dhead " << head_bytes << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << dhead << " dhead " << head_bytes << endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
int blockllen = rows / 1024 + 1;
|
int blockllen = rows / 1024 + 1;
|
||||||
int numthreads = 1024;
|
int numthreads = 1024;
|
||||||
@ -222,7 +224,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
tmplen = rows + 1;
|
tmplen = rows + 1;
|
||||||
size2 = tmplen * sizeof(int);
|
size2 = tmplen * sizeof(int);
|
||||||
reservar(&temp, size2);
|
reservar(&temp, size2);
|
||||||
// DEBUG_MEM cerr << "+ " << temp << " temp select " << size2 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << temp << " temp select " << size2 << endl;
|
||||||
|
#endif
|
||||||
cudaMemset(temp, 0, size2);
|
cudaMemset(temp, 0, size2);
|
||||||
|
|
||||||
size = numselect * sizeof(int);
|
size = numselect * sizeof(int);
|
||||||
@ -245,7 +249,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
|
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, num * size);
|
reservar(&fres, num * size);
|
||||||
// DEBUG_MEM cerr << "+ " << fres << " fres select " << num*size << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << fres << " fres select " << num*size << endl;
|
||||||
|
#endif
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
liberar(dhead, head_bytes);
|
liberar(dhead, head_bytes);
|
||||||
@ -260,7 +266,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
tmplen = rows + 1;
|
tmplen = rows + 1;
|
||||||
size2 = tmplen * sizeof(int);
|
size2 = tmplen * sizeof(int);
|
||||||
reservar(&temp, size2);
|
reservar(&temp, size2);
|
||||||
// DEBUG_MEM cerr << "+ " << temp << " temp select " << size2 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << temp << " temp select " << size2 << endl;
|
||||||
|
#endif
|
||||||
cudaMemset(temp, 0, size2);
|
cudaMemset(temp, 0, size2);
|
||||||
|
|
||||||
size = numselfj * sizeof(int);
|
size = numselfj * sizeof(int);
|
||||||
@ -275,7 +283,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
|
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, num * size);
|
reservar(&fres, num * size);
|
||||||
// DEBUG_MEM cerr << "+ " << fres << " fres select again " << num*size << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << fres << " fres select again " << num*size << endl;
|
||||||
|
#endif
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
liberar(dhead, head_bytes);
|
liberar(dhead, head_bytes);
|
||||||
@ -287,7 +297,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
{
|
{
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, rows * size);
|
reservar(&fres, rows * size);
|
||||||
// DEBUG_MEM cerr << "+ " << fres << " fres select third " << rows*size << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << fres << " fres select third " << rows*size << endl;
|
||||||
|
#endif
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
||||||
liberar(dhead, head_bytes);
|
liberar(dhead, head_bytes);
|
||||||
|
@ -780,9 +780,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
int dconsize = sizet * 2;*/
|
int dconsize = sizet * 2;*/
|
||||||
|
|
||||||
reservar(&dcons, sizet);
|
reservar(&dcons, sizet);
|
||||||
// DEBUG_MEM cerr << "+ " << dcons << " dcons tree " << sizet << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << dcons << " dcons tree " << sizet << endl;
|
||||||
|
#endif
|
||||||
reservar(&temp, sizet2);
|
reservar(&temp, sizet2);
|
||||||
// DEBUG_MEM cerr << "+ " << temp << " temp tree " << sizet2 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << temp << " temp tree " << sizet2 << endl;
|
||||||
|
#endif
|
||||||
thrust::device_ptr<int> res = thrust::device_pointer_cast(temp);
|
thrust::device_ptr<int> res = thrust::device_pointer_cast(temp);
|
||||||
|
|
||||||
numthreads = 1024;
|
numthreads = 1024;
|
||||||
@ -846,7 +850,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
}
|
}
|
||||||
catch(std::bad_alloc &e)
|
catch(std::bad_alloc &e)
|
||||||
{
|
{
|
||||||
limpiar("inclusive scan in join");
|
limpiar("inclusive scan in join", 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//thrust::inclusive_scan(res + 1, res + newLen, res + 1);
|
//thrust::inclusive_scan(res + 1, res + newLen, res + 1);
|
||||||
@ -857,9 +861,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
|
|
||||||
memSizeS = newLen * sizeof(int);
|
memSizeS = newLen * sizeof(int);
|
||||||
reservar(&d_S, memSizeS);
|
reservar(&d_S, memSizeS);
|
||||||
// DEBUG_MEM cerr << "+ " << d_S << " d_S " << memSizeS << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_S << " d_S " << memSizeS << endl;
|
||||||
|
#endif
|
||||||
reservar(&posS, memSizeS);
|
reservar(&posS, memSizeS);
|
||||||
// DEBUG_MEM cerr << "+ " << posS << " posS " << memSizeS << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << posS << " posS " << memSizeS << endl;
|
||||||
|
#endif
|
||||||
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
|
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
|
||||||
sLen = newLen;
|
sLen = newLen;
|
||||||
}
|
}
|
||||||
@ -880,9 +888,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
|
|
||||||
memSizeS = newLen * sizeof(int);
|
memSizeS = newLen * sizeof(int);
|
||||||
reservar(&d_S, memSizeS);
|
reservar(&d_S, memSizeS);
|
||||||
// DEBUG_MEM cerr << "+ " << d_S << " d_S m " << memSizeS << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_S << " d_S m " << memSizeS << endl;
|
||||||
|
#endif
|
||||||
reservar(&posS, memSizeS);
|
reservar(&posS, memSizeS);
|
||||||
// DEBUG_MEM cerr << "+ " << posS << " posS m " << memSizeS << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << posS << " posS m " << memSizeS << endl;
|
||||||
|
#endif
|
||||||
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
|
llenar<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1], temp, posS);
|
||||||
sLen = newLen;
|
sLen = newLen;
|
||||||
}
|
}
|
||||||
@ -890,7 +902,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
{
|
{
|
||||||
memSizeS = sLen * sizeof(int);
|
memSizeS = sLen * sizeof(int);
|
||||||
reservar(&d_S, memSizeS);
|
reservar(&d_S, memSizeS);
|
||||||
// DEBUG_MEM cerr << "+ " << d_S << " d_S n " << memSizeS << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_S << " d_S n " << memSizeS << endl;
|
||||||
|
#endif
|
||||||
llenarnosel<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1]);
|
llenarnosel<<<blockllen, numthreads>>>(p2, d_S, sLen, of2, wherej[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -938,9 +952,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
m32rLen = newLen + extraspace;
|
m32rLen = newLen + extraspace;
|
||||||
sizem32 = m32rLen * sizeof(int);
|
sizem32 = m32rLen * sizeof(int);
|
||||||
reservar(&d_R, sizem32);
|
reservar(&d_R, sizem32);
|
||||||
// DEBUG_MEM cerr << "+ " << d_R << " d_R m " << sizem32 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_R << " d_R m " << sizem32 << endl;
|
||||||
|
#endif
|
||||||
reservar(&posR, sizem32);
|
reservar(&posR, sizem32);
|
||||||
// DEBUG_MEM cerr << "+ " << posR << " posR m " << sizem32 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << posR << " posR m " << sizem32 << endl;
|
||||||
|
#endif
|
||||||
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
||||||
cudaMemsetAsync(posR + newLen, 0x7f, sizextra);
|
cudaMemsetAsync(posR + newLen, 0x7f, sizextra);
|
||||||
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR);
|
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR);
|
||||||
@ -966,9 +984,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
m32rLen = newLen + extraspace;
|
m32rLen = newLen + extraspace;
|
||||||
sizem32 = m32rLen * sizeof(int);
|
sizem32 = m32rLen * sizeof(int);
|
||||||
reservar(&d_R, sizem32);
|
reservar(&d_R, sizem32);
|
||||||
// DEBUG_MEM cerr << "+ " << d_R << " d_R n " << sizem32 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_R << " d_R n " << sizem32 << endl;
|
||||||
|
#endif
|
||||||
reservar(&posR, sizem32);
|
reservar(&posR, sizem32);
|
||||||
// DEBUG_MEM cerr << "+ " << posR << " posR n " << sizem32 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << posR << " posR n " << sizem32 << endl;
|
||||||
|
#endif
|
||||||
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
cudaMemsetAsync(d_R + newLen, 0x7f, sizextra);
|
||||||
cudaMemsetAsync(posR + newLen, 0x7f, sizextra);
|
cudaMemsetAsync(posR + newLen, 0x7f, sizextra);
|
||||||
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR);
|
llenar<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0], temp, posR);
|
||||||
@ -978,7 +1000,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
{
|
{
|
||||||
sizem32 = m32rLen * sizeof(int);
|
sizem32 = m32rLen * sizeof(int);
|
||||||
reservar(&d_R, sizem32);
|
reservar(&d_R, sizem32);
|
||||||
// DEBUG_MEM cerr << "+ " << d_R << " d_R sizem32 " << sizem32 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_R << " d_R sizem32 " << sizem32 << endl;
|
||||||
|
#endif
|
||||||
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||||
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
||||||
}
|
}
|
||||||
@ -989,7 +1013,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
{
|
{
|
||||||
sizem32 = m32rLen * sizeof(int);
|
sizem32 = m32rLen * sizeof(int);
|
||||||
reservar(&d_R, sizem32);
|
reservar(&d_R, sizem32);
|
||||||
// DEBUG_MEM cerr << "+ " << d_R << " d_R sz " << sizem32 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_R << " d_R sz " << sizem32 << endl;
|
||||||
|
#endif
|
||||||
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int));
|
||||||
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
llenarnosel<<<blockllen, numthreads>>>(p1, d_R, rLen, of1, wherej[0]);
|
||||||
}
|
}
|
||||||
@ -1025,7 +1051,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
if(posR == NULL)
|
if(posR == NULL)
|
||||||
{
|
{
|
||||||
reservar(&posR, sizem32);
|
reservar(&posR, sizem32);
|
||||||
// DEBUG_MEM cerr << "+ " << posR << " posR m32 " << sizem32 << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << posR << " posR m32 " << sizem32 << endl;
|
||||||
|
#endif
|
||||||
permutation = thrust::device_pointer_cast(posR);
|
permutation = thrust::device_pointer_cast(posR);
|
||||||
thrust::sequence(permutation, permutation + m32rLen);
|
thrust::sequence(permutation, permutation + m32rLen);
|
||||||
}
|
}
|
||||||
@ -1042,7 +1070,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
}
|
}
|
||||||
catch(std::bad_alloc &e)
|
catch(std::bad_alloc &e)
|
||||||
{
|
{
|
||||||
limpiar("inclusive scan in join");
|
limpiar("inclusive scan in join", 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1093,7 +1121,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
|
|
||||||
int *d_locations;
|
int *d_locations;
|
||||||
reservar(&d_locations, memSizeS);
|
reservar(&d_locations, memSizeS);
|
||||||
// DEBUG_MEM cerr << "+ " << d_locations << " d_locs n " << memSizeS << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_locations << " d_locs n " << memSizeS << endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
dim3 Dbs(THRD_PER_BLCK_search, 1, 1);
|
dim3 Dbs(THRD_PER_BLCK_search, 1, 1);
|
||||||
dim3 Dgs(BLCK_PER_GRID_search, 1, 1);
|
dim3 Dgs(BLCK_PER_GRID_search, 1, 1);
|
||||||
@ -1133,7 +1163,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||||
resSize = sum * sizepro;
|
resSize = sum * sizepro;
|
||||||
reservar(&d_Rout, resSize);
|
reservar(&d_Rout, resSize);
|
||||||
// DEBUG_MEM cerr << "+ " << d_Rout << " d_Rout n " << resSize << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_Rout << " d_Rout n " << resSize << endl;
|
||||||
|
#endif
|
||||||
if(numj > 2)
|
if(numj > 2)
|
||||||
{
|
{
|
||||||
cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||||
@ -1148,7 +1180,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice);
|
||||||
resSize = sum * sizepro;
|
resSize = sum * sizepro;
|
||||||
reservar(&d_Rout, resSize);
|
reservar(&d_Rout, resSize);
|
||||||
// DEBUG_MEM cerr << "+ " << d_Rout << " d_Rout 2 " << resSize << endl;
|
#ifdef DEBUG_MEM
|
||||||
|
cerr << "+ " << d_Rout << " d_Rout 2 " << resSize << endl;
|
||||||
|
#endif
|
||||||
if(numj > 2)
|
if(numj > 2)
|
||||||
{
|
{
|
||||||
cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice);
|
||||||
|
@ -105,7 +105,7 @@ int unir(int *res, int rows, int tipo)
|
|||||||
}
|
}
|
||||||
catch(std::bad_alloc &e)
|
catch(std::bad_alloc &e)
|
||||||
{
|
{
|
||||||
limpiar("sort/unique in unir");
|
limpiar("sort/unique in unir", 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nrows = thrust::distance(pt, re);
|
nrows = thrust::distance(pt, re);
|
||||||
@ -143,7 +143,7 @@ int unir(int *res, int rows, int tipo)
|
|||||||
}
|
}
|
||||||
catch(std::bad_alloc &e)
|
catch(std::bad_alloc &e)
|
||||||
{
|
{
|
||||||
limpiar("sort/unique in unir");
|
limpiar("sort/unique in unir", 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nrows = thrust::distance(pt2, re2);
|
nrows = thrust::distance(pt2, re2);
|
||||||
@ -182,7 +182,7 @@ int unir(int *res, int rows, int tipo)
|
|||||||
}
|
}
|
||||||
catch(std::bad_alloc &e)
|
catch(std::bad_alloc &e)
|
||||||
{
|
{
|
||||||
limpiar("sort/unique in unir");
|
limpiar("sort/unique in unir", 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nrows = thrust::distance(pt3, re3);
|
nrows = thrust::distance(pt3, re3);
|
||||||
|
Reference in New Issue
Block a user