From 2669240c4fcb8630a4d4c1f0493ce6e7e6619a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADtor=20Santos=20Costa?= Date: Thu, 17 Oct 2013 00:44:24 +0100 Subject: [PATCH] debug memory allocations --- packages/cuda/bpreds.cu | 12 ++++-- packages/cuda/lista.cu | 12 ++++-- packages/cuda/memory.cu | 32 ++++++++++----- packages/cuda/memory.h | 2 +- packages/cuda/pred.h | 2 + packages/cuda/selectproyect.cu | 24 +++++++++--- packages/cuda/treeb.cu | 72 +++++++++++++++++++++++++--------- packages/cuda/union2.cu | 6 +-- 8 files changed, 117 insertions(+), 45 deletions(-) diff --git a/packages/cuda/bpreds.cu b/packages/cuda/bpreds.cu index 8ef2143e9..4d8783e85 100644 --- a/packages/cuda/bpreds.cu +++ b/packages/cuda/bpreds.cu @@ -52,7 +52,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret) int tmplen = rows + 1; int size = tmplen * sizeof(int); reservar(&temp, size); - // DEBUG_MEM cerr << "+ " << temp << " temp bpreds " << size << endl; +#ifdef DEBUG_MEM + cerr << "+ " << temp << " temp bpreds " << size << endl; +#endif cudaMemset(temp, 0, size); #if TIMER @@ -68,7 +70,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret) else hsize = sproj; reservar(&dhead, hsize); - // DEBUG_MEM cerr << "+ " << dhead << " dhead " << hsize << endl; +#ifdef DEBUG_MEM + cerr << "+ " << dhead << " dhead " << hsize << endl; +#endif cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice); int blockllen = rows / 1024 + 1; @@ -113,7 +117,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret) int *fres; reservar(&fres, num * sproj); - // DEBUG_MEM cerr << "+ " << fres << " fres " << num * sproj << endl; +#ifdef DEBUG_MEM + cerr << "+ " << fres << " fres " << num * sproj << endl; +#endif cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice); llenarproyectar<<>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres); diff --git a/packages/cuda/lista.cu b/packages/cuda/lista.cu index 0efcb4945..dde438acc 100644 --- a/packages/cuda/lista.cu +++ b/packages/cuda/lista.cu @@ -1000,7 +1000,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, { num_refs = rows1 * cols1 * sizeof(int); reservar(&res, num_refs); - // DEBUG_MEM cerr << "+ " << res << " Res " << num_refs << endl; +#ifdef DEBUG_MEM + cerr << "+ " << res << " Res " << num_refs << endl; +#endif cudaMemcpyAsync(res, dop1, num_refs, cudaMemcpyDeviceToDevice); registrar(rul_act->name, cols1, res, rows1, itr, 1); rul_act->gen_ant = rul_act->gen_act; @@ -1289,7 +1291,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, res_rows = selectproyect(dop1, rows1, cols1, tmprule.num_columns, tmprule.select[0], tmprule.numsel[0], tmprule.selfjoin[0], tmprule.numselfj[0], tmprule.project[0], &res); if(qposr != fin && qposr->name == qname) { cudaFree(dop1); - // DEBUG_MEM cerr << "- " << dop1 << " dop1" << endl; +#ifdef DEBUG_MEM + cerr << "- " << dop1 << " dop1" << endl; +#endif } } @@ -1299,7 +1303,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, cudaMemcpy(hres, res, tipo, cudaMemcpyDeviceToHost); if(res_rows > 0 /*&& tmprule.numsel[0] != 0 && tmprule.numselfj[0] != 0 */) { cudaFree(res); - // DEBUG_MEM cerr << "- " << res << " res" << endl; +#ifdef DEBUG_MEM + cerr << "- " << res << " res" << endl; +#endif } } else diff --git a/packages/cuda/memory.cu b/packages/cuda/memory.cu index a1db3501d..53d813ea5 100644 --- a/packages/cuda/memory.cu +++ b/packages/cuda/memory.cu @@ -105,14 +105,14 @@ int buscarpornombrecpu(int name, int itr, int *totalrows) return x; } -void limpiar(const char s[]) +void limpiar(const char s[], size_t sz) { list::iterator ini; memnode temp; if(GPUmem.size() == 0) { - // DEBUG_MEM cerr << s << ": not enough GPU memory: have " << avmem << endl; + cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl; exit(1); } @@ -176,7 +176,9 @@ void liberar(int *ptr, int size) //cout << "L " << avmem << " " << size; cudaFree(ptr); - // DEBUG_MEM cerr << "- " << ptr << " " << size << endl; +#ifdef DEBUG_MEM + cerr << "- " << ptr << " " << size << endl; +#endif avmem += size; //cout << " " << avmem << endl; @@ -193,9 +195,9 @@ void reservar(int **ptr, int size) return; } while(avmem < size) - limpiar("not enough memory"); + limpiar("not enough memory", size); while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation) - limpiar("error in memory allocation"); + limpiar("Error in memory allocation", size); if (! *ptr ) { size_t free, total; cudaMemGetInfo( &free, &total ); @@ -277,7 +279,9 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho } size = num_rows * num_columns * sizeof(int); reservar(&temp, size); - // DEBUG_MEM cerr << "+ " << temp << " temp " << size << endl; +#ifdef DEBUG_MEM + cerr << "+ " << temp << " temp " << size << endl; +#endif cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice); registrar(name, num_columns, temp, num_rows, itr, 0); *ptr = temp; @@ -297,7 +301,9 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho } size = totalrows * num_columns * sizeof(int); reservar(&temp, size); - // DEBUG_MEM cerr << "+ " << temp << " temp 2 " << size << endl; +#ifdef DEBUG_MEM + cerr << "+ " << temp << " temp 2 " << size << endl; +#endif for(x = 1; x < numgpu; x++) { cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice); @@ -340,7 +346,9 @@ int cargafinal(int name, int cols, int **ptr) } reservar(&temp, cont * cols * sizeof(int)); - // DEBUG_MEM cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl; +#ifdef DEBUG_MEM + cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl; +#endif ini = temp; pos = gpu; @@ -463,7 +471,9 @@ void resultados(vector::iterator first, vector::iterator las cout << endl; } cudaFree(gpu->dev_address); - // DEBUG_MEM cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl; +#ifdef DEBUG_MEM + cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl; +#endif free(temp); gpu++; } @@ -495,7 +505,9 @@ void clear_memory() { if (ini->isrule) { cudaFree(ini->dev_address); - // DEBUG_MEM cerr << "- " << ini->dev_address << " ini->dev_address" << endl; +#ifdef DEBUG_MEM + cerr << "- " << ini->dev_address << " ini->dev_address" << endl; +#endif ini = GPUmem.erase(ini); } else { ini++; diff --git a/packages/cuda/memory.h b/packages/cuda/memory.h index 9cc08e331..5f8feb695 100644 --- a/packages/cuda/memory.h +++ b/packages/cuda/memory.h @@ -11,7 +11,7 @@ using namespace std; void calcular_mem(int); void liberar(int*, int); -void limpiar(const char []); +void limpiar(const char [], size_t); void limpiartodo(int*, int*); int cargar(int, int, int, int, int*, int**, int); int cargafinal(int, int, int**); diff --git a/packages/cuda/pred.h b/packages/cuda/pred.h index e4c2b8918..9e1dd2694 100644 --- a/packages/cuda/pred.h +++ b/packages/cuda/pred.h @@ -1,6 +1,8 @@ #ifndef _PRED_H_ #define _PRED_H_ +// #define DEBUG_MEM 1 + typedef struct Nodo{ int name; int num_rows; diff --git a/packages/cuda/selectproyect.cu b/packages/cuda/selectproyect.cu index 170107fd3..c4529e441 100644 --- a/packages/cuda/selectproyect.cu +++ b/packages/cuda/selectproyect.cu @@ -211,7 +211,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int #endif int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int); reservar(&dhead, head_bytes); - // DEBUG_MEM cerr << "+ " << dhead << " dhead " << head_bytes << endl; +#ifdef DEBUG_MEM + cerr << "+ " << dhead << " dhead " << head_bytes << endl; +#endif int blockllen = rows / 1024 + 1; int numthreads = 1024; @@ -222,7 +224,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int tmplen = rows + 1; size2 = tmplen * sizeof(int); reservar(&temp, size2); - // DEBUG_MEM cerr << "+ " << temp << " temp select " << size2 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << temp << " temp select " << size2 << endl; +#endif cudaMemset(temp, 0, size2); size = numselect * sizeof(int); @@ -245,7 +249,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int size = head_size * sizeof(int); reservar(&fres, num * size); - // DEBUG_MEM cerr << "+ " << fres << " fres select " << num*size << endl; +#ifdef DEBUG_MEM + cerr << "+ " << fres << " fres select " << num*size << endl; +#endif cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); llenarproyectar<<>>(dop1, rows, cols, temp, dhead, head_size, fres); liberar(dhead, head_bytes); @@ -260,7 +266,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int tmplen = rows + 1; size2 = tmplen * sizeof(int); reservar(&temp, size2); - // DEBUG_MEM cerr << "+ " << temp << " temp select " << size2 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << temp << " temp select " << size2 << endl; +#endif cudaMemset(temp, 0, size2); size = numselfj * sizeof(int); @@ -275,7 +283,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int size = head_size * sizeof(int); reservar(&fres, num * size); - // DEBUG_MEM cerr << "+ " << fres << " fres select again " << num*size << endl; +#ifdef DEBUG_MEM + cerr << "+ " << fres << " fres select again " << num*size << endl; +#endif cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); llenarproyectar<<>>(dop1, rows, cols, temp, dhead, head_size, fres); liberar(dhead, head_bytes); @@ -287,7 +297,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int { size = head_size * sizeof(int); reservar(&fres, rows * size); - // DEBUG_MEM cerr << "+ " << fres << " fres select third " << rows*size << endl; +#ifdef DEBUG_MEM + cerr << "+ " << fres << " fres select third " << rows*size << endl; +#endif cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); proyectar<<>>(dop1, rows, cols, dhead, head_size, fres); liberar(dhead, head_bytes); diff --git a/packages/cuda/treeb.cu b/packages/cuda/treeb.cu index 9f5884405..7cc7176ce 100755 --- a/packages/cuda/treeb.cu +++ b/packages/cuda/treeb.cu @@ -780,9 +780,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: int dconsize = sizet * 2;*/ reservar(&dcons, sizet); - // DEBUG_MEM cerr << "+ " << dcons << " dcons tree " << sizet << endl; +#ifdef DEBUG_MEM + cerr << "+ " << dcons << " dcons tree " << sizet << endl; +#endif reservar(&temp, sizet2); - // DEBUG_MEM cerr << "+ " << temp << " temp tree " << sizet2 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << temp << " temp tree " << sizet2 << endl; +#endif thrust::device_ptr res = thrust::device_pointer_cast(temp); numthreads = 1024; @@ -846,7 +850,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: } catch(std::bad_alloc &e) { - limpiar("inclusive scan in join"); + limpiar("inclusive scan in join", 0); } } //thrust::inclusive_scan(res + 1, res + newLen, res + 1); @@ -857,9 +861,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: memSizeS = newLen * sizeof(int); reservar(&d_S, memSizeS); - // DEBUG_MEM cerr << "+ " << d_S << " d_S " << memSizeS << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_S << " d_S " << memSizeS << endl; +#endif reservar(&posS, memSizeS); - // DEBUG_MEM cerr << "+ " << posS << " posS " << memSizeS << endl; +#ifdef DEBUG_MEM + cerr << "+ " << posS << " posS " << memSizeS << endl; +#endif llenar<<>>(p2, d_S, sLen, of2, wherej[1], temp, posS); sLen = newLen; } @@ -880,9 +888,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: memSizeS = newLen * sizeof(int); reservar(&d_S, memSizeS); - // DEBUG_MEM cerr << "+ " << d_S << " d_S m " << memSizeS << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_S << " d_S m " << memSizeS << endl; +#endif reservar(&posS, memSizeS); - // DEBUG_MEM cerr << "+ " << posS << " posS m " << memSizeS << endl; +#ifdef DEBUG_MEM + cerr << "+ " << posS << " posS m " << memSizeS << endl; +#endif llenar<<>>(p2, d_S, sLen, of2, wherej[1], temp, posS); sLen = newLen; } @@ -890,7 +902,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: { memSizeS = sLen * sizeof(int); reservar(&d_S, memSizeS); - // DEBUG_MEM cerr << "+ " << d_S << " d_S n " << memSizeS << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_S << " d_S n " << memSizeS << endl; +#endif llenarnosel<<>>(p2, d_S, sLen, of2, wherej[1]); } } @@ -938,9 +952,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: m32rLen = newLen + extraspace; sizem32 = m32rLen * sizeof(int); reservar(&d_R, sizem32); - // DEBUG_MEM cerr << "+ " << d_R << " d_R m " << sizem32 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_R << " d_R m " << sizem32 << endl; +#endif reservar(&posR, sizem32); - // DEBUG_MEM cerr << "+ " << posR << " posR m " << sizem32 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << posR << " posR m " << sizem32 << endl; +#endif cudaMemsetAsync(d_R + newLen, 0x7f, sizextra); cudaMemsetAsync(posR + newLen, 0x7f, sizextra); llenar<<>>(p1, d_R, rLen, of1, wherej[0], temp, posR); @@ -966,9 +984,13 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: m32rLen = newLen + extraspace; sizem32 = m32rLen * sizeof(int); reservar(&d_R, sizem32); - // DEBUG_MEM cerr << "+ " << d_R << " d_R n " << sizem32 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_R << " d_R n " << sizem32 << endl; +#endif reservar(&posR, sizem32); - // DEBUG_MEM cerr << "+ " << posR << " posR n " << sizem32 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << posR << " posR n " << sizem32 << endl; +#endif cudaMemsetAsync(d_R + newLen, 0x7f, sizextra); cudaMemsetAsync(posR + newLen, 0x7f, sizextra); llenar<<>>(p1, d_R, rLen, of1, wherej[0], temp, posR); @@ -978,7 +1000,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: { sizem32 = m32rLen * sizeof(int); reservar(&d_R, sizem32); - // DEBUG_MEM cerr << "+ " << d_R << " d_R sizem32 " << sizem32 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_R << " d_R sizem32 " << sizem32 << endl; +#endif cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int)); llenarnosel<<>>(p1, d_R, rLen, of1, wherej[0]); } @@ -989,7 +1013,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: { sizem32 = m32rLen * sizeof(int); reservar(&d_R, sizem32); - // DEBUG_MEM cerr << "+ " << d_R << " d_R sz " << sizem32 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_R << " d_R sz " << sizem32 << endl; +#endif cudaMemsetAsync(d_R + rLen, 0x7f, extraspace * sizeof(int)); llenarnosel<<>>(p1, d_R, rLen, of1, wherej[0]); } @@ -1025,7 +1051,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: if(posR == NULL) { reservar(&posR, sizem32); - // DEBUG_MEM cerr << "+ " << posR << " posR m32 " << sizem32 << endl; +#ifdef DEBUG_MEM + cerr << "+ " << posR << " posR m32 " << sizem32 << endl; +#endif permutation = thrust::device_pointer_cast(posR); thrust::sequence(permutation, permutation + m32rLen); } @@ -1042,7 +1070,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: } catch(std::bad_alloc &e) { - limpiar("inclusive scan in join"); + limpiar("inclusive scan in join", 0); } } @@ -1093,7 +1121,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: int *d_locations; reservar(&d_locations, memSizeS); - // DEBUG_MEM cerr << "+ " << d_locations << " d_locs n " << memSizeS << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_locations << " d_locs n " << memSizeS << endl; +#endif dim3 Dbs(THRD_PER_BLCK_search, 1, 1); dim3 Dgs(BLCK_PER_GRID_search, 1, 1); @@ -1133,7 +1163,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice); resSize = sum * sizepro; reservar(&d_Rout, resSize); - // DEBUG_MEM cerr << "+ " << d_Rout << " d_Rout n " << resSize << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_Rout << " d_Rout n " << resSize << endl; +#endif if(numj > 2) { cudaMemcpy(dcons + rule->num_columns, wherej + 2, muljoinsize, cudaMemcpyHostToDevice); @@ -1148,7 +1180,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list: cudaMemcpy(dcons, proj, sizepro, cudaMemcpyHostToDevice); resSize = sum * sizepro; reservar(&d_Rout, resSize); - // DEBUG_MEM cerr << "+ " << d_Rout << " d_Rout 2 " << resSize << endl; +#ifdef DEBUG_MEM + cerr << "+ " << d_Rout << " d_Rout 2 " << resSize << endl; +#endif if(numj > 2) { cudaMemcpy(dcons + projp.y, wherej + 2, muljoinsize, cudaMemcpyHostToDevice); diff --git a/packages/cuda/union2.cu b/packages/cuda/union2.cu index d4f9b5ae0..dd1b856c1 100644 --- a/packages/cuda/union2.cu +++ b/packages/cuda/union2.cu @@ -105,7 +105,7 @@ int unir(int *res, int rows, int tipo) } catch(std::bad_alloc &e) { - limpiar("sort/unique in unir"); + limpiar("sort/unique in unir", 0); } } nrows = thrust::distance(pt, re); @@ -143,7 +143,7 @@ int unir(int *res, int rows, int tipo) } catch(std::bad_alloc &e) { - limpiar("sort/unique in unir"); + limpiar("sort/unique in unir", 0); } } nrows = thrust::distance(pt2, re2); @@ -182,7 +182,7 @@ int unir(int *res, int rows, int tipo) } catch(std::bad_alloc &e) { - limpiar("sort/unique in unir"); + limpiar("sort/unique in unir", 0); } } nrows = thrust::distance(pt3, re3);