profiling

This commit is contained in:
Vítor Santos Costa 2013-10-16 16:19:03 +01:00
parent 94cb9b7563
commit 5ad10a1057
8 changed files with 83 additions and 9 deletions

View File

@ -55,6 +55,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
// cerr << "+ " << temp << " temp bpreds " << size << endl;
cudaMemset(temp, 0, size);
#if TIMER
cuda_stats.builtins++;
#endif
int *dhead;
int predn = numpreds.x * 3;
int spredn = predn * sizeof(int);

View File

@ -395,6 +395,12 @@ static int cuda_count( void )
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
}
static int cuda_statistics( void )
{
Cuda_Statistics();
return TRUE;
}
static int first_time = TRUE;
void
@ -417,5 +423,6 @@ init_cuda(void)
YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
YAP_UserCPredicate("cuda_count", cuda_count, 2);
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
}

View File

@ -4,6 +4,7 @@
cuda_erase/1,
cuda_eval/2,
cuda_coverage/4,
cuda_statistics/0,
cuda_count/2]).
tell_warning :-

View File

@ -13,6 +13,10 @@ extern "C" {
#define MAXVALS 200
#if TIMER
statinfo cuda_stats;
#endif
bool compare(const gpunode &r1, const gpunode &r2)
{
return (r1.name > r2.name);
@ -851,6 +855,28 @@ void mostrareglas(list<rulenode> aux)
cout << endl;
}
extern "C"
void Cuda_Statistics(void)
{
cerr << "GPU Statistics" << endl;
cerr << "Called " << cuda_stats.calls << "times." << endl;
cerr << "GPU time " << cuda_stats.total_time << "msec." << endl;
cerr << "Longest call " << cuda_stats.max_time << "msec." << endl;
cerr << "Fastest call " << cuda_stats.min_time << "msec." << endl << endl;
cerr << "Steps" << endl;
cerr << " Select First: " << cuda_stats.select1_time << " msec." << endl;
cerr << " Select Second: " << cuda_stats.select2_time << " msec." << endl;
cerr << " Sort: " << cuda_stats.sort_time << " msec." << endl;
cerr << " Join: " << cuda_stats.join_time << " msec." << endl;
cerr << " Union: " << cuda_stats.union_time << " msec." << endl;
cerr << " Built-in: " << cuda_stats.pred_time << " msec." << endl << endl;
cerr << "Operations" << endl;
cerr << " Joins: " << cuda_stats.joins << "." << endl;
cerr << " Selects/Projects: " << cuda_stats.selects << "." << endl;
cerr << " Unions: " << cuda_stats.unions << "." << endl;
cerr << " Built-ins: " << cuda_stats.builtins << "." << endl << endl;
}
extern "C"
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, predicate *inpquery, int **result)
{
@ -859,6 +885,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
int x, y;
int qsize, *query, qname;
#if TIMER
cuda_stats.calls++;
#endif
for(x = 0; x < ninpf; x++)
L.push_back(*inpfacts[x]);
for(x = 0; x < ninpr; x++)
@ -1104,7 +1133,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
cudaEventElapsedTime(&time, start3, stop3);
cudaEventDestroy(start3);
cudaEventDestroy(stop3);
cout << "Predicados = " << time << endl;
//cout << "Predicados = " << time << endl;
cuda_stats.pred_time += time;
#endif
}
@ -1125,7 +1155,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
cudaEventElapsedTime(&time, start2, stop2);
cudaEventDestroy(start2);
cudaEventDestroy(stop2);
cout << "Union = " << time << endl;
//cout << "Union = " << time << endl;
cuda_stats.union_time += time;
#endif
//cout << "despues de unir = " << res_rows << endl;
@ -1273,6 +1304,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cuda_stats.total_time += time;
if (time > cuda_stats.max_time)
cuda_stats.max_time = time;
if (time < cuda_stats.min_time || cuda_stats.calls == 1)
cuda_stats.min_time = time;
cudaEventDestroy(start);
cudaEventDestroy(stop);

View File

@ -11,6 +11,20 @@ typedef struct Nodo{
typedef gpunode predicate;
// #define TIMER 1
#if TIMER
typedef struct Stats{
size_t joins, selects, unions, builtins;
size_t calls;
double total_time;
float max_time, min_time;
float select1_time, select2_time, join_time, sort_time, union_time, pred_time;
}statinfo;
extern statinfo cuda_stats;
#endif
#define SBG_EQ (-1)
#define SBG_GT (-2)
#define SBG_LT (-3)
@ -19,5 +33,5 @@ typedef gpunode predicate;
#define SBG_DF (-6)
int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**);
void Cuda_Statistics( void );
#endif

View File

@ -206,6 +206,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
int size, size2, num;
thrust::device_ptr<int> res;
#if TIMER
cuda_stats.selects++;
#endif
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
reservar(&dhead, head_bytes);
// cerr << "+ " << dhead << " dhead " << head_bytes << endl;

View File

@ -746,6 +746,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
int *wherej = rule->wherejoin[pos];
int numj = rule->numjoin[pos];
int flag;
#if TIMER
cuda_stats.joins++;
#endif
int porLiberar = rLen * of1 * sizeof(int);
int size, sizet, sizet2;
@ -788,7 +791,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
int *posR = NULL, *posS = NULL;
#ifdef TIMER
cout << "INICIO" << endl;
//cout << "INICIO" << endl;
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
@ -896,7 +899,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cout << "Select1 = " << time << endl;
//cout << "Select1 = " << time << endl;
cuda_stats.select1_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
@ -994,7 +998,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cout << "Select2 = " << time << endl;
//cout << "Select2 = " << time << endl;
cuda_stats.select2_time += time;
#endif
/*free(hcons);
@ -1045,7 +1050,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cout << "Sort = " << time << endl;
//cout << "Sort = " << time << endl;
cuda_stats.sort_time += time;
cudaEventDestroy(start);
cudaEventDestroy(stop);
@ -1181,8 +1187,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cout << "Join = " << time << endl;
cout << "FIN" << endl;
//cout << "Join = " << time << endl;
//cout << "FIN" << endl;
cuda_stats.join_time += time;
#endif
return sum;

View File

@ -86,6 +86,9 @@ int unir(int *res, int rows, int tipo)
s3 *t3;
int flag, nrows;
#if TIMER
cuda_stats.unions++;
#endif
switch(tipo)
{
case 1: