profiling
This commit is contained in:
parent
94cb9b7563
commit
5ad10a1057
@ -55,6 +55,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
|
||||
// cerr << "+ " << temp << " temp bpreds " << size << endl;
|
||||
cudaMemset(temp, 0, size);
|
||||
|
||||
#if TIMER
|
||||
cuda_stats.builtins++;
|
||||
#endif
|
||||
int *dhead;
|
||||
int predn = numpreds.x * 3;
|
||||
int spredn = predn * sizeof(int);
|
||||
|
@ -395,6 +395,12 @@ static int cuda_count( void )
|
||||
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
|
||||
}
|
||||
|
||||
static int cuda_statistics( void )
|
||||
{
|
||||
Cuda_Statistics();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static int first_time = TRUE;
|
||||
|
||||
void
|
||||
@ -417,5 +423,6 @@ init_cuda(void)
|
||||
YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
|
||||
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
|
||||
YAP_UserCPredicate("cuda_count", cuda_count, 2);
|
||||
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
cuda_erase/1,
|
||||
cuda_eval/2,
|
||||
cuda_coverage/4,
|
||||
cuda_statistics/0,
|
||||
cuda_count/2]).
|
||||
|
||||
tell_warning :-
|
||||
|
@ -13,6 +13,10 @@ extern "C" {
|
||||
|
||||
#define MAXVALS 200
|
||||
|
||||
#if TIMER
|
||||
statinfo cuda_stats;
|
||||
#endif
|
||||
|
||||
bool compare(const gpunode &r1, const gpunode &r2)
|
||||
{
|
||||
return (r1.name > r2.name);
|
||||
@ -851,6 +855,28 @@ void mostrareglas(list<rulenode> aux)
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
void Cuda_Statistics(void)
|
||||
{
|
||||
cerr << "GPU Statistics" << endl;
|
||||
cerr << "Called " << cuda_stats.calls << "times." << endl;
|
||||
cerr << "GPU time " << cuda_stats.total_time << "msec." << endl;
|
||||
cerr << "Longest call " << cuda_stats.max_time << "msec." << endl;
|
||||
cerr << "Fastest call " << cuda_stats.min_time << "msec." << endl << endl;
|
||||
cerr << "Steps" << endl;
|
||||
cerr << " Select First: " << cuda_stats.select1_time << " msec." << endl;
|
||||
cerr << " Select Second: " << cuda_stats.select2_time << " msec." << endl;
|
||||
cerr << " Sort: " << cuda_stats.sort_time << " msec." << endl;
|
||||
cerr << " Join: " << cuda_stats.join_time << " msec." << endl;
|
||||
cerr << " Union: " << cuda_stats.union_time << " msec." << endl;
|
||||
cerr << " Built-in: " << cuda_stats.pred_time << " msec." << endl << endl;
|
||||
cerr << "Operations" << endl;
|
||||
cerr << " Joins: " << cuda_stats.joins << "." << endl;
|
||||
cerr << " Selects/Projects: " << cuda_stats.selects << "." << endl;
|
||||
cerr << " Unions: " << cuda_stats.unions << "." << endl;
|
||||
cerr << " Built-ins: " << cuda_stats.builtins << "." << endl << endl;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, predicate *inpquery, int **result)
|
||||
{
|
||||
@ -859,6 +885,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
int x, y;
|
||||
int qsize, *query, qname;
|
||||
|
||||
#if TIMER
|
||||
cuda_stats.calls++;
|
||||
#endif
|
||||
for(x = 0; x < ninpf; x++)
|
||||
L.push_back(*inpfacts[x]);
|
||||
for(x = 0; x < ninpr; x++)
|
||||
@ -1104,7 +1133,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
cudaEventElapsedTime(&time, start3, stop3);
|
||||
cudaEventDestroy(start3);
|
||||
cudaEventDestroy(stop3);
|
||||
cout << "Predicados = " << time << endl;
|
||||
//cout << "Predicados = " << time << endl;
|
||||
cuda_stats.pred_time += time;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1125,7 +1155,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
cudaEventElapsedTime(&time, start2, stop2);
|
||||
cudaEventDestroy(start2);
|
||||
cudaEventDestroy(stop2);
|
||||
cout << "Union = " << time << endl;
|
||||
//cout << "Union = " << time << endl;
|
||||
cuda_stats.union_time += time;
|
||||
#endif
|
||||
|
||||
//cout << "despues de unir = " << res_rows << endl;
|
||||
@ -1273,6 +1304,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cuda_stats.total_time += time;
|
||||
if (time > cuda_stats.max_time)
|
||||
cuda_stats.max_time = time;
|
||||
if (time < cuda_stats.min_time || cuda_stats.calls == 1)
|
||||
cuda_stats.min_time = time;
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
|
||||
|
@ -11,6 +11,20 @@ typedef struct Nodo{
|
||||
|
||||
typedef gpunode predicate;
|
||||
|
||||
// #define TIMER 1
|
||||
|
||||
#if TIMER
|
||||
typedef struct Stats{
|
||||
size_t joins, selects, unions, builtins;
|
||||
size_t calls;
|
||||
double total_time;
|
||||
float max_time, min_time;
|
||||
float select1_time, select2_time, join_time, sort_time, union_time, pred_time;
|
||||
}statinfo;
|
||||
|
||||
extern statinfo cuda_stats;
|
||||
#endif
|
||||
|
||||
#define SBG_EQ (-1)
|
||||
#define SBG_GT (-2)
|
||||
#define SBG_LT (-3)
|
||||
@ -19,5 +33,5 @@ typedef gpunode predicate;
|
||||
#define SBG_DF (-6)
|
||||
|
||||
int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**);
|
||||
|
||||
void Cuda_Statistics( void );
|
||||
#endif
|
||||
|
@ -206,6 +206,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
int size, size2, num;
|
||||
thrust::device_ptr<int> res;
|
||||
|
||||
#if TIMER
|
||||
cuda_stats.selects++;
|
||||
#endif
|
||||
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
|
||||
reservar(&dhead, head_bytes);
|
||||
// cerr << "+ " << dhead << " dhead " << head_bytes << endl;
|
||||
|
@ -746,6 +746,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
int *wherej = rule->wherejoin[pos];
|
||||
int numj = rule->numjoin[pos];
|
||||
int flag;
|
||||
#if TIMER
|
||||
cuda_stats.joins++;
|
||||
#endif
|
||||
|
||||
int porLiberar = rLen * of1 * sizeof(int);
|
||||
int size, sizet, sizet2;
|
||||
@ -788,7 +791,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
int *posR = NULL, *posS = NULL;
|
||||
|
||||
#ifdef TIMER
|
||||
cout << "INICIO" << endl;
|
||||
//cout << "INICIO" << endl;
|
||||
cudaEvent_t start, stop;
|
||||
float time;
|
||||
cudaEventCreate(&start);
|
||||
@ -896,7 +899,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cout << "Select1 = " << time << endl;
|
||||
//cout << "Select1 = " << time << endl;
|
||||
cuda_stats.select1_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
@ -994,7 +998,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cout << "Select2 = " << time << endl;
|
||||
//cout << "Select2 = " << time << endl;
|
||||
cuda_stats.select2_time += time;
|
||||
#endif
|
||||
|
||||
/*free(hcons);
|
||||
@ -1045,7 +1050,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cout << "Sort = " << time << endl;
|
||||
//cout << "Sort = " << time << endl;
|
||||
cuda_stats.sort_time += time;
|
||||
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
@ -1181,8 +1187,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
cout << "Join = " << time << endl;
|
||||
cout << "FIN" << endl;
|
||||
//cout << "Join = " << time << endl;
|
||||
//cout << "FIN" << endl;
|
||||
cuda_stats.join_time += time;
|
||||
#endif
|
||||
|
||||
return sum;
|
||||
|
@ -86,6 +86,9 @@ int unir(int *res, int rows, int tipo)
|
||||
s3 *t3;
|
||||
int flag, nrows;
|
||||
|
||||
#if TIMER
|
||||
cuda_stats.unions++;
|
||||
#endif
|
||||
switch(tipo)
|
||||
{
|
||||
case 1:
|
||||
|
Reference in New Issue
Block a user