profiling
This commit is contained in:
parent
94cb9b7563
commit
5ad10a1057
@ -55,6 +55,9 @@ int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
|
|||||||
// cerr << "+ " << temp << " temp bpreds " << size << endl;
|
// cerr << "+ " << temp << " temp bpreds " << size << endl;
|
||||||
cudaMemset(temp, 0, size);
|
cudaMemset(temp, 0, size);
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
cuda_stats.builtins++;
|
||||||
|
#endif
|
||||||
int *dhead;
|
int *dhead;
|
||||||
int predn = numpreds.x * 3;
|
int predn = numpreds.x * 3;
|
||||||
int spredn = predn * sizeof(int);
|
int spredn = predn * sizeof(int);
|
||||||
|
@ -395,6 +395,12 @@ static int cuda_count( void )
|
|||||||
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
|
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int cuda_statistics( void )
|
||||||
|
{
|
||||||
|
Cuda_Statistics();
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
static int first_time = TRUE;
|
static int first_time = TRUE;
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -417,5 +423,6 @@ init_cuda(void)
|
|||||||
YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
|
YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
|
||||||
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
|
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
|
||||||
YAP_UserCPredicate("cuda_count", cuda_count, 2);
|
YAP_UserCPredicate("cuda_count", cuda_count, 2);
|
||||||
|
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
cuda_erase/1,
|
cuda_erase/1,
|
||||||
cuda_eval/2,
|
cuda_eval/2,
|
||||||
cuda_coverage/4,
|
cuda_coverage/4,
|
||||||
|
cuda_statistics/0,
|
||||||
cuda_count/2]).
|
cuda_count/2]).
|
||||||
|
|
||||||
tell_warning :-
|
tell_warning :-
|
||||||
|
@ -13,6 +13,10 @@ extern "C" {
|
|||||||
|
|
||||||
#define MAXVALS 200
|
#define MAXVALS 200
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
statinfo cuda_stats;
|
||||||
|
#endif
|
||||||
|
|
||||||
bool compare(const gpunode &r1, const gpunode &r2)
|
bool compare(const gpunode &r1, const gpunode &r2)
|
||||||
{
|
{
|
||||||
return (r1.name > r2.name);
|
return (r1.name > r2.name);
|
||||||
@ -851,6 +855,28 @@ void mostrareglas(list<rulenode> aux)
|
|||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
void Cuda_Statistics(void)
|
||||||
|
{
|
||||||
|
cerr << "GPU Statistics" << endl;
|
||||||
|
cerr << "Called " << cuda_stats.calls << "times." << endl;
|
||||||
|
cerr << "GPU time " << cuda_stats.total_time << "msec." << endl;
|
||||||
|
cerr << "Longest call " << cuda_stats.max_time << "msec." << endl;
|
||||||
|
cerr << "Fastest call " << cuda_stats.min_time << "msec." << endl << endl;
|
||||||
|
cerr << "Steps" << endl;
|
||||||
|
cerr << " Select First: " << cuda_stats.select1_time << " msec." << endl;
|
||||||
|
cerr << " Select Second: " << cuda_stats.select2_time << " msec." << endl;
|
||||||
|
cerr << " Sort: " << cuda_stats.sort_time << " msec." << endl;
|
||||||
|
cerr << " Join: " << cuda_stats.join_time << " msec." << endl;
|
||||||
|
cerr << " Union: " << cuda_stats.union_time << " msec." << endl;
|
||||||
|
cerr << " Built-in: " << cuda_stats.pred_time << " msec." << endl << endl;
|
||||||
|
cerr << "Operations" << endl;
|
||||||
|
cerr << " Joins: " << cuda_stats.joins << "." << endl;
|
||||||
|
cerr << " Selects/Projects: " << cuda_stats.selects << "." << endl;
|
||||||
|
cerr << " Unions: " << cuda_stats.unions << "." << endl;
|
||||||
|
cerr << " Built-ins: " << cuda_stats.builtins << "." << endl << endl;
|
||||||
|
}
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, predicate *inpquery, int **result)
|
int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr, predicate *inpquery, int **result)
|
||||||
{
|
{
|
||||||
@ -859,6 +885,9 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
int x, y;
|
int x, y;
|
||||||
int qsize, *query, qname;
|
int qsize, *query, qname;
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
cuda_stats.calls++;
|
||||||
|
#endif
|
||||||
for(x = 0; x < ninpf; x++)
|
for(x = 0; x < ninpf; x++)
|
||||||
L.push_back(*inpfacts[x]);
|
L.push_back(*inpfacts[x]);
|
||||||
for(x = 0; x < ninpr; x++)
|
for(x = 0; x < ninpr; x++)
|
||||||
@ -1104,7 +1133,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
cudaEventElapsedTime(&time, start3, stop3);
|
cudaEventElapsedTime(&time, start3, stop3);
|
||||||
cudaEventDestroy(start3);
|
cudaEventDestroy(start3);
|
||||||
cudaEventDestroy(stop3);
|
cudaEventDestroy(stop3);
|
||||||
cout << "Predicados = " << time << endl;
|
//cout << "Predicados = " << time << endl;
|
||||||
|
cuda_stats.pred_time += time;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1125,7 +1155,8 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
cudaEventElapsedTime(&time, start2, stop2);
|
cudaEventElapsedTime(&time, start2, stop2);
|
||||||
cudaEventDestroy(start2);
|
cudaEventDestroy(start2);
|
||||||
cudaEventDestroy(stop2);
|
cudaEventDestroy(stop2);
|
||||||
cout << "Union = " << time << endl;
|
//cout << "Union = " << time << endl;
|
||||||
|
cuda_stats.union_time += time;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//cout << "despues de unir = " << res_rows << endl;
|
//cout << "despues de unir = " << res_rows << endl;
|
||||||
@ -1273,6 +1304,11 @@ int Cuda_Eval(predicate **inpfacts, int ninpf, predicate **inprules, int ninpr,
|
|||||||
cudaEventRecord(stop, 0);
|
cudaEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
cudaEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
cudaEventElapsedTime(&time, start, stop);
|
||||||
|
cuda_stats.total_time += time;
|
||||||
|
if (time > cuda_stats.max_time)
|
||||||
|
cuda_stats.max_time = time;
|
||||||
|
if (time < cuda_stats.min_time || cuda_stats.calls == 1)
|
||||||
|
cuda_stats.min_time = time;
|
||||||
cudaEventDestroy(start);
|
cudaEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
cudaEventDestroy(stop);
|
||||||
|
|
||||||
|
@ -11,6 +11,20 @@ typedef struct Nodo{
|
|||||||
|
|
||||||
typedef gpunode predicate;
|
typedef gpunode predicate;
|
||||||
|
|
||||||
|
// #define TIMER 1
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
typedef struct Stats{
|
||||||
|
size_t joins, selects, unions, builtins;
|
||||||
|
size_t calls;
|
||||||
|
double total_time;
|
||||||
|
float max_time, min_time;
|
||||||
|
float select1_time, select2_time, join_time, sort_time, union_time, pred_time;
|
||||||
|
}statinfo;
|
||||||
|
|
||||||
|
extern statinfo cuda_stats;
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SBG_EQ (-1)
|
#define SBG_EQ (-1)
|
||||||
#define SBG_GT (-2)
|
#define SBG_GT (-2)
|
||||||
#define SBG_LT (-3)
|
#define SBG_LT (-3)
|
||||||
@ -19,5 +33,5 @@ typedef gpunode predicate;
|
|||||||
#define SBG_DF (-6)
|
#define SBG_DF (-6)
|
||||||
|
|
||||||
int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**);
|
int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**);
|
||||||
|
void Cuda_Statistics( void );
|
||||||
#endif
|
#endif
|
||||||
|
@ -206,6 +206,9 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
int size, size2, num;
|
int size, size2, num;
|
||||||
thrust::device_ptr<int> res;
|
thrust::device_ptr<int> res;
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
cuda_stats.selects++;
|
||||||
|
#endif
|
||||||
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
|
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
|
||||||
reservar(&dhead, head_bytes);
|
reservar(&dhead, head_bytes);
|
||||||
// cerr << "+ " << dhead << " dhead " << head_bytes << endl;
|
// cerr << "+ " << dhead << " dhead " << head_bytes << endl;
|
||||||
|
@ -746,6 +746,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
int *wherej = rule->wherejoin[pos];
|
int *wherej = rule->wherejoin[pos];
|
||||||
int numj = rule->numjoin[pos];
|
int numj = rule->numjoin[pos];
|
||||||
int flag;
|
int flag;
|
||||||
|
#if TIMER
|
||||||
|
cuda_stats.joins++;
|
||||||
|
#endif
|
||||||
|
|
||||||
int porLiberar = rLen * of1 * sizeof(int);
|
int porLiberar = rLen * of1 * sizeof(int);
|
||||||
int size, sizet, sizet2;
|
int size, sizet, sizet2;
|
||||||
@ -788,7 +791,7 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
int *posR = NULL, *posS = NULL;
|
int *posR = NULL, *posS = NULL;
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
cout << "INICIO" << endl;
|
//cout << "INICIO" << endl;
|
||||||
cudaEvent_t start, stop;
|
cudaEvent_t start, stop;
|
||||||
float time;
|
float time;
|
||||||
cudaEventCreate(&start);
|
cudaEventCreate(&start);
|
||||||
@ -896,7 +899,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
cudaEventRecord(stop, 0);
|
cudaEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
cudaEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
cudaEventElapsedTime(&time, start, stop);
|
||||||
cout << "Select1 = " << time << endl;
|
//cout << "Select1 = " << time << endl;
|
||||||
|
cuda_stats.select1_time += time;
|
||||||
|
|
||||||
cudaEventDestroy(start);
|
cudaEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
cudaEventDestroy(stop);
|
||||||
@ -994,7 +998,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
cudaEventRecord(stop, 0);
|
cudaEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
cudaEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
cudaEventElapsedTime(&time, start, stop);
|
||||||
cout << "Select2 = " << time << endl;
|
//cout << "Select2 = " << time << endl;
|
||||||
|
cuda_stats.select2_time += time;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*free(hcons);
|
/*free(hcons);
|
||||||
@ -1045,7 +1050,8 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
cudaEventRecord(stop, 0);
|
cudaEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
cudaEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
cudaEventElapsedTime(&time, start, stop);
|
||||||
cout << "Sort = " << time << endl;
|
//cout << "Sort = " << time << endl;
|
||||||
|
cuda_stats.sort_time += time;
|
||||||
|
|
||||||
cudaEventDestroy(start);
|
cudaEventDestroy(start);
|
||||||
cudaEventDestroy(stop);
|
cudaEventDestroy(stop);
|
||||||
@ -1181,8 +1187,9 @@ int join(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list<rulenode>:
|
|||||||
cudaEventRecord(stop, 0);
|
cudaEventRecord(stop, 0);
|
||||||
cudaEventSynchronize(stop);
|
cudaEventSynchronize(stop);
|
||||||
cudaEventElapsedTime(&time, start, stop);
|
cudaEventElapsedTime(&time, start, stop);
|
||||||
cout << "Join = " << time << endl;
|
//cout << "Join = " << time << endl;
|
||||||
cout << "FIN" << endl;
|
//cout << "FIN" << endl;
|
||||||
|
cuda_stats.join_time += time;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
|
@ -86,6 +86,9 @@ int unir(int *res, int rows, int tipo)
|
|||||||
s3 *t3;
|
s3 *t3;
|
||||||
int flag, nrows;
|
int flag, nrows;
|
||||||
|
|
||||||
|
#if TIMER
|
||||||
|
cuda_stats.unions++;
|
||||||
|
#endif
|
||||||
switch(tipo)
|
switch(tipo)
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
|
Reference in New Issue
Block a user