#include "CC_CSSTree.h" #include #include #include #include "pred.h" void partInlj(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int wj, int halfrul, int lenrul) { //set_thread_affinity(cpuid,NUM_T); int i=0; int k=0; int curIndex=0; int keyForSearch; int y, posS, posR; for(k=startS; ksearch(keyForSearch); for(i=curIndex-1;i>0;i--) { if(keyForSearch == R[i]) { //cout << keyForSearch << endl; posR = perm[i] * of1; for(y = 0; y < halfrul; y++) res->push_back(p1[posR + proj[y]]); for(; y < lenrul; y++) res->push_back(p2[posS + proj[y]]); } else if(R[i]push_back(p1[posR + proj[y]]); for(; y < lenrul; y++) res->push_back(p2[posS + proj[y]]); } else if(R[i]>keyForSearch) break; } } } void partInlj2(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int cols, int wj) { //set_thread_affinity(cpuid,NUM_T); int i=0; int k=0; int curIndex=0; int keyForSearch; int y, cond, posS, posR; for(k=startS; ksearch(keyForSearch); for(i=curIndex-1;i>0;i--) { if(keyForSearch == R[i]) { //cout << keyForSearch << endl; posR = perm[i] * of1 - 1; for(y = 0; y < cols; y++) { cond = proj[y]; if(cond > 0) res->push_back(p1[posR + cond]); else res->push_back(p2[posS - cond - 1]); } } else if(R[i] 0) res->push_back(p1[posR + cond]); else res->push_back(p2[posS - cond - 1]); } } else if(R[i]>keyForSearch) break; } } } void multipartInlj(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int *wj, int numj, int halfrul, int lenrul) { //set_thread_affinity(cpuid,NUM_T); int i=0; int k=0; int curIndex=0; int keyForSearch; int y, posS, posR; for(k=startS; ksearch(keyForSearch); for(i=curIndex-1;i>0;i--) { if(keyForSearch == R[i]) { posR = perm[i] * of1; for(y = 2; y < numj; y += 2) { if(p1[posR + wj[y]] != p2[posS + wj[y+1]]) break; } if(y < numj) continue; for(y = 0; y < halfrul; y++) res->push_back(p1[posR + proj[y]]); for(; y < lenrul; y++) res->push_back(p2[posS + proj[y]]); } else if(R[i]push_back(p1[posR + proj[y]]); for(; y < lenrul; y++) res->push_back(p2[posS + proj[y]]); } else if(R[i]>keyForSearch) break; } } } void multipartInlj2(Record *R, int rLen, CC_CSSTree *tree, Record *S, int startS, int endS, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int cols, int *wj, int numj) { //set_thread_affinity(cpuid,NUM_T); int i=0; int k=0; int curIndex=0; int keyForSearch; int y, cond, posS, posR; for(k=startS; ksearch(keyForSearch); for(i=curIndex-1;i>0;i--) { if(keyForSearch == R[i]) { posR = perm[i] * of1; for(y = 2; y < numj; y += 2) { if(p1[posR + wj[y]] != p2[posS + wj[y+1]]) break; } if(y < numj) continue; for(y = 0; y < cols; y++) { cond = proj[y]; if(cond > 0) res->push_back(p1[posR + cond - 1]); else res->push_back(p2[posS - cond - 1]); } } else if(R[i] 0) res->push_back(p1[posR + cond - 1]); else res->push_back(p2[posS - cond - 1]); } } else if(R[i]>keyForSearch) break; } } } void inlj_omp(Record *R, int rLen, CC_CSSTree *tree, Record *S, int sLen, int of1, int of2, vector *res, int *p1, int *p2, int *perm, int *proj, int2 projp, int cols, int* wj, int numj, int tipo) { int i=0; int j=0; int *startS=new int[NUM_T]; int *endS=new int[NUM_T]; int chunkSize=sLen/NUM_T; for(i=0;i 2) multipartInlj2(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, cols, wj, numj); else partInlj2(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, cols, wj[1]); } else { if(numj > 2) multipartInlj(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, wj, numj, projp.x, projp.y); else partInlj(R, rLen, tree, S, startS[j], endS[j], of1, of2, &res[j], p1, p2, perm, proj, wj[1], projp.x, projp.y); } } //cout << "fin" << endl; delete startS; delete endS; } int joincpu(int *p1, int *p2, int rLen, int sLen, int of1, int of2, list::iterator rule, int pos, int bothops, int **ret) { int pos2 = pos + 1; int *sel1, nsel1 = 0; int *sel2 = rule->select[pos2]; int nsel2 = rule->numsel[pos2]; int *proj = rule->project[pos]; int2 projp = rule->projpos[pos]; int *sjoin1, nsj1 = 0; int *sjoin2 = rule->selfjoin[pos2]; int nsj2 = rule->numselfj[pos2]; int *wherej = rule->wherejoin[pos]; int numj = rule->numjoin[pos]; int size, *fres, ini[NUM_T], *temp; int x, tipo = 0; int *Sres = NULL, *Rres, Snl, Rnl, *permutation; if(bothops) { sel1 = rule->select[pos]; nsel1 = rule->numsel[pos]; sjoin1 = rule->selfjoin[pos]; nsj1 = rule->numselfj[pos]; } #ifdef TIMER cudaEvent_t start, stop; float time; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); #endif if(nsel1 > 0 || nsj1 > 0) Rnl = selectproyectcpu2(p1, rLen, of1, sel1, nsel1, sjoin1, nsj1, wherej[0], &Rres, &permutation); else { /*cout << "sin sel" << endl; cout << "valores = " << rLen << " " << of1 << " " << wherej[0] << endl; for(x = 0; x < 100; x++) cout << p1[x] << " "; cout << endl; cout << "ultimo = " << p1[of1 * rLen - 1] << endl;*/ Rnl = rLen; size = Rnl * sizeof(int); permutation = (int *)malloc(size); Rres = (int *)malloc(size); #pragma omp parallel for firstprivate(of1) for(x = 0; x < Rnl; x++) { permutation[x] = x; Rres[x] = p1[of1 * x + wherej[0]]; } //cout << "sin sel fin" << endl; } #ifdef TIMER cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); cuda_stats.select1_time += time; cudaEventDestroy(start); cudaEventDestroy(stop); cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); #endif if(nsel2 > 0 || nsj2 > 0) { //cout << "con sel S" << endl; Snl = selectproyectcpu2(p2, sLen, of2, sel2, nsel2, sjoin2, nsj2, wherej[1], &Sres, NULL); } else Snl = sLen; #ifdef TIMER cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); cuda_stats.select2_time += time; cudaEventDestroy(start); cudaEventDestroy(stop); cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); #endif //cout << "antes" << endl; /*cout << "antes" << endl; for(x = 0; x < Rnl; x++) cout << permutation[x] << " "; cout << endl; for(x = 0; x < 100; x++) cout << Rres[x] << " "; cout << endl;*/ thrust::stable_sort_by_key(thrust::omp::par, Rres, Rres + Rnl, permutation); #ifdef TIMER cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); cuda_stats.sort_time += time; cudaEventDestroy(start); cudaEventDestroy(stop); cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); #endif /*cout << "despues" << endl; for(x = 0; x < Rnl; x++) cout << permutation[x] << " "; cout << endl; for(x = 0; x < Rnl; x++) cout << Rres[x] << " "; cout << endl;*/ //cout << "despues sort" << endl; vector *res = new vector[NUM_T]; for(x = 0; x < NUM_T; x++) res[x].reserve(INISIZE); CC_CSSTree *tree = new CC_CSSTree(Rres, Rnl, CSS_TREE_FANOUT); if(pos == (rule->num_rows - 3)) // && rule->num_bpreds.x == 0) tipo = 1; inlj_omp(Rres, Rnl, tree, Sres, Snl, of1, of2, res, p1, p2, permutation, proj, projp, rule->num_columns, wherej, numj, tipo); /*cout << "proj = "; for(x = 0; x < rule->num_columns; x++) cout << proj[x] << " "; cout << endl; int y,z; for(x = 0; x < NUM_T; x++) { cout << "Thread " << x << endl; for(y = 0; y < res[x].size() / projp.y; y++) { for(z = 0; z < projp.y; z++) cout << res[x][y * projp.y + z] << " "; cout << endl; } } cout << "Tamanios" << endl;*/ size = 0; for(x = 0; x < NUM_T; x++) { ini[x] = res[x].size(); size += ini[x]; //cout << ini[x] << " " << size << endl; } fres = (int *)malloc(size * sizeof(int)); temp = fres; for(x = 0; x < NUM_T; x++) { memcpy(temp, res[x].data(), ini[x] * sizeof(int)); temp += ini[x]; } if(*ret != NULL) free(*ret); free(Rres); free(permutation); if(Sres != NULL) free(Sres); delete tree; delete [] res; *ret = fres; #ifdef TIMER cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); cuda_stats.join_time += time; #endif //cout << "Projp.x = " << projp.x << " projp.y = " << projp.y << endl; /*if(numj > 2) { cout << "total = " << rLen << " " << size / projp.y << " " << projp.y << " " << rule->num_columns << endl; exit(1); }*/ return size / projp.y; }