yap-6.3/packages/cuda/selectproyectcpu.cpp

#include <stdlib.h>
#include <string.h>
#include <omp.h>
#include <vector>
#include "pred.h"

using namespace std;

int selectproyectcpu(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
{
	int size = 0, pos, temp;
	int i, x, y, z, w;
	int *fres, *ptr;
	int div, fin, ini[NUM_T + 1];
	vector<int> vec[NUM_T];

	for(x = 0; x < NUM_T; x++)
		vec[x].reserve(INISIZE);

	//omp_set_num_threads(NUM_T);
	div = rows / NUM_T;
	ini[0] = 0;
	for(x = 1; x < NUM_T; x++)
		ini[x] = div * x;
	ini[NUM_T] = rows;

	if(numselect > 0)
	{	
		#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
		for(i = 0; i < NUM_T; i++)
		{
			fin = ini[i+1];
			for(x = ini[i]; x < fin; x++)
			{
				pos = x * cols;
				for(y = 0; y < numselect; y += 2)
				{
					if(dop1[pos+select[y]] != select[y+1])
						break;
				}
				for(z = 0; z < numselfj; z++)
				{
					temp = dop1[pos+selfjoin[z]];
					w = z + 1;
					while(selfjoin[w] > -1)
					{
						if(temp != dop1[pos+selfjoin[w]])
							break;
						w++;
					}
					z = w;
					if(selfjoin[w] != -1)
						break;
				}
				if(y == numselect && z == numselfj)
				{
					for(y = 0; y < head_size; y++)
						vec[i].push_back(dop1[pos+project[y]]);
				}
			}
		}
	}
	else
	{
		if(numselfj > 0)
		{
			#pragma omp parallel for private(x,pos,y,z,fin,w,temp)
			for(i = 0; i < NUM_T; i++)
			{
				fin = ini[i+1];
				for(x = ini[i]; x < fin; x++)
				{
					pos = x * cols;
					for(z = 0; z < numselfj; z++)
					{
						temp = dop1[pos+selfjoin[z]];
						w = z + 1;
						while(selfjoin[w] > -1)
						{
							if(temp != dop1[pos+selfjoin[w]])
								break;
							w++;
						}
						z = w;
						if(selfjoin[w] != -1)
							break;
					}
					if(z == numselfj)
					{
						for(y = 0; y < head_size; y++)
							vec[i].push_back(dop1[pos+project[y]]);
					}
				}
			}
		}
		else
		{
			fres = (int *)malloc(rows * cols * sizeof(int));
			#pragma omp parallel for private(pos,y,z)
			for(x = 0; x < rows; x++)
			{
				pos = x * cols;
				z = pos;
				for(y = 0; y < head_size; y++, z++)
					fres[z] = dop1[pos+project[y]];
			}
			*ret = fres;
			return rows;
		}
	}
	for(x = 0; x < NUM_T; x++)
	{
		ini[x] = vec[x].size();
		size += ini[x];
	}
	fres = (int *)malloc(size * sizeof(int));
	ptr = fres;
	for(x = 0; x < NUM_T; x++)
	{
		memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));
		ptr += ini[x];
	}
	*ret = fres;
	return size / head_size;
}

int selectproyectcpu2(int *dop1, int rows, int cols, int *select, int numselect, int *selfjoin, int numselfj, int wj, int **ret, int **list)
{
	int size = 0, pos, temp;
	int i, x, y, z, w;
	int *fres, *lres, *ptr;
	int div, fin, ini[NUM_T + 1];
	int *vec[NUM_T];
	int *lis[NUM_T];
	int cont[NUM_T];
	
	//omp_set_num_threads(NUM_T);
	div = rows / NUM_T;
	ini[0] = 0;
	for(x = 1; x < NUM_T; x++)
		ini[x] = div * x;
	ini[NUM_T] = rows;

	pos = div + NUM_T;
	for(x = 0; x < NUM_T; x++)
	{
		vec[x] = (int *)malloc(pos * sizeof(int));
		lis[x] = (int *)malloc(pos * sizeof(int));
		cont[x] = 0;
	}

	/*cout << "numselect = " << numselect << endl;
	for(x = 0; x < numselect; x++)
		cout << select[x] << " ";
	cout << endl;*/

	if(numselect > 0)
	{	
		#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
		for(i = 0; i < NUM_T; i++)
		{
			fin = ini[i+1];
			for(x = ini[i]; x < fin; x++)
			{
				pos = x * cols;
				for(y = 0; y < numselect; y += 2)
				{
					if(dop1[pos+select[y]] != select[y+1])
						break;
				}
				for(z = 0; z < numselfj; z++)
				{
					temp = dop1[pos+selfjoin[z]];
					w = z + 1;
					while(selfjoin[w] > -1)
					{
						if(temp != dop1[pos+selfjoin[w]])
							break;
						w++;
					}
					z = w;
					if(selfjoin[w] != -1)
						break;
				}
				if(y == numselect && z == numselfj)
				{
					lis[i][cont[i]] = x;
					if(list != NULL)
						vec[i][cont[i]] = dop1[pos+wj];
					cont[i]++;
				}
			}
		}
	}
	else
	{
		if(numselfj > 0)
		{
			#pragma omp parallel for private(x,pos,y,z,fin,w,temp) firstprivate(cont)
			for(i = 0; i < NUM_T; i++)
			{
				fin = ini[i+1];
				for(x = ini[i]; x < fin; x++)
				{
					pos = x * cols;
					for(z = 0; z < numselfj; z++)
					{
						temp = dop1[pos+selfjoin[z]];
						w = z + 1;
						while(selfjoin[w] > -1)
						{
							if(temp != dop1[pos+selfjoin[w]])
								break;
							w++;
						}
						z = w;
						if(selfjoin[w] != -1)
							break;
					}
					if(z == numselfj)
					{
						lis[i][cont[i]] = x;
						if(list != NULL)
							vec[i][cont[i]] = dop1[pos+wj];
						cont[i]++;
					}
				}
			}
		}
	}

	//cout << "despues sel" << endl;

	for(x = 0; x < NUM_T; x++)
		size += cont[x];
	lres = (int *)malloc(size * sizeof(int));
	ptr = lres;
	for(x = 0; x < NUM_T; x++)
	{
		memcpy(ptr, lis[x], cont[x] * sizeof(int));
		ptr += cont[x];
	}
	if(list != NULL)
	{
		fres = (int *)malloc(size * sizeof(int));
		ptr = fres;
		for(x = 0; x < NUM_T; x++)
		{
			memcpy(ptr, vec[x], cont[x] * sizeof(int));
			ptr += cont[x];
		}
		*ret = fres;	
		*list = lres;
	}
	else
		*ret = lres;
	for(x = 0; x < NUM_T; x++)
	{
		free(lis[x]);
		free(vec[x]);
	}
	return size;
}

int selectproyectcpu3(int *dop1, int rows, int cols, int *select, int numselect, int *selfjoin, int numselfj, int wj, int **ret, int **list)
{
	int size = 0, pos, temp;
	int i, x, y, z, w;
	int *fres, *lres, *ptr;
	int div, fin, ini[NUM_T + 1];
	vector<int> vec[NUM_T];
	vector<int> lis[NUM_T];

	for(x = 0; x < NUM_T; x++)
	{
		vec[x].reserve(INISIZE);
		lis[x].reserve(INISIZE);
	}
	
	//omp_set_num_threads(NUM_T);
	div = rows / NUM_T;
	ini[0] = 0;
	for(x = 1; x < NUM_T; x++)
		ini[x] = div * x;
	ini[NUM_T] = rows;

	/*cout << "numselect = " << numselect << endl;
	for(x = 0; x < numselect; x++)
		cout << select[x] << " ";
	cout << endl;*/

	if(numselect > 0)
	{	
		#pragma omp parallel for private(x,pos,y,z,fin,temp,w)
		for(i = 0; i < NUM_T; i++)
		{
			fin = ini[i+1];
			for(x = ini[i]; x < fin; x++)
			{
				pos = x * cols;
				for(y = 0; y < numselect; y += 2)
				{
					if(dop1[pos+select[y]] != select[y+1])
						break;
				}
				for(z = 0; z < numselfj; z++)
				{
					temp = dop1[pos+selfjoin[z]];
					w = z + 1;
					while(selfjoin[w] > -1)
					{
						if(temp != dop1[pos+selfjoin[w]])
							break;
						w++;
					}
					z = w;
					if(selfjoin[w] != -1)
						break;
				}
				if(y == numselect && z == numselfj)
				{
					lis[i].push_back(x);
					if(list != NULL)
						vec[i].push_back(dop1[pos+wj]);
				}
			}
		}
	}
	else
	{
		if(numselfj > 0)
		{
			#pragma omp parallel for private(x,pos,y,z,fin,w,temp)
			for(i = 0; i < NUM_T; i++)
			{
				fin = ini[i+1];
				for(x = ini[i]; x < fin; x++)
				{
					pos = x * cols;
					for(z = 0; z < numselfj; z++)
					{
						temp = dop1[pos+selfjoin[z]];
						w = z + 1;
						while(selfjoin[w] > -1)
						{
							if(temp != dop1[pos+selfjoin[w]])
								break;
							w++;
						}
						z = w;
						if(selfjoin[w] != -1)
							break;
					}
					if(z == numselfj)
					{
						lis[i].push_back(x);
						if(list != NULL)
							vec[i].push_back(dop1[pos+wj]);
					}
				}
			}
		}
	}

	//cout << "despues sel" << endl;

	for(x = 0; x < NUM_T; x++)
	{
		ini[x] = lis[x].size();
		size += ini[x];
	}
	lres = (int *)malloc(size * sizeof(int));
	ptr = lres;
	for(x = 0; x < NUM_T; x++)
	{
		memcpy(ptr, lis[x].data(), ini[x] * sizeof(int));
		ptr += ini[x];
	}
	if(list != NULL)
	{
		fres = (int *)malloc(size * sizeof(int));
		ptr = fres;
		for(x = 0; x < NUM_T; x++)
		{
			memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));
			ptr += ini[x];
		}
		*ret = fres;	
		*list = lres;
	}
	else
		*ret = lres;
	return size;
}
CUDA: the missing files 2016-04-22 18:21:05 +01:00			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <omp.h>`
			`#include <vector>`
			`#include "pred.h"`

			`using namespace std;`

			`int selectproyectcpu(int dop1, int rows, int cols, int head_size, int select, int numselect, int selfjoin, int numselfj, int project, int **ret)`
			`{`
			`int size = 0, pos, temp;`
			`int i, x, y, z, w;`
			`int fres, ptr;`
			`int div, fin, ini[NUM_T + 1];`
			`vector<int> vec[NUM_T];`

			`for(x = 0; x < NUM_T; x++)`
			`vec[x].reserve(INISIZE);`

			`//omp_set_num_threads(NUM_T);`
			`div = rows / NUM_T;`
			`ini[0] = 0;`
			`for(x = 1; x < NUM_T; x++)`
			`ini[x] = div * x;`
			`ini[NUM_T] = rows;`

			`if(numselect > 0)`
			`{`
			`#pragma omp parallel for private(x,pos,y,z,fin,temp,w)`
			`for(i = 0; i < NUM_T; i++)`
			`{`
			`fin = ini[i+1];`
			`for(x = ini[i]; x < fin; x++)`
			`{`
			`pos = x * cols;`
			`for(y = 0; y < numselect; y += 2)`
			`{`
			`if(dop1[pos+select[y]] != select[y+1])`
			`break;`
			`}`
			`for(z = 0; z < numselfj; z++)`
			`{`
			`temp = dop1[pos+selfjoin[z]];`
			`w = z + 1;`
			`while(selfjoin[w] > -1)`
			`{`
			`if(temp != dop1[pos+selfjoin[w]])`
			`break;`
			`w++;`
			`}`
			`z = w;`
			`if(selfjoin[w] != -1)`
			`break;`
			`}`
			`if(y == numselect && z == numselfj)`
			`{`
			`for(y = 0; y < head_size; y++)`
			`vec[i].push_back(dop1[pos+project[y]]);`
			`}`
			`}`
			`}`
			`}`
			`else`
			`{`
			`if(numselfj > 0)`
			`{`
			`#pragma omp parallel for private(x,pos,y,z,fin,w,temp)`
			`for(i = 0; i < NUM_T; i++)`
			`{`
			`fin = ini[i+1];`
			`for(x = ini[i]; x < fin; x++)`
			`{`
			`pos = x * cols;`
			`for(z = 0; z < numselfj; z++)`
			`{`
			`temp = dop1[pos+selfjoin[z]];`
			`w = z + 1;`
			`while(selfjoin[w] > -1)`
			`{`
			`if(temp != dop1[pos+selfjoin[w]])`
			`break;`
			`w++;`
			`}`
			`z = w;`
			`if(selfjoin[w] != -1)`
			`break;`
			`}`
			`if(z == numselfj)`
			`{`
			`for(y = 0; y < head_size; y++)`
			`vec[i].push_back(dop1[pos+project[y]]);`
			`}`
			`}`
			`}`
			`}`
			`else`
			`{`
			`fres = (int )malloc(rows cols * sizeof(int));`
			`#pragma omp parallel for private(pos,y,z)`
			`for(x = 0; x < rows; x++)`
			`{`
			`pos = x * cols;`
			`z = pos;`
			`for(y = 0; y < head_size; y++, z++)`
			`fres[z] = dop1[pos+project[y]];`
			`}`
			`*ret = fres;`
			`return rows;`
			`}`
			`}`
			`for(x = 0; x < NUM_T; x++)`
			`{`
			`ini[x] = vec[x].size();`
			`size += ini[x];`
			`}`
			`fres = (int )malloc(size sizeof(int));`
			`ptr = fres;`
			`for(x = 0; x < NUM_T; x++)`
			`{`
			`memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));`
			`ptr += ini[x];`
			`}`
			`*ret = fres;`
			`return size / head_size;`
			`}`

			`int selectproyectcpu2(int dop1, int rows, int cols, int select, int numselect, int selfjoin, int numselfj, int wj, int ret, int *list)`
			`{`
			`int size = 0, pos, temp;`
			`int i, x, y, z, w;`
			`int fres, lres, *ptr;`
			`int div, fin, ini[NUM_T + 1];`
			`int *vec[NUM_T];`
			`int *lis[NUM_T];`
			`int cont[NUM_T];`

			`//omp_set_num_threads(NUM_T);`
			`div = rows / NUM_T;`
			`ini[0] = 0;`
			`for(x = 1; x < NUM_T; x++)`
			`ini[x] = div * x;`
			`ini[NUM_T] = rows;`

			`pos = div + NUM_T;`
			`for(x = 0; x < NUM_T; x++)`
			`{`
			`vec[x] = (int )malloc(pos sizeof(int));`
			`lis[x] = (int )malloc(pos sizeof(int));`
			`cont[x] = 0;`
			`}`

			`/*cout << "numselect = " << numselect << endl;`
			`for(x = 0; x < numselect; x++)`
			`cout << select[x] << " ";`
			`cout << endl;*/`

			`if(numselect > 0)`
			`{`
			`#pragma omp parallel for private(x,pos,y,z,fin,temp,w)`
			`for(i = 0; i < NUM_T; i++)`
			`{`
			`fin = ini[i+1];`
			`for(x = ini[i]; x < fin; x++)`
			`{`
			`pos = x * cols;`
			`for(y = 0; y < numselect; y += 2)`
			`{`
			`if(dop1[pos+select[y]] != select[y+1])`
			`break;`
			`}`
			`for(z = 0; z < numselfj; z++)`
			`{`
			`temp = dop1[pos+selfjoin[z]];`
			`w = z + 1;`
			`while(selfjoin[w] > -1)`
			`{`
			`if(temp != dop1[pos+selfjoin[w]])`
			`break;`
			`w++;`
			`}`
			`z = w;`
			`if(selfjoin[w] != -1)`
			`break;`
			`}`
			`if(y == numselect && z == numselfj)`
			`{`
			`lis[i][cont[i]] = x;`
			`if(list != NULL)`
			`vec[i][cont[i]] = dop1[pos+wj];`
			`cont[i]++;`
			`}`
			`}`
			`}`
			`}`
			`else`
			`{`
			`if(numselfj > 0)`
			`{`
			`#pragma omp parallel for private(x,pos,y,z,fin,w,temp) firstprivate(cont)`
			`for(i = 0; i < NUM_T; i++)`
			`{`
			`fin = ini[i+1];`
			`for(x = ini[i]; x < fin; x++)`
			`{`
			`pos = x * cols;`
			`for(z = 0; z < numselfj; z++)`
			`{`
			`temp = dop1[pos+selfjoin[z]];`
			`w = z + 1;`
			`while(selfjoin[w] > -1)`
			`{`
			`if(temp != dop1[pos+selfjoin[w]])`
			`break;`
			`w++;`
			`}`
			`z = w;`
			`if(selfjoin[w] != -1)`
			`break;`
			`}`
			`if(z == numselfj)`
			`{`
			`lis[i][cont[i]] = x;`
			`if(list != NULL)`
			`vec[i][cont[i]] = dop1[pos+wj];`
			`cont[i]++;`
			`}`
			`}`
			`}`
			`}`
			`}`

			`//cout << "despues sel" << endl;`

			`for(x = 0; x < NUM_T; x++)`
			`size += cont[x];`
			`lres = (int )malloc(size sizeof(int));`
			`ptr = lres;`
			`for(x = 0; x < NUM_T; x++)`
			`{`
			`memcpy(ptr, lis[x], cont[x] * sizeof(int));`
			`ptr += cont[x];`
			`}`
			`if(list != NULL)`
			`{`
			`fres = (int )malloc(size sizeof(int));`
			`ptr = fres;`
			`for(x = 0; x < NUM_T; x++)`
			`{`
			`memcpy(ptr, vec[x], cont[x] * sizeof(int));`
			`ptr += cont[x];`
			`}`
			`*ret = fres;`
			`*list = lres;`
			`}`
			`else`
			`*ret = lres;`
			`for(x = 0; x < NUM_T; x++)`
			`{`
			`free(lis[x]);`
			`free(vec[x]);`
			`}`
			`return size;`
			`}`

			`int selectproyectcpu3(int dop1, int rows, int cols, int select, int numselect, int selfjoin, int numselfj, int wj, int ret, int *list)`
			`{`
			`int size = 0, pos, temp;`
			`int i, x, y, z, w;`
			`int fres, lres, *ptr;`
			`int div, fin, ini[NUM_T + 1];`
			`vector<int> vec[NUM_T];`
			`vector<int> lis[NUM_T];`

			`for(x = 0; x < NUM_T; x++)`
			`{`
			`vec[x].reserve(INISIZE);`
			`lis[x].reserve(INISIZE);`
			`}`

			`//omp_set_num_threads(NUM_T);`
			`div = rows / NUM_T;`
			`ini[0] = 0;`
			`for(x = 1; x < NUM_T; x++)`
			`ini[x] = div * x;`
			`ini[NUM_T] = rows;`

			`/*cout << "numselect = " << numselect << endl;`
			`for(x = 0; x < numselect; x++)`
			`cout << select[x] << " ";`
			`cout << endl;*/`

			`if(numselect > 0)`
			`{`
			`#pragma omp parallel for private(x,pos,y,z,fin,temp,w)`
			`for(i = 0; i < NUM_T; i++)`
			`{`
			`fin = ini[i+1];`
			`for(x = ini[i]; x < fin; x++)`
			`{`
			`pos = x * cols;`
			`for(y = 0; y < numselect; y += 2)`
			`{`
			`if(dop1[pos+select[y]] != select[y+1])`
			`break;`
			`}`
			`for(z = 0; z < numselfj; z++)`
			`{`
			`temp = dop1[pos+selfjoin[z]];`
			`w = z + 1;`
			`while(selfjoin[w] > -1)`
			`{`
			`if(temp != dop1[pos+selfjoin[w]])`
			`break;`
			`w++;`
			`}`
			`z = w;`
			`if(selfjoin[w] != -1)`
			`break;`
			`}`
			`if(y == numselect && z == numselfj)`
			`{`
			`lis[i].push_back(x);`
			`if(list != NULL)`
			`vec[i].push_back(dop1[pos+wj]);`
			`}`
			`}`
			`}`
			`}`
			`else`
			`{`
			`if(numselfj > 0)`
			`{`
			`#pragma omp parallel for private(x,pos,y,z,fin,w,temp)`
			`for(i = 0; i < NUM_T; i++)`
			`{`
			`fin = ini[i+1];`
			`for(x = ini[i]; x < fin; x++)`
			`{`
			`pos = x * cols;`
			`for(z = 0; z < numselfj; z++)`
			`{`
			`temp = dop1[pos+selfjoin[z]];`
			`w = z + 1;`
			`while(selfjoin[w] > -1)`
			`{`
			`if(temp != dop1[pos+selfjoin[w]])`
			`break;`
			`w++;`
			`}`
			`z = w;`
			`if(selfjoin[w] != -1)`
			`break;`
			`}`
			`if(z == numselfj)`
			`{`
			`lis[i].push_back(x);`
			`if(list != NULL)`
			`vec[i].push_back(dop1[pos+wj]);`
			`}`
			`}`
			`}`
			`}`
			`}`

			`//cout << "despues sel" << endl;`

			`for(x = 0; x < NUM_T; x++)`
			`{`
			`ini[x] = lis[x].size();`
			`size += ini[x];`
			`}`
			`lres = (int )malloc(size sizeof(int));`
			`ptr = lres;`
			`for(x = 0; x < NUM_T; x++)`
			`{`
			`memcpy(ptr, lis[x].data(), ini[x] * sizeof(int));`
			`ptr += ini[x];`
			`}`
			`if(list != NULL)`
			`{`
			`fres = (int )malloc(size sizeof(int));`
			`ptr = fres;`
			`for(x = 0; x < NUM_T; x++)`
			`{`
			`memcpy(ptr, vec[x].data(), ini[x] * sizeof(int));`
			`ptr += ini[x];`
			`}`
			`*ret = fres;`
			`*list = lres;`
			`}`
			`else`
			`*ret = lres;`
			`return size;`
			`}`