yap-6.3/packages/cuda/memory.cu

#include <list>
#include <iostream>
#include <stdlib.h>
#include <algorithm>
#include <thrust/device_vector.h>
#include "lista.h"
#include "memory.h"

#define MAX_REC 200
#define HALF_REC (MAX_REC / 2)
#define MAX_FIX_POINTS 100

unsigned int avmem;
memnode temp_storage[MAX_REC];
list<memnode> GPUmem;
list<memnode> CPUmem;

bool compareiteration(const memnode &r1, const memnode &r2)
{
	return (r1.iteration < r2.iteration); 
}

bool comparename(const memnode &r1, const memnode &r2)
{
	return (r1.name > r2.name); 
}

void calcular_mem(int dev)
{
	cudaDeviceProp p;
	cudaGetDeviceProperties(&p, dev);
	avmem = p.totalGlobalMem;
	temp_storage[0].dev_address = NULL;
	temp_storage[0].size = 0;
	temp_storage[HALF_REC].dev_address = NULL;
	temp_storage[HALF_REC].size = 0;

	//cout << "Initial memory available " << avmem << endl;
}

template<class InputIterator>
InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
{
	while(first!=last) 
	{
		if(first->name == name) return first;
			++first;
	}
	return last;
}

list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum)
{
	int x = 1, sum = 0;
	memnode temp;

	temp.name = name;
	temp.iteration = itr;
	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
	while(rec.first != rec.second)
	{		
		
		//cout << "itr = " << itr << " rec.first = " << rec.first->name << endl;	
		
		if(rec.first->name == name)
		{
			temp_storage[x] = *rec.first;
			rec.first = GPUmem.erase(rec.first);
			sum += temp_storage[x].rows;
			x++;
		}	
		else
			rec.first++;
	}
	//if(x > 1)
	rec.first = GPUmem.insert(rec.first, temp);
	*totalrows = sum;
	*gpunum = x;
	return rec.first;
}

int buscarpornombrecpu(int name, int itr, int *totalrows)
{
	int x = HALF_REC + 1, sum = 0;
	memnode temp;
	temp.iteration = itr;
	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);

	/*if(rec.first != rec.second)
		cout << "bscnomcpu = " << rec.first->name << " " << rec.first->iteration << endl;*/

	while(rec.first != rec.second)
	{
		if(rec.first->name == name)
		{
			temp_storage[x] = *rec.first;
			rec.first = CPUmem.erase(rec.first);
			sum += temp_storage[x].rows;
			x++;
		}	
		else
			rec.first++;
	}
	*totalrows += sum;
	return x;
}

void limpiar(const char s[], size_t sz)
{
	list<memnode>::iterator ini;
	memnode temp;

	if(GPUmem.size() == 0)
	{
		cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl;
		exit(1);
	}		

	ini = GPUmem.begin();
	if(ini->isrule)
	{	
		temp = *ini;
		temp.dev_address = (int *)malloc(ini->size);
		cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
		CPUmem.push_back(temp);
	}
	liberar(ini->dev_address, ini->size);
	GPUmem.erase(ini);
}

void limpiartodo(int *p1, int *p2)
{
	list<memnode>::iterator ini;
	memnode temp;
	int cont = 0;
	if(p1 != NULL)
		cont++;	
	if(p2 != NULL)
		cont++;
	ini = GPUmem.begin();

	/*cout << "ANTES" << endl;
	mostrar_memoria();
	mostrar_memcpu();
	cout << "FIN ANTES" << endl;*/
	//cout << "mem = " << GPUmem.size() << " " << avmem << endl;

	while(GPUmem.size() > cont)
	{
		if(ini->dev_address == p1 || ini->dev_address == p2)
		{
			ini++;
			continue;
		}
		if(ini->isrule)
		{
			temp = *ini; 
			temp.dev_address = (int *)malloc(ini->size);
			cudaMemcpy(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
			CPUmem.push_back(temp);
		}
		liberar(ini->dev_address, temp.size);
		ini = GPUmem.erase(ini);
	}

	/*cout << "DESPUES" << endl;
	mostrar_memoria();
	mostrar_memcpu();
	cout << "FIN DESPUES" << endl;*/
	//cout << "memfinal = " << GPUmem.size() << " " << avmem << endl;

}

void liberar(int *ptr, int size)
{
	//cout << "L " << avmem << " " << size; 

	cudaFree(ptr);
#ifdef DEBUG_MEM
	cerr << "- " << ptr << " " << size << endl;
#endif
	avmem += size;
	
	//cout << " " << avmem << endl;
}

void reservar(int **ptr, int size)
{
  //size_t free, total;
  //cudaMemGetInfo(      &free, &total	 );
  //	cerr << "? " << free << " " << size << endl;

        if (size == 0) { 
                *ptr = NULL; 
                return;
        }
	while(avmem < size)
		limpiar("not enough memory", size);
	while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
		limpiar("Error in memory allocation", size);
	if (! *ptr ) {
	  size_t free, total;
	  cudaMemGetInfo(      &free, &total	 );
	  cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
	  cerr << "Exiting CUDA...." << endl;
	  exit(1);
	}
	avmem -= size;

	// cout << " " << avmem << endl;
}

void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
{
	memnode temp;
	temp.name = name;
	temp.dev_address = ptr;
	temp.rows = rows;
	temp.size = rows * num_columns * sizeof(int);
	temp.iteration = itr;
	temp.isrule = rule;
	GPUmem.push_back(temp);
}

template<class InputIterator>
void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
{
	i->dev_address = ptr;
	i->rows = rows;
	i->size = rows * num_columns * sizeof(int);
}

int numrows(int name, int itr)
{
	int sum = 0;
	memnode temp;
	temp.iteration = itr;
	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
	while(rec.first != rec.second)
	{
		if(rec.first->name == name)
			sum += rec.first->rows;
		rec.first++;
	}
	rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
	while(rec.first != rec.second)
	{
		if(rec.first->name == name)
			sum += rec.first->rows;
		rec.first++;
	}
	return sum;
}


	extern "C" void * YAP_IntToAtom(int);
	extern  "C" char * YAP_AtomName(void *);


int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
{
	int numgpu, numcpu, totalrows = 0;
	int *temp, x;
	int size, itrant;
	list<memnode>::iterator i;
	memnode fact;

	if(is_fact)
	{
		i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
		if(i != GPUmem.end())
		{
			fact = *i;
			GPUmem.erase(i);
			fact.iteration = itr;
			*ptr = fact.dev_address;
			GPUmem.push_back(fact);
			return fact.rows;
		}
		size = num_rows * num_columns * sizeof(int);
		reservar(&temp, size);
#ifdef DEBUG_MEM
		cerr << "+ " << temp << " temp  " << size << endl;
#endif
		cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
		registrar(name, num_columns, temp, num_rows, itr, 0);
		*ptr = temp;
		return num_rows;
	}
	if(itr > 0)
	{
		itrant = itr - 1;
		i = buscarpornombre(name, itrant, &totalrows, &numgpu);
		numcpu = buscarpornombrecpu(name, itrant, &totalrows);

		if((numgpu == 2) && (numcpu == (HALF_REC + 1)))
		{
			actualizar(num_columns, temp_storage[1].dev_address, temp_storage[1].rows, i);
			*ptr = temp_storage[1].dev_address;
			return temp_storage[1].rows;
		}
		size = totalrows * num_columns * sizeof(int);
		reservar(&temp, size);
#ifdef DEBUG_MEM
		cerr << "+ " << temp << " temp 2  " << size << endl;
#endif
		for(x = 1; x < numgpu; x++)
		{
			cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
			liberar(temp_storage[x].dev_address, temp_storage[x].size);
		}
		for(x = HALF_REC + 1; x < numcpu; x++)
		{
			cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
			free(temp_storage[x].dev_address);
		}
		actualizar(num_columns, temp, totalrows, i);
		*ptr = temp;
		return totalrows;
	}
	return 0;
}

int cargafinal(int name, int cols, int **ptr)
{
	int *temp, *ini, cont = 0;
	memnode bus;
	bus.name = name;
	GPUmem.sort(comparename);
	CPUmem.sort(comparename);
	list<memnode>::iterator endg = GPUmem.end();
	list<memnode>::iterator endc = CPUmem.end();
	list<memnode>::iterator pos = lower_bound(GPUmem.begin(), endg, bus, comparename);
	list<memnode>::iterator gpu = pos;
	while(pos != endg && pos->name == name)
	{
		cont += pos->rows;
		pos++;
	}
	pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
	list<memnode>::iterator cpu = pos;
	while(pos != endc && pos->name == name)
	{
		cont += pos->rows;
		pos++;
	}
	
	reservar(&temp, cont * cols * sizeof(int));
#ifdef DEBUG_MEM
	cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;
#endif
	ini = temp;	

	pos = gpu;
	while(pos != endg && pos->name == name)
	{
		cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
		temp += pos->size / sizeof(int);
		pos++;
	}
	pos = cpu;
	while(pos != endc && pos->name == name)
	{
		cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
		temp += pos->size / sizeof(int);
		pos++;
	}

	/*int x, y;
	int *hop1 = (int *)malloc(cont * cols * sizeof(int));
	cudaMemcpy(hop1, ini, cont * cols * sizeof(int), cudaMemcpyDeviceToHost);
	cout << "select finala" << endl;
	for(x = 0; x < cont; x++)
	{
		for(y = 0; y < cols; y++)
			cout << hop1[x * cols + y] << " ";
		cout << endl;
	}
	cout << "select finala" << endl;*/

	*ptr = ini;
	return cont;
}

bool generadas(int name, int filas, int cols, int itr)
{
	int r1, r2, x, fin;
	int *dop1, *dop2;

	r2 = numrows(name, itr);
	if(itr < MAX_FIX_POINTS)
		fin = itr;
	else
		fin = MAX_FIX_POINTS;
	for(x = 1; x <= fin; x++)
	{
		r1 = numrows(name, itr - x);
		if(r1 == r2)
		{
			r2 = cargar(name, filas, cols, 0, NULL, &dop2, itr + 1);
			thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
			r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
			thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);

			/*int y;
			int *a = (int *)malloc(r1 * cols * sizeof(int));
			cudaMemcpy(a, dop1, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
			for(x = 0; x < r1; x++)
			{
				for(y = 0; y < cols; y++)
					cout << a[x * cols + y] << " ";
			}
			cout << endl;
			cudaMemcpy(a, dop2, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
			for(x = 0; x < r1; x++)
			{
				for(y = 0; y < cols; y++)
					cout << a[x * cols + y] << " ";
			}
			cout << endl;
			free(a);*/

			if(thrust::equal(pt1, pt1 + r1, pt2) == true)
				return true;
		}
	}

	return false;
}

void mostrar_memoria()
{
	int x;
	list<memnode>::iterator i = GPUmem.begin();
	cout << "Memoria inicio GPU" << endl;
	for(x = 0; x < GPUmem.size(); x++, i++)
		cout << i->name << " " << i->iteration << " " << i->size << endl;
	cout << "Memoria fin GPU" << endl;
}

void mostrar_memcpu()
{
	int x;
	list<memnode>::iterator i = CPUmem.begin();
	cout << "Memoria inicio CPU" << endl;
	for(x = 0; x < CPUmem.size(); x++, i++)
		cout << i->name << " " << i->iteration << endl;
	cout << "Memoria fin CPU" << endl;
}

void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator last)
{
	GPUmem.sort(comparename);
	CPUmem.sort(comparename);
	list<memnode>::iterator gpu = GPUmem.begin();
	list<memnode>::iterator cpu = CPUmem.begin();
	int x, y, of, cols;
	int *temp, cont = 0;
	while(first != last)
	{
		while(first->name == gpu->name)
		{
			temp = (int *)malloc(gpu->size);
			cudaMemcpy(temp, gpu->dev_address, gpu->size, cudaMemcpyDeviceToHost);
			cols = gpu->size / (gpu->rows * sizeof(int));
			cont += gpu->rows;
			for(x = 0, of = 0; x < gpu->rows; x++)
			{
				for(y = 0; y < cols; y++, of++)
					cout << temp[of] << " ";
				cout << endl;
			}
			cudaFree(gpu->dev_address);
#ifdef DEBUG_MEM
			cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;
#endif
			free(temp);
			gpu++;
		}
		while(first->name == cpu->name)
		{
			cols = cpu->size / (cpu->rows * sizeof(int));
			cont += cpu->rows;
			for(x = 0, of = 0; x < cpu->rows; x++)
			{
				for(y = 0; y < cols; y++, of++)
					cout << cpu->dev_address[of] << " ";
				cout << endl;
			}
			free(cpu->dev_address);
			cpu++;
		}
		first++;
	}
	cout << cont << endl;
}

void clear_memory()
{
	list<memnode>::iterator ini;
	list<memnode>::iterator fin;
       	ini = GPUmem.begin();
	fin = GPUmem.end();
	while(ini != fin)
	{
	  if (ini->isrule) {
	    cudaFree(ini->dev_address);
#ifdef DEBUG_MEM
	    cerr << "- " << ini->dev_address << " ini->dev_address" << endl;
#endif
	    ini = GPUmem.erase(ini);
	  } else {
	    ini++;
	  }
	}
	ini = CPUmem.begin();
	fin = CPUmem.end();
	while(ini != fin)
	{
		free(ini->dev_address);
		ini++;
	}
	CPUmem.clear();
}
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`#include <list>`
			`#include <iostream>`
			`#include <stdlib.h>`
			`#include <algorithm>`
			`#include <thrust/device_vector.h>`
			`#include "lista.h"`
			`#include "memory.h"`

			`#define MAX_REC 200`
			`#define HALF_REC (MAX_REC / 2)`
			`#define MAX_FIX_POINTS 100`

			`unsigned int avmem;`
			`memnode temp_storage[MAX_REC];`
			`list<memnode> GPUmem;`
			`list<memnode> CPUmem;`

			`bool compareiteration(const memnode &r1, const memnode &r2)`
			`{`
			`return (r1.iteration < r2.iteration);`
			`}`

			`bool comparename(const memnode &r1, const memnode &r2)`
			`{`
			`return (r1.name > r2.name);`
			`}`

			`void calcular_mem(int dev)`
			`{`
			`cudaDeviceProp p;`
			`cudaGetDeviceProperties(&p, dev);`
			`avmem = p.totalGlobalMem;`
			`temp_storage[0].dev_address = NULL;`
			`temp_storage[0].size = 0;`
			`temp_storage[HALF_REC].dev_address = NULL;`
			`temp_storage[HALF_REC].size = 0;`

			`//cout << "Initial memory available " << avmem << endl;`
			`}`

			`template<class InputIterator>`
			`InputIterator buscarhecho(InputIterator first, InputIterator last, int name)`
			`{`
			`while(first!=last)`
			`{`
			`if(first->name == name) return first;`
			`++first;`
			`}`
			`return last;`
			`}`

			`list<memnode>::iterator buscarpornombre(int name, int itr, int totalrows, int gpunum)`
			`{`
			`int x = 1, sum = 0;`
			`memnode temp;`
more cuda checking 2013-10-12 01:11:21 +01:00
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`temp.name = name;`
			`temp.iteration = itr;`
			`pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);`
			`while(rec.first != rec.second)`
			`{`

			`//cout << "itr = " << itr << " rec.first = " << rec.first->name << endl;`

			`if(rec.first->name == name)`
			`{`
			`temp_storage[x] = *rec.first;`
			`rec.first = GPUmem.erase(rec.first);`
			`sum += temp_storage[x].rows;`
			`x++;`
			`}`
			`else`
			`rec.first++;`
			`}`
			`//if(x > 1)`
			`rec.first = GPUmem.insert(rec.first, temp);`
			`*totalrows = sum;`
			`*gpunum = x;`
			`return rec.first;`
			`}`

			`int buscarpornombrecpu(int name, int itr, int *totalrows)`
			`{`
			`int x = HALF_REC + 1, sum = 0;`
			`memnode temp;`
			`temp.iteration = itr;`
			`pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);`

			`/*if(rec.first != rec.second)`
			`cout << "bscnomcpu = " << rec.first->name << " " << rec.first->iteration << endl;*/`

			`while(rec.first != rec.second)`
			`{`
			`if(rec.first->name == name)`
			`{`
			`temp_storage[x] = *rec.first;`
			`rec.first = CPUmem.erase(rec.first);`
			`sum += temp_storage[x].rows;`
			`x++;`
			`}`
			`else`
			`rec.first++;`
			`}`
			`*totalrows += sum;`
			`return x;`
			`}`

debug memory allocations 2013-10-17 00:44:24 +01:00			`void limpiar(const char s[], size_t sz)`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`{`
			`list<memnode>::iterator ini;`
			`memnode temp;`

			`if(GPUmem.size() == 0)`
			`{`
debug memory allocations 2013-10-17 00:44:24 +01:00			`cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl;`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`exit(1);`
			`}`

			`ini = GPUmem.begin();`
			`if(ini->isrule)`
			`{`
			`temp = *ini;`
			`temp.dev_address = (int *)malloc(ini->size);`
			`cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);`
			`CPUmem.push_back(temp);`
			`}`
			`liberar(ini->dev_address, ini->size);`
			`GPUmem.erase(ini);`
			`}`

			`void limpiartodo(int p1, int p2)`
			`{`
			`list<memnode>::iterator ini;`
			`memnode temp;`
			`int cont = 0;`
			`if(p1 != NULL)`
			`cont++;`
			`if(p2 != NULL)`
			`cont++;`
			`ini = GPUmem.begin();`

			`/*cout << "ANTES" << endl;`
			`mostrar_memoria();`
			`mostrar_memcpu();`
			`cout << "FIN ANTES" << endl;*/`
			`//cout << "mem = " << GPUmem.size() << " " << avmem << endl;`

			`while(GPUmem.size() > cont)`
			`{`
			`if(ini->dev_address == p1 \|\| ini->dev_address == p2)`
			`{`
			`ini++;`
			`continue;`
			`}`
			`if(ini->isrule)`
			`{`
			`temp = *ini;`
			`temp.dev_address = (int *)malloc(ini->size);`
			`cudaMemcpy(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);`
			`CPUmem.push_back(temp);`
			`}`
			`liberar(ini->dev_address, temp.size);`
			`ini = GPUmem.erase(ini);`
			`}`

			`/*cout << "DESPUES" << endl;`
			`mostrar_memoria();`
			`mostrar_memcpu();`
			`cout << "FIN DESPUES" << endl;*/`
			`//cout << "memfinal = " << GPUmem.size() << " " << avmem << endl;`

			`}`

			`void liberar(int *ptr, int size)`
			`{`
			`//cout << "L " << avmem << " " << size;`

			`cudaFree(ptr);`
debug memory allocations 2013-10-17 00:44:24 +01:00			`#ifdef DEBUG_MEM`
			`cerr << "- " << ptr << " " << size << endl;`
			`#endif`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`avmem += size;`

			`//cout << " " << avmem << endl;`
			`}`

			`void reservar(int **ptr, int size)`
			`{`
more cuda checking 2013-10-12 01:11:21 +01:00			`//size_t free, total;`
			`//cudaMemGetInfo( &free, &total );`
eliminate leak and fix memory tracking 2013-10-16 17:33:49 +01:00			`// cerr << "? " << free << " " << size << endl;`
more cuda checking 2013-10-12 01:11:21 +01:00
cuda fixes by Carlos and other diverse (fact init, configure). 2013-10-12 12:46:01 +01:00			`if (size == 0) {`
			`*ptr = NULL;`
			`return;`
			`}`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`while(avmem < size)`
debug memory allocations 2013-10-17 00:44:24 +01:00			`limpiar("not enough memory", size);`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)`
debug memory allocations 2013-10-17 00:44:24 +01:00			`limpiar("Error in memory allocation", size);`
cuda fixes by Carlos and other diverse (fact init, configure). 2013-10-12 12:46:01 +01:00			`if (! *ptr ) {`
			`size_t free, total;`
			`cudaMemGetInfo( &free, &total );`
eliminate leak and fix memory tracking 2013-10-16 17:33:49 +01:00			`cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;`
			`cerr << "Exiting CUDA...." << endl;`
cuda fixes by Carlos and other diverse (fact init, configure). 2013-10-12 12:46:01 +01:00			`exit(1);`
			`}`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`avmem -= size;`

more fixes; get coverage quickly 2013-10-09 11:23:45 +01:00			`// cout << " " << avmem << endl;`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`}`

			`void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)`
			`{`
			`memnode temp;`
			`temp.name = name;`
			`temp.dev_address = ptr;`
			`temp.rows = rows;`
			`temp.size = rows * num_columns * sizeof(int);`
			`temp.iteration = itr;`
			`temp.isrule = rule;`
			`GPUmem.push_back(temp);`
			`}`

			`template<class InputIterator>`
			`void actualizar(int num_columns, int *ptr, int rows, InputIterator i)`
			`{`
			`i->dev_address = ptr;`
			`i->rows = rows;`
			`i->size = rows * num_columns * sizeof(int);`
			`}`

			`int numrows(int name, int itr)`
			`{`
			`int sum = 0;`
			`memnode temp;`
			`temp.iteration = itr;`
			`pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);`
			`while(rec.first != rec.second)`
			`{`
			`if(rec.first->name == name)`
			`sum += rec.first->rows;`
			`rec.first++;`
			`}`
			`rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);`
			`while(rec.first != rec.second)`
			`{`
			`if(rec.first->name == name)`
			`sum += rec.first->rows;`
			`rec.first++;`
			`}`
			`return sum;`
			`}`

more cuda checking 2013-10-12 01:11:21 +01:00
			`extern "C" void * YAP_IntToAtom(int);`
			`extern "C" char * YAP_AtomName(void *);`


cuda package from Carlos 2013-10-07 12:20:00 +01:00			`int cargar(int name, int num_rows, int num_columns, int is_fact, int address_host_table, int *ptr, int itr)`
			`{`
			`int numgpu, numcpu, totalrows = 0;`
			`int *temp, x;`
			`int size, itrant;`
			`list<memnode>::iterator i;`
			`memnode fact;`
more cuda checking 2013-10-12 01:11:21 +01:00
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`if(is_fact)`
			`{`
			`i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);`
			`if(i != GPUmem.end())`
			`{`
			`fact = *i;`
			`GPUmem.erase(i);`
			`fact.iteration = itr;`
			`*ptr = fact.dev_address;`
			`GPUmem.push_back(fact);`
			`return fact.rows;`
			`}`
			`size = num_rows * num_columns * sizeof(int);`
			`reservar(&temp, size);`
debug memory allocations 2013-10-17 00:44:24 +01:00			`#ifdef DEBUG_MEM`
			`cerr << "+ " << temp << " temp " << size << endl;`
			`#endif`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);`
			`registrar(name, num_columns, temp, num_rows, itr, 0);`
			`*ptr = temp;`
			`return num_rows;`
			`}`
			`if(itr > 0)`
			`{`
			`itrant = itr - 1;`
			`i = buscarpornombre(name, itrant, &totalrows, &numgpu);`
			`numcpu = buscarpornombrecpu(name, itrant, &totalrows);`

			`if((numgpu == 2) && (numcpu == (HALF_REC + 1)))`
			`{`
			`actualizar(num_columns, temp_storage[1].dev_address, temp_storage[1].rows, i);`
			`*ptr = temp_storage[1].dev_address;`
			`return temp_storage[1].rows;`
			`}`
			`size = totalrows * num_columns * sizeof(int);`
			`reservar(&temp, size);`
debug memory allocations 2013-10-17 00:44:24 +01:00			`#ifdef DEBUG_MEM`
			`cerr << "+ " << temp << " temp 2 " << size << endl;`
			`#endif`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`for(x = 1; x < numgpu; x++)`
			`{`
			`cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);`
			`liberar(temp_storage[x].dev_address, temp_storage[x].size);`
			`}`
			`for(x = HALF_REC + 1; x < numcpu; x++)`
			`{`
			`cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);`
			`free(temp_storage[x].dev_address);`
			`}`
			`actualizar(num_columns, temp, totalrows, i);`
			`*ptr = temp;`
			`return totalrows;`
			`}`
			`return 0;`
			`}`

			`int cargafinal(int name, int cols, int **ptr)`
			`{`
			`int temp, ini, cont = 0;`
			`memnode bus;`
			`bus.name = name;`
			`GPUmem.sort(comparename);`
			`CPUmem.sort(comparename);`
			`list<memnode>::iterator endg = GPUmem.end();`
			`list<memnode>::iterator endc = CPUmem.end();`
			`list<memnode>::iterator pos = lower_bound(GPUmem.begin(), endg, bus, comparename);`
			`list<memnode>::iterator gpu = pos;`
			`while(pos != endg && pos->name == name)`
			`{`
			`cont += pos->rows;`
			`pos++;`
			`}`
			`pos = lower_bound(CPUmem.begin(), endc, bus, comparename);`
			`list<memnode>::iterator cpu = pos;`
			`while(pos != endc && pos->name == name)`
			`{`
			`cont += pos->rows;`
			`pos++;`
			`}`

			`reservar(&temp, cont * cols * sizeof(int));`
debug memory allocations 2013-10-17 00:44:24 +01:00			`#ifdef DEBUG_MEM`
			`cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;`
			`#endif`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`ini = temp;`

			`pos = gpu;`
			`while(pos != endg && pos->name == name)`
			`{`
			`cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);`
			`temp += pos->size / sizeof(int);`
			`pos++;`
			`}`
			`pos = cpu;`
			`while(pos != endc && pos->name == name)`
			`{`
			`cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);`
			`temp += pos->size / sizeof(int);`
			`pos++;`
			`}`

			`/*int x, y;`
			`int hop1 = (int )malloc(cont * cols * sizeof(int));`
			`cudaMemcpy(hop1, ini, cont * cols * sizeof(int), cudaMemcpyDeviceToHost);`
			`cout << "select finala" << endl;`
			`for(x = 0; x < cont; x++)`
			`{`
			`for(y = 0; y < cols; y++)`
			`cout << hop1[x * cols + y] << " ";`
			`cout << endl;`
			`}`
			`cout << "select finala" << endl;*/`

			`*ptr = ini;`
			`return cont;`
			`}`

			`bool generadas(int name, int filas, int cols, int itr)`
			`{`
			`int r1, r2, x, fin;`
			`int dop1, dop2;`

			`r2 = numrows(name, itr);`
			`if(itr < MAX_FIX_POINTS)`
			`fin = itr;`
			`else`
			`fin = MAX_FIX_POINTS;`
			`for(x = 1; x <= fin; x++)`
			`{`
			`r1 = numrows(name, itr - x);`
			`if(r1 == r2)`
			`{`
			`r2 = cargar(name, filas, cols, 0, NULL, &dop2, itr + 1);`
			`thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);`
			`r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);`
			`thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);`

			`/*int y;`
			`int a = (int )malloc(r1 * cols * sizeof(int));`
			`cudaMemcpy(a, dop1, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);`
			`for(x = 0; x < r1; x++)`
			`{`
			`for(y = 0; y < cols; y++)`
			`cout << a[x * cols + y] << " ";`
			`}`
			`cout << endl;`
			`cudaMemcpy(a, dop2, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);`
			`for(x = 0; x < r1; x++)`
			`{`
			`for(y = 0; y < cols; y++)`
			`cout << a[x * cols + y] << " ";`
			`}`
			`cout << endl;`
			`free(a);*/`

			`if(thrust::equal(pt1, pt1 + r1, pt2) == true)`
			`return true;`
			`}`
			`}`

			`return false;`
			`}`

			`void mostrar_memoria()`
			`{`
			`int x;`
			`list<memnode>::iterator i = GPUmem.begin();`
			`cout << "Memoria inicio GPU" << endl;`
			`for(x = 0; x < GPUmem.size(); x++, i++)`
			`cout << i->name << " " << i->iteration << " " << i->size << endl;`
			`cout << "Memoria fin GPU" << endl;`
			`}`

			`void mostrar_memcpu()`
			`{`
			`int x;`
			`list<memnode>::iterator i = CPUmem.begin();`
			`cout << "Memoria inicio CPU" << endl;`
			`for(x = 0; x < CPUmem.size(); x++, i++)`
			`cout << i->name << " " << i->iteration << endl;`
			`cout << "Memoria fin CPU" << endl;`
			`}`

			`void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator last)`
			`{`
			`GPUmem.sort(comparename);`
			`CPUmem.sort(comparename);`
			`list<memnode>::iterator gpu = GPUmem.begin();`
			`list<memnode>::iterator cpu = CPUmem.begin();`
			`int x, y, of, cols;`
			`int *temp, cont = 0;`
			`while(first != last)`
			`{`
			`while(first->name == gpu->name)`
			`{`
			`temp = (int *)malloc(gpu->size);`
			`cudaMemcpy(temp, gpu->dev_address, gpu->size, cudaMemcpyDeviceToHost);`
			`cols = gpu->size / (gpu->rows * sizeof(int));`
			`cont += gpu->rows;`
			`for(x = 0, of = 0; x < gpu->rows; x++)`
			`{`
			`for(y = 0; y < cols; y++, of++)`
			`cout << temp[of] << " ";`
			`cout << endl;`
			`}`
			`cudaFree(gpu->dev_address);`
debug memory allocations 2013-10-17 00:44:24 +01:00			`#ifdef DEBUG_MEM`
			`cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;`
			`#endif`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`free(temp);`
			`gpu++;`
			`}`
			`while(first->name == cpu->name)`
			`{`
			`cols = cpu->size / (cpu->rows * sizeof(int));`
			`cont += cpu->rows;`
			`for(x = 0, of = 0; x < cpu->rows; x++)`
			`{`
			`for(y = 0; y < cols; y++, of++)`
			`cout << cpu->dev_address[of] << " ";`
			`cout << endl;`
			`}`
			`free(cpu->dev_address);`
			`cpu++;`
			`}`
			`first++;`
			`}`
			`cout << cont << endl;`
			`}`

more fixes; get coverage quickly 2013-10-09 11:23:45 +01:00			`void clear_memory()`
cuda package from Carlos 2013-10-07 12:20:00 +01:00			`{`
more fixes; get coverage quickly 2013-10-09 11:23:45 +01:00			`list<memnode>::iterator ini;`
			`list<memnode>::iterator fin;`
more cuda checking 2013-10-12 01:11:21 +01:00			`ini = GPUmem.begin();`
more fixes; get coverage quickly 2013-10-09 11:23:45 +01:00			`fin = GPUmem.end();`
			`while(ini != fin)`
			`{`
more cuda checking 2013-10-12 01:11:21 +01:00			`if (ini->isrule) {`
			`cudaFree(ini->dev_address);`
debug memory allocations 2013-10-17 00:44:24 +01:00			`#ifdef DEBUG_MEM`
			`cerr << "- " << ini->dev_address << " ini->dev_address" << endl;`
			`#endif`
more cuda checking 2013-10-12 01:11:21 +01:00			`ini = GPUmem.erase(ini);`
			`} else {`
			`ini++;`
			`}`
more fixes; get coverage quickly 2013-10-09 11:23:45 +01:00			`}`
			`ini = CPUmem.begin();`
			`fin = CPUmem.end();`
			`while(ini != fin)`
			`{`
			`free(ini->dev_address);`
			`ini++;`
			`}`
			`CPUmem.clear();`
			`}`