576 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			576 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
		
			Executable File
		
	
	
	
	
| #include <list>
 | |
| #include <iostream>
 | |
| #include <stdlib.h>
 | |
| #include <algorithm>
 | |
| #include <thrust/device_vector.h>
 | |
| #include "lista.h"
 | |
| #include "memory.h"
 | |
| #include "pred.h"
 | |
| 
 | |
| #define MAX_REC 200
 | |
| #define MAX_FIX_POINTS 100
 | |
| 
 | |
| memnode temp_storage[MAX_REC];
 | |
| /*List used to store information (address, size, etc.) about facts and rule results loaded in the GPU*/
 | |
| list<memnode> GPUmem;
 | |
| /*List used to store information about rule results offloaded from the GPU to the CPU*/
 | |
| list<memnode> CPUmem;
 | |
| 
 | |
| /*Auxiliary function to sort rule list*/
 | |
| bool comparer(const rulenode &r1, const rulenode &r2)
 | |
| {
 | |
| 	return (r1.name > r2.name); 
 | |
| }
 | |
| 
 | |
| /*Used in search functions to compare iterations*/
 | |
| bool compareiteration(const memnode &r1, const memnode &r2)
 | |
| {
 | |
| 	return (r1.iteration < r2.iteration); 
 | |
| }
 | |
| 
 | |
| /*Used in search functions to compare names*/
 | |
| bool comparename(const memnode &r1, const memnode &r2)
 | |
| {
 | |
| 	return (r1.name > r2.name); 
 | |
| }
 | |
| 
 | |
| /*Linear search of 'name' fact*/
 | |
| template<class InputIterator>
 | |
| InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
 | |
| {
 | |
| 	while(first!=last) 
 | |
| 	{
 | |
| 		if(first->name == name && first->isrule == 0) return first;
 | |
| 			++first;
 | |
| 	}
 | |
| 	return last;
 | |
| }
 | |
| 
 | |
| /*Finds all results of rule 'name' in iteration 'itr' in both CPU and GPU memory. Every result found is removed from its respective list*/
 | |
| list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
 | |
| {
 | |
| 	int x = 0, sum = 0;
 | |
| 	memnode temp;
 | |
| 	list<memnode>::iterator i;
 | |
| 	temp.iteration = itr;
 | |
| 	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
 | |
| 
 | |
| 	while(rec.first != rec.second)
 | |
| 	{
 | |
| 		if(rec.first->name == name && rec.first->isrule == 1)
 | |
| 		{
 | |
| 			temp_storage[x] = *rec.first;
 | |
| 			rec.first = GPUmem.erase(rec.first);
 | |
| 			sum += temp_storage[x].rows;
 | |
| 			x++;
 | |
| 		}	
 | |
| 		else
 | |
| 			rec.first++;
 | |
| 	}
 | |
| 	*gpunum = x;
 | |
| 	temp.name = name;
 | |
| 	temp.isrule = 1;
 | |
| 	i = GPUmem.insert(rec.first, temp);
 | |
| 	rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
 | |
| 
 | |
| 	while(rec.first != rec.second)
 | |
| 	{				
 | |
| 		if(rec.first->name == name && rec.first->isrule == 1)
 | |
| 		{
 | |
| 			temp_storage[x] = *rec.first;
 | |
| 			rec.first = CPUmem.erase(rec.first);
 | |
| 			sum += temp_storage[x].rows;
 | |
| 			x++;
 | |
| 		}	
 | |
| 		else
 | |
| 			rec.first++;
 | |
| 	}
 | |
| 	*totalrows = sum;
 | |
| 	*cpunum = x;
 | |
| 	return i;
 | |
| }
 | |
| 
 | |
| list<memnode>::iterator buscarpornombrecpu(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
 | |
| {
 | |
| 	int x = 0, sum = 0;
 | |
| 	memnode temp;
 | |
| 	list<memnode>::iterator i;
 | |
| 	temp.iteration = itr;
 | |
| 	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
 | |
| 
 | |
| 	while(rec.first != rec.second)
 | |
| 	{				
 | |
| 		if(rec.first->name == name)
 | |
| 		{
 | |
| 			temp_storage[x] = *rec.first;
 | |
| 			rec.first = GPUmem.erase(rec.first);
 | |
| 			sum += temp_storage[x].rows;
 | |
| 			x++;
 | |
| 		}	
 | |
| 		else
 | |
| 			rec.first++;
 | |
| 	}
 | |
| 
 | |
| 	*gpunum = x;
 | |
| 	temp.name = name;
 | |
| 	temp.isrule = 1;
 | |
| 	rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
 | |
| 
 | |
| 	while(rec.first != rec.second)
 | |
| 	{				
 | |
| 		if(rec.first->name == name)
 | |
| 		{
 | |
| 			temp_storage[x] = *rec.first;
 | |
| 			rec.first = CPUmem.erase(rec.first);
 | |
| 			sum += temp_storage[x].rows;
 | |
| 			x++;
 | |
| 		}	
 | |
| 		else
 | |
| 			rec.first++;
 | |
| 	}
 | |
| 	i = CPUmem.insert(rec.first, temp);
 | |
| 	*totalrows = sum;
 | |
| 	*cpunum = x;
 | |
| 	return i;
 | |
| }
 | |
| 
 | |
| /*Removes the least recently used memory block from GPU memory, sending it to CPU memory if it's a rule result. 
 | |
| If there are no used memory blocks in the GPU and we still don't have enough memory, the program exits with error*/
 | |
| void limpiar(const char s[], size_t sz)
 | |
| {
 | |
| 	list<memnode>::iterator ini;
 | |
| 	memnode temp;
 | |
| 	size_t free, total;
 | |
| 
 | |
| 	if(GPUmem.size() == 0)
 | |
| 	{
 | |
| 		cudaMemGetInfo(&free,&total);
 | |
| 		cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
 | |
| 		exit(1);
 | |
| 	}		
 | |
| 
 | |
| 	ini = GPUmem.begin();
 | |
| 	if(ini->isrule)
 | |
| 	{	
 | |
| 		temp = *ini;
 | |
| 		temp.dev_address = (int *)malloc(ini->size);
 | |
| 		cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
 | |
| 		list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
 | |
| 		CPUmem.insert(pos, temp);
 | |
| 	}
 | |
| 	cudaFree(ini->dev_address);
 | |
| 	GPUmem.erase(ini);
 | |
| }
 | |
| 
 | |
| /*Allocs 'size' amount of bytes in GPU memory. If not enough memory is available, removes least recently used memory blocks until 
 | |
| enough space is available*/
 | |
| void reservar(int **ptr, size_t size)
 | |
| {
 | |
| 	size_t free, total;
 | |
| 
 | |
|         if (size == 0) { 
 | |
|                 *ptr = NULL; 
 | |
|                 return;
 | |
|         }
 | |
| 
 | |
| 	cudaMemGetInfo(&free, &total);
 | |
| 	while(free < size)
 | |
| 	{
 | |
| 		cout << "Se limpio memoria " << free << " " << total << endl;
 | |
| 		limpiar("not enough memory", size);
 | |
| 		cudaMemGetInfo(&free, &total);
 | |
| 	}
 | |
| 
 | |
| 	while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
 | |
| 		limpiar("Error in memory allocation", size);
 | |
| 	if (! *ptr ) {
 | |
| 	  size_t free, total;
 | |
| 	  cudaMemGetInfo(      &free, &total	 );
 | |
| 	  cerr << "Could not allocate " << size << " bytes, only " << free << " avaliable from total of " << total << " !!!" << endl;
 | |
| 	  cerr << "Exiting CUDA...." << endl;
 | |
| 	  exit(1);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /*Creates a new entry in the GPU memory list*/
 | |
| void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
 | |
| {
 | |
| 	memnode temp;
 | |
| 	temp.name = name;
 | |
| 	temp.dev_address = ptr;
 | |
| 	temp.rows = rows;
 | |
| 	temp.size = rows * num_columns * sizeof(int);
 | |
| 	temp.iteration = itr;
 | |
| 	temp.isrule = rule;
 | |
| 	GPUmem.push_back(temp);
 | |
| }
 | |
| 
 | |
| void registrarcpu(int name, int num_columns, int *ptr, int rows, int itr, int rule)
 | |
| {
 | |
| 	memnode temp;
 | |
| 	temp.name = name;
 | |
| 	temp.dev_address = ptr;
 | |
| 	temp.rows = rows;
 | |
| 	temp.size = rows * num_columns * sizeof(int);
 | |
| 	temp.iteration = itr;
 | |
| 	temp.isrule = rule;
 | |
| 	CPUmem.push_back(temp);
 | |
| }
 | |
| 
 | |
| /*Updates the information of an element in a list*/
 | |
| template<class InputIterator>
 | |
| void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
 | |
| {
 | |
| 	i->dev_address = ptr;
 | |
| 	i->rows = rows;
 | |
| 	i->size = rows * num_columns * sizeof(int);
 | |
| }
 | |
| 
 | |
| /*Count the total number of rows generated by rule 'name' in iteration 'iter'*/
 | |
| int numrows(int name, int itr)
 | |
| {
 | |
| 	int sum = 0;
 | |
| 	memnode temp;
 | |
| 	temp.iteration = itr;
 | |
| 	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
 | |
| 	while(rec.first != rec.second)
 | |
| 	{
 | |
| 		if(rec.first->name == name)
 | |
| 			sum += rec.first->rows;
 | |
| 		rec.first++;
 | |
| 	}
 | |
| 	rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
 | |
| 	while(rec.first != rec.second)
 | |
| 	{
 | |
| 		if(rec.first->name == name)
 | |
| 			sum += rec.first->rows;
 | |
| 		rec.first++;
 | |
| 	}
 | |
| 	return sum;
 | |
| }
 | |
| 
 | |
| 	extern "C" void * YAP_IntToAtom(int);
 | |
| 	extern  "C" char * YAP_AtomName(void *);
 | |
| 
 | |
| /*Loads facts or rule results in GPU memory. If a fact is already in GPU memory, its pointer is simply returned. Otherwise, 
 | |
| memory is reserved and the fact is loaded. Rule results are loaded based on the current iteration 'itr' and both GPU and 
 | |
| CPU memories are searched for all instances of said results. The instances are combined into a single one in GPU memory.*/
 | |
| int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
 | |
| {
 | |
| 	int numgpu, numcpu, totalrows = 0;
 | |
| 	int *temp, x;
 | |
| 	int size, itrant, inc = 0;
 | |
| 	list<memnode>::iterator i;
 | |
| 	memnode fact;
 | |
| 
 | |
| 	if(is_fact)
 | |
| 	{
 | |
| 		i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
 | |
| 		if(i != GPUmem.end())
 | |
| 		{
 | |
| 			fact = *i;
 | |
| 			GPUmem.erase(i);
 | |
| 			fact.iteration = itr;
 | |
| 			*ptr = fact.dev_address;
 | |
| 			GPUmem.push_back(fact);
 | |
| 			return fact.rows;
 | |
| 		}
 | |
| 		size = num_rows * num_columns * sizeof(int);
 | |
| 		reservar(&temp, size);
 | |
| 		cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
 | |
| 		registrar(name, num_columns, temp, num_rows, itr, 0);
 | |
| 		*ptr = temp;
 | |
| 		return num_rows;
 | |
| 	}
 | |
| 	if(itr > 0)
 | |
| 	{
 | |
| 		itrant = itr - 1;
 | |
| 		i = buscarpornombre(name, itrant, &totalrows, &numgpu, &numcpu);
 | |
| 		if((numgpu == 1) && (numcpu == 1))
 | |
| 		{
 | |
| 			actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
 | |
| 			*ptr = temp_storage[0].dev_address;
 | |
| 			return temp_storage[0].rows;
 | |
| 		}
 | |
| 		size = totalrows * num_columns * sizeof(int);
 | |
| 		reservar(&temp, size);
 | |
| 		for(x = 0; x < numgpu; x++)
 | |
| 		{
 | |
| 			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
 | |
| 			inc += temp_storage[x].size / sizeof(int);
 | |
| 			cudaFree(temp_storage[x].dev_address);
 | |
| 		}
 | |
| 		for(; x < numcpu; x++)
 | |
| 		{
 | |
| 			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
 | |
| 			inc += temp_storage[x].size / sizeof(int);
 | |
| 			free(temp_storage[x].dev_address);
 | |
| 		}
 | |
| 		actualizar(num_columns, temp, totalrows, i);
 | |
| 		*ptr = temp;
 | |
| 		return totalrows;
 | |
| 	}
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
 | |
| {
 | |
| 	int numgpu, numcpu, totalrows = 0;
 | |
| 	int *temp, x;
 | |
| 	int size, itrant, inc = 0;
 | |
| 	list<memnode>::iterator i;
 | |
| 
 | |
| 	if(is_fact)
 | |
| 	{
 | |
| 		*ptr = address_host_table;
 | |
| 		return num_rows;
 | |
| 	}
 | |
| 	if(itr > 0)
 | |
| 	{
 | |
| 		itrant = itr - 1;
 | |
| 		i = buscarpornombrecpu(name, itrant, &totalrows, &numgpu, &numcpu);
 | |
| 
 | |
| 		if((numgpu == 0) && (numcpu == 1))
 | |
| 		{
 | |
| 			actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
 | |
| 			*ptr = temp_storage[0].dev_address;
 | |
| 			return temp_storage[0].rows;
 | |
| 		}
 | |
| 		size = totalrows * num_columns * sizeof(int);
 | |
| 		temp = (int *)malloc(size);
 | |
| 		for(x = 0; x < numgpu; x++)
 | |
| 		{
 | |
| 			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
 | |
| 			inc += temp_storage[x].size / sizeof(int);
 | |
| 			cudaFree(temp_storage[x].dev_address);
 | |
| 		}
 | |
| 		for(; x < numcpu; x++)
 | |
| 		{
 | |
| 			memcpy(temp + inc, temp_storage[x].dev_address, temp_storage[x].size);
 | |
| 			inc += temp_storage[x].size / sizeof(int);
 | |
| 			free(temp_storage[x].dev_address);
 | |
| 		}
 | |
| 		actualizar(num_columns, temp, totalrows, i);
 | |
| 		*ptr = temp;
 | |
| 		return totalrows;
 | |
| 	}
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*Loads all results of rule 'name' from both GPU and CPU memories into the GPU*/
 | |
| int cargafinal(int name, int cols, int **ptr)
 | |
| {
 | |
| 	int *temp, *ini, cont = 0, numg = 0, numc = 0;
 | |
| 	memnode bus;
 | |
| 	bus.name = name;
 | |
| 	GPUmem.sort(comparename);
 | |
| 	CPUmem.sort(comparename);
 | |
| 	list<memnode>::iterator endg = GPUmem.end();
 | |
| 	list<memnode>::iterator endc = CPUmem.end();
 | |
| 	list<memnode>::iterator pos = lower_bound(GPUmem.begin(), endg, bus, comparename);
 | |
| 	list<memnode>::iterator gpu = pos;
 | |
| 	while(pos != endg && pos->name == name)
 | |
| 	{
 | |
| 		cont += pos->rows;
 | |
| 		numg++;
 | |
| 		pos++;
 | |
| 	}
 | |
| 	pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
 | |
| 	list<memnode>::iterator cpu = pos;
 | |
| 	while(pos != endc && pos->name == name)
 | |
| 	{
 | |
| 		cont += pos->rows;
 | |
| 		numc++;
 | |
| 		pos++;
 | |
| 	}
 | |
| 
 | |
| 	if(numg == 0 && numc == 0)
 | |
| 		return 0;
 | |
| 	if(numg == 1 && numc == 0) 
 | |
| 	{
 | |
| 		pos = gpu;
 | |
| 		*ptr = pos->dev_address;
 | |
| 		cont = pos->rows;
 | |
| 		GPUmem.erase(pos);
 | |
| 		#ifdef TUFFY
 | |
| 		return -cont;
 | |
| 		#else
 | |
| 		return cont;
 | |
| 		#endif
 | |
| 	}
 | |
| 	if(numg == 0 && numc == 1)
 | |
| 	{
 | |
| 		pos = cpu;
 | |
| 		cont = pos->rows;
 | |
| 		#ifdef TUFFY
 | |
| 		reservar(&temp, pos->size);
 | |
| 		cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
 | |
| 		*ptr = temp;
 | |
| 		#else
 | |
| 		*ptr = pos->dev_address;
 | |
| 		#endif
 | |
| 		CPUmem.erase(pos);
 | |
| 		return -cont;
 | |
| 	}
 | |
| 
 | |
| 	reservar(&temp, cont * cols * sizeof(int));
 | |
| 	ini = temp;
 | |
| 	pos = gpu;
 | |
| 	while(pos != endg && pos->name == name)
 | |
| 	{
 | |
| 		cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
 | |
| 		temp += pos->size / sizeof(int);
 | |
| 		pos++;
 | |
| 	}
 | |
| 	pos = cpu;
 | |
| 	while(pos != endc && pos->name == name)
 | |
| 	{
 | |
| 		cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
 | |
| 		temp += pos->size / sizeof(int);
 | |
| 		pos++;
 | |
| 	}
 | |
| 	*ptr = ini;
 | |
| 	return cont;
 | |
| }
 | |
| 
 | |
| /*Compares the results of the current iteration against the results of older iterations. 
 | |
| Used to avoid infinite computations when the result is not a single fixed-point, but an 
 | |
| orbit of points.*/
 | |
| bool generadas(int name, int filas, int cols, int itr)
 | |
| {
 | |
| 	int r1, r2, x, fin;
 | |
| 	int *dop1, *dop2;
 | |
| 
 | |
| 	r2 = numrows(name, itr);
 | |
| 	if(itr < MAX_FIX_POINTS)
 | |
| 		fin = itr;
 | |
| 	else
 | |
| 		fin = MAX_FIX_POINTS;
 | |
| 	for(x = 1; x <= fin; x++)
 | |
| 	{
 | |
| 		r1 = numrows(name, itr - x);
 | |
| 		if(r1 == r2)
 | |
| 		{
 | |
| 			r2 = cargar(name, filas, cols, 0, NULL, &dop2, itr + 1);
 | |
| 			thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
 | |
| 			r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
 | |
| 			thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
 | |
| 			if(thrust::equal(pt1, pt1 + r1, pt2) == true)
 | |
| 				return true;
 | |
| 		}
 | |
| 	}
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| void mostrar_memoria()
 | |
| {
 | |
| 	unsigned int x;
 | |
| 	list<memnode>::iterator i = GPUmem.begin();
 | |
| 	cout << "Memoria inicio GPU" << endl;
 | |
| 	for(x = 0; x < GPUmem.size(); x++, i++)
 | |
| 		cout << i->name << " " << i->iteration << " " << i->isrule << " " << i->rows << " " << i->size << endl;
 | |
| 	cout << "Memoria fin GPU" << endl;
 | |
| }
 | |
| 
 | |
| void mostrar_memcpu()
 | |
| {
 | |
| 	unsigned int x;
 | |
| 	list<memnode>::iterator i = CPUmem.begin();
 | |
| 	cout << "Memoria inicio CPU" << endl;
 | |
| 	for(x = 0; x < CPUmem.size(); x++, i++)
 | |
| 		cout << i->name << " " << i->iteration << endl;
 | |
| 	cout << "Memoria fin CPU" << endl;
 | |
| }
 | |
| 
 | |
| /*Clear all rule results from both GPU and CPU memory*/
 | |
| void clear_memory()
 | |
| {
 | |
| 	list<memnode>::iterator ini;
 | |
| 	list<memnode>::iterator fin;
 | |
|        	ini = GPUmem.begin();
 | |
| 	fin = GPUmem.end();
 | |
| 	while(ini != fin)
 | |
| 	{
 | |
| 		if(ini->isrule)
 | |
| 		{
 | |
| 			cudaFree(ini->dev_address);
 | |
| 			ini = GPUmem.erase(ini);
 | |
| 		}
 | |
| 		else
 | |
| 			ini++;
 | |
| 	}
 | |
| 	ini = CPUmem.begin();
 | |
| 	fin = CPUmem.end();
 | |
| 	while(ini != fin)
 | |
| 	{
 | |
| 		free(ini->dev_address);
 | |
| 		ini++;
 | |
| 	}
 | |
| 	CPUmem.clear();
 | |
| }
 | |
| 
 | |
| /*Clear everything from both GPU and CPU memory*/
 | |
| void clear_memory_all()
 | |
| {
 | |
| 	list<memnode>::iterator ini;
 | |
| 	list<memnode>::iterator fin;
 | |
|        	ini = GPUmem.begin();
 | |
| 	fin = GPUmem.end();
 | |
| 	while(ini != fin)
 | |
| 	{
 | |
| 		cudaFree(ini->dev_address);
 | |
| 		ini++;
 | |
| 	}
 | |
| 	GPUmem.clear();
 | |
| 	ini = CPUmem.begin();
 | |
| 	fin = CPUmem.end();
 | |
| 	while(ini != fin)
 | |
| 	{
 | |
| 		free(ini->dev_address);
 | |
| 		ini++;
 | |
| 	}
 | |
| 	CPUmem.clear();
 | |
| }
 | |
| 
 | |
| /*Remove all instances of fact 'name' from both CPU and GPU memories*/
 | |
| void liberar(int name)
 | |
| {
 | |
| 	list<memnode>::iterator i;
 | |
| 	memnode fact;
 | |
| 	i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
 | |
| 	if(i != GPUmem.end())
 | |
| 	{
 | |
| 		fact = *i;
 | |
| 		GPUmem.erase(i);
 | |
| 		cudaFree(fact.dev_address);
 | |
| 	}
 | |
| 	i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
 | |
| 	if(i != CPUmem.end())
 | |
| 	{
 | |
| 		fact = *i;
 | |
| 		CPUmem.erase(i);
 | |
| 		free(fact.dev_address);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /*Add all rows in 'dop1' to the fact 'name' by creating a new array capable of holding both.*/
 | |
| void sumar(int name, int *dop1, int cols, int rows)
 | |
| {
 | |
| 	list<memnode>::iterator i;
 | |
| 	memnode fact;
 | |
| 	i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
 | |
| 	int *res, newrows, offset;
 | |
| 	if(i != GPUmem.end())
 | |
| 	{
 | |
| 		fact = *i;
 | |
| 		newrows = rows + fact.rows;
 | |
| 		reservar(&res, newrows * cols * sizeof(int));
 | |
| 		offset = fact.rows * cols;
 | |
| 		cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
 | |
| 		GPUmem.erase(i);
 | |
| 		registrar(name, cols, res, newrows, 0, 0);
 | |
| 		cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
 | |
| 		cudaFree(fact.dev_address);
 | |
| 	}
 | |
| }
 |