hiatory
This commit is contained in:
parent
3d68f0e06b
commit
cd41d373db
1
C/init.c
1
C/init.c
@ -1438,5 +1438,6 @@ void Yap_exit(int value) {
|
||||
Yap_ShutdownLoadForeign();
|
||||
}
|
||||
Yap_CloseStreams(false);
|
||||
Yap_CloseReadline();
|
||||
exit(value);
|
||||
}
|
||||
|
@ -185,7 +185,7 @@ available in experimental implementations.
|
||||
*/
|
||||
YAP_FLAG(FILE_NAME_VARIABLES_FLAG, "file_name_variables", true, booleanFlag,
|
||||
"true", NULL),
|
||||
YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15e",
|
||||
YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15f",
|
||||
NULL), /**< + `float_format `
|
||||
|
||||
C-library `printf()` format specification used by write/1 and
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*************************************************************************
|
||||
/*************************************************************************
|
||||
* *
|
||||
* YAP Prolog %W% %G% *
|
||||
* Yap Prolog was developed at NCCUP - Universidade do Porto *
|
||||
@ -301,6 +301,7 @@ extern void Yap_DebugErrorPutc(int n);
|
||||
extern void Yap_DebugErrorPuts(const char *s);
|
||||
extern void Yap_DebugWriteIndicator(struct pred_entry *ap);
|
||||
void Yap_PlWriteToStream(Term, int, int);
|
||||
void Yap_CloseReadline(void);
|
||||
/* depth_lim.c */
|
||||
bool Yap_InitReadline(Term t);
|
||||
void Yap_InitItDeepenPreds(void);
|
||||
|
14
H/Yatom.h
14
H/Yatom.h
@ -659,6 +659,19 @@ INLINE_ONLY inline EXTERN PropFlags IsPredProperty(int flags) {
|
||||
return (PropFlags)((flags == PEProp));
|
||||
}
|
||||
|
||||
INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe);
|
||||
|
||||
INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe) {
|
||||
if (pe->ModuleOfPred == IDB_MODULE) {
|
||||
return NULL;
|
||||
} else if (pe->ArityOfPE == 0) {
|
||||
return (Atom)pe->FunctorOfPred;
|
||||
} else {
|
||||
Functor f = pe->FunctorOfPred;
|
||||
return NameOfFunctor(f);
|
||||
}
|
||||
}
|
||||
|
||||
/* Flags for code or dbase entry */
|
||||
/* There are several flags for code and data base entries */
|
||||
typedef enum {
|
||||
@ -1322,7 +1335,6 @@ INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags);
|
||||
|
||||
INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags flags) {
|
||||
return flags == FlagProperty;
|
||||
|
||||
}
|
||||
|
||||
/* Proto types */
|
||||
|
@ -474,6 +474,8 @@
|
||||
|
||||
#define LOCAL_search_atoms LOCAL->search_atoms_
|
||||
#define REMOTE_search_atoms(wid) REMOTE(wid)->search_atoms_
|
||||
#define LOCAL_SearchPreds LOCAL->SearchPreds_
|
||||
#define REMOTE_SearchPreds(wid) REMOTE(wid)->SearchPreds_
|
||||
|
||||
#define LOCAL_CurSlot LOCAL->CurSlot_
|
||||
#define REMOTE_CurSlot(wid) REMOTE(wid)->CurSlot_
|
||||
|
@ -268,6 +268,7 @@ const char* Error_Function_;
|
||||
UInt exo_arg_;
|
||||
// atom completion
|
||||
struct scan_atoms* search_atoms_;
|
||||
struct pred_entry* SearchPreds_;
|
||||
// Slots
|
||||
yhandle_t CurSlot_;
|
||||
yhandle_t NSlots_;
|
||||
|
@ -269,6 +269,7 @@ static void InitWorker(int wid) {
|
||||
|
||||
|
||||
|
||||
|
||||
REMOTE_CurSlot(wid) = 0;
|
||||
REMOTE_NSlots(wid) = 0;
|
||||
REMOTE_SlotBase(wid) = InitHandles(wid);
|
||||
|
@ -279,4 +279,5 @@ static void RestoreWorker(int wid USES_REGS) {
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -312,6 +312,7 @@ UInt exo_arg =0
|
||||
|
||||
// atom completion
|
||||
struct scan_atoms* search_atoms void
|
||||
struct pred_entry* SearchPreds void
|
||||
|
||||
// Slots
|
||||
yhandle_t CurSlot =0
|
||||
|
18
os/charsio.c
18
os/charsio.c
@ -470,20 +470,13 @@ code with _C_.
|
||||
|
||||
*/
|
||||
static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */
|
||||
int sno = Yap_CheckStream(ARG1, Input_Stream_f, "get_byte/2");
|
||||
int sno = Yap_CheckBinaryStream(ARG1, Input_Stream_f, "get_byte/2");
|
||||
Int status;
|
||||
Term out;
|
||||
|
||||
if (sno < 0)
|
||||
return (FALSE);
|
||||
status = GLOBAL_Stream[sno].status;
|
||||
if (!(status & Binary_Stream_f)
|
||||
//&& strictISOFlag()
|
||||
) {
|
||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
||||
Yap_Error(PERMISSION_ERROR_INPUT_STREAM, ARG1, "get_byte/2");
|
||||
return (FALSE);
|
||||
}
|
||||
out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno));
|
||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
||||
return Yap_unify_constant(ARG2, out);
|
||||
@ -812,16 +805,9 @@ static Int put_byte(USES_REGS1) { /* '$put_byte'(Stream,N) */
|
||||
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, t2, "put_code/1");
|
||||
return FALSE;
|
||||
}
|
||||
int sno = Yap_CheckStream(ARG1, Output_Stream_f, "put/2");
|
||||
int sno = Yap_CheckBinaryStream(ARG1, Output_Stream_f, "put/2");
|
||||
if (sno < 0)
|
||||
return (FALSE);
|
||||
if (!(GLOBAL_Stream[sno].status & Binary_Stream_f)
|
||||
// && strictISOFlag()
|
||||
) {
|
||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
||||
Yap_Error(PERMISSION_ERROR_OUTPUT_BINARY_STREAM, ARG1, NULL);
|
||||
return false;
|
||||
}
|
||||
GLOBAL_Stream[sno].stream_putc(sno, ch);
|
||||
/*
|
||||
* if (!(GLOBAL_Stream[sno].status & Null_Stream_f))
|
||||
|
18
os/iopreds.c
18
os/iopreds.c
@ -1576,6 +1576,24 @@ int Yap_CheckTextStream__(const char *file, const char *f, int line, Term arg,
|
||||
return sno;
|
||||
}
|
||||
|
||||
int Yap_CheckBinaryStream__(const char *file, const char *f, int line, Term arg,
|
||||
int kind, const char *msg) {
|
||||
int sno;
|
||||
if ((sno = CheckStream__(file, f, line, arg, kind, msg)) < 0)
|
||||
return -1;
|
||||
if ((GLOBAL_Stream[sno].status & Binary_Stream_f)) {
|
||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
||||
if (kind == Input_Stream_f)
|
||||
PlIOError__(file, f, line, PERMISSION_ERROR_INPUT_TEXT_STREAM, arg,
|
||||
msg);
|
||||
else
|
||||
PlIOError__(file, f, line, PERMISSION_ERROR_OUTPUT_TEXT_STREAM, arg,
|
||||
msg);
|
||||
return -1;
|
||||
}
|
||||
return sno;
|
||||
}
|
||||
|
||||
/* used from C-interface */
|
||||
int Yap_GetFreeStreamDForReading(void) {
|
||||
int sno = GetFreeStreamD();
|
||||
|
@ -45,6 +45,10 @@ extern int Yap_CheckStream__(const char *, const char *, int, Term, int,
|
||||
Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
|
||||
extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int,
|
||||
const char *);
|
||||
#define Yap_CheckBinaryStream(arg, kind, msg) \
|
||||
Yap_CheckBinaryStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
|
||||
extern int Yap_CheckBinaryStream__(const char *, const char *, int, Term, int,
|
||||
const char *);
|
||||
|
||||
extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name,
|
||||
encoding_t encoding, stream_flags_t flags,
|
||||
|
@ -149,7 +149,7 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
} else {
|
||||
Term cmod;
|
||||
p = LOCAL_SearchPreds;
|
||||
cmod = (p->ModuleOfPred != PROLOG_MODULE ? p->ModuleOfPred : TermProlog );
|
||||
cmod = (p->ModuleOfPred != PROLOG_MODULE ? p->ModuleOfPred : TermProlog);
|
||||
mod = Yap_GetModuleEntry(cmod);
|
||||
}
|
||||
while (mod) {
|
||||
@ -168,8 +168,7 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
p = mod->PredForME;
|
||||
}
|
||||
char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
|
||||
if (strlen(c) > strlen(prefix) &&
|
||||
strstr(c, prefix) == c &&
|
||||
if (strlen(c) > strlen(prefix) && strstr(c, prefix) == c &&
|
||||
!(p->PredFlags & HiddenPredFlag)) {
|
||||
LOCAL_SearchPreds = p;
|
||||
arity_t ar = p->ArityOfPE;
|
||||
@ -177,19 +176,16 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
|
||||
return c;
|
||||
}
|
||||
size_t sz = strlen(c);
|
||||
chain_t *el = (chain_t *)malloc(sizeof(chain_t)+sz);
|
||||
strncpy(LOCAL_FileNameBuf, c, YAP_FILENAME_MAX);
|
||||
strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
|
||||
return LOCAL_FileNameBuf;
|
||||
}
|
||||
|
||||
}
|
||||
LOCAL_SearchPreds = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static char *predicate_generator(const char *prefix, int state) {
|
||||
static char *predicate_generator(const char *prefix, int state) {
|
||||
char *s = predicate_enumerate(prefix, state);
|
||||
|
||||
if (s) {
|
||||
@ -201,15 +197,15 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
static char **prolog_completion(const char *text, int start, int end) {
|
||||
static char **prolog_completion(const char *text, int start, int end) {
|
||||
char **matches = NULL;
|
||||
|
||||
if (start == 0 && isalpha(text[0])) {
|
||||
int i = 0;
|
||||
while (i < end) {
|
||||
if (isalnum(text[i]))
|
||||
if (isalnum(text[i]) || text[i] == '_')
|
||||
i++;
|
||||
else
|
||||
break;
|
||||
@ -225,16 +221,20 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
;
|
||||
p = text + i;
|
||||
|
||||
if ((strstr(p,"[") == p) || (strstr(p,"compile(") == p) ||
|
||||
(strstr(p,"consult(") == p) || (strstr(p,"load_files(") == p) ||
|
||||
(strstr(p,"reconsult(") == p) || (strstr(p,"use_module(") == p))
|
||||
if ((strstr(p, "[") == p) || (strstr(p, "compile(") == p) ||
|
||||
(strstr(p, "consult(") == p) || (strstr(p, "load_files(") == p) ||
|
||||
(strstr(p, "reconsult(") == p) || (strstr(p, "use_module(") == p) ||
|
||||
(strstr(p, "cd(") == p))
|
||||
matches = rl_completion_matches((char *)text, /* for pre-4.2 */
|
||||
rl_filename_completion_function);
|
||||
return matches;
|
||||
}
|
||||
int i = end, ch = '\0';
|
||||
while (i > start) {
|
||||
ch = text[-i];
|
||||
ch = text[--i];
|
||||
if (ch == '\'')
|
||||
return rl_completion_matches((char *)text, /* for pre-4.2 */
|
||||
rl_filename_completion_function);
|
||||
if (isalnum(text[i]))
|
||||
continue;
|
||||
break;
|
||||
@ -243,16 +243,16 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
return rl_completion_matches((char *)text, atom_generator);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void Yap_ReadlineFlush(int sno) {
|
||||
void Yap_ReadlineFlush(int sno) {
|
||||
if (GLOBAL_Stream[sno].status & Tty_Stream_f &&
|
||||
GLOBAL_Stream[sno].status & Output_Stream_f) {
|
||||
rl_redisplay();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Yap_ReadlinePrompt(StreamDesc * s) {
|
||||
bool Yap_ReadlinePrompt(StreamDesc *s) {
|
||||
if (s->status & Tty_Stream_f) {
|
||||
s->stream_getc = ReadlineGetc;
|
||||
if (GLOBAL_Stream[0].status & Tty_Stream_f &&
|
||||
@ -261,9 +261,9 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Yap_ReadlineOps(StreamDesc * s) {
|
||||
bool Yap_ReadlineOps(StreamDesc *s) {
|
||||
if (s->status & Tty_Stream_f) {
|
||||
if (GLOBAL_Stream[0].status & Tty_Stream_f &&
|
||||
is_same_tty(s->file, GLOBAL_Stream[0].file))
|
||||
@ -273,9 +273,9 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int prolog_complete(int ignore, int key) {
|
||||
static int prolog_complete(int ignore, int key) {
|
||||
if (rl_point > 0 && rl_line_buffer[rl_point - 1] != ' ') {
|
||||
#if HAVE_DECL_RL_CATCH_SIGNALS_ /* actually version >= 1.2, or true readline \
|
||||
*/
|
||||
@ -292,9 +292,9 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
rl_complete(ignore, key);
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool Yap_InitReadline(Term enable) {
|
||||
bool Yap_InitReadline(Term enable) {
|
||||
// don't call readline within emacs
|
||||
// if (getenv("ËMACS"))
|
||||
// return;
|
||||
@ -303,12 +303,13 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
|
||||
GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
|
||||
GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
|
||||
#if _MSC_VER || defined(__MINGW32__)
|
||||
#if _WIN32
|
||||
rl_instream = stdin;
|
||||
#endif
|
||||
rl_outstream = stderr;
|
||||
using_history();
|
||||
const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
|
||||
history_file = s;
|
||||
if (read_history(s) != 0) {
|
||||
FILE *f = fopen(s, "a");
|
||||
if (f) {
|
||||
@ -323,9 +324,9 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
rl_add_defun("prolog-complete", (void *)prolog_complete, '\t');
|
||||
#endif
|
||||
return Yap_ReadlineOps(GLOBAL_Stream + StdInStream);
|
||||
}
|
||||
}
|
||||
|
||||
static bool getLine(int inp, int out) {
|
||||
static bool getLine(int inp, int out) {
|
||||
CACHE_REGS
|
||||
rl_instream = GLOBAL_Stream[inp].file;
|
||||
rl_outstream = GLOBAL_Stream[out].file;
|
||||
@ -362,14 +363,13 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
return false;
|
||||
if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
|
||||
add_history((char *)myrl_line);
|
||||
write_history(history_file);
|
||||
fflush(NULL);
|
||||
}
|
||||
s->u.irl.ptr = s->u.irl.buf = myrl_line;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static int ReadlinePutc(int sno, int ch) {
|
||||
static int ReadlinePutc(int sno, int ch) {
|
||||
CACHE_REGS
|
||||
StreamDesc *s = &GLOBAL_Stream[sno];
|
||||
#if MAC || _MSC_VER || defined(__MINGW32__)
|
||||
@ -384,15 +384,15 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
LOCAL_newline = true;
|
||||
}
|
||||
return ((int)ch);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
@brief reading from the console is complicated because we need to
|
||||
know whether to prompt and so on...
|
||||
|
||||
EOF must be handled by resetting the file.
|
||||
*/
|
||||
static int ReadlineGetc(int sno) {
|
||||
*/
|
||||
static int ReadlineGetc(int sno) {
|
||||
StreamDesc *s = &GLOBAL_Stream[sno];
|
||||
int ch;
|
||||
bool fetch = (s->u.irl.buf == NULL);
|
||||
@ -409,17 +409,17 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
return EOF;
|
||||
}
|
||||
return console_post_process_read_char(ch, s);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
@brief Yap_ReadlinePeekChar peeks the next char from the
|
||||
readline buffer, but does not actually grab it.
|
||||
|
||||
The idea is to take advantage of the buffering. Special care must be taken
|
||||
with EOF, though.
|
||||
|
||||
*/
|
||||
Int Yap_ReadlinePeekChar(int sno) {
|
||||
*/
|
||||
Int Yap_ReadlinePeekChar(int sno) {
|
||||
StreamDesc *s = &GLOBAL_Stream[sno];
|
||||
int ch;
|
||||
|
||||
@ -446,9 +446,9 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
return EOF;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
|
||||
int Yap_ReadlineForSIGINT(void) {
|
||||
int Yap_ReadlineForSIGINT(void) {
|
||||
CACHE_REGS
|
||||
int ch;
|
||||
StreamDesc *s = &GLOBAL_Stream[StdInStream];
|
||||
@ -471,20 +471,26 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Int has_readline(USES_REGS1) {
|
||||
void Yap_CloseReadline(void) {
|
||||
#if USE_READLINE
|
||||
write_history(history_file);
|
||||
#endif
|
||||
}
|
||||
|
||||
static Int has_readline(USES_REGS1) {
|
||||
#if USE_READLINE
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void Yap_InitReadlinePreds(void) {
|
||||
void Yap_InitReadlinePreds(void) {
|
||||
Yap_InitCPred("$has_readline", 0, has_readline,
|
||||
SafePredFlag | HiddenPredFlag);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
bool Yap_InitReadline(Term enable) {
|
||||
|
@ -390,6 +390,8 @@ write1 ( USES_REGS1 )
|
||||
if (output_stream == -1) output_stream = 1;
|
||||
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||
if (args == NULL) {
|
||||
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||
if (LOCAL_Error_TYPE)
|
||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||
return false;
|
||||
@ -415,6 +417,8 @@ write_canonical1 ( USES_REGS1 )
|
||||
if (output_stream == -1) output_stream = 1;
|
||||
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||
if (args == NULL) {
|
||||
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||
if (LOCAL_Error_TYPE)
|
||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||
return false;
|
||||
@ -440,6 +444,8 @@ write_canonical ( USES_REGS1 )
|
||||
we cannot make recursive Prolog calls */
|
||||
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||
if (args == NULL) {
|
||||
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||
if (LOCAL_Error_TYPE)
|
||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||
return false;
|
||||
@ -467,6 +473,8 @@ writeq1 ( USES_REGS1 )
|
||||
we cannot make recursive Prolog calls */
|
||||
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||
if (args == NULL) {
|
||||
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||
if (LOCAL_Error_TYPE)
|
||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||
return false;
|
||||
@ -495,6 +503,8 @@ writeq ( USES_REGS1 )
|
||||
we cannot make recursive Prolog calls */
|
||||
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||
if (args == NULL) {
|
||||
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||
if (LOCAL_Error_TYPE)
|
||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||
return false;
|
||||
@ -523,6 +533,8 @@ print1 ( USES_REGS1 )
|
||||
we cannot make recursive Prolog calls */
|
||||
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||
if (args == NULL) {
|
||||
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||
if (LOCAL_Error_TYPE)
|
||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||
return false;
|
||||
@ -551,6 +563,8 @@ print ( USES_REGS1 )
|
||||
we cannot make recursive Prolog calls */
|
||||
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||
if (args == NULL) {
|
||||
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||
if (LOCAL_Error_TYPE)
|
||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||
return false;
|
||||
|
@ -54,11 +54,28 @@ if (CUDA_FOUND)
|
||||
macro_optional_find_package (Thrust ON)
|
||||
|
||||
set (CUDA_SOURCES
|
||||
lista.cu
|
||||
memory.cu
|
||||
cuda.c
|
||||
CC_CSSTree.cu
|
||||
bpreds.cu
|
||||
dbio.cu
|
||||
lista.cu
|
||||
memory.cu
|
||||
selectproyect.cu
|
||||
treeb.cu
|
||||
union2.cu
|
||||
)
|
||||
|
||||
set (CXX_SOURCES
|
||||
bpredscpu.cpp
|
||||
joincpu.cpp
|
||||
selectproyectcpu.cpp
|
||||
unioncpu2.cpp
|
||||
)
|
||||
|
||||
set (C_SOURCES
|
||||
creator2.c
|
||||
cuda.c
|
||||
)
|
||||
|
||||
set (PL_SOURCES
|
||||
cuda.yap
|
||||
)
|
||||
|
15
packages/cuda/Makefile.in
Normal file → Executable file
15
packages/cuda/Makefile.in
Normal file → Executable file
@ -23,7 +23,7 @@ CC=@CC@
|
||||
NVCC=@NVCC@
|
||||
CFLAGS= @SHLIB_CFLAGS@ $(YAP_EXTRAS) $(DEFS) -I$(srcdir) -I../.. -I$(srcdir)/../../include @CUDA_CPPFLAGS@
|
||||
NVCCFLAGS=@CUDA_CPPFLAGS@ -I$(srcdir) -I../.. -I$(srcdir)/../../include
|
||||
CUDA_LDFLAGS=@CUDA_LDFLAGS@
|
||||
LDFLAGS=@LDFLAGS@
|
||||
#
|
||||
#
|
||||
# You shouldn't need to change what follows.
|
||||
@ -39,7 +39,7 @@ SO=@SO@
|
||||
CWD=$(PWD)
|
||||
#
|
||||
|
||||
CUDA_PROLOG= \
|
||||
BDD_PROLOG= \
|
||||
$(srcdir)/cuda.yap
|
||||
|
||||
OBJS=cuda.o memory.o lista.o
|
||||
@ -62,16 +62,11 @@ memory.o: $(srcdir)/memory.cu $(srcdir)/pred.h
|
||||
@DO_SECOND_LD@cuda.@SO@: $(OBJS)
|
||||
@DO_SECOND_LD@ @CUDA_SHLIB_LD@ $(CUDA_LDFLAGS) -o cuda.@SO@ $(OBJS)
|
||||
|
||||
install: all install-examples
|
||||
install: all
|
||||
mkdir -p $(DESTDIR)$(SHAREDIR)
|
||||
for h in $(CUDA_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
|
||||
for h in $(BDD_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
|
||||
$(INSTALL_PROGRAM) $(SOBJS) $(DESTDIR)$(YAPLIBDIR)
|
||||
|
||||
install-examples:
|
||||
|
||||
clean:
|
||||
rm -f *.o *~ $(OBJS) *.BAK
|
||||
|
||||
distclean: clean
|
||||
rm -f $(SOBJS) Makefile
|
||||
rm -f *.o *~ $(OBJS) $(SOBJS) *.BAK
|
||||
|
||||
|
499
packages/cuda/bpreds.cu
Normal file → Executable file
499
packages/cuda/bpreds.cu
Normal file → Executable file
@ -1,4 +1,113 @@
|
||||
__global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <cstdarg>
|
||||
#include "pred.h"
|
||||
|
||||
/*Determines the maximum from a set of values*/
|
||||
int maximo(int count, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int j, temp, mx = 0;
|
||||
va_start(ap, count);
|
||||
|
||||
for(j = 0; j < count; j++)
|
||||
{
|
||||
temp = va_arg(ap, int);
|
||||
if(temp > mx)
|
||||
mx = temp;
|
||||
}
|
||||
|
||||
va_end(ap);
|
||||
return mx;
|
||||
}
|
||||
|
||||
|
||||
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, rowact1, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact1 = id * of1;
|
||||
rowact = id * of2;
|
||||
for(x = nx; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 = dop1[rowact1 - op1 - 1];
|
||||
else
|
||||
op1 = dop2[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 = dop1[rowact1 - op2 - 1];
|
||||
else
|
||||
op2 = dop2[rowact + op2];
|
||||
switch(shared[x] - BPOFFSET)
|
||||
{
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
if(res2 != NULL)
|
||||
res2[id] = 1;
|
||||
for(x = 0; x < nx; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop2[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop2[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*Mark all rows that comply with the comparison predicates*/
|
||||
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@ -8,7 +117,7 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact = id * cols;
|
||||
rowact = id * of1;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
@ -46,98 +155,306 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
|
||||
}
|
||||
}
|
||||
|
||||
int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
|
||||
/*Unmark all rows that do not comply with the comparison predicates*/
|
||||
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
int *temp;
|
||||
int tmplen = rows + 1;
|
||||
int size = tmplen * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << temp << " temp bpreds " << size << endl;
|
||||
#endif
|
||||
cudaMemset(temp, 0, size);
|
||||
|
||||
#if TIMER
|
||||
cuda_stats.builtins++;
|
||||
#endif
|
||||
int *dhead;
|
||||
int predn = numpreds.x * 3;
|
||||
int spredn = predn * sizeof(int);
|
||||
int sproj = numpreds.z * sizeof(int);
|
||||
int hsize;
|
||||
if(predn > numpreds.z)
|
||||
hsize = spredn;
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
rowact = id * of1;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
hsize = sproj;
|
||||
reservar(&dhead, hsize);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << dhead << " dhead " << hsize << endl;
|
||||
#endif
|
||||
cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
|
||||
|
||||
int blockllen = rows / 1024 + 1;
|
||||
int numthreads = 1024;
|
||||
|
||||
/*int x;
|
||||
cout << "arraypreds = ";
|
||||
for(x = 0; x < predn; x++)
|
||||
cout << bin[x] << " ";
|
||||
cout << endl;
|
||||
cout << "temptable = ";
|
||||
for(x = 0; x < numpreds.z; x++)
|
||||
cout << bin[x+predn] << " ";
|
||||
cout << endl;
|
||||
int y;
|
||||
int *hop1 = (int *)malloc(numpreds.y * rows * sizeof(int));
|
||||
cudaMemcpy(hop1, dop1, numpreds.y * rows * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < rows; x++)
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
for(y = 0; y < numpreds.y; y++)
|
||||
cout << hop1[x * numpreds.y + y] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
free(hop1);*/
|
||||
|
||||
predicates<<<blockllen, numthreads, spredn>>>(dop1, rows, numpreds.y, dhead, predn, temp + 1);
|
||||
|
||||
/*int y;
|
||||
int *hop1 = (int *)malloc((rows + 1) * sizeof(int));
|
||||
cudaMemcpy(hop1, temp, (rows + 1) * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < (rows + 1); x++)
|
||||
cout << hop1[x] << " ";
|
||||
cout << endl;
|
||||
free(hop1);*/
|
||||
|
||||
thrust::device_ptr<int> res;
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
int num = res[rows];
|
||||
if(num == 0)
|
||||
return 0;
|
||||
|
||||
int *fres;
|
||||
reservar(&fres, num * sproj);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << fres << " fres " << num * sproj << endl;
|
||||
#endif
|
||||
cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
|
||||
|
||||
/*int y;
|
||||
int *hop1 = (int *)malloc(numpreds.z * num * sizeof(int));
|
||||
cudaMemcpy(hop1, fres, numpreds.z * num * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < num; x++)
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
{
|
||||
for(y = 0; y < numpreds.z; y++)
|
||||
cout << hop1[x * numpreds.z + y] << " ";
|
||||
cout << endl;
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(hop1);*/
|
||||
|
||||
liberar(dhead, hsize);
|
||||
liberar(temp, size);
|
||||
liberar(dop1, rows * cols * sizeof(int));
|
||||
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
|
||||
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, rowact1, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact1 = id * of1;
|
||||
rowact = id * of2;
|
||||
for(x = nx; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 = dop1[rowact1 - op1 - 1];
|
||||
else
|
||||
op1 = dop2[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 = dop1[rowact1 - op2 - 1];
|
||||
else
|
||||
op2 = dop2[rowact + op2];
|
||||
switch(shared[x] - BPOFFSET)
|
||||
{
|
||||
case SBG_EQ: if(op1 == op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_GT: if(op1 > op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_LT: if(op1 < op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_GE: if(op1 >= op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_LE: if(op1 <= op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
break;
|
||||
case SBG_DF: if(op1 != op2)
|
||||
{
|
||||
res2[id] = 1;
|
||||
x = numc;
|
||||
}
|
||||
}
|
||||
}
|
||||
for(x = 0; x < nx; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop2[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop2[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 == op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GT: if(op1 > op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LT: if(op1 < op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GE: if(op1 >= op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LE: if(op1 <= op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_DF: if(op1 != op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Mark all rows that comply with the comparison predicates using disjunctions (i.e. a row is marked if it complies with at least one predicate)*/
|
||||
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact = id * of1;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 == op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GT: if(op1 > op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LT: if(op1 < op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_GE: if(op1 >= op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_LE: if(op1 <= op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case SBG_DF: if(op1 != op2)
|
||||
{
|
||||
res[id] = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/*Unmark all rows that do not comply with the comparison predicates using disjunctions (i.e. a row is unmarked only if it complies with none of the predicates)*/
|
||||
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
rowact = id * of1;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 == op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GT: if(op1 > op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LT: if(op1 < op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_GE: if(op1 >= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_LE: if(op1 <= op2)
|
||||
return;
|
||||
break;
|
||||
case SBG_DF: if(op1 != op2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
res[id] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
228
packages/cuda/cuda.c
Normal file → Executable file
228
packages/cuda/cuda.c
Normal file → Executable file
@ -6,19 +6,25 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
#include "pred.h"
|
||||
|
||||
#define MAXARG 100
|
||||
|
||||
YAP_Atom AtomEq,
|
||||
AtomGt,
|
||||
AtomLt,
|
||||
AtomGe,
|
||||
AtomLe,
|
||||
AtomDf;
|
||||
AtomDf,
|
||||
AtomNt;
|
||||
|
||||
predicate *facts[100]; /*Temporary solution to maintain facts and rules*/
|
||||
predicate *rules[100];
|
||||
predicate *facts[MAXARG]; /*Temporary solution to maintain facts and rules*/
|
||||
predicate *rules[MAXARG];
|
||||
int32_t cf = 0, cr = 0;
|
||||
|
||||
char names[1024];
|
||||
|
||||
// initialize CUDA system
|
||||
void Cuda_Initialize( void );
|
||||
|
||||
@ -39,6 +45,19 @@ void init_cuda( void );
|
||||
|
||||
//#define DEBUG_INTERFACE 1
|
||||
|
||||
#ifdef ROCKIT
|
||||
static int32_t query[100];
|
||||
static int32_t qcont = 0;
|
||||
static int cuda_init_query(void)
|
||||
{
|
||||
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG1));
|
||||
query[qcont] = pname;
|
||||
qcont++;
|
||||
query[qcont] = 0;
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if DEBUG_INTERFACE
|
||||
static void
|
||||
dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
|
||||
@ -83,8 +102,18 @@ int32_t Cuda_NewFacts(predicate *pe)
|
||||
#if DEBUG_INTERFACE
|
||||
dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
|
||||
#endif
|
||||
|
||||
#ifdef ROCKIT
|
||||
if(cf >= 0)
|
||||
{
|
||||
facts[cf] = pe;
|
||||
cf++;
|
||||
}
|
||||
#else
|
||||
facts[cf] = pe;
|
||||
cf++;
|
||||
#endif
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@ -115,7 +144,7 @@ int32_t Cuda_Erase(predicate *pe)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static YAP_Bool
|
||||
static int
|
||||
load_facts( void ) {
|
||||
|
||||
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||
@ -164,15 +193,18 @@ load_facts( void ) {
|
||||
static int currentFact = 0;
|
||||
static predicate *currentPred = NULL;
|
||||
|
||||
static YAP_Bool
|
||||
static int
|
||||
cuda_init_facts( void ) {
|
||||
|
||||
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
|
||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
||||
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
|
||||
predicate *pred;
|
||||
|
||||
strcat(names, YAP_AtomName(YAP_AtomOfTerm(YAP_ARG3)));
|
||||
strcat(names, " ");
|
||||
|
||||
if (!mat)
|
||||
return FALSE;
|
||||
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||
@ -198,14 +230,16 @@ cuda_init_facts( void ) {
|
||||
}
|
||||
}
|
||||
|
||||
static YAP_Bool
|
||||
static int
|
||||
cuda_load_fact( void ) {
|
||||
YAP_Term th = YAP_ARG1;
|
||||
|
||||
int i, j;
|
||||
int i = currentFact;
|
||||
|
||||
#if defined(DATALOG) || defined(TUFFY)
|
||||
YAP_Term th = YAP_ARG1;
|
||||
int ncols = currentPred->num_columns;
|
||||
int j;
|
||||
int *mat = currentPred->address_host_table;
|
||||
i = currentFact;
|
||||
for (j = 0; j < ncols; j++) {
|
||||
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||
if (YAP_IsAtomTerm(ta)) {
|
||||
@ -214,6 +248,8 @@ cuda_load_fact( void ) {
|
||||
mat[i*ncols+j] = YAP_IntOfTerm(ta);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
i++;
|
||||
if (i == currentPred->num_rows) {
|
||||
Cuda_NewFacts(currentPred);
|
||||
@ -225,21 +261,26 @@ cuda_load_fact( void ) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static YAP_Bool
|
||||
static int
|
||||
load_rule( void ) {
|
||||
// maximum of 2K symbols per rule, should be enough for ILP
|
||||
int32_t vec[2048], *ptr = vec, *nvec;
|
||||
int32_t vec[2048], *ptr = vec, *nvec, neg[2048];
|
||||
// qK different variables;
|
||||
YAP_Term vars[1024];
|
||||
int32_t nvars = 0;
|
||||
int32_t nvars = 0, x;
|
||||
int32_t ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */
|
||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||
YAP_Term t3 = YAP_ARG3;
|
||||
int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
|
||||
YAP_Atom name = YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3)));
|
||||
int32_t pname = YAP_AtomToInt(name);
|
||||
|
||||
const char *strname = YAP_AtomName(name);
|
||||
predicate *pred;
|
||||
int32_t cont = 0;
|
||||
memset(neg, 0x0, 2048 * sizeof(int32_t));
|
||||
|
||||
while(YAP_IsPairTerm(t3)) {
|
||||
int32_t j = 0;
|
||||
int32_t j = 0, m;
|
||||
YAP_Term th = YAP_HeadOfTerm(t3);
|
||||
YAP_Functor f = YAP_FunctorOfTerm( th );
|
||||
int32_t n = YAP_ArityOfFunctor( f );
|
||||
@ -257,8 +298,17 @@ load_rule( void ) {
|
||||
*ptr++ = SBG_LE;
|
||||
else if (at == AtomDf)
|
||||
*ptr++ = SBG_DF;
|
||||
else if (at == AtomNt)
|
||||
{
|
||||
neg[cont] = 1;
|
||||
cont++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr++ = YAP_AtomToInt( at );
|
||||
cont++;
|
||||
}
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||
|
||||
@ -277,6 +327,34 @@ load_rule( void ) {
|
||||
}
|
||||
} else if (YAP_IsAtomTerm(ta)) {
|
||||
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||
} else if (YAP_IsApplTerm(ta)) {
|
||||
f = YAP_FunctorOfTerm( ta );
|
||||
at = YAP_NameOfFunctor( f );
|
||||
m = YAP_ArityOfFunctor( f );
|
||||
*ptr++ = YAP_AtomToInt( at );
|
||||
|
||||
for (x = 0; x < m; x++) {
|
||||
YAP_Term ta2 = YAP_ArgOfTerm(x+1, ta);
|
||||
|
||||
if (YAP_IsVarTerm(ta2)) {
|
||||
int32_t k;
|
||||
for (k = 0; k < nvars; k++) {
|
||||
if (vars[k] == ta2) {
|
||||
*ptr++ = k+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (k == nvars) {
|
||||
vars[k] = ta2;
|
||||
*ptr++ = k+1;
|
||||
nvars++;
|
||||
}
|
||||
} else if (YAP_IsAtomTerm(ta2)) {
|
||||
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||
} else {
|
||||
*ptr++ = -YAP_IntOfTerm(ta);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*ptr++ = -YAP_IntOfTerm(ta);
|
||||
}
|
||||
@ -296,53 +374,136 @@ load_rule( void ) {
|
||||
pred->num_rows = ngoals;
|
||||
pred->num_columns = ncols;
|
||||
pred->is_fact = FALSE;
|
||||
x = (strlen(strname) + 1) * sizeof(char);
|
||||
pred->predname = (char *)malloc(x);
|
||||
memcpy(pred->predname, strname, x);
|
||||
nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
|
||||
memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
|
||||
pred->address_host_table = nvec;
|
||||
pred->negatives = (int32_t *)malloc(sizeof(int32_t) * cont);
|
||||
memcpy(pred->negatives, neg, sizeof(int32_t) * cont);
|
||||
Cuda_NewRule( pred );
|
||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||
}
|
||||
|
||||
static YAP_Bool
|
||||
static int
|
||||
cuda_erase( void )
|
||||
{
|
||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||
return Cuda_Erase( ptr );
|
||||
}
|
||||
|
||||
static YAP_Bool
|
||||
void setQuery(YAP_Term t1, int32_t **res)
|
||||
{
|
||||
int32_t *query = (int32_t *)malloc(MAXARG * sizeof(int32_t));
|
||||
int32_t x, y = 0, *itr;
|
||||
predicate *ptr = NULL;
|
||||
if(YAP_IsPairTerm(t1))
|
||||
{
|
||||
while(YAP_IsPairTerm(t1))
|
||||
{
|
||||
ptr = (predicate *)YAP_IntOfTerm(YAP_HeadOfTerm(t1));
|
||||
query[y] = ptr->name;
|
||||
itr = ptr->address_host_table;
|
||||
x = 2;
|
||||
while(itr[x] != 0)
|
||||
x++;
|
||||
query[y+1] = itr[x+1];
|
||||
t1 = YAP_TailOfTerm(t1);
|
||||
y+=2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = (predicate *)YAP_IntOfTerm(t1);
|
||||
query[y] = ptr->name;
|
||||
itr = ptr->address_host_table;
|
||||
x = 2;
|
||||
while(itr[x] != 0)
|
||||
x++;
|
||||
query[y+1] = itr[x+1];
|
||||
y += 2;
|
||||
}
|
||||
query[y] = -1;
|
||||
query[y+1] = -1;
|
||||
*res = query;
|
||||
}
|
||||
|
||||
static int
|
||||
cuda_eval( void )
|
||||
{
|
||||
int32_t *mat;
|
||||
|
||||
#if defined(DATALOG) || defined(TUFFY)
|
||||
int32_t *query = NULL;
|
||||
setQuery(YAP_ARG1, &query);
|
||||
#endif
|
||||
|
||||
int32_t finalDR = YAP_IntOfTerm(YAP_ARG3);
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, names, finalDR);
|
||||
|
||||
#ifdef TUFFY
|
||||
cf = 0;
|
||||
#endif
|
||||
#ifdef ROCKIT
|
||||
if(cf > 0)
|
||||
cf *= -1;
|
||||
#endif
|
||||
#if defined(TUFFY) || defined(ROCKIT)
|
||||
cr = 0;
|
||||
names[0] = '\0';
|
||||
return FALSE;
|
||||
#else
|
||||
int32_t i;
|
||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
||||
int32_t ncols = ptr->num_columns;
|
||||
YAP_Term out = YAP_TermNil();
|
||||
YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
|
||||
YAP_Term vec[256];
|
||||
int32_t i;
|
||||
|
||||
YAP_Atom at;
|
||||
|
||||
if (n < 0)
|
||||
return FALSE;
|
||||
for (i=0; i<n; i++) {
|
||||
int32_t ni = ((n-1)-i)*ncols, j;
|
||||
|
||||
printf("%s(", YAP_AtomName(YAP_IntToAtom(ptr->name)));
|
||||
|
||||
for (j=0; j<ncols; j++) {
|
||||
vec[j] = YAP_MkIntTerm(mat[ni+j]);
|
||||
|
||||
at = YAP_IntToAtom(mat[ni+j]);
|
||||
if(at != NULL)
|
||||
printf("%s", YAP_AtomName(at));
|
||||
else
|
||||
printf("%d", mat[ni+j]);
|
||||
if(j < (ncols - 1))
|
||||
printf(",");
|
||||
|
||||
}
|
||||
out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
|
||||
|
||||
printf(")\n");
|
||||
|
||||
}
|
||||
if (n > 0)
|
||||
free( mat );
|
||||
return YAP_Unify(YAP_ARG2, out);
|
||||
#endif
|
||||
}
|
||||
|
||||
static YAP_Bool
|
||||
static int
|
||||
cuda_coverage( void )
|
||||
{
|
||||
int32_t *mat;
|
||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
||||
int32_t ncols = ptr->num_columns;
|
||||
|
||||
#if defined(DATALOG) || defined(TUFFY)
|
||||
int32_t *query = NULL;
|
||||
setQuery(YAP_ARG1, &query);
|
||||
#endif
|
||||
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
|
||||
int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
|
||||
int32_t i = n/2, min = 0, max = n-1;
|
||||
int32_t t0, t1;
|
||||
@ -384,11 +545,16 @@ cuda_coverage( void )
|
||||
} while ( TRUE );
|
||||
}
|
||||
|
||||
static YAP_Bool cuda_count( void )
|
||||
static int cuda_count( void )
|
||||
{
|
||||
int32_t *mat;
|
||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
||||
|
||||
#if defined(DATALOG) || defined(TUFFY)
|
||||
int32_t *query = NULL;
|
||||
setQuery(YAP_ARG1, &query);
|
||||
#endif
|
||||
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
|
||||
|
||||
if (n < 0)
|
||||
return FALSE;
|
||||
@ -396,7 +562,7 @@ static YAP_Bool cuda_count( void )
|
||||
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
|
||||
}
|
||||
|
||||
static YAP_Bool cuda_statistics( void )
|
||||
static int cuda_statistics( void )
|
||||
{
|
||||
Cuda_Statistics();
|
||||
return TRUE;
|
||||
@ -416,14 +582,20 @@ init_cuda(void)
|
||||
AtomGe = YAP_LookupAtom(">=");
|
||||
AtomLe = YAP_LookupAtom("=<");
|
||||
AtomDf = YAP_LookupAtom("\\=");
|
||||
AtomNt = YAP_LookupAtom("not");
|
||||
YAP_UserCPredicate("load_facts", load_facts, 4);
|
||||
YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
|
||||
YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
|
||||
YAP_UserCPredicate("load_rule", load_rule, 4);
|
||||
YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
|
||||
YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
|
||||
YAP_UserCPredicate("cuda_eval", cuda_eval, 3);
|
||||
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
|
||||
YAP_UserCPredicate("cuda_count", cuda_count, 2);
|
||||
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
|
||||
|
||||
#ifdef ROCKIT
|
||||
YAP_UserCPredicate("cuda_init_query", cuda_init_query, 1);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
9
packages/cuda/cuda.yap
Normal file → Executable file
9
packages/cuda/cuda.yap
Normal file → Executable file
@ -2,10 +2,11 @@
|
||||
cuda_inline/2,
|
||||
cuda_rule/2,
|
||||
cuda_erase/1,
|
||||
cuda_eval/2,
|
||||
cuda_eval/3,
|
||||
cuda_coverage/4,
|
||||
cuda_statistics/0,
|
||||
cuda_count/2]).
|
||||
cuda_count/2,
|
||||
cuda_query/1]).
|
||||
|
||||
tell_warning :-
|
||||
print_message(warning,functionality(cuda)).
|
||||
@ -40,7 +41,7 @@ count_answers(G, N) :-
|
||||
|
||||
cuda_rule((Head :- Body) , IdRules) :-
|
||||
body_to_list( Body, L, [], 1, N),
|
||||
functor(Head, _Na, Ar),
|
||||
functor(Head, Na, Ar),
|
||||
load_rule( N, Ar, [Head|L], IdRules ).
|
||||
|
||||
|
||||
@ -54,3 +55,5 @@ body_to_list( B, NL, L, N0, N) :-
|
||||
body_to_list( B, [B|L], L, N0, N) :-
|
||||
N is N0+1.
|
||||
|
||||
cuda_query(Call) :-
|
||||
cuda_init_query(Call).
|
||||
|
965
packages/cuda/lista.cu
Normal file → Executable file
965
packages/cuda/lista.cu
Normal file → Executable file
File diff suppressed because it is too large
Load Diff
7
packages/cuda/lista.h
Normal file → Executable file
7
packages/cuda/lista.h
Normal file → Executable file
@ -25,8 +25,11 @@ typedef struct auxiliar{
|
||||
int *numselfj;
|
||||
int **wherejoin;
|
||||
int *numjoin;
|
||||
int3 num_bpreds;
|
||||
int *builtin;
|
||||
int totalpreds;
|
||||
int **preds;
|
||||
int2 *numpreds;
|
||||
int *negatives;
|
||||
char *rulename;
|
||||
int gen_act;
|
||||
int gen_ant;
|
||||
}rulenode;
|
||||
|
493
packages/cuda/memory.cu
Normal file → Executable file
493
packages/cuda/memory.cu
Normal file → Executable file
@ -5,63 +5,101 @@
|
||||
#include <thrust/device_vector.h>
|
||||
#include "lista.h"
|
||||
#include "memory.h"
|
||||
#include "pred.h"
|
||||
|
||||
#define MAX_REC 200
|
||||
#define HALF_REC (MAX_REC / 2)
|
||||
#define MAX_FIX_POINTS 100
|
||||
|
||||
unsigned int avmem;
|
||||
memnode temp_storage[MAX_REC];
|
||||
/*List used to store information (address, size, etc.) about facts and rule results loaded in the GPU*/
|
||||
list<memnode> GPUmem;
|
||||
/*List used to store information about rule results offloaded from the GPU to the CPU*/
|
||||
list<memnode> CPUmem;
|
||||
|
||||
/*Auxiliary function to sort rule list*/
|
||||
bool comparer(const rulenode &r1, const rulenode &r2)
|
||||
{
|
||||
return (r1.name > r2.name);
|
||||
}
|
||||
|
||||
/*Used in search functions to compare iterations*/
|
||||
bool compareiteration(const memnode &r1, const memnode &r2)
|
||||
{
|
||||
return (r1.iteration < r2.iteration);
|
||||
}
|
||||
|
||||
/*Used in search functions to compare names*/
|
||||
bool comparename(const memnode &r1, const memnode &r2)
|
||||
{
|
||||
return (r1.name > r2.name);
|
||||
}
|
||||
|
||||
void calcular_mem(int dev)
|
||||
{
|
||||
cudaDeviceProp p;
|
||||
cudaGetDeviceProperties(&p, dev);
|
||||
avmem = p.totalGlobalMem;
|
||||
temp_storage[0].dev_address = NULL;
|
||||
temp_storage[0].size = 0;
|
||||
temp_storage[HALF_REC].dev_address = NULL;
|
||||
temp_storage[HALF_REC].size = 0;
|
||||
|
||||
//cout << "Initial memory available " << avmem << endl;
|
||||
}
|
||||
|
||||
/*Linear search of 'name' fact*/
|
||||
template<class InputIterator>
|
||||
InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
|
||||
{
|
||||
while(first!=last)
|
||||
{
|
||||
if(first->name == name) return first;
|
||||
if(first->name == name && first->isrule == 0) return first;
|
||||
++first;
|
||||
}
|
||||
return last;
|
||||
}
|
||||
|
||||
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum)
|
||||
/*Finds all results of rule 'name' in iteration 'itr' in both CPU and GPU memory. Every result found is removed from its respective list*/
|
||||
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
|
||||
{
|
||||
int x = 1, sum = 0;
|
||||
int x = 0, sum = 0;
|
||||
memnode temp;
|
||||
|
||||
temp.name = name;
|
||||
list<memnode>::iterator i;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name && rec.first->isrule == 1)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
rec.first = GPUmem.erase(rec.first);
|
||||
sum += temp_storage[x].rows;
|
||||
x++;
|
||||
}
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
*gpunum = x;
|
||||
temp.name = name;
|
||||
temp.isrule = 1;
|
||||
i = GPUmem.insert(rec.first, temp);
|
||||
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
|
||||
//cout << "itr = " << itr << " rec.first = " << rec.first->name << endl;
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name && rec.first->isrule == 1)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
rec.first = CPUmem.erase(rec.first);
|
||||
sum += temp_storage[x].rows;
|
||||
x++;
|
||||
}
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
*totalrows = sum;
|
||||
*cpunum = x;
|
||||
return i;
|
||||
}
|
||||
|
||||
list<memnode>::iterator buscarpornombrecpu(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
|
||||
{
|
||||
int x = 0, sum = 0;
|
||||
memnode temp;
|
||||
list<memnode>::iterator i;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
@ -72,22 +110,11 @@ list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
//if(x > 1)
|
||||
rec.first = GPUmem.insert(rec.first, temp);
|
||||
*totalrows = sum;
|
||||
|
||||
*gpunum = x;
|
||||
return rec.first;
|
||||
}
|
||||
|
||||
int buscarpornombrecpu(int name, int itr, int *totalrows)
|
||||
{
|
||||
int x = HALF_REC + 1, sum = 0;
|
||||
memnode temp;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
|
||||
/*if(rec.first != rec.second)
|
||||
cout << "bscnomcpu = " << rec.first->name << " " << rec.first->iteration << endl;*/
|
||||
temp.name = name;
|
||||
temp.isrule = 1;
|
||||
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
@ -101,18 +128,24 @@ int buscarpornombrecpu(int name, int itr, int *totalrows)
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
*totalrows += sum;
|
||||
return x;
|
||||
i = CPUmem.insert(rec.first, temp);
|
||||
*totalrows = sum;
|
||||
*cpunum = x;
|
||||
return i;
|
||||
}
|
||||
|
||||
/*Removes the least recently used memory block from GPU memory, sending it to CPU memory if it's a rule result.
|
||||
If there are no used memory blocks in the GPU and we still don't have enough memory, the program exits with error*/
|
||||
void limpiar(const char s[], size_t sz)
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
memnode temp;
|
||||
size_t free, total;
|
||||
|
||||
if(GPUmem.size() == 0)
|
||||
{
|
||||
cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl;
|
||||
cudaMemGetInfo(&free,&total);
|
||||
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -122,80 +155,32 @@ void limpiar(const char s[], size_t sz)
|
||||
temp = *ini;
|
||||
temp.dev_address = (int *)malloc(ini->size);
|
||||
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||
CPUmem.push_back(temp);
|
||||
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
CPUmem.insert(pos, temp);
|
||||
}
|
||||
liberar(ini->dev_address, ini->size);
|
||||
cudaFree(ini->dev_address);
|
||||
GPUmem.erase(ini);
|
||||
}
|
||||
|
||||
void limpiartodo(int *p1, int *p2)
|
||||
/*Allocs 'size' amount of bytes in GPU memory. If not enough memory is available, removes least recently used memory blocks until
|
||||
enough space is available*/
|
||||
void reservar(int **ptr, size_t size)
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
memnode temp;
|
||||
int cont = 0;
|
||||
if(p1 != NULL)
|
||||
cont++;
|
||||
if(p2 != NULL)
|
||||
cont++;
|
||||
ini = GPUmem.begin();
|
||||
|
||||
/*cout << "ANTES" << endl;
|
||||
mostrar_memoria();
|
||||
mostrar_memcpu();
|
||||
cout << "FIN ANTES" << endl;*/
|
||||
//cout << "mem = " << GPUmem.size() << " " << avmem << endl;
|
||||
|
||||
while(GPUmem.size() > cont)
|
||||
{
|
||||
if(ini->dev_address == p1 || ini->dev_address == p2)
|
||||
{
|
||||
ini++;
|
||||
continue;
|
||||
}
|
||||
if(ini->isrule)
|
||||
{
|
||||
temp = *ini;
|
||||
temp.dev_address = (int *)malloc(ini->size);
|
||||
cudaMemcpy(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||
CPUmem.push_back(temp);
|
||||
}
|
||||
liberar(ini->dev_address, temp.size);
|
||||
ini = GPUmem.erase(ini);
|
||||
}
|
||||
|
||||
/*cout << "DESPUES" << endl;
|
||||
mostrar_memoria();
|
||||
mostrar_memcpu();
|
||||
cout << "FIN DESPUES" << endl;*/
|
||||
//cout << "memfinal = " << GPUmem.size() << " " << avmem << endl;
|
||||
|
||||
}
|
||||
|
||||
void liberar(int *ptr, int size)
|
||||
{
|
||||
//cout << "L " << avmem << " " << size;
|
||||
|
||||
cudaFree(ptr);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "- " << ptr << " " << size << endl;
|
||||
#endif
|
||||
avmem += size;
|
||||
|
||||
//cout << " " << avmem << endl;
|
||||
}
|
||||
|
||||
void reservar(int **ptr, int size)
|
||||
{
|
||||
//size_t free, total;
|
||||
//cudaMemGetInfo( &free, &total );
|
||||
// cerr << "? " << free << " " << size << endl;
|
||||
size_t free, total;
|
||||
|
||||
if (size == 0) {
|
||||
*ptr = NULL;
|
||||
return;
|
||||
}
|
||||
while(avmem < size)
|
||||
|
||||
cudaMemGetInfo(&free, &total);
|
||||
while(free < size)
|
||||
{
|
||||
cout << "Se limpio memoria " << free << " " << total << endl;
|
||||
limpiar("not enough memory", size);
|
||||
cudaMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||
limpiar("Error in memory allocation", size);
|
||||
if (! *ptr ) {
|
||||
@ -205,11 +190,9 @@ void reservar(int **ptr, int size)
|
||||
cerr << "Exiting CUDA...." << endl;
|
||||
exit(1);
|
||||
}
|
||||
avmem -= size;
|
||||
|
||||
// cout << " " << avmem << endl;
|
||||
}
|
||||
|
||||
/*Creates a new entry in the GPU memory list*/
|
||||
void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||
{
|
||||
memnode temp;
|
||||
@ -222,6 +205,19 @@ void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||
GPUmem.push_back(temp);
|
||||
}
|
||||
|
||||
void registrarcpu(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||
{
|
||||
memnode temp;
|
||||
temp.name = name;
|
||||
temp.dev_address = ptr;
|
||||
temp.rows = rows;
|
||||
temp.size = rows * num_columns * sizeof(int);
|
||||
temp.iteration = itr;
|
||||
temp.isrule = rule;
|
||||
CPUmem.push_back(temp);
|
||||
}
|
||||
|
||||
/*Updates the information of an element in a list*/
|
||||
template<class InputIterator>
|
||||
void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
|
||||
{
|
||||
@ -230,6 +226,7 @@ void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
|
||||
i->size = rows * num_columns * sizeof(int);
|
||||
}
|
||||
|
||||
/*Count the total number of rows generated by rule 'name' in iteration 'iter'*/
|
||||
int numrows(int name, int itr)
|
||||
{
|
||||
int sum = 0;
|
||||
@ -252,16 +249,17 @@ int numrows(int name, int itr)
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
extern "C" void * YAP_IntToAtom(int);
|
||||
extern "C" char * YAP_AtomName(void *);
|
||||
|
||||
|
||||
/*Loads facts or rule results in GPU memory. If a fact is already in GPU memory, its pointer is simply returned. Otherwise,
|
||||
memory is reserved and the fact is loaded. Rule results are loaded based on the current iteration 'itr' and both GPU and
|
||||
CPU memories are searched for all instances of said results. The instances are combined into a single one in GPU memory.*/
|
||||
int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||
{
|
||||
int numgpu, numcpu, totalrows = 0;
|
||||
int *temp, x;
|
||||
int size, itrant;
|
||||
int size, itrant, inc = 0;
|
||||
list<memnode>::iterator i;
|
||||
memnode fact;
|
||||
|
||||
@ -279,9 +277,6 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
||||
}
|
||||
size = num_rows * num_columns * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << temp << " temp " << size << endl;
|
||||
#endif
|
||||
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
||||
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||
*ptr = temp;
|
||||
@ -290,28 +285,25 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
||||
if(itr > 0)
|
||||
{
|
||||
itrant = itr - 1;
|
||||
i = buscarpornombre(name, itrant, &totalrows, &numgpu);
|
||||
numcpu = buscarpornombrecpu(name, itrant, &totalrows);
|
||||
|
||||
if((numgpu == 2) && (numcpu == (HALF_REC + 1)))
|
||||
i = buscarpornombre(name, itrant, &totalrows, &numgpu, &numcpu);
|
||||
if((numgpu == 1) && (numcpu == 1))
|
||||
{
|
||||
actualizar(num_columns, temp_storage[1].dev_address, temp_storage[1].rows, i);
|
||||
*ptr = temp_storage[1].dev_address;
|
||||
return temp_storage[1].rows;
|
||||
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
|
||||
*ptr = temp_storage[0].dev_address;
|
||||
return temp_storage[0].rows;
|
||||
}
|
||||
size = totalrows * num_columns * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << temp << " temp 2 " << size << endl;
|
||||
#endif
|
||||
for(x = 1; x < numgpu; x++)
|
||||
for(x = 0; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||
liberar(temp_storage[x].dev_address, temp_storage[x].size);
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
cudaFree(temp_storage[x].dev_address);
|
||||
}
|
||||
for(x = HALF_REC + 1; x < numcpu; x++)
|
||||
for(; x < numcpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
free(temp_storage[x].dev_address);
|
||||
}
|
||||
actualizar(num_columns, temp, totalrows, i);
|
||||
@ -321,9 +313,54 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||
{
|
||||
int numgpu, numcpu, totalrows = 0;
|
||||
int *temp, x;
|
||||
int size, itrant, inc = 0;
|
||||
list<memnode>::iterator i;
|
||||
|
||||
if(is_fact)
|
||||
{
|
||||
*ptr = address_host_table;
|
||||
return num_rows;
|
||||
}
|
||||
if(itr > 0)
|
||||
{
|
||||
itrant = itr - 1;
|
||||
i = buscarpornombrecpu(name, itrant, &totalrows, &numgpu, &numcpu);
|
||||
|
||||
if((numgpu == 0) && (numcpu == 1))
|
||||
{
|
||||
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
|
||||
*ptr = temp_storage[0].dev_address;
|
||||
return temp_storage[0].rows;
|
||||
}
|
||||
size = totalrows * num_columns * sizeof(int);
|
||||
temp = (int *)malloc(size);
|
||||
for(x = 0; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
cudaFree(temp_storage[x].dev_address);
|
||||
}
|
||||
for(; x < numcpu; x++)
|
||||
{
|
||||
memcpy(temp + inc, temp_storage[x].dev_address, temp_storage[x].size);
|
||||
inc += temp_storage[x].size / sizeof(int);
|
||||
free(temp_storage[x].dev_address);
|
||||
}
|
||||
actualizar(num_columns, temp, totalrows, i);
|
||||
*ptr = temp;
|
||||
return totalrows;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*Loads all results of rule 'name' from both GPU and CPU memories into the GPU*/
|
||||
int cargafinal(int name, int cols, int **ptr)
|
||||
{
|
||||
int *temp, *ini, cont = 0;
|
||||
int *temp, *ini, cont = 0, numg = 0, numc = 0;
|
||||
memnode bus;
|
||||
bus.name = name;
|
||||
GPUmem.sort(comparename);
|
||||
@ -335,6 +372,7 @@ int cargafinal(int name, int cols, int **ptr)
|
||||
while(pos != endg && pos->name == name)
|
||||
{
|
||||
cont += pos->rows;
|
||||
numg++;
|
||||
pos++;
|
||||
}
|
||||
pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
|
||||
@ -342,15 +380,41 @@ int cargafinal(int name, int cols, int **ptr)
|
||||
while(pos != endc && pos->name == name)
|
||||
{
|
||||
cont += pos->rows;
|
||||
numc++;
|
||||
pos++;
|
||||
}
|
||||
|
||||
reservar(&temp, cont * cols * sizeof(int));
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;
|
||||
#endif
|
||||
ini = temp;
|
||||
if(numg == 0 && numc == 0)
|
||||
return 0;
|
||||
if(numg == 1 && numc == 0)
|
||||
{
|
||||
pos = gpu;
|
||||
*ptr = pos->dev_address;
|
||||
cont = pos->rows;
|
||||
GPUmem.erase(pos);
|
||||
#ifdef TUFFY
|
||||
return -cont;
|
||||
#else
|
||||
return cont;
|
||||
#endif
|
||||
}
|
||||
if(numg == 0 && numc == 1)
|
||||
{
|
||||
pos = cpu;
|
||||
cont = pos->rows;
|
||||
#ifdef TUFFY
|
||||
reservar(&temp, pos->size);
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||
*ptr = temp;
|
||||
#else
|
||||
*ptr = pos->dev_address;
|
||||
#endif
|
||||
CPUmem.erase(pos);
|
||||
return -cont;
|
||||
}
|
||||
|
||||
reservar(&temp, cont * cols * sizeof(int));
|
||||
ini = temp;
|
||||
pos = gpu;
|
||||
while(pos != endg && pos->name == name)
|
||||
{
|
||||
@ -365,23 +429,13 @@ int cargafinal(int name, int cols, int **ptr)
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
|
||||
/*int x, y;
|
||||
int *hop1 = (int *)malloc(cont * cols * sizeof(int));
|
||||
cudaMemcpy(hop1, ini, cont * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
cout << "select finala" << endl;
|
||||
for(x = 0; x < cont; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++)
|
||||
cout << hop1[x * cols + y] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
cout << "select finala" << endl;*/
|
||||
|
||||
*ptr = ini;
|
||||
return cont;
|
||||
}
|
||||
|
||||
/*Compares the results of the current iteration against the results of older iterations.
|
||||
Used to avoid infinite computations when the result is not a single fixed-point, but an
|
||||
orbit of points.*/
|
||||
bool generadas(int name, int filas, int cols, int itr)
|
||||
{
|
||||
int r1, r2, x, fin;
|
||||
@ -401,46 +455,26 @@ bool generadas(int name, int filas, int cols, int itr)
|
||||
thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
|
||||
r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
|
||||
thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
|
||||
|
||||
/*int y;
|
||||
int *a = (int *)malloc(r1 * cols * sizeof(int));
|
||||
cudaMemcpy(a, dop1, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < r1; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++)
|
||||
cout << a[x * cols + y] << " ";
|
||||
}
|
||||
cout << endl;
|
||||
cudaMemcpy(a, dop2, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < r1; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++)
|
||||
cout << a[x * cols + y] << " ";
|
||||
}
|
||||
cout << endl;
|
||||
free(a);*/
|
||||
|
||||
if(thrust::equal(pt1, pt1 + r1, pt2) == true)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void mostrar_memoria()
|
||||
{
|
||||
int x;
|
||||
unsigned int x;
|
||||
list<memnode>::iterator i = GPUmem.begin();
|
||||
cout << "Memoria inicio GPU" << endl;
|
||||
for(x = 0; x < GPUmem.size(); x++, i++)
|
||||
cout << i->name << " " << i->iteration << " " << i->size << endl;
|
||||
cout << i->name << " " << i->iteration << " " << i->isrule << " " << i->rows << " " << i->size << endl;
|
||||
cout << "Memoria fin GPU" << endl;
|
||||
}
|
||||
|
||||
void mostrar_memcpu()
|
||||
{
|
||||
int x;
|
||||
unsigned int x;
|
||||
list<memnode>::iterator i = CPUmem.begin();
|
||||
cout << "Memoria inicio CPU" << endl;
|
||||
for(x = 0; x < CPUmem.size(); x++, i++)
|
||||
@ -448,53 +482,7 @@ void mostrar_memcpu()
|
||||
cout << "Memoria fin CPU" << endl;
|
||||
}
|
||||
|
||||
void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator last)
|
||||
{
|
||||
GPUmem.sort(comparename);
|
||||
CPUmem.sort(comparename);
|
||||
list<memnode>::iterator gpu = GPUmem.begin();
|
||||
list<memnode>::iterator cpu = CPUmem.begin();
|
||||
int x, y, of, cols;
|
||||
int *temp, cont = 0;
|
||||
while(first != last)
|
||||
{
|
||||
while(first->name == gpu->name)
|
||||
{
|
||||
temp = (int *)malloc(gpu->size);
|
||||
cudaMemcpy(temp, gpu->dev_address, gpu->size, cudaMemcpyDeviceToHost);
|
||||
cols = gpu->size / (gpu->rows * sizeof(int));
|
||||
cont += gpu->rows;
|
||||
for(x = 0, of = 0; x < gpu->rows; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++, of++)
|
||||
cout << temp[of] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
cudaFree(gpu->dev_address);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;
|
||||
#endif
|
||||
free(temp);
|
||||
gpu++;
|
||||
}
|
||||
while(first->name == cpu->name)
|
||||
{
|
||||
cols = cpu->size / (cpu->rows * sizeof(int));
|
||||
cont += cpu->rows;
|
||||
for(x = 0, of = 0; x < cpu->rows; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++, of++)
|
||||
cout << cpu->dev_address[of] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
free(cpu->dev_address);
|
||||
cpu++;
|
||||
}
|
||||
first++;
|
||||
}
|
||||
cout << cont << endl;
|
||||
}
|
||||
|
||||
/*Clear all rule results from both GPU and CPU memory*/
|
||||
void clear_memory()
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
@ -503,15 +491,13 @@ void clear_memory()
|
||||
fin = GPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
if (ini->isrule) {
|
||||
if(ini->isrule)
|
||||
{
|
||||
cudaFree(ini->dev_address);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "- " << ini->dev_address << " ini->dev_address" << endl;
|
||||
#endif
|
||||
ini = GPUmem.erase(ini);
|
||||
} else {
|
||||
ini++;
|
||||
}
|
||||
else
|
||||
ini++;
|
||||
}
|
||||
ini = CPUmem.begin();
|
||||
fin = CPUmem.end();
|
||||
@ -522,3 +508,68 @@ void clear_memory()
|
||||
}
|
||||
CPUmem.clear();
|
||||
}
|
||||
|
||||
/*Clear everything from both GPU and CPU memory*/
|
||||
void clear_memory_all()
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
list<memnode>::iterator fin;
|
||||
ini = GPUmem.begin();
|
||||
fin = GPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
cudaFree(ini->dev_address);
|
||||
ini++;
|
||||
}
|
||||
GPUmem.clear();
|
||||
ini = CPUmem.begin();
|
||||
fin = CPUmem.end();
|
||||
while(ini != fin)
|
||||
{
|
||||
free(ini->dev_address);
|
||||
ini++;
|
||||
}
|
||||
CPUmem.clear();
|
||||
}
|
||||
|
||||
/*Remove all instances of fact 'name' from both CPU and GPU memories*/
|
||||
void liberar(int name)
|
||||
{
|
||||
list<memnode>::iterator i;
|
||||
memnode fact;
|
||||
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||
if(i != GPUmem.end())
|
||||
{
|
||||
fact = *i;
|
||||
GPUmem.erase(i);
|
||||
cudaFree(fact.dev_address);
|
||||
}
|
||||
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
|
||||
if(i != CPUmem.end())
|
||||
{
|
||||
fact = *i;
|
||||
CPUmem.erase(i);
|
||||
free(fact.dev_address);
|
||||
}
|
||||
}
|
||||
|
||||
/*Add all rows in 'dop1' to the fact 'name' by creating a new array capable of holding both.*/
|
||||
void sumar(int name, int *dop1, int cols, int rows)
|
||||
{
|
||||
list<memnode>::iterator i;
|
||||
memnode fact;
|
||||
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||
int *res, newrows, offset;
|
||||
if(i != GPUmem.end())
|
||||
{
|
||||
fact = *i;
|
||||
newrows = rows + fact.rows;
|
||||
reservar(&res, newrows * cols * sizeof(int));
|
||||
offset = fact.rows * cols;
|
||||
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||
GPUmem.erase(i);
|
||||
registrar(name, cols, res, newrows, 0, 0);
|
||||
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||
cudaFree(fact.dev_address);
|
||||
}
|
||||
}
|
||||
|
13
packages/cuda/memory.h
Normal file → Executable file
13
packages/cuda/memory.h
Normal file → Executable file
@ -1,26 +1,27 @@
|
||||
#ifndef _MEMORY_H_
|
||||
#define _MEMORY_H_
|
||||
|
||||
//#include <thrust/device_vector.h>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include "lista.h"
|
||||
|
||||
using namespace std;
|
||||
//using namespace thrust;
|
||||
|
||||
void calcular_mem(int);
|
||||
void liberar(int*, int);
|
||||
bool comparer(const rulenode&, const rulenode&);
|
||||
void limpiar(const char [], size_t);
|
||||
void limpiartodo(int*, int*);
|
||||
int cargar(int, int, int, int, int*, int**, int);
|
||||
int cargarcpu(int, int, int, int, int*, int**, int);
|
||||
int cargafinal(int, int, int**);
|
||||
void reservar(int**, int);
|
||||
void reservar(int**, size_t);
|
||||
void registrar(int, int, int*, int, int, int);
|
||||
void registrarcpu(int, int, int*, int, int, int);
|
||||
bool generadas(int, int, int, int);
|
||||
void sumar(int, int*, int, int);
|
||||
void liberar(int);
|
||||
void mostrar_memoria(void);
|
||||
void mostrar_memcpu(void);
|
||||
void clear_memory(void);
|
||||
void resultados(vector<rulenode>::iterator, vector<rulenode>::iterator);
|
||||
void clear_memory_all(void);
|
||||
|
||||
#endif
|
||||
|
12
packages/cuda/pred.h
Normal file → Executable file
12
packages/cuda/pred.h
Normal file → Executable file
@ -9,11 +9,17 @@ typedef struct Nodo{
|
||||
int num_columns;
|
||||
int is_fact;
|
||||
int *address_host_table;
|
||||
int *negatives;
|
||||
char *predname;
|
||||
double *weight;
|
||||
}gpunode;
|
||||
|
||||
typedef gpunode predicate;
|
||||
|
||||
// #define TIMER 1
|
||||
//#define TIMER 1
|
||||
#define DATALOG 1
|
||||
#define NUM_T 4
|
||||
#define INISIZE 1000000
|
||||
|
||||
#if TIMER
|
||||
typedef struct Stats{
|
||||
@ -27,6 +33,8 @@ typedef struct Stats{
|
||||
extern statinfo cuda_stats;
|
||||
#endif
|
||||
|
||||
/*Constants used to mark comparison predicates*/
|
||||
#define BPOFFSET (-6)
|
||||
#define SBG_EQ (-1)
|
||||
#define SBG_GT (-2)
|
||||
#define SBG_LT (-3)
|
||||
@ -34,6 +42,6 @@ extern statinfo cuda_stats;
|
||||
#define SBG_LE (-5)
|
||||
#define SBG_DF (-6)
|
||||
|
||||
int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**);
|
||||
int Cuda_Eval(predicate**, int, predicate**, int, int*, int**, char*, int);
|
||||
void Cuda_Statistics( void );
|
||||
#endif
|
||||
|
188
packages/cuda/selectproyect.cu
Normal file → Executable file
188
packages/cuda/selectproyect.cu
Normal file → Executable file
@ -1,10 +1,11 @@
|
||||
#include <thrust/device_vector.h>
|
||||
//#include <thrust/device_ptr.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <stdlib.h>
|
||||
#include "memory.h"
|
||||
#include "bpreds.h"
|
||||
|
||||
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
|
||||
/*Mark all rows that comply with the selections*/
|
||||
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@ -24,14 +25,14 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
|
||||
/*If we already have an array of marks (perhaps because the selfjoin was applied first),
|
||||
we unmark any rows that do not comply with the selections*/
|
||||
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int *spos = &shared[numc];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < (numc * 2))
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
@ -39,10 +40,10 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
rowact = id * cols;
|
||||
for(x = 0; x < numc; x++)
|
||||
for(x = 0; x < numc; x += 2)
|
||||
{
|
||||
posact = rowact + spos[x];
|
||||
if(dop1[posact] != shared[x])
|
||||
posact = rowact + shared[x];
|
||||
if(dop1[posact] != shared[x+1])
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
@ -51,6 +52,7 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
|
||||
}
|
||||
}
|
||||
|
||||
/*Unmark all rows that do not comply with the selfjoins.*/
|
||||
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
@ -66,12 +68,12 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
|
||||
pos = id * cols;
|
||||
for(x = 0; x < cont; x++)
|
||||
{
|
||||
temp = shared[x];
|
||||
temp = dop1[pos+shared[x]];
|
||||
y = x + 1;
|
||||
temp2 = shared[y];
|
||||
while(temp2 > -1)
|
||||
{
|
||||
if(dop1[temp+pos] != dop1[temp2+pos])
|
||||
if(temp != dop1[temp2+pos])
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
@ -84,6 +86,7 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
|
||||
}
|
||||
}
|
||||
|
||||
/*Mark all rows that comply with the selfjoins*/
|
||||
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
@ -97,12 +100,12 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
|
||||
pos = id * cols;
|
||||
for(x = 0; x < cont; x++)
|
||||
{
|
||||
temp = shared[x];
|
||||
temp = dop1[pos+shared[x]];
|
||||
y = x + 1;
|
||||
temp2 = shared[y];
|
||||
while(temp2 > -1)
|
||||
{
|
||||
if(dop1[temp+pos] != dop1[temp2+pos])
|
||||
if(temp != dop1[temp2+pos])
|
||||
return;
|
||||
y++;
|
||||
temp2 = shared[y];
|
||||
@ -113,6 +116,7 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
|
||||
}
|
||||
}
|
||||
|
||||
/*Project all columns found in 'dhead' to a new array 'res'*/
|
||||
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
@ -130,76 +134,31 @@ __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize,
|
||||
}
|
||||
}
|
||||
|
||||
/*Project all columns found in 'dhead' using only the rows marked as valid (i.e. those that complied with
|
||||
selections, selfjoins, etc.). The array 'temp' holds the result of the prefix sum of said marks.*/
|
||||
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < cols)
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
posr = temp[id+1];
|
||||
if(temp[id] != posr && posr > 0)
|
||||
posr = temp[id];
|
||||
if(temp[id+1] != posr)
|
||||
{
|
||||
pos = id * cols;
|
||||
posr = (posr - 1) * hsize;
|
||||
posr *= hsize;
|
||||
for(x = 0; x < hsize; x++, posr++)
|
||||
res[posr] = dop1[pos+shared[x]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*__global__ void removedup()
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(id < rows)
|
||||
{
|
||||
|
||||
}
|
||||
}*/
|
||||
|
||||
template<typename T> /*a libreria*/
|
||||
struct suma : public binary_function<T,T,T>
|
||||
{
|
||||
__host__ __device__
|
||||
T operator()(const T &r1, const T &r2)
|
||||
{
|
||||
if(r1 > -1)
|
||||
{
|
||||
if(r2 > 0)
|
||||
return r1 + r2;
|
||||
return -r1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(r2 > 0)
|
||||
return abs(r1) + r2;
|
||||
return r1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
int mayor(int a, int b, int c)
|
||||
{
|
||||
if(a > b)
|
||||
{
|
||||
if(a > c)
|
||||
return a;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(b > c)
|
||||
return b;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
|
||||
/*Performs selections, selfjoins and comparison predicates when the rule has a single normal predicate.*/
|
||||
int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *preds, int numpreds, int *project, int **ret, int ANDlogic)
|
||||
{
|
||||
int *fres = NULL, *temp = NULL;
|
||||
int *dhead = NULL, tmplen;
|
||||
@ -209,30 +168,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
#if TIMER
|
||||
cuda_stats.selects++;
|
||||
#endif
|
||||
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
|
||||
|
||||
int head_bytes = maximo(4, numselect, numselfj, numpreds, head_size) * sizeof(int);
|
||||
reservar(&dhead, head_bytes);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << dhead << " dhead " << head_bytes << endl;
|
||||
#endif
|
||||
|
||||
int blockllen = rows / 1024 + 1;
|
||||
int numthreads = 1024;
|
||||
//int numthreads = 32;
|
||||
int blockllen = rows / numthreads + 1;
|
||||
|
||||
#ifdef ROCKIT
|
||||
ANDlogic = 1;
|
||||
#endif
|
||||
|
||||
//removerep(dop1, rows, cols, dhead,)
|
||||
if(numselect > 0)
|
||||
{
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << temp << " temp select " << size2 << endl;
|
||||
#endif
|
||||
cudaMemset(temp, 0, size2);
|
||||
|
||||
size = numselect * sizeof(int);
|
||||
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
||||
|
||||
marcar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
||||
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
||||
|
||||
if(numselfj > 0)
|
||||
{
|
||||
@ -241,6 +197,16 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
}
|
||||
|
||||
if(numpreds > 0)
|
||||
{
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
@ -249,13 +215,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << fres << " fres select " << num*size << endl;
|
||||
#endif
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
liberar(dhead, head_bytes);
|
||||
liberar(temp, size2);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
@ -266,15 +229,22 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << temp << " temp select " << size2 << endl;
|
||||
#endif
|
||||
cudaMemset(temp, 0, size2);
|
||||
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
|
||||
if(numpreds > 0)
|
||||
{
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
if(ANDlogic)
|
||||
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
@ -283,13 +253,41 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << fres << " fres select again " << num*size << endl;
|
||||
#endif
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
liberar(dhead, head_bytes);
|
||||
liberar(temp, size2);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numpreds > 0)
|
||||
{
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
size = numpreds * sizeof(int);
|
||||
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||
|
||||
if(ANDlogic)
|
||||
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
else
|
||||
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
|
||||
if(num == 0)
|
||||
return 0;
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
cudaFree(dhead);
|
||||
cudaFree(temp);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
@ -297,14 +295,12 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
||||
{
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, rows * size);
|
||||
#ifdef DEBUG_MEM
|
||||
cerr << "+ " << fres << " fres select third " << rows*size << endl;
|
||||
#endif
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
||||
liberar(dhead, head_bytes);
|
||||
cudaFree(dhead);
|
||||
*ret = fres;
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
889
packages/cuda/union2.cu
Normal file → Executable file
889
packages/cuda/union2.cu
Normal file → Executable file
File diff suppressed because it is too large
Load Diff
@ -2158,12 +2158,14 @@ static foreign_t init_python(void) {
|
||||
char **argv;
|
||||
term_t t = PL_new_term_ref();
|
||||
YAP_Argv(&argv);
|
||||
if (argv) {
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
Py_SetProgramName(argv[0]);
|
||||
#else
|
||||
wchar_t *buf = Py_DecodeLocale(argv[0], NULL);
|
||||
Py_SetProgramName(buf);
|
||||
#endif
|
||||
}
|
||||
Py_Initialize();
|
||||
py_Main = PyImport_AddModule("__main__");
|
||||
py_Builtin = PyImport_AddModule("__builtin__");
|
||||
|
Reference in New Issue
Block a user