This commit is contained in:
Vitor Santos Costa 2016-04-19 23:30:02 +01:00
parent 3d68f0e06b
commit cd41d373db
28 changed files with 3153 additions and 2229 deletions

View File

@ -1438,5 +1438,6 @@ void Yap_exit(int value) {
Yap_ShutdownLoadForeign(); Yap_ShutdownLoadForeign();
} }
Yap_CloseStreams(false); Yap_CloseStreams(false);
Yap_CloseReadline();
exit(value); exit(value);
} }

View File

@ -185,7 +185,7 @@ available in experimental implementations.
*/ */
YAP_FLAG(FILE_NAME_VARIABLES_FLAG, "file_name_variables", true, booleanFlag, YAP_FLAG(FILE_NAME_VARIABLES_FLAG, "file_name_variables", true, booleanFlag,
"true", NULL), "true", NULL),
YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15e", YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15f",
NULL), /**< + `float_format ` NULL), /**< + `float_format `
C-library `printf()` format specification used by write/1 and C-library `printf()` format specification used by write/1 and

View File

@ -1,4 +1,4 @@
/************************************************************************* /*************************************************************************
* * * *
* YAP Prolog %W% %G% * * YAP Prolog %W% %G% *
* Yap Prolog was developed at NCCUP - Universidade do Porto * * Yap Prolog was developed at NCCUP - Universidade do Porto *
@ -301,6 +301,7 @@ extern void Yap_DebugErrorPutc(int n);
extern void Yap_DebugErrorPuts(const char *s); extern void Yap_DebugErrorPuts(const char *s);
extern void Yap_DebugWriteIndicator(struct pred_entry *ap); extern void Yap_DebugWriteIndicator(struct pred_entry *ap);
void Yap_PlWriteToStream(Term, int, int); void Yap_PlWriteToStream(Term, int, int);
void Yap_CloseReadline(void);
/* depth_lim.c */ /* depth_lim.c */
bool Yap_InitReadline(Term t); bool Yap_InitReadline(Term t);
void Yap_InitItDeepenPreds(void); void Yap_InitItDeepenPreds(void);

View File

@ -659,6 +659,19 @@ INLINE_ONLY inline EXTERN PropFlags IsPredProperty(int flags) {
return (PropFlags)((flags == PEProp)); return (PropFlags)((flags == PEProp));
} }
INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe);
INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe) {
if (pe->ModuleOfPred == IDB_MODULE) {
return NULL;
} else if (pe->ArityOfPE == 0) {
return (Atom)pe->FunctorOfPred;
} else {
Functor f = pe->FunctorOfPred;
return NameOfFunctor(f);
}
}
/* Flags for code or dbase entry */ /* Flags for code or dbase entry */
/* There are several flags for code and data base entries */ /* There are several flags for code and data base entries */
typedef enum { typedef enum {
@ -1258,7 +1271,7 @@ INLINE_ONLY inline EXTERN Prop AbsBlobProp(YAP_BlobPropEntry *p) {
INLINE_ONLY inline EXTERN bool IsBlobProperty(PropFlags); INLINE_ONLY inline EXTERN bool IsBlobProperty(PropFlags);
INLINE_ONLY inline EXTERN bool IsBlobProperty(PropFlags flags) { INLINE_ONLY inline EXTERN bool IsBlobProperty(PropFlags flags) {
return flags == BlobProperty; return flags == BlobProperty;
} }
@ -1321,8 +1334,7 @@ INLINE_ONLY inline EXTERN Prop AbsFlagProp(FlagEntry *p) { return (Prop)(p); }
INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags); INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags);
INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags flags) { INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags flags) {
return flags == FlagProperty; return flags == FlagProperty;
} }
/* Proto types */ /* Proto types */

View File

@ -474,6 +474,8 @@
#define LOCAL_search_atoms LOCAL->search_atoms_ #define LOCAL_search_atoms LOCAL->search_atoms_
#define REMOTE_search_atoms(wid) REMOTE(wid)->search_atoms_ #define REMOTE_search_atoms(wid) REMOTE(wid)->search_atoms_
#define LOCAL_SearchPreds LOCAL->SearchPreds_
#define REMOTE_SearchPreds(wid) REMOTE(wid)->SearchPreds_
#define LOCAL_CurSlot LOCAL->CurSlot_ #define LOCAL_CurSlot LOCAL->CurSlot_
#define REMOTE_CurSlot(wid) REMOTE(wid)->CurSlot_ #define REMOTE_CurSlot(wid) REMOTE(wid)->CurSlot_

View File

@ -268,6 +268,7 @@ const char* Error_Function_;
UInt exo_arg_; UInt exo_arg_;
// atom completion // atom completion
struct scan_atoms* search_atoms_; struct scan_atoms* search_atoms_;
struct pred_entry* SearchPreds_;
// Slots // Slots
yhandle_t CurSlot_; yhandle_t CurSlot_;
yhandle_t NSlots_; yhandle_t NSlots_;

View File

@ -269,6 +269,7 @@ static void InitWorker(int wid) {
REMOTE_CurSlot(wid) = 0; REMOTE_CurSlot(wid) = 0;
REMOTE_NSlots(wid) = 0; REMOTE_NSlots(wid) = 0;
REMOTE_SlotBase(wid) = InitHandles(wid); REMOTE_SlotBase(wid) = InitHandles(wid);

View File

@ -279,4 +279,5 @@ static void RestoreWorker(int wid USES_REGS) {
} }

View File

@ -312,6 +312,7 @@ UInt exo_arg =0
// atom completion // atom completion
struct scan_atoms* search_atoms void struct scan_atoms* search_atoms void
struct pred_entry* SearchPreds void
// Slots // Slots
yhandle_t CurSlot =0 yhandle_t CurSlot =0

View File

@ -470,20 +470,13 @@ code with _C_.
*/ */
static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */ static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */
int sno = Yap_CheckStream(ARG1, Input_Stream_f, "get_byte/2"); int sno = Yap_CheckBinaryStream(ARG1, Input_Stream_f, "get_byte/2");
Int status; Int status;
Term out; Term out;
if (sno < 0) if (sno < 0)
return (FALSE); return (FALSE);
status = GLOBAL_Stream[sno].status; status = GLOBAL_Stream[sno].status;
if (!(status & Binary_Stream_f)
//&& strictISOFlag()
) {
UNLOCK(GLOBAL_Stream[sno].streamlock);
Yap_Error(PERMISSION_ERROR_INPUT_STREAM, ARG1, "get_byte/2");
return (FALSE);
}
out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno)); out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno));
UNLOCK(GLOBAL_Stream[sno].streamlock); UNLOCK(GLOBAL_Stream[sno].streamlock);
return Yap_unify_constant(ARG2, out); return Yap_unify_constant(ARG2, out);
@ -812,16 +805,9 @@ static Int put_byte(USES_REGS1) { /* '$put_byte'(Stream,N) */
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, t2, "put_code/1"); Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, t2, "put_code/1");
return FALSE; return FALSE;
} }
int sno = Yap_CheckStream(ARG1, Output_Stream_f, "put/2"); int sno = Yap_CheckBinaryStream(ARG1, Output_Stream_f, "put/2");
if (sno < 0) if (sno < 0)
return (FALSE); return (FALSE);
if (!(GLOBAL_Stream[sno].status & Binary_Stream_f)
// && strictISOFlag()
) {
UNLOCK(GLOBAL_Stream[sno].streamlock);
Yap_Error(PERMISSION_ERROR_OUTPUT_BINARY_STREAM, ARG1, NULL);
return false;
}
GLOBAL_Stream[sno].stream_putc(sno, ch); GLOBAL_Stream[sno].stream_putc(sno, ch);
/* /*
* if (!(GLOBAL_Stream[sno].status & Null_Stream_f)) * if (!(GLOBAL_Stream[sno].status & Null_Stream_f))

View File

@ -1576,6 +1576,24 @@ int Yap_CheckTextStream__(const char *file, const char *f, int line, Term arg,
return sno; return sno;
} }
int Yap_CheckBinaryStream__(const char *file, const char *f, int line, Term arg,
int kind, const char *msg) {
int sno;
if ((sno = CheckStream__(file, f, line, arg, kind, msg)) < 0)
return -1;
if ((GLOBAL_Stream[sno].status & Binary_Stream_f)) {
UNLOCK(GLOBAL_Stream[sno].streamlock);
if (kind == Input_Stream_f)
PlIOError__(file, f, line, PERMISSION_ERROR_INPUT_TEXT_STREAM, arg,
msg);
else
PlIOError__(file, f, line, PERMISSION_ERROR_OUTPUT_TEXT_STREAM, arg,
msg);
return -1;
}
return sno;
}
/* used from C-interface */ /* used from C-interface */
int Yap_GetFreeStreamDForReading(void) { int Yap_GetFreeStreamDForReading(void) {
int sno = GetFreeStreamD(); int sno = GetFreeStreamD();

View File

@ -45,6 +45,10 @@ extern int Yap_CheckStream__(const char *, const char *, int, Term, int,
Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg) Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int, extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int,
const char *); const char *);
#define Yap_CheckBinaryStream(arg, kind, msg) \
Yap_CheckBinaryStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
extern int Yap_CheckBinaryStream__(const char *, const char *, int, Term, int,
const char *);
extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name, extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name,
encoding_t encoding, stream_flags_t flags, encoding_t encoding, stream_flags_t flags,

View File

@ -130,8 +130,8 @@ static char *atom_generator(const char *prefix, int state) {
} }
typedef struct chain { typedef struct chain {
struct chain *next; struct chain *next;
char data[2]; char data[2];
} chain_t; } chain_t;
static char *predicate_enumerate(const char *prefix, int state) { static char *predicate_enumerate(const char *prefix, int state) {
@ -139,352 +139,358 @@ static char *predicate_enumerate(const char *prefix, int state) {
PredEntry *p; PredEntry *p;
ModEntry m0, *mod; ModEntry m0, *mod;
AtomEntry *ap; AtomEntry *ap;
if (!state) { if (!state) {
p = NULL; p = NULL;
mod = &m0; mod = &m0;
m0.NextME = CurrentModules; m0.NextME = CurrentModules;
if (mod->AtomOfME == AtomIDB) if (mod->AtomOfME == AtomIDB)
mod = mod->NextME; mod = mod->NextME;
} else { } else {
Term cmod; Term cmod;
p = LOCAL_SearchPreds; p = LOCAL_SearchPreds;
cmod = (p->ModuleOfPred != PROLOG_MODULE ? p->ModuleOfPred : TermProlog ); cmod = (p->ModuleOfPred != PROLOG_MODULE ? p->ModuleOfPred : TermProlog);
mod = Yap_GetModuleEntry(cmod); mod = Yap_GetModuleEntry(cmod);
} }
while (mod) { while (mod) {
// move to next o; // move to next o;
if (p) if (p)
p = p->NextPredOfModule; p = p->NextPredOfModule;
while (p == NULL) { while (p == NULL) {
mod = mod->NextME; mod = mod->NextME;
if (!mod) { if (!mod) {
// done // done
LOCAL_SearchPreds = NULL;
return NULL;
}
if (mod->AtomOfME == AtomIDB)
mod = mod->NextME;
p = mod->PredForME;
}
char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
if (strlen(c) > strlen(prefix) &&
strstr(c, prefix) == c &&
!(p->PredFlags & HiddenPredFlag)) {
LOCAL_SearchPreds = p;
arity_t ar = p->ArityOfPE;
int l, r;
if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
return c;
}
size_t sz = strlen(c);
chain_t *el = (chain_t *)malloc(sizeof(chain_t)+sz);
strncpy(LOCAL_FileNameBuf, c, YAP_FILENAME_MAX);
strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
return LOCAL_FileNameBuf;
}
}
LOCAL_SearchPreds = NULL; LOCAL_SearchPreds = NULL;
return NULL; return NULL;
}
if (mod->AtomOfME == AtomIDB)
mod = mod->NextME;
p = mod->PredForME;
}
char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
if (strlen(c) > strlen(prefix) && strstr(c, prefix) == c &&
!(p->PredFlags & HiddenPredFlag)) {
LOCAL_SearchPreds = p;
arity_t ar = p->ArityOfPE;
int l, r;
if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
return c;
}
strncpy(LOCAL_FileNameBuf, c, YAP_FILENAME_MAX);
strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
return LOCAL_FileNameBuf;
}
}
LOCAL_SearchPreds = NULL;
return NULL;
} }
static char *predicate_generator(const char *prefix, int state) { static char *predicate_generator(const char *prefix, int state) {
char *s = predicate_enumerate(prefix, state); char *s = predicate_enumerate(prefix, state);
if (s) { if (s) {
char *copy = malloc(1 + strlen(s)); char *copy = malloc(1 + strlen(s));
if (copy) /* else pretend no completion */ if (copy) /* else pretend no completion */
strcpy(copy, s); strcpy(copy, s);
s = copy; s = copy;
}
return s;
} }
static char **prolog_completion(const char *text, int start, int end) { return s;
char **matches = NULL; }
if (start == 0 && isalpha(text[0])) { static char **prolog_completion(const char *text, int start, int end) {
int i = 0; char **matches = NULL;
while (i < end) {
if (isalnum(text[i]))
i++;
else
break;
}
if (i == end) {
matches = rl_completion_matches((char *)text, predicate_generator);
}
return matches;
} else if (start == 0) {
int i = 0;
const char *p;
while (isblank(text[i++]) && i <= end)
;
p = text + i;
if ((strstr(p,"[") == p) || (strstr(p,"compile(") == p) || if (start == 0 && isalpha(text[0])) {
(strstr(p,"consult(") == p) || (strstr(p,"load_files(") == p) || int i = 0;
(strstr(p,"reconsult(") == p) || (strstr(p,"use_module(") == p)) while (i < end) {
matches = rl_completion_matches((char *)text, /* for pre-4.2 */ if (isalnum(text[i]) || text[i] == '_')
rl_filename_completion_function); i++;
return matches; else
break;
} }
int i = end, ch = '\0'; if (i == end) {
while (i > start) { matches = rl_completion_matches((char *)text, predicate_generator);
ch = text[-i];
if (isalnum(text[i]))
continue;
break;
} }
if (islower(ch)) return matches;
return rl_completion_matches((char *)text, atom_generator); } else if (start == 0) {
int i = 0;
const char *p;
while (isblank(text[i++]) && i <= end)
;
p = text + i;
return NULL; if ((strstr(p, "[") == p) || (strstr(p, "compile(") == p) ||
(strstr(p, "consult(") == p) || (strstr(p, "load_files(") == p) ||
(strstr(p, "reconsult(") == p) || (strstr(p, "use_module(") == p) ||
(strstr(p, "cd(") == p))
matches = rl_completion_matches((char *)text, /* for pre-4.2 */
rl_filename_completion_function);
return matches;
} }
int i = end, ch = '\0';
void Yap_ReadlineFlush(int sno) { while (i > start) {
if (GLOBAL_Stream[sno].status & Tty_Stream_f && ch = text[--i];
GLOBAL_Stream[sno].status & Output_Stream_f) { if (ch == '\'')
rl_redisplay(); return rl_completion_matches((char *)text, /* for pre-4.2 */
} rl_filename_completion_function);
if (isalnum(text[i]))
continue;
break;
} }
if (islower(ch))
return rl_completion_matches((char *)text, atom_generator);
bool Yap_ReadlinePrompt(StreamDesc * s) { return NULL;
if (s->status & Tty_Stream_f) { }
s->stream_getc = ReadlineGetc;
if (GLOBAL_Stream[0].status & Tty_Stream_f && void Yap_ReadlineFlush(int sno) {
s->name == GLOBAL_Stream[0].name) if (GLOBAL_Stream[sno].status & Tty_Stream_f &&
s->stream_putc = ReadlinePutc; GLOBAL_Stream[sno].status & Output_Stream_f) {
return true; rl_redisplay();
}
return false;
} }
}
bool Yap_ReadlineOps(StreamDesc * s) { bool Yap_ReadlinePrompt(StreamDesc *s) {
if (s->status & Tty_Stream_f) { if (s->status & Tty_Stream_f) {
if (GLOBAL_Stream[0].status & Tty_Stream_f && s->stream_getc = ReadlineGetc;
is_same_tty(s->file, GLOBAL_Stream[0].file)) if (GLOBAL_Stream[0].status & Tty_Stream_f &&
s->stream_putc = ReadlinePutc; s->name == GLOBAL_Stream[0].name)
s->stream_getc = ReadlineGetc; s->stream_putc = ReadlinePutc;
s->status |= Readline_Stream_f;
return true;
}
return false;
}
static int prolog_complete(int ignore, int key) {
if (rl_point > 0 && rl_line_buffer[rl_point - 1] != ' ') {
#if HAVE_DECL_RL_CATCH_SIGNALS_ /* actually version >= 1.2, or true readline \
*/
rl_begin_undo_group();
rl_complete(ignore, key);
if (rl_point > 0 && rl_line_buffer[rl_point - 1] == ' ') {
rl_delete_text(rl_point - 1, rl_point);
rl_point -= 1;
rl_delete(-1, key);
}
rl_end_undo_group();
#endif
} else
rl_complete(ignore, key);
return 0;
}
bool Yap_InitReadline(Term enable) {
// don't call readline within emacs
// if (getenv("ËMACS"))
// return;
if (enable == TermFalse)
return true;
GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
#if _MSC_VER || defined(__MINGW32__)
rl_instream = stdin;
#endif
rl_outstream = stderr;
using_history();
const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
if (read_history(s) != 0) {
FILE *f = fopen(s, "a");
if (f) {
fclose(f);
}
}
rl_readline_name = "Prolog";
rl_attempted_completion_function = prolog_completion;
#ifdef HAVE_RL_COMPLETION_FUNC_T
rl_add_defun("prolog-complete", prolog_complete, '\t');
#else
rl_add_defun("prolog-complete", (void *)prolog_complete, '\t');
#endif
return Yap_ReadlineOps(GLOBAL_Stream + StdInStream);
}
static bool getLine(int inp, int out) {
CACHE_REGS
rl_instream = GLOBAL_Stream[inp].file;
rl_outstream = GLOBAL_Stream[out].file;
const unsigned char *myrl_line;
StreamDesc *s = GLOBAL_Stream + inp;
if (!(s->status & Tty_Stream_f))
return false;
/* window of vulnerability opened */
fflush(NULL);
LOCAL_PrologMode |= ConsoleGetcMode;
if (LOCAL_newline) { // no output so far
myrl_line = (unsigned char *)readline(LOCAL_Prompt);
} else {
myrl_line = (unsigned char *)readline(NULL);
}
/* Do it the gnu way */
if (LOCAL_PrologMode & InterruptMode) {
Yap_external_signal(0, YAP_INT_SIGNAL);
LOCAL_PrologMode &= ~ConsoleGetcMode;
if (LOCAL_PrologMode & AbortMode) {
Yap_Error(ABORT_EVENT, TermNil, "");
LOCAL_ErrorMessage = "Abort";
return console_post_process_eof(s);
}
} else {
LOCAL_PrologMode &= ~ConsoleGetcMode;
LOCAL_newline = true;
}
strncpy(LOCAL_Prompt, RepAtom(LOCAL_AtPrompt)->StrOfAE, MAX_PROMPT);
/* window of vulnerability closed */
if (myrl_line == NULL)
return false;
if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
add_history((char *)myrl_line);
write_history(history_file);
fflush(NULL);
}
s->u.irl.ptr = s->u.irl.buf = myrl_line;
return true; return true;
} }
return false;
}
static int ReadlinePutc(int sno, int ch) { bool Yap_ReadlineOps(StreamDesc *s) {
CACHE_REGS if (s->status & Tty_Stream_f) {
StreamDesc *s = &GLOBAL_Stream[sno]; if (GLOBAL_Stream[0].status & Tty_Stream_f &&
#if MAC || _MSC_VER || defined(__MINGW32__) is_same_tty(s->file, GLOBAL_Stream[0].file))
if (ch == 10) { s->stream_putc = ReadlinePutc;
putc('\n', s->file); s->stream_getc = ReadlineGetc;
} else s->status |= Readline_Stream_f;
return true;
}
return false;
}
static int prolog_complete(int ignore, int key) {
if (rl_point > 0 && rl_line_buffer[rl_point - 1] != ' ') {
#if HAVE_DECL_RL_CATCH_SIGNALS_ /* actually version >= 1.2, or true readline \
*/
rl_begin_undo_group();
rl_complete(ignore, key);
if (rl_point > 0 && rl_line_buffer[rl_point - 1] == ' ') {
rl_delete_text(rl_point - 1, rl_point);
rl_point -= 1;
rl_delete(-1, key);
}
rl_end_undo_group();
#endif #endif
putc(ch, s->file); } else
console_count_output_char(ch, s); rl_complete(ignore, key);
if (ch == 10) {
Yap_ReadlineFlush(sno); return 0;
LOCAL_newline = true; }
bool Yap_InitReadline(Term enable) {
// don't call readline within emacs
// if (getenv("ËMACS"))
// return;
if (enable == TermFalse)
return true;
GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
#if _WIN32
rl_instream = stdin;
#endif
rl_outstream = stderr;
using_history();
const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
history_file = s;
if (read_history(s) != 0) {
FILE *f = fopen(s, "a");
if (f) {
fclose(f);
} }
return ((int)ch);
} }
rl_readline_name = "Prolog";
rl_attempted_completion_function = prolog_completion;
#ifdef HAVE_RL_COMPLETION_FUNC_T
rl_add_defun("prolog-complete", prolog_complete, '\t');
#else
rl_add_defun("prolog-complete", (void *)prolog_complete, '\t');
#endif
return Yap_ReadlineOps(GLOBAL_Stream + StdInStream);
}
/** static bool getLine(int inp, int out) {
@brief reading from the console is complicated because we need to CACHE_REGS
know whether to prompt and so on... rl_instream = GLOBAL_Stream[inp].file;
rl_outstream = GLOBAL_Stream[out].file;
const unsigned char *myrl_line;
StreamDesc *s = GLOBAL_Stream + inp;
EOF must be handled by resetting the file. if (!(s->status & Tty_Stream_f))
*/ return false;
static int ReadlineGetc(int sno) {
StreamDesc *s = &GLOBAL_Stream[sno];
int ch;
bool fetch = (s->u.irl.buf == NULL);
if (!fetch || getLine(sno, StdErrStream)) { /* window of vulnerability opened */
const unsigned char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf; fflush(NULL);
ch = *ttyptr; LOCAL_PrologMode |= ConsoleGetcMode;
if (ch == '\0') { if (LOCAL_newline) { // no output so far
ch = '\n'; myrl_line = (unsigned char *)readline(LOCAL_Prompt);
free((void *)myrl_line); } else {
s->u.irl.ptr = s->u.irl.buf = NULL; myrl_line = (unsigned char *)readline(NULL);
}
} else {
return EOF;
}
return console_post_process_read_char(ch, s);
} }
/* Do it the gnu way */
/** if (LOCAL_PrologMode & InterruptMode) {
@brief Yap_ReadlinePeekChar peeks the next char from the Yap_external_signal(0, YAP_INT_SIGNAL);
readline buffer, but does not actually grab it. LOCAL_PrologMode &= ~ConsoleGetcMode;
if (LOCAL_PrologMode & AbortMode) {
The idea is to take advantage of the buffering. Special care must be taken Yap_Error(ABORT_EVENT, TermNil, "");
with EOF, though. LOCAL_ErrorMessage = "Abort";
return console_post_process_eof(s);
*/
Int Yap_ReadlinePeekChar(int sno) {
StreamDesc *s = &GLOBAL_Stream[sno];
int ch;
if (s->u.irl.buf) {
const unsigned char *ttyptr = s->u.irl.ptr;
ch = *ttyptr;
if (ch == '\0') {
ch = '\n';
}
return ch;
} }
if (getLine(sno, StdErrStream)) { } else {
CACHE_REGS LOCAL_PrologMode &= ~ConsoleGetcMode;
ch = s->u.irl.ptr[0]; LOCAL_newline = true;
if (ch == '\0') { }
ch = '\n'; strncpy(LOCAL_Prompt, RepAtom(LOCAL_AtPrompt)->StrOfAE, MAX_PROMPT);
} /* window of vulnerability closed */
if (ch == '\n') { if (myrl_line == NULL)
LOCAL_newline = true; return false;
} else { if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
LOCAL_newline = false; add_history((char *)myrl_line);
} fflush(NULL);
} else { }
return EOF; s->u.irl.ptr = s->u.irl.buf = myrl_line;
return true;
}
static int ReadlinePutc(int sno, int ch) {
CACHE_REGS
StreamDesc *s = &GLOBAL_Stream[sno];
#if MAC || _MSC_VER || defined(__MINGW32__)
if (ch == 10) {
putc('\n', s->file);
} else
#endif
putc(ch, s->file);
console_count_output_char(ch, s);
if (ch == 10) {
Yap_ReadlineFlush(sno);
LOCAL_newline = true;
}
return ((int)ch);
}
/**
@brief reading from the console is complicated because we need to
know whether to prompt and so on...
EOF must be handled by resetting the file.
*/
static int ReadlineGetc(int sno) {
StreamDesc *s = &GLOBAL_Stream[sno];
int ch;
bool fetch = (s->u.irl.buf == NULL);
if (!fetch || getLine(sno, StdErrStream)) {
const unsigned char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf;
ch = *ttyptr;
if (ch == '\0') {
ch = '\n';
free((void *)myrl_line);
s->u.irl.ptr = s->u.irl.buf = NULL;
}
} else {
return EOF;
}
return console_post_process_read_char(ch, s);
}
/**
@brief Yap_ReadlinePeekChar peeks the next char from the
readline buffer, but does not actually grab it.
The idea is to take advantage of the buffering. Special care must be taken
with EOF, though.
*/
Int Yap_ReadlinePeekChar(int sno) {
StreamDesc *s = &GLOBAL_Stream[sno];
int ch;
if (s->u.irl.buf) {
const unsigned char *ttyptr = s->u.irl.ptr;
ch = *ttyptr;
if (ch == '\0') {
ch = '\n';
} }
return ch; return ch;
} }
if (getLine(sno, StdErrStream)) {
int Yap_ReadlineForSIGINT(void) {
CACHE_REGS CACHE_REGS
int ch; ch = s->u.irl.ptr[0];
StreamDesc *s = &GLOBAL_Stream[StdInStream]; if (ch == '\0') {
const unsigned char *myrl_line = s->u.irl.buf; ch = '\n';
}
if (ch == '\n') {
LOCAL_newline = true;
} else {
LOCAL_newline = false;
}
} else {
return EOF;
}
return ch;
}
if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != NULL) { int Yap_ReadlineForSIGINT(void) {
CACHE_REGS
int ch;
StreamDesc *s = &GLOBAL_Stream[StdInStream];
const unsigned char *myrl_line = s->u.irl.buf;
if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != NULL) {
ch = myrl_line[0];
free((void *)myrl_line);
myrl_line = NULL;
return ch;
} else {
myrl_line = (const unsigned char *)readline("Action (h for help): ");
if (!myrl_line) {
ch = EOF;
return ch;
} else {
ch = myrl_line[0]; ch = myrl_line[0];
free((void *)myrl_line); free((void *)myrl_line);
myrl_line = NULL; myrl_line = NULL;
return ch; return ch;
} else {
myrl_line = (const unsigned char *)readline("Action (h for help): ");
if (!myrl_line) {
ch = EOF;
return ch;
} else {
ch = myrl_line[0];
free((void *)myrl_line);
myrl_line = NULL;
return ch;
}
} }
} }
}
static Int has_readline(USES_REGS1) { void Yap_CloseReadline(void) {
#if USE_READLINE #if USE_READLINE
return true; write_history(history_file);
#else
return false;
#endif #endif
} }
void Yap_InitReadlinePreds(void) { static Int has_readline(USES_REGS1) {
Yap_InitCPred("$has_readline", 0, has_readline, #if USE_READLINE
SafePredFlag | HiddenPredFlag); return true;
} #else
return false;
#endif
}
void Yap_InitReadlinePreds(void) {
Yap_InitCPred("$has_readline", 0, has_readline,
SafePredFlag | HiddenPredFlag);
}
#else #else
bool Yap_InitReadline(Term enable) { bool Yap_InitReadline(Term enable) {

View File

@ -390,7 +390,9 @@ write1 ( USES_REGS1 )
if (output_stream == -1) output_stream = 1; if (output_stream == -1) output_stream = 1;
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END ); xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
if (args == NULL) { if (args == NULL) {
if (LOCAL_Error_TYPE) if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
if (LOCAL_Error_TYPE)
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL); Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
return false; return false;
} }
@ -415,6 +417,8 @@ write_canonical1 ( USES_REGS1 )
if (output_stream == -1) output_stream = 1; if (output_stream == -1) output_stream = 1;
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END ); xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
if (args == NULL) { if (args == NULL) {
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
if (LOCAL_Error_TYPE) if (LOCAL_Error_TYPE)
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL); Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
return false; return false;
@ -440,6 +444,8 @@ write_canonical ( USES_REGS1 )
we cannot make recursive Prolog calls */ we cannot make recursive Prolog calls */
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END ); xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
if (args == NULL) { if (args == NULL) {
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
if (LOCAL_Error_TYPE) if (LOCAL_Error_TYPE)
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL); Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
return false; return false;
@ -467,7 +473,9 @@ writeq1 ( USES_REGS1 )
we cannot make recursive Prolog calls */ we cannot make recursive Prolog calls */
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END ); xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
if (args == NULL) { if (args == NULL) {
if (LOCAL_Error_TYPE) if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
if (LOCAL_Error_TYPE)
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL); Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
return false; return false;
} }
@ -495,6 +503,8 @@ writeq ( USES_REGS1 )
we cannot make recursive Prolog calls */ we cannot make recursive Prolog calls */
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END ); xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
if (args == NULL) { if (args == NULL) {
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
if (LOCAL_Error_TYPE) if (LOCAL_Error_TYPE)
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL); Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
return false; return false;
@ -523,7 +533,9 @@ print1 ( USES_REGS1 )
we cannot make recursive Prolog calls */ we cannot make recursive Prolog calls */
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END ); xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
if (args == NULL) { if (args == NULL) {
if (LOCAL_Error_TYPE) if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
if (LOCAL_Error_TYPE)
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL); Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
return false; return false;
} }
@ -551,7 +563,9 @@ print ( USES_REGS1 )
we cannot make recursive Prolog calls */ we cannot make recursive Prolog calls */
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END ); xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
if (args == NULL) { if (args == NULL) {
if (LOCAL_Error_TYPE) if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
if (LOCAL_Error_TYPE)
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL); Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
return false; return false;
} }

View File

@ -54,11 +54,28 @@ if (CUDA_FOUND)
macro_optional_find_package (Thrust ON) macro_optional_find_package (Thrust ON)
set (CUDA_SOURCES set (CUDA_SOURCES
lista.cu CC_CSSTree.cu
memory.cu bpreds.cu
cuda.c dbio.cu
lista.cu
memory.cu
selectproyect.cu
treeb.cu
union2.cu
) )
set (CXX_SOURCES
bpredscpu.cpp
joincpu.cpp
selectproyectcpu.cpp
unioncpu2.cpp
)
set (C_SOURCES
creator2.c
cuda.c
)
set (PL_SOURCES set (PL_SOURCES
cuda.yap cuda.yap
) )

15
packages/cuda/Makefile.in Normal file → Executable file
View File

@ -23,7 +23,7 @@ CC=@CC@
NVCC=@NVCC@ NVCC=@NVCC@
CFLAGS= @SHLIB_CFLAGS@ $(YAP_EXTRAS) $(DEFS) -I$(srcdir) -I../.. -I$(srcdir)/../../include @CUDA_CPPFLAGS@ CFLAGS= @SHLIB_CFLAGS@ $(YAP_EXTRAS) $(DEFS) -I$(srcdir) -I../.. -I$(srcdir)/../../include @CUDA_CPPFLAGS@
NVCCFLAGS=@CUDA_CPPFLAGS@ -I$(srcdir) -I../.. -I$(srcdir)/../../include NVCCFLAGS=@CUDA_CPPFLAGS@ -I$(srcdir) -I../.. -I$(srcdir)/../../include
CUDA_LDFLAGS=@CUDA_LDFLAGS@ LDFLAGS=@LDFLAGS@
# #
# #
# You shouldn't need to change what follows. # You shouldn't need to change what follows.
@ -39,7 +39,7 @@ SO=@SO@
CWD=$(PWD) CWD=$(PWD)
# #
CUDA_PROLOG= \ BDD_PROLOG= \
$(srcdir)/cuda.yap $(srcdir)/cuda.yap
OBJS=cuda.o memory.o lista.o OBJS=cuda.o memory.o lista.o
@ -62,16 +62,11 @@ memory.o: $(srcdir)/memory.cu $(srcdir)/pred.h
@DO_SECOND_LD@cuda.@SO@: $(OBJS) @DO_SECOND_LD@cuda.@SO@: $(OBJS)
@DO_SECOND_LD@ @CUDA_SHLIB_LD@ $(CUDA_LDFLAGS) -o cuda.@SO@ $(OBJS) @DO_SECOND_LD@ @CUDA_SHLIB_LD@ $(CUDA_LDFLAGS) -o cuda.@SO@ $(OBJS)
install: all install-examples install: all
mkdir -p $(DESTDIR)$(SHAREDIR) mkdir -p $(DESTDIR)$(SHAREDIR)
for h in $(CUDA_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done for h in $(BDD_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
$(INSTALL_PROGRAM) $(SOBJS) $(DESTDIR)$(YAPLIBDIR) $(INSTALL_PROGRAM) $(SOBJS) $(DESTDIR)$(YAPLIBDIR)
install-examples:
clean: clean:
rm -f *.o *~ $(OBJS) *.BAK rm -f *.o *~ $(OBJS) $(SOBJS) *.BAK
distclean: clean
rm -f $(SOBJS) Makefile

515
packages/cuda/bpreds.cu Normal file → Executable file
View File

@ -1,4 +1,113 @@
__global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, int *res) #include <thrust/device_vector.h>
#include <thrust/scan.h>
#include <cstdarg>
#include "pred.h"
/*Determines the maximum from a set of values*/
int maximo(int count, ...)
{
va_list ap;
int j, temp, mx = 0;
va_start(ap, count);
for(j = 0; j < count; j++)
{
temp = va_arg(ap, int);
if(temp > mx)
mx = temp;
}
va_end(ap);
return mx;
}
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int x, rowact, rowact1, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
__syncthreads();
if(id < rows)
{
rowact1 = id * of1;
rowact = id * of2;
for(x = nx; x < numc; x += 3)
{
op1 = shared[x+1];
if(op1 < 0)
op1 = dop1[rowact1 - op1 - 1];
else
op1 = dop2[rowact + op1];
op2 = shared[x+2];
if(op2 < 0)
op2 = dop1[rowact1 - op2 - 1];
else
op2 = dop2[rowact + op2];
switch(shared[x] - BPOFFSET)
{
case SBG_EQ: if(op1 != op2)
return;
break;
case SBG_GT: if(op1 <= op2)
return;
break;
case SBG_LT: if(op1 >= op2)
return;
break;
case SBG_GE: if(op1 < op2)
return;
break;
case SBG_LE: if(op1 > op2)
return;
break;
case SBG_DF: if(op1 == op2)
return;
}
}
if(res2 != NULL)
res2[id] = 1;
for(x = 0; x < nx; x += 3)
{
op1 = shared[x+1];
if(op1 < 0)
op1 *= -1;
else
op1 = dop2[rowact + op1];
op2 = shared[x+2];
if(op2 < 0)
op2 *= -1;
else
op2 = dop2[rowact + op2];
switch(shared[x])
{
case SBG_EQ: if(op1 != op2)
return;
break;
case SBG_GT: if(op1 <= op2)
return;
break;
case SBG_LT: if(op1 >= op2)
return;
break;
case SBG_GE: if(op1 < op2)
return;
break;
case SBG_LE: if(op1 > op2)
return;
break;
case SBG_DF: if(op1 == op2)
return;
}
}
res[id] = 1;
}
}
/*Mark all rows that comply with the comparison predicates*/
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = blockIdx.x * blockDim.x + threadIdx.x;
@ -8,7 +117,7 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
rowact = id * cols; rowact = id * of1;
for(x = 0; x < numc; x += 3) for(x = 0; x < numc; x += 3)
{ {
op1 = shared[x+1]; op1 = shared[x+1];
@ -23,21 +132,21 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
op2 = dop1[rowact + op2]; op2 = dop1[rowact + op2];
switch(shared[x]) switch(shared[x])
{ {
case SBG_EQ: if(op1 != op2) case SBG_EQ: if(op1 != op2)
return; return;
break; break;
case SBG_GT: if(op1 <= op2) case SBG_GT: if(op1 <= op2)
return; return;
break; break;
case SBG_LT: if(op1 >= op2) case SBG_LT: if(op1 >= op2)
return; return;
break; break;
case SBG_GE: if(op1 < op2) case SBG_GE: if(op1 < op2)
return; return;
break; break;
case SBG_LE: if(op1 > op2) case SBG_LE: if(op1 > op2)
return; return;
break; break;
case SBG_DF: if(op1 == op2) case SBG_DF: if(op1 == op2)
return; return;
} }
@ -46,98 +155,306 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
} }
} }
int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret) /*Unmark all rows that do not comply with the comparison predicates*/
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{ {
int *temp; extern __shared__ int shared[];
int tmplen = rows + 1; int id = blockIdx.x * blockDim.x + threadIdx.x;
int size = tmplen * sizeof(int); int x, rowact, op1, op2;
reservar(&temp, size); if(threadIdx.x < numc)
#ifdef DEBUG_MEM shared[threadIdx.x] = cons[threadIdx.x];
cerr << "+ " << temp << " temp bpreds " << size << endl; __syncthreads();
#endif if(id < rows)
cudaMemset(temp, 0, size);
#if TIMER
cuda_stats.builtins++;
#endif
int *dhead;
int predn = numpreds.x * 3;
int spredn = predn * sizeof(int);
int sproj = numpreds.z * sizeof(int);
int hsize;
if(predn > numpreds.z)
hsize = spredn;
else
hsize = sproj;
reservar(&dhead, hsize);
#ifdef DEBUG_MEM
cerr << "+ " << dhead << " dhead " << hsize << endl;
#endif
cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
int blockllen = rows / 1024 + 1;
int numthreads = 1024;
/*int x;
cout << "arraypreds = ";
for(x = 0; x < predn; x++)
cout << bin[x] << " ";
cout << endl;
cout << "temptable = ";
for(x = 0; x < numpreds.z; x++)
cout << bin[x+predn] << " ";
cout << endl;
int y;
int *hop1 = (int *)malloc(numpreds.y * rows * sizeof(int));
cudaMemcpy(hop1, dop1, numpreds.y * rows * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < rows; x++)
{ {
for(y = 0; y < numpreds.y; y++) if(res[id] == 0)
cout << hop1[x * numpreds.y + y] << " "; return;
cout << endl; rowact = id * of1;
for(x = 0; x < numc; x += 3)
{
op1 = shared[x+1];
if(op1 < 0)
op1 *= -1;
else
op1 = dop1[rowact + op1];
op2 = shared[x+2];
if(op2 < 0)
op2 *= -1;
else
op2 = dop1[rowact + op2];
switch(shared[x])
{
case SBG_EQ: if(op1 != op2)
{
res[id] = 0;
return;
}
break;
case SBG_GT: if(op1 <= op2)
{
res[id] = 0;
return;
}
break;
case SBG_LT: if(op1 >= op2)
{
res[id] = 0;
return;
}
break;
case SBG_GE: if(op1 < op2)
{
res[id] = 0;
return;
}
break;
case SBG_LE: if(op1 > op2)
{
res[id] = 0;
return;
}
break;
case SBG_DF: if(op1 == op2)
{
res[id] = 0;
return;
}
}
}
} }
free(hop1);*/
predicates<<<blockllen, numthreads, spredn>>>(dop1, rows, numpreds.y, dhead, predn, temp + 1);
/*int y;
int *hop1 = (int *)malloc((rows + 1) * sizeof(int));
cudaMemcpy(hop1, temp, (rows + 1) * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < (rows + 1); x++)
cout << hop1[x] << " ";
cout << endl;
free(hop1);*/
thrust::device_ptr<int> res;
res = thrust::device_pointer_cast(temp);
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
int num = res[rows];
if(num == 0)
return 0;
int *fres;
reservar(&fres, num * sproj);
#ifdef DEBUG_MEM
cerr << "+ " << fres << " fres " << num * sproj << endl;
#endif
cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
/*int y;
int *hop1 = (int *)malloc(numpreds.z * num * sizeof(int));
cudaMemcpy(hop1, fres, numpreds.z * num * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < num; x++)
{
for(y = 0; y < numpreds.z; y++)
cout << hop1[x * numpreds.z + y] << " ";
cout << endl;
}
free(hop1);*/
liberar(dhead, hsize);
liberar(temp, size);
liberar(dop1, rows * cols * sizeof(int));
*ret = fres;
return num;
} }
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int x, rowact, rowact1, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
__syncthreads();
if(id < rows)
{
rowact1 = id * of1;
rowact = id * of2;
for(x = nx; x < numc; x += 3)
{
op1 = shared[x+1];
if(op1 < 0)
op1 = dop1[rowact1 - op1 - 1];
else
op1 = dop2[rowact + op1];
op2 = shared[x+2];
if(op2 < 0)
op2 = dop1[rowact1 - op2 - 1];
else
op2 = dop2[rowact + op2];
switch(shared[x] - BPOFFSET)
{
case SBG_EQ: if(op1 == op2)
{
res2[id] = 1;
x = numc;
}
break;
case SBG_GT: if(op1 > op2)
{
res2[id] = 1;
x = numc;
}
break;
case SBG_LT: if(op1 < op2)
{
res2[id] = 1;
x = numc;
}
break;
case SBG_GE: if(op1 >= op2)
{
res2[id] = 1;
x = numc;
}
break;
case SBG_LE: if(op1 <= op2)
{
res2[id] = 1;
x = numc;
}
break;
case SBG_DF: if(op1 != op2)
{
res2[id] = 1;
x = numc;
}
}
}
for(x = 0; x < nx; x += 3)
{
op1 = shared[x+1];
if(op1 < 0)
op1 *= -1;
else
op1 = dop2[rowact + op1];
op2 = shared[x+2];
if(op2 < 0)
op2 *= -1;
else
op2 = dop2[rowact + op2];
switch(shared[x])
{
case SBG_EQ: if(op1 == op2)
{
res[id] = 1;
return;
}
break;
case SBG_GT: if(op1 > op2)
{
res[id] = 1;
return;
}
break;
case SBG_LT: if(op1 < op2)
{
res[id] = 1;
return;
}
break;
case SBG_GE: if(op1 >= op2)
{
res[id] = 1;
return;
}
break;
case SBG_LE: if(op1 <= op2)
{
res[id] = 1;
return;
}
break;
case SBG_DF: if(op1 != op2)
{
res[id] = 1;
return;
}
}
}
}
}
/*Mark all rows that comply with the comparison predicates using disjunctions (i.e. a row is marked if it complies with at least one predicate)*/
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int x, rowact, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
__syncthreads();
if(id < rows)
{
rowact = id * of1;
for(x = 0; x < numc; x += 3)
{
op1 = shared[x+1];
if(op1 < 0)
op1 *= -1;
else
op1 = dop1[rowact + op1];
op2 = shared[x+2];
if(op2 < 0)
op2 *= -1;
else
op2 = dop1[rowact + op2];
switch(shared[x])
{
case SBG_EQ: if(op1 == op2)
{
res[id] = 1;
return;
}
break;
case SBG_GT: if(op1 > op2)
{
res[id] = 1;
return;
}
break;
case SBG_LT: if(op1 < op2)
{
res[id] = 1;
return;
}
break;
case SBG_GE: if(op1 >= op2)
{
res[id] = 1;
return;
}
break;
case SBG_LE: if(op1 <= op2)
{
res[id] = 1;
return;
}
break;
case SBG_DF: if(op1 != op2)
{
res[id] = 1;
return;
}
}
}
}
}
/*Unmark all rows that do not comply with the comparison predicates using disjunctions (i.e. a row is unmarked only if it complies with none of the predicates)*/
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
{
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
int x, rowact, op1, op2;
if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x];
__syncthreads();
if(id < rows)
{
if(res[id] == 0)
return;
rowact = id * of1;
for(x = 0; x < numc; x += 3)
{
op1 = shared[x+1];
if(op1 < 0)
op1 *= -1;
else
op1 = dop1[rowact + op1];
op2 = shared[x+2];
if(op2 < 0)
op2 *= -1;
else
op2 = dop1[rowact + op2];
switch(shared[x])
{
case SBG_EQ: if(op1 == op2)
return;
break;
case SBG_GT: if(op1 > op2)
return;
break;
case SBG_LT: if(op1 < op2)
return;
break;
case SBG_GE: if(op1 >= op2)
return;
break;
case SBG_LE: if(op1 <= op2)
return;
break;
case SBG_DF: if(op1 != op2)
return;
}
}
res[id] = 0;
}
}

230
packages/cuda/cuda.c Normal file → Executable file
View File

@ -6,19 +6,25 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include <inttypes.h>
#include "pred.h" #include "pred.h"
#define MAXARG 100
YAP_Atom AtomEq, YAP_Atom AtomEq,
AtomGt, AtomGt,
AtomLt, AtomLt,
AtomGe, AtomGe,
AtomLe, AtomLe,
AtomDf; AtomDf,
AtomNt;
predicate *facts[100]; /*Temporary solution to maintain facts and rules*/ predicate *facts[MAXARG]; /*Temporary solution to maintain facts and rules*/
predicate *rules[100]; predicate *rules[MAXARG];
int32_t cf = 0, cr = 0; int32_t cf = 0, cr = 0;
char names[1024];
// initialize CUDA system // initialize CUDA system
void Cuda_Initialize( void ); void Cuda_Initialize( void );
@ -39,6 +45,19 @@ void init_cuda( void );
//#define DEBUG_INTERFACE 1 //#define DEBUG_INTERFACE 1
#ifdef ROCKIT
static int32_t query[100];
static int32_t qcont = 0;
static int cuda_init_query(void)
{
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG1));
query[qcont] = pname;
qcont++;
query[qcont] = 0;
return TRUE;
}
#endif
#if DEBUG_INTERFACE #if DEBUG_INTERFACE
static void static void
dump_mat(int32_t mat[], int32_t nrows, int32_t ncols) dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
@ -83,8 +102,18 @@ int32_t Cuda_NewFacts(predicate *pe)
#if DEBUG_INTERFACE #if DEBUG_INTERFACE
dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns ); dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
#endif #endif
#ifdef ROCKIT
if(cf >= 0)
{
facts[cf] = pe;
cf++;
}
#else
facts[cf] = pe; facts[cf] = pe;
cf++; cf++;
#endif
return TRUE; return TRUE;
} }
@ -115,7 +144,7 @@ int32_t Cuda_Erase(predicate *pe)
return TRUE; return TRUE;
} }
static YAP_Bool static int
load_facts( void ) { load_facts( void ) {
int32_t nrows = YAP_IntOfTerm(YAP_ARG1); int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
@ -164,15 +193,18 @@ load_facts( void ) {
static int currentFact = 0; static int currentFact = 0;
static predicate *currentPred = NULL; static predicate *currentPred = NULL;
static YAP_Bool static int
cuda_init_facts( void ) { cuda_init_facts( void ) {
int32_t nrows = YAP_IntOfTerm(YAP_ARG1); int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0; int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols); int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3)); int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
predicate *pred; predicate *pred;
strcat(names, YAP_AtomName(YAP_AtomOfTerm(YAP_ARG3)));
strcat(names, " ");
if (!mat) if (!mat)
return FALSE; return FALSE;
if (YAP_IsVarTerm( YAP_ARG4)) { if (YAP_IsVarTerm( YAP_ARG4)) {
@ -198,14 +230,16 @@ cuda_init_facts( void ) {
} }
} }
static YAP_Bool static int
cuda_load_fact( void ) { cuda_load_fact( void ) {
YAP_Term th = YAP_ARG1;
int i, j; int i = currentFact;
#if defined(DATALOG) || defined(TUFFY)
YAP_Term th = YAP_ARG1;
int ncols = currentPred->num_columns; int ncols = currentPred->num_columns;
int j;
int *mat = currentPred->address_host_table; int *mat = currentPred->address_host_table;
i = currentFact;
for (j = 0; j < ncols; j++) { for (j = 0; j < ncols; j++) {
YAP_Term ta = YAP_ArgOfTerm(j+1, th); YAP_Term ta = YAP_ArgOfTerm(j+1, th);
if (YAP_IsAtomTerm(ta)) { if (YAP_IsAtomTerm(ta)) {
@ -214,6 +248,8 @@ cuda_load_fact( void ) {
mat[i*ncols+j] = YAP_IntOfTerm(ta); mat[i*ncols+j] = YAP_IntOfTerm(ta);
} }
} }
#endif
i++; i++;
if (i == currentPred->num_rows) { if (i == currentPred->num_rows) {
Cuda_NewFacts(currentPred); Cuda_NewFacts(currentPred);
@ -225,21 +261,26 @@ cuda_load_fact( void ) {
return TRUE; return TRUE;
} }
static YAP_Bool static int
load_rule( void ) { load_rule( void ) {
// maximum of 2K symbols per rule, should be enough for ILP // maximum of 2K symbols per rule, should be enough for ILP
int32_t vec[2048], *ptr = vec, *nvec; int32_t vec[2048], *ptr = vec, *nvec, neg[2048];
// qK different variables; // qK different variables;
YAP_Term vars[1024]; YAP_Term vars[1024];
int32_t nvars = 0; int32_t nvars = 0, x;
int32_t ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */ int32_t ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */
int32_t ncols = YAP_IntOfTerm(YAP_ARG2); int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
YAP_Term t3 = YAP_ARG3; YAP_Term t3 = YAP_ARG3;
int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3)))); YAP_Atom name = YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3)));
int32_t pname = YAP_AtomToInt(name);
const char *strname = YAP_AtomName(name);
predicate *pred; predicate *pred;
int32_t cont = 0;
memset(neg, 0x0, 2048 * sizeof(int32_t));
while(YAP_IsPairTerm(t3)) { while(YAP_IsPairTerm(t3)) {
int32_t j = 0; int32_t j = 0, m;
YAP_Term th = YAP_HeadOfTerm(t3); YAP_Term th = YAP_HeadOfTerm(t3);
YAP_Functor f = YAP_FunctorOfTerm( th ); YAP_Functor f = YAP_FunctorOfTerm( th );
int32_t n = YAP_ArityOfFunctor( f ); int32_t n = YAP_ArityOfFunctor( f );
@ -257,8 +298,17 @@ load_rule( void ) {
*ptr++ = SBG_LE; *ptr++ = SBG_LE;
else if (at == AtomDf) else if (at == AtomDf)
*ptr++ = SBG_DF; *ptr++ = SBG_DF;
else if (at == AtomNt)
{
neg[cont] = 1;
cont++;
}
else else
*ptr++ = YAP_AtomToInt( at ); {
*ptr++ = YAP_AtomToInt( at );
cont++;
}
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
YAP_Term ta = YAP_ArgOfTerm(j+1, th); YAP_Term ta = YAP_ArgOfTerm(j+1, th);
@ -277,6 +327,34 @@ load_rule( void ) {
} }
} else if (YAP_IsAtomTerm(ta)) { } else if (YAP_IsAtomTerm(ta)) {
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta)); *ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
} else if (YAP_IsApplTerm(ta)) {
f = YAP_FunctorOfTerm( ta );
at = YAP_NameOfFunctor( f );
m = YAP_ArityOfFunctor( f );
*ptr++ = YAP_AtomToInt( at );
for (x = 0; x < m; x++) {
YAP_Term ta2 = YAP_ArgOfTerm(x+1, ta);
if (YAP_IsVarTerm(ta2)) {
int32_t k;
for (k = 0; k < nvars; k++) {
if (vars[k] == ta2) {
*ptr++ = k+1;
break;
}
}
if (k == nvars) {
vars[k] = ta2;
*ptr++ = k+1;
nvars++;
}
} else if (YAP_IsAtomTerm(ta2)) {
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
} else {
*ptr++ = -YAP_IntOfTerm(ta);
}
}
} else { } else {
*ptr++ = -YAP_IntOfTerm(ta); *ptr++ = -YAP_IntOfTerm(ta);
} }
@ -296,53 +374,136 @@ load_rule( void ) {
pred->num_rows = ngoals; pred->num_rows = ngoals;
pred->num_columns = ncols; pred->num_columns = ncols;
pred->is_fact = FALSE; pred->is_fact = FALSE;
x = (strlen(strname) + 1) * sizeof(char);
pred->predname = (char *)malloc(x);
memcpy(pred->predname, strname, x);
nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec)); nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec)); memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
pred->address_host_table = nvec; pred->address_host_table = nvec;
pred->negatives = (int32_t *)malloc(sizeof(int32_t) * cont);
memcpy(pred->negatives, neg, sizeof(int32_t) * cont);
Cuda_NewRule( pred ); Cuda_NewRule( pred );
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred)); return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
} }
static YAP_Bool static int
cuda_erase( void ) cuda_erase( void )
{ {
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1); predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
return Cuda_Erase( ptr ); return Cuda_Erase( ptr );
} }
static YAP_Bool void setQuery(YAP_Term t1, int32_t **res)
{
int32_t *query = (int32_t *)malloc(MAXARG * sizeof(int32_t));
int32_t x, y = 0, *itr;
predicate *ptr = NULL;
if(YAP_IsPairTerm(t1))
{
while(YAP_IsPairTerm(t1))
{
ptr = (predicate *)YAP_IntOfTerm(YAP_HeadOfTerm(t1));
query[y] = ptr->name;
itr = ptr->address_host_table;
x = 2;
while(itr[x] != 0)
x++;
query[y+1] = itr[x+1];
t1 = YAP_TailOfTerm(t1);
y+=2;
}
}
else
{
ptr = (predicate *)YAP_IntOfTerm(t1);
query[y] = ptr->name;
itr = ptr->address_host_table;
x = 2;
while(itr[x] != 0)
x++;
query[y+1] = itr[x+1];
y += 2;
}
query[y] = -1;
query[y+1] = -1;
*res = query;
}
static int
cuda_eval( void ) cuda_eval( void )
{ {
int32_t *mat; int32_t *mat;
#if defined(DATALOG) || defined(TUFFY)
int32_t *query = NULL;
setQuery(YAP_ARG1, &query);
#endif
int32_t finalDR = YAP_IntOfTerm(YAP_ARG3);
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, names, finalDR);
#ifdef TUFFY
cf = 0;
#endif
#ifdef ROCKIT
if(cf > 0)
cf *= -1;
#endif
#if defined(TUFFY) || defined(ROCKIT)
cr = 0;
names[0] = '\0';
return FALSE;
#else
int32_t i;
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1); predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
int32_t ncols = ptr->num_columns; int32_t ncols = ptr->num_columns;
YAP_Term out = YAP_TermNil(); YAP_Term out = YAP_TermNil();
YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols); YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
YAP_Term vec[256]; YAP_Term vec[256];
int32_t i;
YAP_Atom at;
if (n < 0) if (n < 0)
return FALSE; return FALSE;
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
int32_t ni = ((n-1)-i)*ncols, j; int32_t ni = ((n-1)-i)*ncols, j;
printf("%s(", YAP_AtomName(YAP_IntToAtom(ptr->name)));
for (j=0; j<ncols; j++) { for (j=0; j<ncols; j++) {
vec[j] = YAP_MkIntTerm(mat[ni+j]); vec[j] = YAP_MkIntTerm(mat[ni+j]);
at = YAP_IntToAtom(mat[ni+j]);
if(at != NULL)
printf("%s", YAP_AtomName(at));
else
printf("%d", mat[ni+j]);
if(j < (ncols - 1))
printf(",");
} }
out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out); out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
printf(")\n");
} }
if (n > 0) if (n > 0)
free( mat ); free( mat );
return YAP_Unify(YAP_ARG2, out); return YAP_Unify(YAP_ARG2, out);
#endif
} }
static YAP_Bool static int
cuda_coverage( void ) cuda_coverage( void )
{ {
int32_t *mat; int32_t *mat;
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat); #if defined(DATALOG) || defined(TUFFY)
int32_t ncols = ptr->num_columns; int32_t *query = NULL;
setQuery(YAP_ARG1, &query);
#endif
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2)); int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
int32_t i = n/2, min = 0, max = n-1; int32_t i = n/2, min = 0, max = n-1;
int32_t t0, t1; int32_t t0, t1;
@ -384,11 +545,16 @@ cuda_coverage( void )
} while ( TRUE ); } while ( TRUE );
} }
static YAP_Bool cuda_count( void ) static int cuda_count( void )
{ {
int32_t *mat; int32_t *mat;
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat); #if defined(DATALOG) || defined(TUFFY)
int32_t *query = NULL;
setQuery(YAP_ARG1, &query);
#endif
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
if (n < 0) if (n < 0)
return FALSE; return FALSE;
@ -396,7 +562,7 @@ static YAP_Bool cuda_count( void )
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n)); return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
} }
static YAP_Bool cuda_statistics( void ) static int cuda_statistics( void )
{ {
Cuda_Statistics(); Cuda_Statistics();
return TRUE; return TRUE;
@ -416,14 +582,20 @@ init_cuda(void)
AtomGe = YAP_LookupAtom(">="); AtomGe = YAP_LookupAtom(">=");
AtomLe = YAP_LookupAtom("=<"); AtomLe = YAP_LookupAtom("=<");
AtomDf = YAP_LookupAtom("\\="); AtomDf = YAP_LookupAtom("\\=");
AtomNt = YAP_LookupAtom("not");
YAP_UserCPredicate("load_facts", load_facts, 4); YAP_UserCPredicate("load_facts", load_facts, 4);
YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4); YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1); YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
YAP_UserCPredicate("load_rule", load_rule, 4); YAP_UserCPredicate("load_rule", load_rule, 4);
YAP_UserCPredicate("cuda_erase", cuda_erase, 1); YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
YAP_UserCPredicate("cuda_eval", cuda_eval, 2); YAP_UserCPredicate("cuda_eval", cuda_eval, 3);
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4); YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
YAP_UserCPredicate("cuda_count", cuda_count, 2); YAP_UserCPredicate("cuda_count", cuda_count, 2);
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0); YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
#ifdef ROCKIT
YAP_UserCPredicate("cuda_init_query", cuda_init_query, 1);
#endif
} }

9
packages/cuda/cuda.yap Normal file → Executable file
View File

@ -2,10 +2,11 @@
cuda_inline/2, cuda_inline/2,
cuda_rule/2, cuda_rule/2,
cuda_erase/1, cuda_erase/1,
cuda_eval/2, cuda_eval/3,
cuda_coverage/4, cuda_coverage/4,
cuda_statistics/0, cuda_statistics/0,
cuda_count/2]). cuda_count/2,
cuda_query/1]).
tell_warning :- tell_warning :-
print_message(warning,functionality(cuda)). print_message(warning,functionality(cuda)).
@ -40,7 +41,7 @@ count_answers(G, N) :-
cuda_rule((Head :- Body) , IdRules) :- cuda_rule((Head :- Body) , IdRules) :-
body_to_list( Body, L, [], 1, N), body_to_list( Body, L, [], 1, N),
functor(Head, _Na, Ar), functor(Head, Na, Ar),
load_rule( N, Ar, [Head|L], IdRules ). load_rule( N, Ar, [Head|L], IdRules ).
@ -54,3 +55,5 @@ body_to_list( B, NL, L, N0, N) :-
body_to_list( B, [B|L], L, N0, N) :- body_to_list( B, [B|L], L, N0, N) :-
N is N0+1. N is N0+1.
cuda_query(Call) :-
cuda_init_query(Call).

1085
packages/cuda/lista.cu Normal file → Executable file

File diff suppressed because it is too large Load Diff

7
packages/cuda/lista.h Normal file → Executable file
View File

@ -25,8 +25,11 @@ typedef struct auxiliar{
int *numselfj; int *numselfj;
int **wherejoin; int **wherejoin;
int *numjoin; int *numjoin;
int3 num_bpreds; int totalpreds;
int *builtin; int **preds;
int2 *numpreds;
int *negatives;
char *rulename;
int gen_act; int gen_act;
int gen_ant; int gen_ant;
}rulenode; }rulenode;

509
packages/cuda/memory.cu Normal file → Executable file
View File

@ -5,63 +5,101 @@
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include "lista.h" #include "lista.h"
#include "memory.h" #include "memory.h"
#include "pred.h"
#define MAX_REC 200 #define MAX_REC 200
#define HALF_REC (MAX_REC / 2)
#define MAX_FIX_POINTS 100 #define MAX_FIX_POINTS 100
unsigned int avmem;
memnode temp_storage[MAX_REC]; memnode temp_storage[MAX_REC];
/*List used to store information (address, size, etc.) about facts and rule results loaded in the GPU*/
list<memnode> GPUmem; list<memnode> GPUmem;
/*List used to store information about rule results offloaded from the GPU to the CPU*/
list<memnode> CPUmem; list<memnode> CPUmem;
/*Auxiliary function to sort rule list*/
bool comparer(const rulenode &r1, const rulenode &r2)
{
return (r1.name > r2.name);
}
/*Used in search functions to compare iterations*/
bool compareiteration(const memnode &r1, const memnode &r2) bool compareiteration(const memnode &r1, const memnode &r2)
{ {
return (r1.iteration < r2.iteration); return (r1.iteration < r2.iteration);
} }
/*Used in search functions to compare names*/
bool comparename(const memnode &r1, const memnode &r2) bool comparename(const memnode &r1, const memnode &r2)
{ {
return (r1.name > r2.name); return (r1.name > r2.name);
} }
void calcular_mem(int dev) /*Linear search of 'name' fact*/
{
cudaDeviceProp p;
cudaGetDeviceProperties(&p, dev);
avmem = p.totalGlobalMem;
temp_storage[0].dev_address = NULL;
temp_storage[0].size = 0;
temp_storage[HALF_REC].dev_address = NULL;
temp_storage[HALF_REC].size = 0;
//cout << "Initial memory available " << avmem << endl;
}
template<class InputIterator> template<class InputIterator>
InputIterator buscarhecho(InputIterator first, InputIterator last, int name) InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
{ {
while(first!=last) while(first!=last)
{ {
if(first->name == name) return first; if(first->name == name && first->isrule == 0) return first;
++first; ++first;
} }
return last; return last;
} }
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum) /*Finds all results of rule 'name' in iteration 'itr' in both CPU and GPU memory. Every result found is removed from its respective list*/
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
{ {
int x = 1, sum = 0; int x = 0, sum = 0;
memnode temp; memnode temp;
list<memnode>::iterator i;
temp.name = name;
temp.iteration = itr; temp.iteration = itr;
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration); pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
while(rec.first != rec.second) while(rec.first != rec.second)
{ {
if(rec.first->name == name && rec.first->isrule == 1)
//cout << "itr = " << itr << " rec.first = " << rec.first->name << endl; {
temp_storage[x] = *rec.first;
rec.first = GPUmem.erase(rec.first);
sum += temp_storage[x].rows;
x++;
}
else
rec.first++;
}
*gpunum = x;
temp.name = name;
temp.isrule = 1;
i = GPUmem.insert(rec.first, temp);
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
while(rec.first != rec.second)
{
if(rec.first->name == name && rec.first->isrule == 1)
{
temp_storage[x] = *rec.first;
rec.first = CPUmem.erase(rec.first);
sum += temp_storage[x].rows;
x++;
}
else
rec.first++;
}
*totalrows = sum;
*cpunum = x;
return i;
}
list<memnode>::iterator buscarpornombrecpu(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
{
int x = 0, sum = 0;
memnode temp;
list<memnode>::iterator i;
temp.iteration = itr;
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
while(rec.first != rec.second)
{
if(rec.first->name == name) if(rec.first->name == name)
{ {
temp_storage[x] = *rec.first; temp_storage[x] = *rec.first;
@ -72,25 +110,14 @@ list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *
else else
rec.first++; rec.first++;
} }
//if(x > 1)
rec.first = GPUmem.insert(rec.first, temp);
*totalrows = sum;
*gpunum = x; *gpunum = x;
return rec.first; temp.name = name;
} temp.isrule = 1;
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
int buscarpornombrecpu(int name, int itr, int *totalrows)
{
int x = HALF_REC + 1, sum = 0;
memnode temp;
temp.iteration = itr;
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
/*if(rec.first != rec.second)
cout << "bscnomcpu = " << rec.first->name << " " << rec.first->iteration << endl;*/
while(rec.first != rec.second) while(rec.first != rec.second)
{ {
if(rec.first->name == name) if(rec.first->name == name)
{ {
temp_storage[x] = *rec.first; temp_storage[x] = *rec.first;
@ -101,18 +128,24 @@ int buscarpornombrecpu(int name, int itr, int *totalrows)
else else
rec.first++; rec.first++;
} }
*totalrows += sum; i = CPUmem.insert(rec.first, temp);
return x; *totalrows = sum;
*cpunum = x;
return i;
} }
/*Removes the least recently used memory block from GPU memory, sending it to CPU memory if it's a rule result.
If there are no used memory blocks in the GPU and we still don't have enough memory, the program exits with error*/
void limpiar(const char s[], size_t sz) void limpiar(const char s[], size_t sz)
{ {
list<memnode>::iterator ini; list<memnode>::iterator ini;
memnode temp; memnode temp;
size_t free, total;
if(GPUmem.size() == 0) if(GPUmem.size() == 0)
{ {
cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl; cudaMemGetInfo(&free,&total);
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
exit(1); exit(1);
} }
@ -122,80 +155,32 @@ void limpiar(const char s[], size_t sz)
temp = *ini; temp = *ini;
temp.dev_address = (int *)malloc(ini->size); temp.dev_address = (int *)malloc(ini->size);
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost); cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
CPUmem.push_back(temp); list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
CPUmem.insert(pos, temp);
} }
liberar(ini->dev_address, ini->size); cudaFree(ini->dev_address);
GPUmem.erase(ini); GPUmem.erase(ini);
} }
void limpiartodo(int *p1, int *p2) /*Allocs 'size' amount of bytes in GPU memory. If not enough memory is available, removes least recently used memory blocks until
enough space is available*/
void reservar(int **ptr, size_t size)
{ {
list<memnode>::iterator ini; size_t free, total;
memnode temp;
int cont = 0;
if(p1 != NULL)
cont++;
if(p2 != NULL)
cont++;
ini = GPUmem.begin();
/*cout << "ANTES" << endl;
mostrar_memoria();
mostrar_memcpu();
cout << "FIN ANTES" << endl;*/
//cout << "mem = " << GPUmem.size() << " " << avmem << endl;
while(GPUmem.size() > cont)
{
if(ini->dev_address == p1 || ini->dev_address == p2)
{
ini++;
continue;
}
if(ini->isrule)
{
temp = *ini;
temp.dev_address = (int *)malloc(ini->size);
cudaMemcpy(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
CPUmem.push_back(temp);
}
liberar(ini->dev_address, temp.size);
ini = GPUmem.erase(ini);
}
/*cout << "DESPUES" << endl;
mostrar_memoria();
mostrar_memcpu();
cout << "FIN DESPUES" << endl;*/
//cout << "memfinal = " << GPUmem.size() << " " << avmem << endl;
}
void liberar(int *ptr, int size)
{
//cout << "L " << avmem << " " << size;
cudaFree(ptr);
#ifdef DEBUG_MEM
cerr << "- " << ptr << " " << size << endl;
#endif
avmem += size;
//cout << " " << avmem << endl;
}
void reservar(int **ptr, int size)
{
//size_t free, total;
//cudaMemGetInfo( &free, &total );
// cerr << "? " << free << " " << size << endl;
if (size == 0) { if (size == 0) {
*ptr = NULL; *ptr = NULL;
return; return;
} }
while(avmem < size)
cudaMemGetInfo(&free, &total);
while(free < size)
{
cout << "Se limpio memoria " << free << " " << total << endl;
limpiar("not enough memory", size); limpiar("not enough memory", size);
cudaMemGetInfo(&free, &total);
}
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation) while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
limpiar("Error in memory allocation", size); limpiar("Error in memory allocation", size);
if (! *ptr ) { if (! *ptr ) {
@ -205,11 +190,9 @@ void reservar(int **ptr, int size)
cerr << "Exiting CUDA...." << endl; cerr << "Exiting CUDA...." << endl;
exit(1); exit(1);
} }
avmem -= size;
// cout << " " << avmem << endl;
} }
/*Creates a new entry in the GPU memory list*/
void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule) void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
{ {
memnode temp; memnode temp;
@ -222,6 +205,19 @@ void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
GPUmem.push_back(temp); GPUmem.push_back(temp);
} }
void registrarcpu(int name, int num_columns, int *ptr, int rows, int itr, int rule)
{
memnode temp;
temp.name = name;
temp.dev_address = ptr;
temp.rows = rows;
temp.size = rows * num_columns * sizeof(int);
temp.iteration = itr;
temp.isrule = rule;
CPUmem.push_back(temp);
}
/*Updates the information of an element in a list*/
template<class InputIterator> template<class InputIterator>
void actualizar(int num_columns, int *ptr, int rows, InputIterator i) void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
{ {
@ -230,6 +226,7 @@ void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
i->size = rows * num_columns * sizeof(int); i->size = rows * num_columns * sizeof(int);
} }
/*Count the total number of rows generated by rule 'name' in iteration 'iter'*/
int numrows(int name, int itr) int numrows(int name, int itr)
{ {
int sum = 0; int sum = 0;
@ -252,16 +249,17 @@ int numrows(int name, int itr)
return sum; return sum;
} }
extern "C" void * YAP_IntToAtom(int); extern "C" void * YAP_IntToAtom(int);
extern "C" char * YAP_AtomName(void *); extern "C" char * YAP_AtomName(void *);
/*Loads facts or rule results in GPU memory. If a fact is already in GPU memory, its pointer is simply returned. Otherwise,
memory is reserved and the fact is loaded. Rule results are loaded based on the current iteration 'itr' and both GPU and
CPU memories are searched for all instances of said results. The instances are combined into a single one in GPU memory.*/
int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr) int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
{ {
int numgpu, numcpu, totalrows = 0; int numgpu, numcpu, totalrows = 0;
int *temp, x; int *temp, x;
int size, itrant; int size, itrant, inc = 0;
list<memnode>::iterator i; list<memnode>::iterator i;
memnode fact; memnode fact;
@ -279,9 +277,6 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
} }
size = num_rows * num_columns * sizeof(int); size = num_rows * num_columns * sizeof(int);
reservar(&temp, size); reservar(&temp, size);
#ifdef DEBUG_MEM
cerr << "+ " << temp << " temp " << size << endl;
#endif
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice); cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
registrar(name, num_columns, temp, num_rows, itr, 0); registrar(name, num_columns, temp, num_rows, itr, 0);
*ptr = temp; *ptr = temp;
@ -290,28 +285,25 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
if(itr > 0) if(itr > 0)
{ {
itrant = itr - 1; itrant = itr - 1;
i = buscarpornombre(name, itrant, &totalrows, &numgpu); i = buscarpornombre(name, itrant, &totalrows, &numgpu, &numcpu);
numcpu = buscarpornombrecpu(name, itrant, &totalrows); if((numgpu == 1) && (numcpu == 1))
if((numgpu == 2) && (numcpu == (HALF_REC + 1)))
{ {
actualizar(num_columns, temp_storage[1].dev_address, temp_storage[1].rows, i); actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
*ptr = temp_storage[1].dev_address; *ptr = temp_storage[0].dev_address;
return temp_storage[1].rows; return temp_storage[0].rows;
} }
size = totalrows * num_columns * sizeof(int); size = totalrows * num_columns * sizeof(int);
reservar(&temp, size); reservar(&temp, size);
#ifdef DEBUG_MEM for(x = 0; x < numgpu; x++)
cerr << "+ " << temp << " temp 2 " << size << endl;
#endif
for(x = 1; x < numgpu; x++)
{ {
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice); cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
liberar(temp_storage[x].dev_address, temp_storage[x].size); inc += temp_storage[x].size / sizeof(int);
cudaFree(temp_storage[x].dev_address);
} }
for(x = HALF_REC + 1; x < numcpu; x++) for(; x < numcpu; x++)
{ {
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice); cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
inc += temp_storage[x].size / sizeof(int);
free(temp_storage[x].dev_address); free(temp_storage[x].dev_address);
} }
actualizar(num_columns, temp, totalrows, i); actualizar(num_columns, temp, totalrows, i);
@ -321,9 +313,54 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
return 0; return 0;
} }
int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
{
int numgpu, numcpu, totalrows = 0;
int *temp, x;
int size, itrant, inc = 0;
list<memnode>::iterator i;
if(is_fact)
{
*ptr = address_host_table;
return num_rows;
}
if(itr > 0)
{
itrant = itr - 1;
i = buscarpornombrecpu(name, itrant, &totalrows, &numgpu, &numcpu);
if((numgpu == 0) && (numcpu == 1))
{
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
*ptr = temp_storage[0].dev_address;
return temp_storage[0].rows;
}
size = totalrows * num_columns * sizeof(int);
temp = (int *)malloc(size);
for(x = 0; x < numgpu; x++)
{
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
inc += temp_storage[x].size / sizeof(int);
cudaFree(temp_storage[x].dev_address);
}
for(; x < numcpu; x++)
{
memcpy(temp + inc, temp_storage[x].dev_address, temp_storage[x].size);
inc += temp_storage[x].size / sizeof(int);
free(temp_storage[x].dev_address);
}
actualizar(num_columns, temp, totalrows, i);
*ptr = temp;
return totalrows;
}
return 0;
}
/*Loads all results of rule 'name' from both GPU and CPU memories into the GPU*/
int cargafinal(int name, int cols, int **ptr) int cargafinal(int name, int cols, int **ptr)
{ {
int *temp, *ini, cont = 0; int *temp, *ini, cont = 0, numg = 0, numc = 0;
memnode bus; memnode bus;
bus.name = name; bus.name = name;
GPUmem.sort(comparename); GPUmem.sort(comparename);
@ -335,6 +372,7 @@ int cargafinal(int name, int cols, int **ptr)
while(pos != endg && pos->name == name) while(pos != endg && pos->name == name)
{ {
cont += pos->rows; cont += pos->rows;
numg++;
pos++; pos++;
} }
pos = lower_bound(CPUmem.begin(), endc, bus, comparename); pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
@ -342,15 +380,41 @@ int cargafinal(int name, int cols, int **ptr)
while(pos != endc && pos->name == name) while(pos != endc && pos->name == name)
{ {
cont += pos->rows; cont += pos->rows;
numc++;
pos++; pos++;
} }
reservar(&temp, cont * cols * sizeof(int));
#ifdef DEBUG_MEM
cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;
#endif
ini = temp;
if(numg == 0 && numc == 0)
return 0;
if(numg == 1 && numc == 0)
{
pos = gpu;
*ptr = pos->dev_address;
cont = pos->rows;
GPUmem.erase(pos);
#ifdef TUFFY
return -cont;
#else
return cont;
#endif
}
if(numg == 0 && numc == 1)
{
pos = cpu;
cont = pos->rows;
#ifdef TUFFY
reservar(&temp, pos->size);
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
*ptr = temp;
#else
*ptr = pos->dev_address;
#endif
CPUmem.erase(pos);
return -cont;
}
reservar(&temp, cont * cols * sizeof(int));
ini = temp;
pos = gpu; pos = gpu;
while(pos != endg && pos->name == name) while(pos != endg && pos->name == name)
{ {
@ -365,23 +429,13 @@ int cargafinal(int name, int cols, int **ptr)
temp += pos->size / sizeof(int); temp += pos->size / sizeof(int);
pos++; pos++;
} }
/*int x, y;
int *hop1 = (int *)malloc(cont * cols * sizeof(int));
cudaMemcpy(hop1, ini, cont * cols * sizeof(int), cudaMemcpyDeviceToHost);
cout << "select finala" << endl;
for(x = 0; x < cont; x++)
{
for(y = 0; y < cols; y++)
cout << hop1[x * cols + y] << " ";
cout << endl;
}
cout << "select finala" << endl;*/
*ptr = ini; *ptr = ini;
return cont; return cont;
} }
/*Compares the results of the current iteration against the results of older iterations.
Used to avoid infinite computations when the result is not a single fixed-point, but an
orbit of points.*/
bool generadas(int name, int filas, int cols, int itr) bool generadas(int name, int filas, int cols, int itr)
{ {
int r1, r2, x, fin; int r1, r2, x, fin;
@ -401,46 +455,26 @@ bool generadas(int name, int filas, int cols, int itr)
thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2); thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1); r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1); thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
/*int y;
int *a = (int *)malloc(r1 * cols * sizeof(int));
cudaMemcpy(a, dop1, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < r1; x++)
{
for(y = 0; y < cols; y++)
cout << a[x * cols + y] << " ";
}
cout << endl;
cudaMemcpy(a, dop2, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
for(x = 0; x < r1; x++)
{
for(y = 0; y < cols; y++)
cout << a[x * cols + y] << " ";
}
cout << endl;
free(a);*/
if(thrust::equal(pt1, pt1 + r1, pt2) == true) if(thrust::equal(pt1, pt1 + r1, pt2) == true)
return true; return true;
} }
} }
return false; return false;
} }
void mostrar_memoria() void mostrar_memoria()
{ {
int x; unsigned int x;
list<memnode>::iterator i = GPUmem.begin(); list<memnode>::iterator i = GPUmem.begin();
cout << "Memoria inicio GPU" << endl; cout << "Memoria inicio GPU" << endl;
for(x = 0; x < GPUmem.size(); x++, i++) for(x = 0; x < GPUmem.size(); x++, i++)
cout << i->name << " " << i->iteration << " " << i->size << endl; cout << i->name << " " << i->iteration << " " << i->isrule << " " << i->rows << " " << i->size << endl;
cout << "Memoria fin GPU" << endl; cout << "Memoria fin GPU" << endl;
} }
void mostrar_memcpu() void mostrar_memcpu()
{ {
int x; unsigned int x;
list<memnode>::iterator i = CPUmem.begin(); list<memnode>::iterator i = CPUmem.begin();
cout << "Memoria inicio CPU" << endl; cout << "Memoria inicio CPU" << endl;
for(x = 0; x < CPUmem.size(); x++, i++) for(x = 0; x < CPUmem.size(); x++, i++)
@ -448,53 +482,7 @@ void mostrar_memcpu()
cout << "Memoria fin CPU" << endl; cout << "Memoria fin CPU" << endl;
} }
void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator last) /*Clear all rule results from both GPU and CPU memory*/
{
GPUmem.sort(comparename);
CPUmem.sort(comparename);
list<memnode>::iterator gpu = GPUmem.begin();
list<memnode>::iterator cpu = CPUmem.begin();
int x, y, of, cols;
int *temp, cont = 0;
while(first != last)
{
while(first->name == gpu->name)
{
temp = (int *)malloc(gpu->size);
cudaMemcpy(temp, gpu->dev_address, gpu->size, cudaMemcpyDeviceToHost);
cols = gpu->size / (gpu->rows * sizeof(int));
cont += gpu->rows;
for(x = 0, of = 0; x < gpu->rows; x++)
{
for(y = 0; y < cols; y++, of++)
cout << temp[of] << " ";
cout << endl;
}
cudaFree(gpu->dev_address);
#ifdef DEBUG_MEM
cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;
#endif
free(temp);
gpu++;
}
while(first->name == cpu->name)
{
cols = cpu->size / (cpu->rows * sizeof(int));
cont += cpu->rows;
for(x = 0, of = 0; x < cpu->rows; x++)
{
for(y = 0; y < cols; y++, of++)
cout << cpu->dev_address[of] << " ";
cout << endl;
}
free(cpu->dev_address);
cpu++;
}
first++;
}
cout << cont << endl;
}
void clear_memory() void clear_memory()
{ {
list<memnode>::iterator ini; list<memnode>::iterator ini;
@ -503,15 +491,13 @@ void clear_memory()
fin = GPUmem.end(); fin = GPUmem.end();
while(ini != fin) while(ini != fin)
{ {
if (ini->isrule) { if(ini->isrule)
cudaFree(ini->dev_address); {
#ifdef DEBUG_MEM cudaFree(ini->dev_address);
cerr << "- " << ini->dev_address << " ini->dev_address" << endl; ini = GPUmem.erase(ini);
#endif }
ini = GPUmem.erase(ini); else
} else { ini++;
ini++;
}
} }
ini = CPUmem.begin(); ini = CPUmem.begin();
fin = CPUmem.end(); fin = CPUmem.end();
@ -522,3 +508,68 @@ void clear_memory()
} }
CPUmem.clear(); CPUmem.clear();
} }
/*Clear everything from both GPU and CPU memory*/
void clear_memory_all()
{
list<memnode>::iterator ini;
list<memnode>::iterator fin;
ini = GPUmem.begin();
fin = GPUmem.end();
while(ini != fin)
{
cudaFree(ini->dev_address);
ini++;
}
GPUmem.clear();
ini = CPUmem.begin();
fin = CPUmem.end();
while(ini != fin)
{
free(ini->dev_address);
ini++;
}
CPUmem.clear();
}
/*Remove all instances of fact 'name' from both CPU and GPU memories*/
void liberar(int name)
{
list<memnode>::iterator i;
memnode fact;
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
if(i != GPUmem.end())
{
fact = *i;
GPUmem.erase(i);
cudaFree(fact.dev_address);
}
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
if(i != CPUmem.end())
{
fact = *i;
CPUmem.erase(i);
free(fact.dev_address);
}
}
/*Add all rows in 'dop1' to the fact 'name' by creating a new array capable of holding both.*/
void sumar(int name, int *dop1, int cols, int rows)
{
list<memnode>::iterator i;
memnode fact;
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
int *res, newrows, offset;
if(i != GPUmem.end())
{
fact = *i;
newrows = rows + fact.rows;
reservar(&res, newrows * cols * sizeof(int));
offset = fact.rows * cols;
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
GPUmem.erase(i);
registrar(name, cols, res, newrows, 0, 0);
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
cudaFree(fact.dev_address);
}
}

13
packages/cuda/memory.h Normal file → Executable file
View File

@ -1,26 +1,27 @@
#ifndef _MEMORY_H_ #ifndef _MEMORY_H_
#define _MEMORY_H_ #define _MEMORY_H_
//#include <thrust/device_vector.h>
#include <list> #include <list>
#include <vector> #include <vector>
#include "lista.h" #include "lista.h"
using namespace std; using namespace std;
//using namespace thrust;
void calcular_mem(int); bool comparer(const rulenode&, const rulenode&);
void liberar(int*, int);
void limpiar(const char [], size_t); void limpiar(const char [], size_t);
void limpiartodo(int*, int*); void limpiartodo(int*, int*);
int cargar(int, int, int, int, int*, int**, int); int cargar(int, int, int, int, int*, int**, int);
int cargarcpu(int, int, int, int, int*, int**, int);
int cargafinal(int, int, int**); int cargafinal(int, int, int**);
void reservar(int**, int); void reservar(int**, size_t);
void registrar(int, int, int*, int, int, int); void registrar(int, int, int*, int, int, int);
void registrarcpu(int, int, int*, int, int, int);
bool generadas(int, int, int, int); bool generadas(int, int, int, int);
void sumar(int, int*, int, int);
void liberar(int);
void mostrar_memoria(void); void mostrar_memoria(void);
void mostrar_memcpu(void); void mostrar_memcpu(void);
void clear_memory(void); void clear_memory(void);
void resultados(vector<rulenode>::iterator, vector<rulenode>::iterator); void clear_memory_all(void);
#endif #endif

12
packages/cuda/pred.h Normal file → Executable file
View File

@ -9,11 +9,17 @@ typedef struct Nodo{
int num_columns; int num_columns;
int is_fact; int is_fact;
int *address_host_table; int *address_host_table;
int *negatives;
char *predname;
double *weight;
}gpunode; }gpunode;
typedef gpunode predicate; typedef gpunode predicate;
// #define TIMER 1 //#define TIMER 1
#define DATALOG 1
#define NUM_T 4
#define INISIZE 1000000
#if TIMER #if TIMER
typedef struct Stats{ typedef struct Stats{
@ -27,6 +33,8 @@ typedef struct Stats{
extern statinfo cuda_stats; extern statinfo cuda_stats;
#endif #endif
/*Constants used to mark comparison predicates*/
#define BPOFFSET (-6)
#define SBG_EQ (-1) #define SBG_EQ (-1)
#define SBG_GT (-2) #define SBG_GT (-2)
#define SBG_LT (-3) #define SBG_LT (-3)
@ -34,6 +42,6 @@ extern statinfo cuda_stats;
#define SBG_LE (-5) #define SBG_LE (-5)
#define SBG_DF (-6) #define SBG_DF (-6)
int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**); int Cuda_Eval(predicate**, int, predicate**, int, int*, int**, char*, int);
void Cuda_Statistics( void ); void Cuda_Statistics( void );
#endif #endif

202
packages/cuda/selectproyect.cu Normal file → Executable file
View File

@ -1,10 +1,11 @@
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
//#include <thrust/device_ptr.h>
#include <thrust/scan.h> #include <thrust/scan.h>
#include <stdlib.h> #include <stdlib.h>
#include "memory.h" #include "memory.h"
#include "bpreds.h"
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/ /*Mark all rows that comply with the selections*/
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = blockIdx.x * blockDim.x + threadIdx.x;
@ -24,14 +25,14 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
res[id] = 1; res[id] = 1;
} }
} }
/*If we already have an array of marks (perhaps because the selfjoin was applied first),
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/ we unmark any rows that do not comply with the selections*/
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int *spos = &shared[numc];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = blockIdx.x * blockDim.x + threadIdx.x;
int x, rowact, posact; int x, rowact, posact;
if(threadIdx.x < (numc * 2)) if(threadIdx.x < numc)
shared[threadIdx.x] = cons[threadIdx.x]; shared[threadIdx.x] = cons[threadIdx.x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
@ -39,10 +40,10 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
if(res[id] == 0) if(res[id] == 0)
return; return;
rowact = id * cols; rowact = id * cols;
for(x = 0; x < numc; x++) for(x = 0; x < numc; x += 2)
{ {
posact = rowact + spos[x]; posact = rowact + shared[x];
if(dop1[posact] != shared[x]) if(dop1[posact] != shared[x+1])
{ {
res[id] = 0; res[id] = 0;
return; return;
@ -51,6 +52,7 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
} }
} }
/*Unmark all rows that do not comply with the selfjoins.*/
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res) __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
@ -66,12 +68,12 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
pos = id * cols; pos = id * cols;
for(x = 0; x < cont; x++) for(x = 0; x < cont; x++)
{ {
temp = shared[x]; temp = dop1[pos+shared[x]];
y = x + 1; y = x + 1;
temp2 = shared[y]; temp2 = shared[y];
while(temp2 > -1) while(temp2 > -1)
{ {
if(dop1[temp+pos] != dop1[temp2+pos]) if(temp != dop1[temp2+pos])
{ {
res[id] = 0; res[id] = 0;
return; return;
@ -84,6 +86,7 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
} }
} }
/*Mark all rows that comply with the selfjoins*/
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res) __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
@ -97,12 +100,12 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
pos = id * cols; pos = id * cols;
for(x = 0; x < cont; x++) for(x = 0; x < cont; x++)
{ {
temp = shared[x]; temp = dop1[pos+shared[x]];
y = x + 1; y = x + 1;
temp2 = shared[y]; temp2 = shared[y];
while(temp2 > -1) while(temp2 > -1)
{ {
if(dop1[temp+pos] != dop1[temp2+pos]) if(temp != dop1[temp2+pos])
return; return;
y++; y++;
temp2 = shared[y]; temp2 = shared[y];
@ -113,6 +116,7 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
} }
} }
/*Project all columns found in 'dhead' to a new array 'res'*/
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res) __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
@ -130,76 +134,31 @@ __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize,
} }
} }
/*Project all columns found in 'dhead' using only the rows marked as valid (i.e. those that complied with
selections, selfjoins, etc.). The array 'temp' holds the result of the prefix sum of said marks.*/
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res) __global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
{ {
extern __shared__ int shared[]; extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = blockIdx.x * blockDim.x + threadIdx.x;
int pos, posr, x; int pos, posr, x;
if(threadIdx.x < cols) if(threadIdx.x < hsize)
shared[threadIdx.x] = dhead[threadIdx.x]; shared[threadIdx.x] = dhead[threadIdx.x];
__syncthreads(); __syncthreads();
if(id < rows) if(id < rows)
{ {
posr = temp[id+1]; posr = temp[id];
if(temp[id] != posr && posr > 0) if(temp[id+1] != posr)
{ {
pos = id * cols; pos = id * cols;
posr = (posr - 1) * hsize; posr *= hsize;
for(x = 0; x < hsize; x++, posr++) for(x = 0; x < hsize; x++, posr++)
res[posr] = dop1[pos+shared[x]]; res[posr] = dop1[pos+shared[x]];
} }
} }
} }
/*__global__ void removedup() /*Performs selections, selfjoins and comparison predicates when the rule has a single normal predicate.*/
{ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *preds, int numpreds, int *project, int **ret, int ANDlogic)
extern __shared__ int shared[];
int id = blockIdx.x * blockDim.x + threadIdx.x;
if(threadIdx.x < cols)
shared[threadIdx.x] = dhead[threadIdx.x];
if(id < rows)
{
}
}*/
template<typename T> /*a libreria*/
struct suma : public binary_function<T,T,T>
{
__host__ __device__
T operator()(const T &r1, const T &r2)
{
if(r1 > -1)
{
if(r2 > 0)
return r1 + r2;
return -r1;
}
else
{
if(r2 > 0)
return abs(r1) + r2;
return r1;
}
}
};
int mayor(int a, int b, int c)
{
if(a > b)
{
if(a > c)
return a;
}
else
{
if(b > c)
return b;
}
return c;
}
int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
{ {
int *fres = NULL, *temp = NULL; int *fres = NULL, *temp = NULL;
int *dhead = NULL, tmplen; int *dhead = NULL, tmplen;
@ -209,30 +168,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
#if TIMER #if TIMER
cuda_stats.selects++; cuda_stats.selects++;
#endif #endif
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
int head_bytes = maximo(4, numselect, numselfj, numpreds, head_size) * sizeof(int);
reservar(&dhead, head_bytes); reservar(&dhead, head_bytes);
#ifdef DEBUG_MEM
cerr << "+ " << dhead << " dhead " << head_bytes << endl;
#endif
int blockllen = rows / 1024 + 1;
int numthreads = 1024; int numthreads = 1024;
//int numthreads = 32;
int blockllen = rows / numthreads + 1;
#ifdef ROCKIT
ANDlogic = 1;
#endif
//removerep(dop1, rows, cols, dhead,)
if(numselect > 0) if(numselect > 0)
{ {
tmplen = rows + 1; tmplen = rows + 1;
size2 = tmplen * sizeof(int); size2 = tmplen * sizeof(int);
reservar(&temp, size2); reservar(&temp, size2);
#ifdef DEBUG_MEM
cerr << "+ " << temp << " temp select " << size2 << endl;
#endif
cudaMemset(temp, 0, size2); cudaMemset(temp, 0, size2);
size = numselect * sizeof(int); size = numselect * sizeof(int);
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice); cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
marcar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1); marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
if(numselfj > 0) if(numselfj > 0)
{ {
@ -241,6 +197,16 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1); samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
} }
if(numpreds > 0)
{
size = numpreds * sizeof(int);
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
else
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
}
res = thrust::device_pointer_cast(temp); res = thrust::device_pointer_cast(temp);
thrust::inclusive_scan(res + 1, res + tmplen, res + 1); thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
num = res[rows]; num = res[rows];
@ -249,13 +215,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
size = head_size * sizeof(int); size = head_size * sizeof(int);
reservar(&fres, num * size); reservar(&fres, num * size);
#ifdef DEBUG_MEM
cerr << "+ " << fres << " fres select " << num*size << endl;
#endif
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres); llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
liberar(dhead, head_bytes); cudaFree(dhead);
liberar(temp, size2); cudaFree(temp);
*ret = fres; *ret = fres;
return num; return num;
} }
@ -266,15 +229,22 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
tmplen = rows + 1; tmplen = rows + 1;
size2 = tmplen * sizeof(int); size2 = tmplen * sizeof(int);
reservar(&temp, size2); reservar(&temp, size2);
#ifdef DEBUG_MEM
cerr << "+ " << temp << " temp select " << size2 << endl;
#endif
cudaMemset(temp, 0, size2); cudaMemset(temp, 0, size2);
size = numselfj * sizeof(int); size = numselfj * sizeof(int);
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice); cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1); samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
if(numpreds > 0)
{
size = numpreds * sizeof(int);
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
if(ANDlogic)
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
else
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
}
res = thrust::device_pointer_cast(temp); res = thrust::device_pointer_cast(temp);
thrust::inclusive_scan(res + 1, res + tmplen, res + 1); thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
num = res[rows]; num = res[rows];
@ -283,28 +253,54 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
size = head_size * sizeof(int); size = head_size * sizeof(int);
reservar(&fres, num * size); reservar(&fres, num * size);
#ifdef DEBUG_MEM
cerr << "+ " << fres << " fres select again " << num*size << endl;
#endif
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres); llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
liberar(dhead, head_bytes); cudaFree(dhead);
liberar(temp, size2); cudaFree(temp);
*ret = fres; *ret = fres;
return num; return num;
} }
else else
{ {
size = head_size * sizeof(int); if(numpreds > 0)
reservar(&fres, rows * size); {
#ifdef DEBUG_MEM tmplen = rows + 1;
cerr << "+ " << fres << " fres select third " << rows*size << endl; size2 = tmplen * sizeof(int);
#endif reservar(&temp, size2);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice); cudaMemset(temp, 0, size2);
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres); size = numpreds * sizeof(int);
liberar(dhead, head_bytes); cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
*ret = fres;
return rows; if(ANDlogic)
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
else
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
res = thrust::device_pointer_cast(temp);
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
num = res[rows];
if(num == 0)
return 0;
size = head_size * sizeof(int);
reservar(&fres, num * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
cudaFree(dhead);
cudaFree(temp);
*ret = fres;
return num;
}
else
{
size = head_size * sizeof(int);
reservar(&fres, rows * size);
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
cudaFree(dhead);
*ret = fres;
return rows;
}
} }
} }
} }

File diff suppressed because it is too large Load Diff

935
packages/cuda/union2.cu Normal file → Executable file

File diff suppressed because it is too large Load Diff

View File

@ -2158,12 +2158,14 @@ static foreign_t init_python(void) {
char **argv; char **argv;
term_t t = PL_new_term_ref(); term_t t = PL_new_term_ref();
YAP_Argv(&argv); YAP_Argv(&argv);
if (argv) {
#if PY_MAJOR_VERSION < 3 #if PY_MAJOR_VERSION < 3
Py_SetProgramName(argv[0]); Py_SetProgramName(argv[0]);
#else #else
wchar_t *buf = Py_DecodeLocale(argv[0], NULL); wchar_t *buf = Py_DecodeLocale(argv[0], NULL);
Py_SetProgramName(buf); Py_SetProgramName(buf);
#endif #endif
}
Py_Initialize(); Py_Initialize();
py_Main = PyImport_AddModule("__main__"); py_Main = PyImport_AddModule("__main__");
py_Builtin = PyImport_AddModule("__builtin__"); py_Builtin = PyImport_AddModule("__builtin__");