hiatory
This commit is contained in:
parent
3d68f0e06b
commit
cd41d373db
1
C/init.c
1
C/init.c
@ -1438,5 +1438,6 @@ void Yap_exit(int value) {
|
|||||||
Yap_ShutdownLoadForeign();
|
Yap_ShutdownLoadForeign();
|
||||||
}
|
}
|
||||||
Yap_CloseStreams(false);
|
Yap_CloseStreams(false);
|
||||||
|
Yap_CloseReadline();
|
||||||
exit(value);
|
exit(value);
|
||||||
}
|
}
|
||||||
|
@ -185,7 +185,7 @@ available in experimental implementations.
|
|||||||
*/
|
*/
|
||||||
YAP_FLAG(FILE_NAME_VARIABLES_FLAG, "file_name_variables", true, booleanFlag,
|
YAP_FLAG(FILE_NAME_VARIABLES_FLAG, "file_name_variables", true, booleanFlag,
|
||||||
"true", NULL),
|
"true", NULL),
|
||||||
YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15e",
|
YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15f",
|
||||||
NULL), /**< + `float_format `
|
NULL), /**< + `float_format `
|
||||||
|
|
||||||
C-library `printf()` format specification used by write/1 and
|
C-library `printf()` format specification used by write/1 and
|
||||||
|
@ -301,6 +301,7 @@ extern void Yap_DebugErrorPutc(int n);
|
|||||||
extern void Yap_DebugErrorPuts(const char *s);
|
extern void Yap_DebugErrorPuts(const char *s);
|
||||||
extern void Yap_DebugWriteIndicator(struct pred_entry *ap);
|
extern void Yap_DebugWriteIndicator(struct pred_entry *ap);
|
||||||
void Yap_PlWriteToStream(Term, int, int);
|
void Yap_PlWriteToStream(Term, int, int);
|
||||||
|
void Yap_CloseReadline(void);
|
||||||
/* depth_lim.c */
|
/* depth_lim.c */
|
||||||
bool Yap_InitReadline(Term t);
|
bool Yap_InitReadline(Term t);
|
||||||
void Yap_InitItDeepenPreds(void);
|
void Yap_InitItDeepenPreds(void);
|
||||||
|
14
H/Yatom.h
14
H/Yatom.h
@ -659,6 +659,19 @@ INLINE_ONLY inline EXTERN PropFlags IsPredProperty(int flags) {
|
|||||||
return (PropFlags)((flags == PEProp));
|
return (PropFlags)((flags == PEProp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe);
|
||||||
|
|
||||||
|
INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe) {
|
||||||
|
if (pe->ModuleOfPred == IDB_MODULE) {
|
||||||
|
return NULL;
|
||||||
|
} else if (pe->ArityOfPE == 0) {
|
||||||
|
return (Atom)pe->FunctorOfPred;
|
||||||
|
} else {
|
||||||
|
Functor f = pe->FunctorOfPred;
|
||||||
|
return NameOfFunctor(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Flags for code or dbase entry */
|
/* Flags for code or dbase entry */
|
||||||
/* There are several flags for code and data base entries */
|
/* There are several flags for code and data base entries */
|
||||||
typedef enum {
|
typedef enum {
|
||||||
@ -1322,7 +1335,6 @@ INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags);
|
|||||||
|
|
||||||
INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags flags) {
|
INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags flags) {
|
||||||
return flags == FlagProperty;
|
return flags == FlagProperty;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Proto types */
|
/* Proto types */
|
||||||
|
@ -474,6 +474,8 @@
|
|||||||
|
|
||||||
#define LOCAL_search_atoms LOCAL->search_atoms_
|
#define LOCAL_search_atoms LOCAL->search_atoms_
|
||||||
#define REMOTE_search_atoms(wid) REMOTE(wid)->search_atoms_
|
#define REMOTE_search_atoms(wid) REMOTE(wid)->search_atoms_
|
||||||
|
#define LOCAL_SearchPreds LOCAL->SearchPreds_
|
||||||
|
#define REMOTE_SearchPreds(wid) REMOTE(wid)->SearchPreds_
|
||||||
|
|
||||||
#define LOCAL_CurSlot LOCAL->CurSlot_
|
#define LOCAL_CurSlot LOCAL->CurSlot_
|
||||||
#define REMOTE_CurSlot(wid) REMOTE(wid)->CurSlot_
|
#define REMOTE_CurSlot(wid) REMOTE(wid)->CurSlot_
|
||||||
|
@ -268,6 +268,7 @@ const char* Error_Function_;
|
|||||||
UInt exo_arg_;
|
UInt exo_arg_;
|
||||||
// atom completion
|
// atom completion
|
||||||
struct scan_atoms* search_atoms_;
|
struct scan_atoms* search_atoms_;
|
||||||
|
struct pred_entry* SearchPreds_;
|
||||||
// Slots
|
// Slots
|
||||||
yhandle_t CurSlot_;
|
yhandle_t CurSlot_;
|
||||||
yhandle_t NSlots_;
|
yhandle_t NSlots_;
|
||||||
|
@ -269,6 +269,7 @@ static void InitWorker(int wid) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
REMOTE_CurSlot(wid) = 0;
|
REMOTE_CurSlot(wid) = 0;
|
||||||
REMOTE_NSlots(wid) = 0;
|
REMOTE_NSlots(wid) = 0;
|
||||||
REMOTE_SlotBase(wid) = InitHandles(wid);
|
REMOTE_SlotBase(wid) = InitHandles(wid);
|
||||||
|
@ -279,4 +279,5 @@ static void RestoreWorker(int wid USES_REGS) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -312,6 +312,7 @@ UInt exo_arg =0
|
|||||||
|
|
||||||
// atom completion
|
// atom completion
|
||||||
struct scan_atoms* search_atoms void
|
struct scan_atoms* search_atoms void
|
||||||
|
struct pred_entry* SearchPreds void
|
||||||
|
|
||||||
// Slots
|
// Slots
|
||||||
yhandle_t CurSlot =0
|
yhandle_t CurSlot =0
|
||||||
|
18
os/charsio.c
18
os/charsio.c
@ -470,20 +470,13 @@ code with _C_.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */
|
static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */
|
||||||
int sno = Yap_CheckStream(ARG1, Input_Stream_f, "get_byte/2");
|
int sno = Yap_CheckBinaryStream(ARG1, Input_Stream_f, "get_byte/2");
|
||||||
Int status;
|
Int status;
|
||||||
Term out;
|
Term out;
|
||||||
|
|
||||||
if (sno < 0)
|
if (sno < 0)
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
status = GLOBAL_Stream[sno].status;
|
status = GLOBAL_Stream[sno].status;
|
||||||
if (!(status & Binary_Stream_f)
|
|
||||||
//&& strictISOFlag()
|
|
||||||
) {
|
|
||||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
|
||||||
Yap_Error(PERMISSION_ERROR_INPUT_STREAM, ARG1, "get_byte/2");
|
|
||||||
return (FALSE);
|
|
||||||
}
|
|
||||||
out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno));
|
out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno));
|
||||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
||||||
return Yap_unify_constant(ARG2, out);
|
return Yap_unify_constant(ARG2, out);
|
||||||
@ -812,16 +805,9 @@ static Int put_byte(USES_REGS1) { /* '$put_byte'(Stream,N) */
|
|||||||
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, t2, "put_code/1");
|
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, t2, "put_code/1");
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
int sno = Yap_CheckStream(ARG1, Output_Stream_f, "put/2");
|
int sno = Yap_CheckBinaryStream(ARG1, Output_Stream_f, "put/2");
|
||||||
if (sno < 0)
|
if (sno < 0)
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
if (!(GLOBAL_Stream[sno].status & Binary_Stream_f)
|
|
||||||
// && strictISOFlag()
|
|
||||||
) {
|
|
||||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
|
||||||
Yap_Error(PERMISSION_ERROR_OUTPUT_BINARY_STREAM, ARG1, NULL);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
GLOBAL_Stream[sno].stream_putc(sno, ch);
|
GLOBAL_Stream[sno].stream_putc(sno, ch);
|
||||||
/*
|
/*
|
||||||
* if (!(GLOBAL_Stream[sno].status & Null_Stream_f))
|
* if (!(GLOBAL_Stream[sno].status & Null_Stream_f))
|
||||||
|
18
os/iopreds.c
18
os/iopreds.c
@ -1576,6 +1576,24 @@ int Yap_CheckTextStream__(const char *file, const char *f, int line, Term arg,
|
|||||||
return sno;
|
return sno;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Yap_CheckBinaryStream__(const char *file, const char *f, int line, Term arg,
|
||||||
|
int kind, const char *msg) {
|
||||||
|
int sno;
|
||||||
|
if ((sno = CheckStream__(file, f, line, arg, kind, msg)) < 0)
|
||||||
|
return -1;
|
||||||
|
if ((GLOBAL_Stream[sno].status & Binary_Stream_f)) {
|
||||||
|
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
||||||
|
if (kind == Input_Stream_f)
|
||||||
|
PlIOError__(file, f, line, PERMISSION_ERROR_INPUT_TEXT_STREAM, arg,
|
||||||
|
msg);
|
||||||
|
else
|
||||||
|
PlIOError__(file, f, line, PERMISSION_ERROR_OUTPUT_TEXT_STREAM, arg,
|
||||||
|
msg);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return sno;
|
||||||
|
}
|
||||||
|
|
||||||
/* used from C-interface */
|
/* used from C-interface */
|
||||||
int Yap_GetFreeStreamDForReading(void) {
|
int Yap_GetFreeStreamDForReading(void) {
|
||||||
int sno = GetFreeStreamD();
|
int sno = GetFreeStreamD();
|
||||||
|
@ -45,6 +45,10 @@ extern int Yap_CheckStream__(const char *, const char *, int, Term, int,
|
|||||||
Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
|
Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
|
||||||
extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int,
|
extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int,
|
||||||
const char *);
|
const char *);
|
||||||
|
#define Yap_CheckBinaryStream(arg, kind, msg) \
|
||||||
|
Yap_CheckBinaryStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
|
||||||
|
extern int Yap_CheckBinaryStream__(const char *, const char *, int, Term, int,
|
||||||
|
const char *);
|
||||||
|
|
||||||
extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name,
|
extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name,
|
||||||
encoding_t encoding, stream_flags_t flags,
|
encoding_t encoding, stream_flags_t flags,
|
||||||
|
@ -168,8 +168,7 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
|||||||
p = mod->PredForME;
|
p = mod->PredForME;
|
||||||
}
|
}
|
||||||
char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
|
char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
|
||||||
if (strlen(c) > strlen(prefix) &&
|
if (strlen(c) > strlen(prefix) && strstr(c, prefix) == c &&
|
||||||
strstr(c, prefix) == c &&
|
|
||||||
!(p->PredFlags & HiddenPredFlag)) {
|
!(p->PredFlags & HiddenPredFlag)) {
|
||||||
LOCAL_SearchPreds = p;
|
LOCAL_SearchPreds = p;
|
||||||
arity_t ar = p->ArityOfPE;
|
arity_t ar = p->ArityOfPE;
|
||||||
@ -177,13 +176,10 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
|||||||
if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
|
if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
size_t sz = strlen(c);
|
|
||||||
chain_t *el = (chain_t *)malloc(sizeof(chain_t)+sz);
|
|
||||||
strncpy(LOCAL_FileNameBuf, c, YAP_FILENAME_MAX);
|
strncpy(LOCAL_FileNameBuf, c, YAP_FILENAME_MAX);
|
||||||
strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
|
strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
|
||||||
return LOCAL_FileNameBuf;
|
return LOCAL_FileNameBuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
LOCAL_SearchPreds = NULL;
|
LOCAL_SearchPreds = NULL;
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -209,7 +205,7 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
|||||||
if (start == 0 && isalpha(text[0])) {
|
if (start == 0 && isalpha(text[0])) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (i < end) {
|
while (i < end) {
|
||||||
if (isalnum(text[i]))
|
if (isalnum(text[i]) || text[i] == '_')
|
||||||
i++;
|
i++;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
@ -227,14 +223,18 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
|||||||
|
|
||||||
if ((strstr(p, "[") == p) || (strstr(p, "compile(") == p) ||
|
if ((strstr(p, "[") == p) || (strstr(p, "compile(") == p) ||
|
||||||
(strstr(p, "consult(") == p) || (strstr(p, "load_files(") == p) ||
|
(strstr(p, "consult(") == p) || (strstr(p, "load_files(") == p) ||
|
||||||
(strstr(p,"reconsult(") == p) || (strstr(p,"use_module(") == p))
|
(strstr(p, "reconsult(") == p) || (strstr(p, "use_module(") == p) ||
|
||||||
|
(strstr(p, "cd(") == p))
|
||||||
matches = rl_completion_matches((char *)text, /* for pre-4.2 */
|
matches = rl_completion_matches((char *)text, /* for pre-4.2 */
|
||||||
rl_filename_completion_function);
|
rl_filename_completion_function);
|
||||||
return matches;
|
return matches;
|
||||||
}
|
}
|
||||||
int i = end, ch = '\0';
|
int i = end, ch = '\0';
|
||||||
while (i > start) {
|
while (i > start) {
|
||||||
ch = text[-i];
|
ch = text[--i];
|
||||||
|
if (ch == '\'')
|
||||||
|
return rl_completion_matches((char *)text, /* for pre-4.2 */
|
||||||
|
rl_filename_completion_function);
|
||||||
if (isalnum(text[i]))
|
if (isalnum(text[i]))
|
||||||
continue;
|
continue;
|
||||||
break;
|
break;
|
||||||
@ -303,12 +303,13 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
|||||||
GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
|
GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
|
||||||
GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
|
GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
|
||||||
GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
|
GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
|
||||||
#if _MSC_VER || defined(__MINGW32__)
|
#if _WIN32
|
||||||
rl_instream = stdin;
|
rl_instream = stdin;
|
||||||
#endif
|
#endif
|
||||||
rl_outstream = stderr;
|
rl_outstream = stderr;
|
||||||
using_history();
|
using_history();
|
||||||
const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
|
const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
|
||||||
|
history_file = s;
|
||||||
if (read_history(s) != 0) {
|
if (read_history(s) != 0) {
|
||||||
FILE *f = fopen(s, "a");
|
FILE *f = fopen(s, "a");
|
||||||
if (f) {
|
if (f) {
|
||||||
@ -362,7 +363,6 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
|||||||
return false;
|
return false;
|
||||||
if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
|
if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
|
||||||
add_history((char *)myrl_line);
|
add_history((char *)myrl_line);
|
||||||
write_history(history_file);
|
|
||||||
fflush(NULL);
|
fflush(NULL);
|
||||||
}
|
}
|
||||||
s->u.irl.ptr = s->u.irl.buf = myrl_line;
|
s->u.irl.ptr = s->u.irl.buf = myrl_line;
|
||||||
@ -473,6 +473,12 @@ static char *predicate_enumerate(const char *prefix, int state) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Yap_CloseReadline(void) {
|
||||||
|
#if USE_READLINE
|
||||||
|
write_history(history_file);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static Int has_readline(USES_REGS1) {
|
static Int has_readline(USES_REGS1) {
|
||||||
#if USE_READLINE
|
#if USE_READLINE
|
||||||
return true;
|
return true;
|
||||||
|
@ -390,6 +390,8 @@ write1 ( USES_REGS1 )
|
|||||||
if (output_stream == -1) output_stream = 1;
|
if (output_stream == -1) output_stream = 1;
|
||||||
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||||
if (args == NULL) {
|
if (args == NULL) {
|
||||||
|
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||||
|
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||||
if (LOCAL_Error_TYPE)
|
if (LOCAL_Error_TYPE)
|
||||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||||
return false;
|
return false;
|
||||||
@ -415,6 +417,8 @@ write_canonical1 ( USES_REGS1 )
|
|||||||
if (output_stream == -1) output_stream = 1;
|
if (output_stream == -1) output_stream = 1;
|
||||||
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||||
if (args == NULL) {
|
if (args == NULL) {
|
||||||
|
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||||
|
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||||
if (LOCAL_Error_TYPE)
|
if (LOCAL_Error_TYPE)
|
||||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||||
return false;
|
return false;
|
||||||
@ -440,6 +444,8 @@ write_canonical ( USES_REGS1 )
|
|||||||
we cannot make recursive Prolog calls */
|
we cannot make recursive Prolog calls */
|
||||||
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||||
if (args == NULL) {
|
if (args == NULL) {
|
||||||
|
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||||
|
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||||
if (LOCAL_Error_TYPE)
|
if (LOCAL_Error_TYPE)
|
||||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||||
return false;
|
return false;
|
||||||
@ -467,6 +473,8 @@ writeq1 ( USES_REGS1 )
|
|||||||
we cannot make recursive Prolog calls */
|
we cannot make recursive Prolog calls */
|
||||||
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||||
if (args == NULL) {
|
if (args == NULL) {
|
||||||
|
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||||
|
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||||
if (LOCAL_Error_TYPE)
|
if (LOCAL_Error_TYPE)
|
||||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||||
return false;
|
return false;
|
||||||
@ -495,6 +503,8 @@ writeq ( USES_REGS1 )
|
|||||||
we cannot make recursive Prolog calls */
|
we cannot make recursive Prolog calls */
|
||||||
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||||
if (args == NULL) {
|
if (args == NULL) {
|
||||||
|
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||||
|
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||||
if (LOCAL_Error_TYPE)
|
if (LOCAL_Error_TYPE)
|
||||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||||
return false;
|
return false;
|
||||||
@ -523,6 +533,8 @@ print1 ( USES_REGS1 )
|
|||||||
we cannot make recursive Prolog calls */
|
we cannot make recursive Prolog calls */
|
||||||
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||||
if (args == NULL) {
|
if (args == NULL) {
|
||||||
|
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||||
|
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||||
if (LOCAL_Error_TYPE)
|
if (LOCAL_Error_TYPE)
|
||||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||||
return false;
|
return false;
|
||||||
@ -551,6 +563,8 @@ print ( USES_REGS1 )
|
|||||||
we cannot make recursive Prolog calls */
|
we cannot make recursive Prolog calls */
|
||||||
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END );
|
||||||
if (args == NULL) {
|
if (args == NULL) {
|
||||||
|
if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
|
||||||
|
LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
|
||||||
if (LOCAL_Error_TYPE)
|
if (LOCAL_Error_TYPE)
|
||||||
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
|
||||||
return false;
|
return false;
|
||||||
|
@ -54,8 +54,25 @@ if (CUDA_FOUND)
|
|||||||
macro_optional_find_package (Thrust ON)
|
macro_optional_find_package (Thrust ON)
|
||||||
|
|
||||||
set (CUDA_SOURCES
|
set (CUDA_SOURCES
|
||||||
|
CC_CSSTree.cu
|
||||||
|
bpreds.cu
|
||||||
|
dbio.cu
|
||||||
lista.cu
|
lista.cu
|
||||||
memory.cu
|
memory.cu
|
||||||
|
selectproyect.cu
|
||||||
|
treeb.cu
|
||||||
|
union2.cu
|
||||||
|
)
|
||||||
|
|
||||||
|
set (CXX_SOURCES
|
||||||
|
bpredscpu.cpp
|
||||||
|
joincpu.cpp
|
||||||
|
selectproyectcpu.cpp
|
||||||
|
unioncpu2.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
set (C_SOURCES
|
||||||
|
creator2.c
|
||||||
cuda.c
|
cuda.c
|
||||||
)
|
)
|
||||||
|
|
||||||
|
15
packages/cuda/Makefile.in
Normal file → Executable file
15
packages/cuda/Makefile.in
Normal file → Executable file
@ -23,7 +23,7 @@ CC=@CC@
|
|||||||
NVCC=@NVCC@
|
NVCC=@NVCC@
|
||||||
CFLAGS= @SHLIB_CFLAGS@ $(YAP_EXTRAS) $(DEFS) -I$(srcdir) -I../.. -I$(srcdir)/../../include @CUDA_CPPFLAGS@
|
CFLAGS= @SHLIB_CFLAGS@ $(YAP_EXTRAS) $(DEFS) -I$(srcdir) -I../.. -I$(srcdir)/../../include @CUDA_CPPFLAGS@
|
||||||
NVCCFLAGS=@CUDA_CPPFLAGS@ -I$(srcdir) -I../.. -I$(srcdir)/../../include
|
NVCCFLAGS=@CUDA_CPPFLAGS@ -I$(srcdir) -I../.. -I$(srcdir)/../../include
|
||||||
CUDA_LDFLAGS=@CUDA_LDFLAGS@
|
LDFLAGS=@LDFLAGS@
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# You shouldn't need to change what follows.
|
# You shouldn't need to change what follows.
|
||||||
@ -39,7 +39,7 @@ SO=@SO@
|
|||||||
CWD=$(PWD)
|
CWD=$(PWD)
|
||||||
#
|
#
|
||||||
|
|
||||||
CUDA_PROLOG= \
|
BDD_PROLOG= \
|
||||||
$(srcdir)/cuda.yap
|
$(srcdir)/cuda.yap
|
||||||
|
|
||||||
OBJS=cuda.o memory.o lista.o
|
OBJS=cuda.o memory.o lista.o
|
||||||
@ -62,16 +62,11 @@ memory.o: $(srcdir)/memory.cu $(srcdir)/pred.h
|
|||||||
@DO_SECOND_LD@cuda.@SO@: $(OBJS)
|
@DO_SECOND_LD@cuda.@SO@: $(OBJS)
|
||||||
@DO_SECOND_LD@ @CUDA_SHLIB_LD@ $(CUDA_LDFLAGS) -o cuda.@SO@ $(OBJS)
|
@DO_SECOND_LD@ @CUDA_SHLIB_LD@ $(CUDA_LDFLAGS) -o cuda.@SO@ $(OBJS)
|
||||||
|
|
||||||
install: all install-examples
|
install: all
|
||||||
mkdir -p $(DESTDIR)$(SHAREDIR)
|
mkdir -p $(DESTDIR)$(SHAREDIR)
|
||||||
for h in $(CUDA_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
|
for h in $(BDD_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
|
||||||
$(INSTALL_PROGRAM) $(SOBJS) $(DESTDIR)$(YAPLIBDIR)
|
$(INSTALL_PROGRAM) $(SOBJS) $(DESTDIR)$(YAPLIBDIR)
|
||||||
|
|
||||||
install-examples:
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o *~ $(OBJS) *.BAK
|
rm -f *.o *~ $(OBJS) $(SOBJS) *.BAK
|
||||||
|
|
||||||
distclean: clean
|
|
||||||
rm -f $(SOBJS) Makefile
|
|
||||||
|
|
||||||
|
499
packages/cuda/bpreds.cu
Normal file → Executable file
499
packages/cuda/bpreds.cu
Normal file → Executable file
@ -1,4 +1,113 @@
|
|||||||
__global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
#include <thrust/device_vector.h>
|
||||||
|
#include <thrust/scan.h>
|
||||||
|
#include <cstdarg>
|
||||||
|
#include "pred.h"
|
||||||
|
|
||||||
|
/*Determines the maximum from a set of values*/
|
||||||
|
int maximo(int count, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
int j, temp, mx = 0;
|
||||||
|
va_start(ap, count);
|
||||||
|
|
||||||
|
for(j = 0; j < count; j++)
|
||||||
|
{
|
||||||
|
temp = va_arg(ap, int);
|
||||||
|
if(temp > mx)
|
||||||
|
mx = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
va_end(ap);
|
||||||
|
return mx;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int x, rowact, rowact1, op1, op2;
|
||||||
|
if(threadIdx.x < numc)
|
||||||
|
shared[threadIdx.x] = cons[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
rowact1 = id * of1;
|
||||||
|
rowact = id * of2;
|
||||||
|
for(x = nx; x < numc; x += 3)
|
||||||
|
{
|
||||||
|
op1 = shared[x+1];
|
||||||
|
if(op1 < 0)
|
||||||
|
op1 = dop1[rowact1 - op1 - 1];
|
||||||
|
else
|
||||||
|
op1 = dop2[rowact + op1];
|
||||||
|
op2 = shared[x+2];
|
||||||
|
if(op2 < 0)
|
||||||
|
op2 = dop1[rowact1 - op2 - 1];
|
||||||
|
else
|
||||||
|
op2 = dop2[rowact + op2];
|
||||||
|
switch(shared[x] - BPOFFSET)
|
||||||
|
{
|
||||||
|
case SBG_EQ: if(op1 != op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_GT: if(op1 <= op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_LT: if(op1 >= op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_GE: if(op1 < op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_LE: if(op1 > op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_DF: if(op1 == op2)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(res2 != NULL)
|
||||||
|
res2[id] = 1;
|
||||||
|
for(x = 0; x < nx; x += 3)
|
||||||
|
{
|
||||||
|
op1 = shared[x+1];
|
||||||
|
if(op1 < 0)
|
||||||
|
op1 *= -1;
|
||||||
|
else
|
||||||
|
op1 = dop2[rowact + op1];
|
||||||
|
op2 = shared[x+2];
|
||||||
|
if(op2 < 0)
|
||||||
|
op2 *= -1;
|
||||||
|
else
|
||||||
|
op2 = dop2[rowact + op2];
|
||||||
|
switch(shared[x])
|
||||||
|
{
|
||||||
|
case SBG_EQ: if(op1 != op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_GT: if(op1 <= op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_LT: if(op1 >= op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_GE: if(op1 < op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_LE: if(op1 > op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_DF: if(op1 == op2)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res[id] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Mark all rows that comply with the comparison predicates*/
|
||||||
|
__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -8,7 +117,7 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
rowact = id * cols;
|
rowact = id * of1;
|
||||||
for(x = 0; x < numc; x += 3)
|
for(x = 0; x < numc; x += 3)
|
||||||
{
|
{
|
||||||
op1 = shared[x+1];
|
op1 = shared[x+1];
|
||||||
@ -46,98 +155,306 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
|
/*Unmark all rows that do not comply with the comparison predicates*/
|
||||||
|
__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
int *temp;
|
extern __shared__ int shared[];
|
||||||
int tmplen = rows + 1;
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int size = tmplen * sizeof(int);
|
int x, rowact, op1, op2;
|
||||||
reservar(&temp, size);
|
if(threadIdx.x < numc)
|
||||||
#ifdef DEBUG_MEM
|
shared[threadIdx.x] = cons[threadIdx.x];
|
||||||
cerr << "+ " << temp << " temp bpreds " << size << endl;
|
__syncthreads();
|
||||||
#endif
|
if(id < rows)
|
||||||
cudaMemset(temp, 0, size);
|
{
|
||||||
|
if(res[id] == 0)
|
||||||
#if TIMER
|
return;
|
||||||
cuda_stats.builtins++;
|
rowact = id * of1;
|
||||||
#endif
|
for(x = 0; x < numc; x += 3)
|
||||||
int *dhead;
|
{
|
||||||
int predn = numpreds.x * 3;
|
op1 = shared[x+1];
|
||||||
int spredn = predn * sizeof(int);
|
if(op1 < 0)
|
||||||
int sproj = numpreds.z * sizeof(int);
|
op1 *= -1;
|
||||||
int hsize;
|
|
||||||
if(predn > numpreds.z)
|
|
||||||
hsize = spredn;
|
|
||||||
else
|
else
|
||||||
hsize = sproj;
|
op1 = dop1[rowact + op1];
|
||||||
reservar(&dhead, hsize);
|
op2 = shared[x+2];
|
||||||
#ifdef DEBUG_MEM
|
if(op2 < 0)
|
||||||
cerr << "+ " << dhead << " dhead " << hsize << endl;
|
op2 *= -1;
|
||||||
#endif
|
else
|
||||||
cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
|
op2 = dop1[rowact + op2];
|
||||||
|
switch(shared[x])
|
||||||
int blockllen = rows / 1024 + 1;
|
|
||||||
int numthreads = 1024;
|
|
||||||
|
|
||||||
/*int x;
|
|
||||||
cout << "arraypreds = ";
|
|
||||||
for(x = 0; x < predn; x++)
|
|
||||||
cout << bin[x] << " ";
|
|
||||||
cout << endl;
|
|
||||||
cout << "temptable = ";
|
|
||||||
for(x = 0; x < numpreds.z; x++)
|
|
||||||
cout << bin[x+predn] << " ";
|
|
||||||
cout << endl;
|
|
||||||
int y;
|
|
||||||
int *hop1 = (int *)malloc(numpreds.y * rows * sizeof(int));
|
|
||||||
cudaMemcpy(hop1, dop1, numpreds.y * rows * sizeof(int), cudaMemcpyDeviceToHost);
|
|
||||||
for(x = 0; x < rows; x++)
|
|
||||||
{
|
{
|
||||||
for(y = 0; y < numpreds.y; y++)
|
case SBG_EQ: if(op1 != op2)
|
||||||
cout << hop1[x * numpreds.y + y] << " ";
|
|
||||||
cout << endl;
|
|
||||||
}
|
|
||||||
free(hop1);*/
|
|
||||||
|
|
||||||
predicates<<<blockllen, numthreads, spredn>>>(dop1, rows, numpreds.y, dhead, predn, temp + 1);
|
|
||||||
|
|
||||||
/*int y;
|
|
||||||
int *hop1 = (int *)malloc((rows + 1) * sizeof(int));
|
|
||||||
cudaMemcpy(hop1, temp, (rows + 1) * sizeof(int), cudaMemcpyDeviceToHost);
|
|
||||||
for(x = 0; x < (rows + 1); x++)
|
|
||||||
cout << hop1[x] << " ";
|
|
||||||
cout << endl;
|
|
||||||
free(hop1);*/
|
|
||||||
|
|
||||||
thrust::device_ptr<int> res;
|
|
||||||
res = thrust::device_pointer_cast(temp);
|
|
||||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
|
||||||
int num = res[rows];
|
|
||||||
if(num == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
int *fres;
|
|
||||||
reservar(&fres, num * sproj);
|
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "+ " << fres << " fres " << num * sproj << endl;
|
|
||||||
#endif
|
|
||||||
cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
|
|
||||||
llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
|
|
||||||
|
|
||||||
/*int y;
|
|
||||||
int *hop1 = (int *)malloc(numpreds.z * num * sizeof(int));
|
|
||||||
cudaMemcpy(hop1, fres, numpreds.z * num * sizeof(int), cudaMemcpyDeviceToHost);
|
|
||||||
for(x = 0; x < num; x++)
|
|
||||||
{
|
{
|
||||||
for(y = 0; y < numpreds.z; y++)
|
res[id] = 0;
|
||||||
cout << hop1[x * numpreds.z + y] << " ";
|
return;
|
||||||
cout << endl;
|
|
||||||
}
|
}
|
||||||
free(hop1);*/
|
break;
|
||||||
|
case SBG_GT: if(op1 <= op2)
|
||||||
liberar(dhead, hsize);
|
{
|
||||||
liberar(temp, size);
|
res[id] = 0;
|
||||||
liberar(dop1, rows * cols * sizeof(int));
|
return;
|
||||||
|
|
||||||
*ret = fres;
|
|
||||||
return num;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
case SBG_LT: if(op1 >= op2)
|
||||||
|
{
|
||||||
|
res[id] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_GE: if(op1 < op2)
|
||||||
|
{
|
||||||
|
res[id] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_LE: if(op1 > op2)
|
||||||
|
{
|
||||||
|
res[id] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_DF: if(op1 == op2)
|
||||||
|
{
|
||||||
|
res[id] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int x, rowact, rowact1, op1, op2;
|
||||||
|
if(threadIdx.x < numc)
|
||||||
|
shared[threadIdx.x] = cons[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
rowact1 = id * of1;
|
||||||
|
rowact = id * of2;
|
||||||
|
for(x = nx; x < numc; x += 3)
|
||||||
|
{
|
||||||
|
op1 = shared[x+1];
|
||||||
|
if(op1 < 0)
|
||||||
|
op1 = dop1[rowact1 - op1 - 1];
|
||||||
|
else
|
||||||
|
op1 = dop2[rowact + op1];
|
||||||
|
op2 = shared[x+2];
|
||||||
|
if(op2 < 0)
|
||||||
|
op2 = dop1[rowact1 - op2 - 1];
|
||||||
|
else
|
||||||
|
op2 = dop2[rowact + op2];
|
||||||
|
switch(shared[x] - BPOFFSET)
|
||||||
|
{
|
||||||
|
case SBG_EQ: if(op1 == op2)
|
||||||
|
{
|
||||||
|
res2[id] = 1;
|
||||||
|
x = numc;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_GT: if(op1 > op2)
|
||||||
|
{
|
||||||
|
res2[id] = 1;
|
||||||
|
x = numc;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_LT: if(op1 < op2)
|
||||||
|
{
|
||||||
|
res2[id] = 1;
|
||||||
|
x = numc;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_GE: if(op1 >= op2)
|
||||||
|
{
|
||||||
|
res2[id] = 1;
|
||||||
|
x = numc;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_LE: if(op1 <= op2)
|
||||||
|
{
|
||||||
|
res2[id] = 1;
|
||||||
|
x = numc;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_DF: if(op1 != op2)
|
||||||
|
{
|
||||||
|
res2[id] = 1;
|
||||||
|
x = numc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(x = 0; x < nx; x += 3)
|
||||||
|
{
|
||||||
|
op1 = shared[x+1];
|
||||||
|
if(op1 < 0)
|
||||||
|
op1 *= -1;
|
||||||
|
else
|
||||||
|
op1 = dop2[rowact + op1];
|
||||||
|
op2 = shared[x+2];
|
||||||
|
if(op2 < 0)
|
||||||
|
op2 *= -1;
|
||||||
|
else
|
||||||
|
op2 = dop2[rowact + op2];
|
||||||
|
switch(shared[x])
|
||||||
|
{
|
||||||
|
case SBG_EQ: if(op1 == op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_GT: if(op1 > op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_LT: if(op1 < op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_GE: if(op1 >= op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_LE: if(op1 <= op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_DF: if(op1 != op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Mark all rows that comply with the comparison predicates using disjunctions (i.e. a row is marked if it complies with at least one predicate)*/
|
||||||
|
__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int x, rowact, op1, op2;
|
||||||
|
if(threadIdx.x < numc)
|
||||||
|
shared[threadIdx.x] = cons[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
rowact = id * of1;
|
||||||
|
for(x = 0; x < numc; x += 3)
|
||||||
|
{
|
||||||
|
op1 = shared[x+1];
|
||||||
|
if(op1 < 0)
|
||||||
|
op1 *= -1;
|
||||||
|
else
|
||||||
|
op1 = dop1[rowact + op1];
|
||||||
|
op2 = shared[x+2];
|
||||||
|
if(op2 < 0)
|
||||||
|
op2 *= -1;
|
||||||
|
else
|
||||||
|
op2 = dop1[rowact + op2];
|
||||||
|
switch(shared[x])
|
||||||
|
{
|
||||||
|
case SBG_EQ: if(op1 == op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_GT: if(op1 > op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_LT: if(op1 < op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_GE: if(op1 >= op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_LE: if(op1 <= op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SBG_DF: if(op1 != op2)
|
||||||
|
{
|
||||||
|
res[id] = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Unmark all rows that do not comply with the comparison predicates using disjunctions (i.e. a row is unmarked only if it complies with none of the predicates)*/
|
||||||
|
__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
|
||||||
|
{
|
||||||
|
extern __shared__ int shared[];
|
||||||
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int x, rowact, op1, op2;
|
||||||
|
if(threadIdx.x < numc)
|
||||||
|
shared[threadIdx.x] = cons[threadIdx.x];
|
||||||
|
__syncthreads();
|
||||||
|
if(id < rows)
|
||||||
|
{
|
||||||
|
if(res[id] == 0)
|
||||||
|
return;
|
||||||
|
rowact = id * of1;
|
||||||
|
for(x = 0; x < numc; x += 3)
|
||||||
|
{
|
||||||
|
op1 = shared[x+1];
|
||||||
|
if(op1 < 0)
|
||||||
|
op1 *= -1;
|
||||||
|
else
|
||||||
|
op1 = dop1[rowact + op1];
|
||||||
|
op2 = shared[x+2];
|
||||||
|
if(op2 < 0)
|
||||||
|
op2 *= -1;
|
||||||
|
else
|
||||||
|
op2 = dop1[rowact + op2];
|
||||||
|
switch(shared[x])
|
||||||
|
{
|
||||||
|
case SBG_EQ: if(op1 == op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_GT: if(op1 > op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_LT: if(op1 < op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_GE: if(op1 >= op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_LE: if(op1 <= op2)
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
case SBG_DF: if(op1 != op2)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res[id] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
228
packages/cuda/cuda.c
Normal file → Executable file
228
packages/cuda/cuda.c
Normal file → Executable file
@ -6,19 +6,25 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <inttypes.h>
|
||||||
#include "pred.h"
|
#include "pred.h"
|
||||||
|
|
||||||
|
#define MAXARG 100
|
||||||
|
|
||||||
YAP_Atom AtomEq,
|
YAP_Atom AtomEq,
|
||||||
AtomGt,
|
AtomGt,
|
||||||
AtomLt,
|
AtomLt,
|
||||||
AtomGe,
|
AtomGe,
|
||||||
AtomLe,
|
AtomLe,
|
||||||
AtomDf;
|
AtomDf,
|
||||||
|
AtomNt;
|
||||||
|
|
||||||
predicate *facts[100]; /*Temporary solution to maintain facts and rules*/
|
predicate *facts[MAXARG]; /*Temporary solution to maintain facts and rules*/
|
||||||
predicate *rules[100];
|
predicate *rules[MAXARG];
|
||||||
int32_t cf = 0, cr = 0;
|
int32_t cf = 0, cr = 0;
|
||||||
|
|
||||||
|
char names[1024];
|
||||||
|
|
||||||
// initialize CUDA system
|
// initialize CUDA system
|
||||||
void Cuda_Initialize( void );
|
void Cuda_Initialize( void );
|
||||||
|
|
||||||
@ -39,6 +45,19 @@ void init_cuda( void );
|
|||||||
|
|
||||||
//#define DEBUG_INTERFACE 1
|
//#define DEBUG_INTERFACE 1
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
static int32_t query[100];
|
||||||
|
static int32_t qcont = 0;
|
||||||
|
static int cuda_init_query(void)
|
||||||
|
{
|
||||||
|
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG1));
|
||||||
|
query[qcont] = pname;
|
||||||
|
qcont++;
|
||||||
|
query[qcont] = 0;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if DEBUG_INTERFACE
|
#if DEBUG_INTERFACE
|
||||||
static void
|
static void
|
||||||
dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
|
dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
|
||||||
@ -83,8 +102,18 @@ int32_t Cuda_NewFacts(predicate *pe)
|
|||||||
#if DEBUG_INTERFACE
|
#if DEBUG_INTERFACE
|
||||||
dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
|
dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
if(cf >= 0)
|
||||||
|
{
|
||||||
facts[cf] = pe;
|
facts[cf] = pe;
|
||||||
cf++;
|
cf++;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
facts[cf] = pe;
|
||||||
|
cf++;
|
||||||
|
#endif
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -115,7 +144,7 @@ int32_t Cuda_Erase(predicate *pe)
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static YAP_Bool
|
static int
|
||||||
load_facts( void ) {
|
load_facts( void ) {
|
||||||
|
|
||||||
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||||
@ -164,15 +193,18 @@ load_facts( void ) {
|
|||||||
static int currentFact = 0;
|
static int currentFact = 0;
|
||||||
static predicate *currentPred = NULL;
|
static predicate *currentPred = NULL;
|
||||||
|
|
||||||
static YAP_Bool
|
static int
|
||||||
cuda_init_facts( void ) {
|
cuda_init_facts( void ) {
|
||||||
|
|
||||||
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
|
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||||
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
||||||
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
|
int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
|
||||||
predicate *pred;
|
predicate *pred;
|
||||||
|
|
||||||
|
strcat(names, YAP_AtomName(YAP_AtomOfTerm(YAP_ARG3)));
|
||||||
|
strcat(names, " ");
|
||||||
|
|
||||||
if (!mat)
|
if (!mat)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
if (YAP_IsVarTerm( YAP_ARG4)) {
|
if (YAP_IsVarTerm( YAP_ARG4)) {
|
||||||
@ -198,14 +230,16 @@ cuda_init_facts( void ) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static YAP_Bool
|
static int
|
||||||
cuda_load_fact( void ) {
|
cuda_load_fact( void ) {
|
||||||
YAP_Term th = YAP_ARG1;
|
|
||||||
|
|
||||||
int i, j;
|
int i = currentFact;
|
||||||
|
|
||||||
|
#if defined(DATALOG) || defined(TUFFY)
|
||||||
|
YAP_Term th = YAP_ARG1;
|
||||||
int ncols = currentPred->num_columns;
|
int ncols = currentPred->num_columns;
|
||||||
|
int j;
|
||||||
int *mat = currentPred->address_host_table;
|
int *mat = currentPred->address_host_table;
|
||||||
i = currentFact;
|
|
||||||
for (j = 0; j < ncols; j++) {
|
for (j = 0; j < ncols; j++) {
|
||||||
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||||
if (YAP_IsAtomTerm(ta)) {
|
if (YAP_IsAtomTerm(ta)) {
|
||||||
@ -214,6 +248,8 @@ cuda_load_fact( void ) {
|
|||||||
mat[i*ncols+j] = YAP_IntOfTerm(ta);
|
mat[i*ncols+j] = YAP_IntOfTerm(ta);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
i++;
|
i++;
|
||||||
if (i == currentPred->num_rows) {
|
if (i == currentPred->num_rows) {
|
||||||
Cuda_NewFacts(currentPred);
|
Cuda_NewFacts(currentPred);
|
||||||
@ -225,21 +261,26 @@ cuda_load_fact( void ) {
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static YAP_Bool
|
static int
|
||||||
load_rule( void ) {
|
load_rule( void ) {
|
||||||
// maximum of 2K symbols per rule, should be enough for ILP
|
// maximum of 2K symbols per rule, should be enough for ILP
|
||||||
int32_t vec[2048], *ptr = vec, *nvec;
|
int32_t vec[2048], *ptr = vec, *nvec, neg[2048];
|
||||||
// qK different variables;
|
// qK different variables;
|
||||||
YAP_Term vars[1024];
|
YAP_Term vars[1024];
|
||||||
int32_t nvars = 0;
|
int32_t nvars = 0, x;
|
||||||
int32_t ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */
|
int32_t ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */
|
||||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||||
YAP_Term t3 = YAP_ARG3;
|
YAP_Term t3 = YAP_ARG3;
|
||||||
int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
|
YAP_Atom name = YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3)));
|
||||||
|
int32_t pname = YAP_AtomToInt(name);
|
||||||
|
|
||||||
|
const char *strname = YAP_AtomName(name);
|
||||||
predicate *pred;
|
predicate *pred;
|
||||||
|
int32_t cont = 0;
|
||||||
|
memset(neg, 0x0, 2048 * sizeof(int32_t));
|
||||||
|
|
||||||
while(YAP_IsPairTerm(t3)) {
|
while(YAP_IsPairTerm(t3)) {
|
||||||
int32_t j = 0;
|
int32_t j = 0, m;
|
||||||
YAP_Term th = YAP_HeadOfTerm(t3);
|
YAP_Term th = YAP_HeadOfTerm(t3);
|
||||||
YAP_Functor f = YAP_FunctorOfTerm( th );
|
YAP_Functor f = YAP_FunctorOfTerm( th );
|
||||||
int32_t n = YAP_ArityOfFunctor( f );
|
int32_t n = YAP_ArityOfFunctor( f );
|
||||||
@ -257,8 +298,17 @@ load_rule( void ) {
|
|||||||
*ptr++ = SBG_LE;
|
*ptr++ = SBG_LE;
|
||||||
else if (at == AtomDf)
|
else if (at == AtomDf)
|
||||||
*ptr++ = SBG_DF;
|
*ptr++ = SBG_DF;
|
||||||
|
else if (at == AtomNt)
|
||||||
|
{
|
||||||
|
neg[cont] = 1;
|
||||||
|
cont++;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
*ptr++ = YAP_AtomToInt( at );
|
*ptr++ = YAP_AtomToInt( at );
|
||||||
|
cont++;
|
||||||
|
}
|
||||||
|
|
||||||
for (j = 0; j < n; j++) {
|
for (j = 0; j < n; j++) {
|
||||||
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||||
|
|
||||||
@ -277,6 +327,34 @@ load_rule( void ) {
|
|||||||
}
|
}
|
||||||
} else if (YAP_IsAtomTerm(ta)) {
|
} else if (YAP_IsAtomTerm(ta)) {
|
||||||
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||||
|
} else if (YAP_IsApplTerm(ta)) {
|
||||||
|
f = YAP_FunctorOfTerm( ta );
|
||||||
|
at = YAP_NameOfFunctor( f );
|
||||||
|
m = YAP_ArityOfFunctor( f );
|
||||||
|
*ptr++ = YAP_AtomToInt( at );
|
||||||
|
|
||||||
|
for (x = 0; x < m; x++) {
|
||||||
|
YAP_Term ta2 = YAP_ArgOfTerm(x+1, ta);
|
||||||
|
|
||||||
|
if (YAP_IsVarTerm(ta2)) {
|
||||||
|
int32_t k;
|
||||||
|
for (k = 0; k < nvars; k++) {
|
||||||
|
if (vars[k] == ta2) {
|
||||||
|
*ptr++ = k+1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (k == nvars) {
|
||||||
|
vars[k] = ta2;
|
||||||
|
*ptr++ = k+1;
|
||||||
|
nvars++;
|
||||||
|
}
|
||||||
|
} else if (YAP_IsAtomTerm(ta2)) {
|
||||||
|
*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
|
||||||
|
} else {
|
||||||
|
*ptr++ = -YAP_IntOfTerm(ta);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
*ptr++ = -YAP_IntOfTerm(ta);
|
*ptr++ = -YAP_IntOfTerm(ta);
|
||||||
}
|
}
|
||||||
@ -296,53 +374,136 @@ load_rule( void ) {
|
|||||||
pred->num_rows = ngoals;
|
pred->num_rows = ngoals;
|
||||||
pred->num_columns = ncols;
|
pred->num_columns = ncols;
|
||||||
pred->is_fact = FALSE;
|
pred->is_fact = FALSE;
|
||||||
|
x = (strlen(strname) + 1) * sizeof(char);
|
||||||
|
pred->predname = (char *)malloc(x);
|
||||||
|
memcpy(pred->predname, strname, x);
|
||||||
nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
|
nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
|
||||||
memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
|
memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
|
||||||
pred->address_host_table = nvec;
|
pred->address_host_table = nvec;
|
||||||
|
pred->negatives = (int32_t *)malloc(sizeof(int32_t) * cont);
|
||||||
|
memcpy(pred->negatives, neg, sizeof(int32_t) * cont);
|
||||||
Cuda_NewRule( pred );
|
Cuda_NewRule( pred );
|
||||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||||
}
|
}
|
||||||
|
|
||||||
static YAP_Bool
|
static int
|
||||||
cuda_erase( void )
|
cuda_erase( void )
|
||||||
{
|
{
|
||||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||||
return Cuda_Erase( ptr );
|
return Cuda_Erase( ptr );
|
||||||
}
|
}
|
||||||
|
|
||||||
static YAP_Bool
|
void setQuery(YAP_Term t1, int32_t **res)
|
||||||
|
{
|
||||||
|
int32_t *query = (int32_t *)malloc(MAXARG * sizeof(int32_t));
|
||||||
|
int32_t x, y = 0, *itr;
|
||||||
|
predicate *ptr = NULL;
|
||||||
|
if(YAP_IsPairTerm(t1))
|
||||||
|
{
|
||||||
|
while(YAP_IsPairTerm(t1))
|
||||||
|
{
|
||||||
|
ptr = (predicate *)YAP_IntOfTerm(YAP_HeadOfTerm(t1));
|
||||||
|
query[y] = ptr->name;
|
||||||
|
itr = ptr->address_host_table;
|
||||||
|
x = 2;
|
||||||
|
while(itr[x] != 0)
|
||||||
|
x++;
|
||||||
|
query[y+1] = itr[x+1];
|
||||||
|
t1 = YAP_TailOfTerm(t1);
|
||||||
|
y+=2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ptr = (predicate *)YAP_IntOfTerm(t1);
|
||||||
|
query[y] = ptr->name;
|
||||||
|
itr = ptr->address_host_table;
|
||||||
|
x = 2;
|
||||||
|
while(itr[x] != 0)
|
||||||
|
x++;
|
||||||
|
query[y+1] = itr[x+1];
|
||||||
|
y += 2;
|
||||||
|
}
|
||||||
|
query[y] = -1;
|
||||||
|
query[y+1] = -1;
|
||||||
|
*res = query;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
cuda_eval( void )
|
cuda_eval( void )
|
||||||
{
|
{
|
||||||
int32_t *mat;
|
int32_t *mat;
|
||||||
|
|
||||||
|
#if defined(DATALOG) || defined(TUFFY)
|
||||||
|
int32_t *query = NULL;
|
||||||
|
setQuery(YAP_ARG1, &query);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int32_t finalDR = YAP_IntOfTerm(YAP_ARG3);
|
||||||
|
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, names, finalDR);
|
||||||
|
|
||||||
|
#ifdef TUFFY
|
||||||
|
cf = 0;
|
||||||
|
#endif
|
||||||
|
#ifdef ROCKIT
|
||||||
|
if(cf > 0)
|
||||||
|
cf *= -1;
|
||||||
|
#endif
|
||||||
|
#if defined(TUFFY) || defined(ROCKIT)
|
||||||
|
cr = 0;
|
||||||
|
names[0] = '\0';
|
||||||
|
return FALSE;
|
||||||
|
#else
|
||||||
|
int32_t i;
|
||||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
|
||||||
int32_t ncols = ptr->num_columns;
|
int32_t ncols = ptr->num_columns;
|
||||||
YAP_Term out = YAP_TermNil();
|
YAP_Term out = YAP_TermNil();
|
||||||
YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
|
YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
|
||||||
YAP_Term vec[256];
|
YAP_Term vec[256];
|
||||||
int32_t i;
|
|
||||||
|
YAP_Atom at;
|
||||||
|
|
||||||
if (n < 0)
|
if (n < 0)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
for (i=0; i<n; i++) {
|
for (i=0; i<n; i++) {
|
||||||
int32_t ni = ((n-1)-i)*ncols, j;
|
int32_t ni = ((n-1)-i)*ncols, j;
|
||||||
|
|
||||||
|
printf("%s(", YAP_AtomName(YAP_IntToAtom(ptr->name)));
|
||||||
|
|
||||||
for (j=0; j<ncols; j++) {
|
for (j=0; j<ncols; j++) {
|
||||||
vec[j] = YAP_MkIntTerm(mat[ni+j]);
|
vec[j] = YAP_MkIntTerm(mat[ni+j]);
|
||||||
|
|
||||||
|
at = YAP_IntToAtom(mat[ni+j]);
|
||||||
|
if(at != NULL)
|
||||||
|
printf("%s", YAP_AtomName(at));
|
||||||
|
else
|
||||||
|
printf("%d", mat[ni+j]);
|
||||||
|
if(j < (ncols - 1))
|
||||||
|
printf(",");
|
||||||
|
|
||||||
}
|
}
|
||||||
out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
|
out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
|
||||||
|
|
||||||
|
printf(")\n");
|
||||||
|
|
||||||
}
|
}
|
||||||
if (n > 0)
|
if (n > 0)
|
||||||
free( mat );
|
free( mat );
|
||||||
return YAP_Unify(YAP_ARG2, out);
|
return YAP_Unify(YAP_ARG2, out);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static YAP_Bool
|
static int
|
||||||
cuda_coverage( void )
|
cuda_coverage( void )
|
||||||
{
|
{
|
||||||
int32_t *mat;
|
int32_t *mat;
|
||||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
|
||||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
#if defined(DATALOG) || defined(TUFFY)
|
||||||
int32_t ncols = ptr->num_columns;
|
int32_t *query = NULL;
|
||||||
|
setQuery(YAP_ARG1, &query);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
|
||||||
int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
|
int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
|
||||||
int32_t i = n/2, min = 0, max = n-1;
|
int32_t i = n/2, min = 0, max = n-1;
|
||||||
int32_t t0, t1;
|
int32_t t0, t1;
|
||||||
@ -384,11 +545,16 @@ cuda_coverage( void )
|
|||||||
} while ( TRUE );
|
} while ( TRUE );
|
||||||
}
|
}
|
||||||
|
|
||||||
static YAP_Bool cuda_count( void )
|
static int cuda_count( void )
|
||||||
{
|
{
|
||||||
int32_t *mat;
|
int32_t *mat;
|
||||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
|
||||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
#if defined(DATALOG) || defined(TUFFY)
|
||||||
|
int32_t *query = NULL;
|
||||||
|
setQuery(YAP_ARG1, &query);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
|
||||||
|
|
||||||
if (n < 0)
|
if (n < 0)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
@ -396,7 +562,7 @@ static YAP_Bool cuda_count( void )
|
|||||||
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
|
return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
|
||||||
}
|
}
|
||||||
|
|
||||||
static YAP_Bool cuda_statistics( void )
|
static int cuda_statistics( void )
|
||||||
{
|
{
|
||||||
Cuda_Statistics();
|
Cuda_Statistics();
|
||||||
return TRUE;
|
return TRUE;
|
||||||
@ -416,14 +582,20 @@ init_cuda(void)
|
|||||||
AtomGe = YAP_LookupAtom(">=");
|
AtomGe = YAP_LookupAtom(">=");
|
||||||
AtomLe = YAP_LookupAtom("=<");
|
AtomLe = YAP_LookupAtom("=<");
|
||||||
AtomDf = YAP_LookupAtom("\\=");
|
AtomDf = YAP_LookupAtom("\\=");
|
||||||
|
AtomNt = YAP_LookupAtom("not");
|
||||||
YAP_UserCPredicate("load_facts", load_facts, 4);
|
YAP_UserCPredicate("load_facts", load_facts, 4);
|
||||||
YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
|
YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
|
||||||
YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
|
YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
|
||||||
YAP_UserCPredicate("load_rule", load_rule, 4);
|
YAP_UserCPredicate("load_rule", load_rule, 4);
|
||||||
YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
|
YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
|
||||||
YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
|
YAP_UserCPredicate("cuda_eval", cuda_eval, 3);
|
||||||
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
|
YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
|
||||||
YAP_UserCPredicate("cuda_count", cuda_count, 2);
|
YAP_UserCPredicate("cuda_count", cuda_count, 2);
|
||||||
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
|
YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
YAP_UserCPredicate("cuda_init_query", cuda_init_query, 1);
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
9
packages/cuda/cuda.yap
Normal file → Executable file
9
packages/cuda/cuda.yap
Normal file → Executable file
@ -2,10 +2,11 @@
|
|||||||
cuda_inline/2,
|
cuda_inline/2,
|
||||||
cuda_rule/2,
|
cuda_rule/2,
|
||||||
cuda_erase/1,
|
cuda_erase/1,
|
||||||
cuda_eval/2,
|
cuda_eval/3,
|
||||||
cuda_coverage/4,
|
cuda_coverage/4,
|
||||||
cuda_statistics/0,
|
cuda_statistics/0,
|
||||||
cuda_count/2]).
|
cuda_count/2,
|
||||||
|
cuda_query/1]).
|
||||||
|
|
||||||
tell_warning :-
|
tell_warning :-
|
||||||
print_message(warning,functionality(cuda)).
|
print_message(warning,functionality(cuda)).
|
||||||
@ -40,7 +41,7 @@ count_answers(G, N) :-
|
|||||||
|
|
||||||
cuda_rule((Head :- Body) , IdRules) :-
|
cuda_rule((Head :- Body) , IdRules) :-
|
||||||
body_to_list( Body, L, [], 1, N),
|
body_to_list( Body, L, [], 1, N),
|
||||||
functor(Head, _Na, Ar),
|
functor(Head, Na, Ar),
|
||||||
load_rule( N, Ar, [Head|L], IdRules ).
|
load_rule( N, Ar, [Head|L], IdRules ).
|
||||||
|
|
||||||
|
|
||||||
@ -54,3 +55,5 @@ body_to_list( B, NL, L, N0, N) :-
|
|||||||
body_to_list( B, [B|L], L, N0, N) :-
|
body_to_list( B, [B|L], L, N0, N) :-
|
||||||
N is N0+1.
|
N is N0+1.
|
||||||
|
|
||||||
|
cuda_query(Call) :-
|
||||||
|
cuda_init_query(Call).
|
||||||
|
965
packages/cuda/lista.cu
Normal file → Executable file
965
packages/cuda/lista.cu
Normal file → Executable file
File diff suppressed because it is too large
Load Diff
7
packages/cuda/lista.h
Normal file → Executable file
7
packages/cuda/lista.h
Normal file → Executable file
@ -25,8 +25,11 @@ typedef struct auxiliar{
|
|||||||
int *numselfj;
|
int *numselfj;
|
||||||
int **wherejoin;
|
int **wherejoin;
|
||||||
int *numjoin;
|
int *numjoin;
|
||||||
int3 num_bpreds;
|
int totalpreds;
|
||||||
int *builtin;
|
int **preds;
|
||||||
|
int2 *numpreds;
|
||||||
|
int *negatives;
|
||||||
|
char *rulename;
|
||||||
int gen_act;
|
int gen_act;
|
||||||
int gen_ant;
|
int gen_ant;
|
||||||
}rulenode;
|
}rulenode;
|
||||||
|
491
packages/cuda/memory.cu
Normal file → Executable file
491
packages/cuda/memory.cu
Normal file → Executable file
@ -5,63 +5,101 @@
|
|||||||
#include <thrust/device_vector.h>
|
#include <thrust/device_vector.h>
|
||||||
#include "lista.h"
|
#include "lista.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
|
#include "pred.h"
|
||||||
|
|
||||||
#define MAX_REC 200
|
#define MAX_REC 200
|
||||||
#define HALF_REC (MAX_REC / 2)
|
|
||||||
#define MAX_FIX_POINTS 100
|
#define MAX_FIX_POINTS 100
|
||||||
|
|
||||||
unsigned int avmem;
|
|
||||||
memnode temp_storage[MAX_REC];
|
memnode temp_storage[MAX_REC];
|
||||||
|
/*List used to store information (address, size, etc.) about facts and rule results loaded in the GPU*/
|
||||||
list<memnode> GPUmem;
|
list<memnode> GPUmem;
|
||||||
|
/*List used to store information about rule results offloaded from the GPU to the CPU*/
|
||||||
list<memnode> CPUmem;
|
list<memnode> CPUmem;
|
||||||
|
|
||||||
|
/*Auxiliary function to sort rule list*/
|
||||||
|
bool comparer(const rulenode &r1, const rulenode &r2)
|
||||||
|
{
|
||||||
|
return (r1.name > r2.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Used in search functions to compare iterations*/
|
||||||
bool compareiteration(const memnode &r1, const memnode &r2)
|
bool compareiteration(const memnode &r1, const memnode &r2)
|
||||||
{
|
{
|
||||||
return (r1.iteration < r2.iteration);
|
return (r1.iteration < r2.iteration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Used in search functions to compare names*/
|
||||||
bool comparename(const memnode &r1, const memnode &r2)
|
bool comparename(const memnode &r1, const memnode &r2)
|
||||||
{
|
{
|
||||||
return (r1.name > r2.name);
|
return (r1.name > r2.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calcular_mem(int dev)
|
/*Linear search of 'name' fact*/
|
||||||
{
|
|
||||||
cudaDeviceProp p;
|
|
||||||
cudaGetDeviceProperties(&p, dev);
|
|
||||||
avmem = p.totalGlobalMem;
|
|
||||||
temp_storage[0].dev_address = NULL;
|
|
||||||
temp_storage[0].size = 0;
|
|
||||||
temp_storage[HALF_REC].dev_address = NULL;
|
|
||||||
temp_storage[HALF_REC].size = 0;
|
|
||||||
|
|
||||||
//cout << "Initial memory available " << avmem << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class InputIterator>
|
template<class InputIterator>
|
||||||
InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
|
InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
|
||||||
{
|
{
|
||||||
while(first!=last)
|
while(first!=last)
|
||||||
{
|
{
|
||||||
if(first->name == name) return first;
|
if(first->name == name && first->isrule == 0) return first;
|
||||||
++first;
|
++first;
|
||||||
}
|
}
|
||||||
return last;
|
return last;
|
||||||
}
|
}
|
||||||
|
|
||||||
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum)
|
/*Finds all results of rule 'name' in iteration 'itr' in both CPU and GPU memory. Every result found is removed from its respective list*/
|
||||||
|
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
|
||||||
{
|
{
|
||||||
int x = 1, sum = 0;
|
int x = 0, sum = 0;
|
||||||
memnode temp;
|
memnode temp;
|
||||||
|
list<memnode>::iterator i;
|
||||||
temp.name = name;
|
|
||||||
temp.iteration = itr;
|
temp.iteration = itr;
|
||||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||||
|
|
||||||
while(rec.first != rec.second)
|
while(rec.first != rec.second)
|
||||||
{
|
{
|
||||||
|
if(rec.first->name == name && rec.first->isrule == 1)
|
||||||
|
{
|
||||||
|
temp_storage[x] = *rec.first;
|
||||||
|
rec.first = GPUmem.erase(rec.first);
|
||||||
|
sum += temp_storage[x].rows;
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rec.first++;
|
||||||
|
}
|
||||||
|
*gpunum = x;
|
||||||
|
temp.name = name;
|
||||||
|
temp.isrule = 1;
|
||||||
|
i = GPUmem.insert(rec.first, temp);
|
||||||
|
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||||
|
|
||||||
//cout << "itr = " << itr << " rec.first = " << rec.first->name << endl;
|
while(rec.first != rec.second)
|
||||||
|
{
|
||||||
|
if(rec.first->name == name && rec.first->isrule == 1)
|
||||||
|
{
|
||||||
|
temp_storage[x] = *rec.first;
|
||||||
|
rec.first = CPUmem.erase(rec.first);
|
||||||
|
sum += temp_storage[x].rows;
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rec.first++;
|
||||||
|
}
|
||||||
|
*totalrows = sum;
|
||||||
|
*cpunum = x;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
list<memnode>::iterator buscarpornombrecpu(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
|
||||||
|
{
|
||||||
|
int x = 0, sum = 0;
|
||||||
|
memnode temp;
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
temp.iteration = itr;
|
||||||
|
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||||
|
|
||||||
|
while(rec.first != rec.second)
|
||||||
|
{
|
||||||
if(rec.first->name == name)
|
if(rec.first->name == name)
|
||||||
{
|
{
|
||||||
temp_storage[x] = *rec.first;
|
temp_storage[x] = *rec.first;
|
||||||
@ -72,22 +110,11 @@ list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *
|
|||||||
else
|
else
|
||||||
rec.first++;
|
rec.first++;
|
||||||
}
|
}
|
||||||
//if(x > 1)
|
|
||||||
rec.first = GPUmem.insert(rec.first, temp);
|
|
||||||
*totalrows = sum;
|
|
||||||
*gpunum = x;
|
*gpunum = x;
|
||||||
return rec.first;
|
temp.name = name;
|
||||||
}
|
temp.isrule = 1;
|
||||||
|
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||||
int buscarpornombrecpu(int name, int itr, int *totalrows)
|
|
||||||
{
|
|
||||||
int x = HALF_REC + 1, sum = 0;
|
|
||||||
memnode temp;
|
|
||||||
temp.iteration = itr;
|
|
||||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
|
||||||
|
|
||||||
/*if(rec.first != rec.second)
|
|
||||||
cout << "bscnomcpu = " << rec.first->name << " " << rec.first->iteration << endl;*/
|
|
||||||
|
|
||||||
while(rec.first != rec.second)
|
while(rec.first != rec.second)
|
||||||
{
|
{
|
||||||
@ -101,18 +128,24 @@ int buscarpornombrecpu(int name, int itr, int *totalrows)
|
|||||||
else
|
else
|
||||||
rec.first++;
|
rec.first++;
|
||||||
}
|
}
|
||||||
*totalrows += sum;
|
i = CPUmem.insert(rec.first, temp);
|
||||||
return x;
|
*totalrows = sum;
|
||||||
|
*cpunum = x;
|
||||||
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Removes the least recently used memory block from GPU memory, sending it to CPU memory if it's a rule result.
|
||||||
|
If there are no used memory blocks in the GPU and we still don't have enough memory, the program exits with error*/
|
||||||
void limpiar(const char s[], size_t sz)
|
void limpiar(const char s[], size_t sz)
|
||||||
{
|
{
|
||||||
list<memnode>::iterator ini;
|
list<memnode>::iterator ini;
|
||||||
memnode temp;
|
memnode temp;
|
||||||
|
size_t free, total;
|
||||||
|
|
||||||
if(GPUmem.size() == 0)
|
if(GPUmem.size() == 0)
|
||||||
{
|
{
|
||||||
cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl;
|
cudaMemGetInfo(&free,&total);
|
||||||
|
cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,80 +155,32 @@ void limpiar(const char s[], size_t sz)
|
|||||||
temp = *ini;
|
temp = *ini;
|
||||||
temp.dev_address = (int *)malloc(ini->size);
|
temp.dev_address = (int *)malloc(ini->size);
|
||||||
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||||
CPUmem.push_back(temp);
|
list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||||
|
CPUmem.insert(pos, temp);
|
||||||
}
|
}
|
||||||
liberar(ini->dev_address, ini->size);
|
cudaFree(ini->dev_address);
|
||||||
GPUmem.erase(ini);
|
GPUmem.erase(ini);
|
||||||
}
|
}
|
||||||
|
|
||||||
void limpiartodo(int *p1, int *p2)
|
/*Allocs 'size' amount of bytes in GPU memory. If not enough memory is available, removes least recently used memory blocks until
|
||||||
|
enough space is available*/
|
||||||
|
void reservar(int **ptr, size_t size)
|
||||||
{
|
{
|
||||||
list<memnode>::iterator ini;
|
size_t free, total;
|
||||||
memnode temp;
|
|
||||||
int cont = 0;
|
|
||||||
if(p1 != NULL)
|
|
||||||
cont++;
|
|
||||||
if(p2 != NULL)
|
|
||||||
cont++;
|
|
||||||
ini = GPUmem.begin();
|
|
||||||
|
|
||||||
/*cout << "ANTES" << endl;
|
|
||||||
mostrar_memoria();
|
|
||||||
mostrar_memcpu();
|
|
||||||
cout << "FIN ANTES" << endl;*/
|
|
||||||
//cout << "mem = " << GPUmem.size() << " " << avmem << endl;
|
|
||||||
|
|
||||||
while(GPUmem.size() > cont)
|
|
||||||
{
|
|
||||||
if(ini->dev_address == p1 || ini->dev_address == p2)
|
|
||||||
{
|
|
||||||
ini++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if(ini->isrule)
|
|
||||||
{
|
|
||||||
temp = *ini;
|
|
||||||
temp.dev_address = (int *)malloc(ini->size);
|
|
||||||
cudaMemcpy(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
|
||||||
CPUmem.push_back(temp);
|
|
||||||
}
|
|
||||||
liberar(ini->dev_address, temp.size);
|
|
||||||
ini = GPUmem.erase(ini);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*cout << "DESPUES" << endl;
|
|
||||||
mostrar_memoria();
|
|
||||||
mostrar_memcpu();
|
|
||||||
cout << "FIN DESPUES" << endl;*/
|
|
||||||
//cout << "memfinal = " << GPUmem.size() << " " << avmem << endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void liberar(int *ptr, int size)
|
|
||||||
{
|
|
||||||
//cout << "L " << avmem << " " << size;
|
|
||||||
|
|
||||||
cudaFree(ptr);
|
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "- " << ptr << " " << size << endl;
|
|
||||||
#endif
|
|
||||||
avmem += size;
|
|
||||||
|
|
||||||
//cout << " " << avmem << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void reservar(int **ptr, int size)
|
|
||||||
{
|
|
||||||
//size_t free, total;
|
|
||||||
//cudaMemGetInfo( &free, &total );
|
|
||||||
// cerr << "? " << free << " " << size << endl;
|
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
*ptr = NULL;
|
*ptr = NULL;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while(avmem < size)
|
|
||||||
|
cudaMemGetInfo(&free, &total);
|
||||||
|
while(free < size)
|
||||||
|
{
|
||||||
|
cout << "Se limpio memoria " << free << " " << total << endl;
|
||||||
limpiar("not enough memory", size);
|
limpiar("not enough memory", size);
|
||||||
|
cudaMemGetInfo(&free, &total);
|
||||||
|
}
|
||||||
|
|
||||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||||
limpiar("Error in memory allocation", size);
|
limpiar("Error in memory allocation", size);
|
||||||
if (! *ptr ) {
|
if (! *ptr ) {
|
||||||
@ -205,11 +190,9 @@ void reservar(int **ptr, int size)
|
|||||||
cerr << "Exiting CUDA...." << endl;
|
cerr << "Exiting CUDA...." << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
avmem -= size;
|
|
||||||
|
|
||||||
// cout << " " << avmem << endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Creates a new entry in the GPU memory list*/
|
||||||
void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||||
{
|
{
|
||||||
memnode temp;
|
memnode temp;
|
||||||
@ -222,6 +205,19 @@ void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
|||||||
GPUmem.push_back(temp);
|
GPUmem.push_back(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void registrarcpu(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||||
|
{
|
||||||
|
memnode temp;
|
||||||
|
temp.name = name;
|
||||||
|
temp.dev_address = ptr;
|
||||||
|
temp.rows = rows;
|
||||||
|
temp.size = rows * num_columns * sizeof(int);
|
||||||
|
temp.iteration = itr;
|
||||||
|
temp.isrule = rule;
|
||||||
|
CPUmem.push_back(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Updates the information of an element in a list*/
|
||||||
template<class InputIterator>
|
template<class InputIterator>
|
||||||
void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
|
void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
|
||||||
{
|
{
|
||||||
@ -230,6 +226,7 @@ void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
|
|||||||
i->size = rows * num_columns * sizeof(int);
|
i->size = rows * num_columns * sizeof(int);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Count the total number of rows generated by rule 'name' in iteration 'iter'*/
|
||||||
int numrows(int name, int itr)
|
int numrows(int name, int itr)
|
||||||
{
|
{
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
@ -252,16 +249,17 @@ int numrows(int name, int itr)
|
|||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern "C" void * YAP_IntToAtom(int);
|
extern "C" void * YAP_IntToAtom(int);
|
||||||
extern "C" char * YAP_AtomName(void *);
|
extern "C" char * YAP_AtomName(void *);
|
||||||
|
|
||||||
|
/*Loads facts or rule results in GPU memory. If a fact is already in GPU memory, its pointer is simply returned. Otherwise,
|
||||||
|
memory is reserved and the fact is loaded. Rule results are loaded based on the current iteration 'itr' and both GPU and
|
||||||
|
CPU memories are searched for all instances of said results. The instances are combined into a single one in GPU memory.*/
|
||||||
int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||||
{
|
{
|
||||||
int numgpu, numcpu, totalrows = 0;
|
int numgpu, numcpu, totalrows = 0;
|
||||||
int *temp, x;
|
int *temp, x;
|
||||||
int size, itrant;
|
int size, itrant, inc = 0;
|
||||||
list<memnode>::iterator i;
|
list<memnode>::iterator i;
|
||||||
memnode fact;
|
memnode fact;
|
||||||
|
|
||||||
@ -279,9 +277,6 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
|||||||
}
|
}
|
||||||
size = num_rows * num_columns * sizeof(int);
|
size = num_rows * num_columns * sizeof(int);
|
||||||
reservar(&temp, size);
|
reservar(&temp, size);
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "+ " << temp << " temp " << size << endl;
|
|
||||||
#endif
|
|
||||||
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
||||||
registrar(name, num_columns, temp, num_rows, itr, 0);
|
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||||
*ptr = temp;
|
*ptr = temp;
|
||||||
@ -290,28 +285,25 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
|||||||
if(itr > 0)
|
if(itr > 0)
|
||||||
{
|
{
|
||||||
itrant = itr - 1;
|
itrant = itr - 1;
|
||||||
i = buscarpornombre(name, itrant, &totalrows, &numgpu);
|
i = buscarpornombre(name, itrant, &totalrows, &numgpu, &numcpu);
|
||||||
numcpu = buscarpornombrecpu(name, itrant, &totalrows);
|
if((numgpu == 1) && (numcpu == 1))
|
||||||
|
|
||||||
if((numgpu == 2) && (numcpu == (HALF_REC + 1)))
|
|
||||||
{
|
{
|
||||||
actualizar(num_columns, temp_storage[1].dev_address, temp_storage[1].rows, i);
|
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
|
||||||
*ptr = temp_storage[1].dev_address;
|
*ptr = temp_storage[0].dev_address;
|
||||||
return temp_storage[1].rows;
|
return temp_storage[0].rows;
|
||||||
}
|
}
|
||||||
size = totalrows * num_columns * sizeof(int);
|
size = totalrows * num_columns * sizeof(int);
|
||||||
reservar(&temp, size);
|
reservar(&temp, size);
|
||||||
#ifdef DEBUG_MEM
|
for(x = 0; x < numgpu; x++)
|
||||||
cerr << "+ " << temp << " temp 2 " << size << endl;
|
|
||||||
#endif
|
|
||||||
for(x = 1; x < numgpu; x++)
|
|
||||||
{
|
{
|
||||||
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||||
liberar(temp_storage[x].dev_address, temp_storage[x].size);
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
|
cudaFree(temp_storage[x].dev_address);
|
||||||
}
|
}
|
||||||
for(x = HALF_REC + 1; x < numcpu; x++)
|
for(; x < numcpu; x++)
|
||||||
{
|
{
|
||||||
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
||||||
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
free(temp_storage[x].dev_address);
|
free(temp_storage[x].dev_address);
|
||||||
}
|
}
|
||||||
actualizar(num_columns, temp, totalrows, i);
|
actualizar(num_columns, temp, totalrows, i);
|
||||||
@ -321,9 +313,54 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||||
|
{
|
||||||
|
int numgpu, numcpu, totalrows = 0;
|
||||||
|
int *temp, x;
|
||||||
|
int size, itrant, inc = 0;
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
|
||||||
|
if(is_fact)
|
||||||
|
{
|
||||||
|
*ptr = address_host_table;
|
||||||
|
return num_rows;
|
||||||
|
}
|
||||||
|
if(itr > 0)
|
||||||
|
{
|
||||||
|
itrant = itr - 1;
|
||||||
|
i = buscarpornombrecpu(name, itrant, &totalrows, &numgpu, &numcpu);
|
||||||
|
|
||||||
|
if((numgpu == 0) && (numcpu == 1))
|
||||||
|
{
|
||||||
|
actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
|
||||||
|
*ptr = temp_storage[0].dev_address;
|
||||||
|
return temp_storage[0].rows;
|
||||||
|
}
|
||||||
|
size = totalrows * num_columns * sizeof(int);
|
||||||
|
temp = (int *)malloc(size);
|
||||||
|
for(x = 0; x < numgpu; x++)
|
||||||
|
{
|
||||||
|
cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
|
||||||
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
|
cudaFree(temp_storage[x].dev_address);
|
||||||
|
}
|
||||||
|
for(; x < numcpu; x++)
|
||||||
|
{
|
||||||
|
memcpy(temp + inc, temp_storage[x].dev_address, temp_storage[x].size);
|
||||||
|
inc += temp_storage[x].size / sizeof(int);
|
||||||
|
free(temp_storage[x].dev_address);
|
||||||
|
}
|
||||||
|
actualizar(num_columns, temp, totalrows, i);
|
||||||
|
*ptr = temp;
|
||||||
|
return totalrows;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Loads all results of rule 'name' from both GPU and CPU memories into the GPU*/
|
||||||
int cargafinal(int name, int cols, int **ptr)
|
int cargafinal(int name, int cols, int **ptr)
|
||||||
{
|
{
|
||||||
int *temp, *ini, cont = 0;
|
int *temp, *ini, cont = 0, numg = 0, numc = 0;
|
||||||
memnode bus;
|
memnode bus;
|
||||||
bus.name = name;
|
bus.name = name;
|
||||||
GPUmem.sort(comparename);
|
GPUmem.sort(comparename);
|
||||||
@ -335,6 +372,7 @@ int cargafinal(int name, int cols, int **ptr)
|
|||||||
while(pos != endg && pos->name == name)
|
while(pos != endg && pos->name == name)
|
||||||
{
|
{
|
||||||
cont += pos->rows;
|
cont += pos->rows;
|
||||||
|
numg++;
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
|
pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
|
||||||
@ -342,15 +380,41 @@ int cargafinal(int name, int cols, int **ptr)
|
|||||||
while(pos != endc && pos->name == name)
|
while(pos != endc && pos->name == name)
|
||||||
{
|
{
|
||||||
cont += pos->rows;
|
cont += pos->rows;
|
||||||
|
numc++;
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
reservar(&temp, cont * cols * sizeof(int));
|
if(numg == 0 && numc == 0)
|
||||||
#ifdef DEBUG_MEM
|
return 0;
|
||||||
cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;
|
if(numg == 1 && numc == 0)
|
||||||
|
{
|
||||||
|
pos = gpu;
|
||||||
|
*ptr = pos->dev_address;
|
||||||
|
cont = pos->rows;
|
||||||
|
GPUmem.erase(pos);
|
||||||
|
#ifdef TUFFY
|
||||||
|
return -cont;
|
||||||
|
#else
|
||||||
|
return cont;
|
||||||
#endif
|
#endif
|
||||||
ini = temp;
|
}
|
||||||
|
if(numg == 0 && numc == 1)
|
||||||
|
{
|
||||||
|
pos = cpu;
|
||||||
|
cont = pos->rows;
|
||||||
|
#ifdef TUFFY
|
||||||
|
reservar(&temp, pos->size);
|
||||||
|
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||||
|
*ptr = temp;
|
||||||
|
#else
|
||||||
|
*ptr = pos->dev_address;
|
||||||
|
#endif
|
||||||
|
CPUmem.erase(pos);
|
||||||
|
return -cont;
|
||||||
|
}
|
||||||
|
|
||||||
|
reservar(&temp, cont * cols * sizeof(int));
|
||||||
|
ini = temp;
|
||||||
pos = gpu;
|
pos = gpu;
|
||||||
while(pos != endg && pos->name == name)
|
while(pos != endg && pos->name == name)
|
||||||
{
|
{
|
||||||
@ -365,23 +429,13 @@ int cargafinal(int name, int cols, int **ptr)
|
|||||||
temp += pos->size / sizeof(int);
|
temp += pos->size / sizeof(int);
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*int x, y;
|
|
||||||
int *hop1 = (int *)malloc(cont * cols * sizeof(int));
|
|
||||||
cudaMemcpy(hop1, ini, cont * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
|
||||||
cout << "select finala" << endl;
|
|
||||||
for(x = 0; x < cont; x++)
|
|
||||||
{
|
|
||||||
for(y = 0; y < cols; y++)
|
|
||||||
cout << hop1[x * cols + y] << " ";
|
|
||||||
cout << endl;
|
|
||||||
}
|
|
||||||
cout << "select finala" << endl;*/
|
|
||||||
|
|
||||||
*ptr = ini;
|
*ptr = ini;
|
||||||
return cont;
|
return cont;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Compares the results of the current iteration against the results of older iterations.
|
||||||
|
Used to avoid infinite computations when the result is not a single fixed-point, but an
|
||||||
|
orbit of points.*/
|
||||||
bool generadas(int name, int filas, int cols, int itr)
|
bool generadas(int name, int filas, int cols, int itr)
|
||||||
{
|
{
|
||||||
int r1, r2, x, fin;
|
int r1, r2, x, fin;
|
||||||
@ -401,46 +455,26 @@ bool generadas(int name, int filas, int cols, int itr)
|
|||||||
thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
|
thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
|
||||||
r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
|
r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
|
||||||
thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
|
thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
|
||||||
|
|
||||||
/*int y;
|
|
||||||
int *a = (int *)malloc(r1 * cols * sizeof(int));
|
|
||||||
cudaMemcpy(a, dop1, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
|
||||||
for(x = 0; x < r1; x++)
|
|
||||||
{
|
|
||||||
for(y = 0; y < cols; y++)
|
|
||||||
cout << a[x * cols + y] << " ";
|
|
||||||
}
|
|
||||||
cout << endl;
|
|
||||||
cudaMemcpy(a, dop2, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
|
||||||
for(x = 0; x < r1; x++)
|
|
||||||
{
|
|
||||||
for(y = 0; y < cols; y++)
|
|
||||||
cout << a[x * cols + y] << " ";
|
|
||||||
}
|
|
||||||
cout << endl;
|
|
||||||
free(a);*/
|
|
||||||
|
|
||||||
if(thrust::equal(pt1, pt1 + r1, pt2) == true)
|
if(thrust::equal(pt1, pt1 + r1, pt2) == true)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mostrar_memoria()
|
void mostrar_memoria()
|
||||||
{
|
{
|
||||||
int x;
|
unsigned int x;
|
||||||
list<memnode>::iterator i = GPUmem.begin();
|
list<memnode>::iterator i = GPUmem.begin();
|
||||||
cout << "Memoria inicio GPU" << endl;
|
cout << "Memoria inicio GPU" << endl;
|
||||||
for(x = 0; x < GPUmem.size(); x++, i++)
|
for(x = 0; x < GPUmem.size(); x++, i++)
|
||||||
cout << i->name << " " << i->iteration << " " << i->size << endl;
|
cout << i->name << " " << i->iteration << " " << i->isrule << " " << i->rows << " " << i->size << endl;
|
||||||
cout << "Memoria fin GPU" << endl;
|
cout << "Memoria fin GPU" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mostrar_memcpu()
|
void mostrar_memcpu()
|
||||||
{
|
{
|
||||||
int x;
|
unsigned int x;
|
||||||
list<memnode>::iterator i = CPUmem.begin();
|
list<memnode>::iterator i = CPUmem.begin();
|
||||||
cout << "Memoria inicio CPU" << endl;
|
cout << "Memoria inicio CPU" << endl;
|
||||||
for(x = 0; x < CPUmem.size(); x++, i++)
|
for(x = 0; x < CPUmem.size(); x++, i++)
|
||||||
@ -448,53 +482,7 @@ void mostrar_memcpu()
|
|||||||
cout << "Memoria fin CPU" << endl;
|
cout << "Memoria fin CPU" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator last)
|
/*Clear all rule results from both GPU and CPU memory*/
|
||||||
{
|
|
||||||
GPUmem.sort(comparename);
|
|
||||||
CPUmem.sort(comparename);
|
|
||||||
list<memnode>::iterator gpu = GPUmem.begin();
|
|
||||||
list<memnode>::iterator cpu = CPUmem.begin();
|
|
||||||
int x, y, of, cols;
|
|
||||||
int *temp, cont = 0;
|
|
||||||
while(first != last)
|
|
||||||
{
|
|
||||||
while(first->name == gpu->name)
|
|
||||||
{
|
|
||||||
temp = (int *)malloc(gpu->size);
|
|
||||||
cudaMemcpy(temp, gpu->dev_address, gpu->size, cudaMemcpyDeviceToHost);
|
|
||||||
cols = gpu->size / (gpu->rows * sizeof(int));
|
|
||||||
cont += gpu->rows;
|
|
||||||
for(x = 0, of = 0; x < gpu->rows; x++)
|
|
||||||
{
|
|
||||||
for(y = 0; y < cols; y++, of++)
|
|
||||||
cout << temp[of] << " ";
|
|
||||||
cout << endl;
|
|
||||||
}
|
|
||||||
cudaFree(gpu->dev_address);
|
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;
|
|
||||||
#endif
|
|
||||||
free(temp);
|
|
||||||
gpu++;
|
|
||||||
}
|
|
||||||
while(first->name == cpu->name)
|
|
||||||
{
|
|
||||||
cols = cpu->size / (cpu->rows * sizeof(int));
|
|
||||||
cont += cpu->rows;
|
|
||||||
for(x = 0, of = 0; x < cpu->rows; x++)
|
|
||||||
{
|
|
||||||
for(y = 0; y < cols; y++, of++)
|
|
||||||
cout << cpu->dev_address[of] << " ";
|
|
||||||
cout << endl;
|
|
||||||
}
|
|
||||||
free(cpu->dev_address);
|
|
||||||
cpu++;
|
|
||||||
}
|
|
||||||
first++;
|
|
||||||
}
|
|
||||||
cout << cont << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear_memory()
|
void clear_memory()
|
||||||
{
|
{
|
||||||
list<memnode>::iterator ini;
|
list<memnode>::iterator ini;
|
||||||
@ -503,15 +491,13 @@ void clear_memory()
|
|||||||
fin = GPUmem.end();
|
fin = GPUmem.end();
|
||||||
while(ini != fin)
|
while(ini != fin)
|
||||||
{
|
{
|
||||||
if (ini->isrule) {
|
if(ini->isrule)
|
||||||
|
{
|
||||||
cudaFree(ini->dev_address);
|
cudaFree(ini->dev_address);
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "- " << ini->dev_address << " ini->dev_address" << endl;
|
|
||||||
#endif
|
|
||||||
ini = GPUmem.erase(ini);
|
ini = GPUmem.erase(ini);
|
||||||
} else {
|
|
||||||
ini++;
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
ini++;
|
||||||
}
|
}
|
||||||
ini = CPUmem.begin();
|
ini = CPUmem.begin();
|
||||||
fin = CPUmem.end();
|
fin = CPUmem.end();
|
||||||
@ -522,3 +508,68 @@ void clear_memory()
|
|||||||
}
|
}
|
||||||
CPUmem.clear();
|
CPUmem.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Clear everything from both GPU and CPU memory*/
|
||||||
|
void clear_memory_all()
|
||||||
|
{
|
||||||
|
list<memnode>::iterator ini;
|
||||||
|
list<memnode>::iterator fin;
|
||||||
|
ini = GPUmem.begin();
|
||||||
|
fin = GPUmem.end();
|
||||||
|
while(ini != fin)
|
||||||
|
{
|
||||||
|
cudaFree(ini->dev_address);
|
||||||
|
ini++;
|
||||||
|
}
|
||||||
|
GPUmem.clear();
|
||||||
|
ini = CPUmem.begin();
|
||||||
|
fin = CPUmem.end();
|
||||||
|
while(ini != fin)
|
||||||
|
{
|
||||||
|
free(ini->dev_address);
|
||||||
|
ini++;
|
||||||
|
}
|
||||||
|
CPUmem.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Remove all instances of fact 'name' from both CPU and GPU memories*/
|
||||||
|
void liberar(int name)
|
||||||
|
{
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
memnode fact;
|
||||||
|
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||||
|
if(i != GPUmem.end())
|
||||||
|
{
|
||||||
|
fact = *i;
|
||||||
|
GPUmem.erase(i);
|
||||||
|
cudaFree(fact.dev_address);
|
||||||
|
}
|
||||||
|
i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
|
||||||
|
if(i != CPUmem.end())
|
||||||
|
{
|
||||||
|
fact = *i;
|
||||||
|
CPUmem.erase(i);
|
||||||
|
free(fact.dev_address);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Add all rows in 'dop1' to the fact 'name' by creating a new array capable of holding both.*/
|
||||||
|
void sumar(int name, int *dop1, int cols, int rows)
|
||||||
|
{
|
||||||
|
list<memnode>::iterator i;
|
||||||
|
memnode fact;
|
||||||
|
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||||
|
int *res, newrows, offset;
|
||||||
|
if(i != GPUmem.end())
|
||||||
|
{
|
||||||
|
fact = *i;
|
||||||
|
newrows = rows + fact.rows;
|
||||||
|
reservar(&res, newrows * cols * sizeof(int));
|
||||||
|
offset = fact.rows * cols;
|
||||||
|
cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||||
|
GPUmem.erase(i);
|
||||||
|
registrar(name, cols, res, newrows, 0, 0);
|
||||||
|
cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
|
||||||
|
cudaFree(fact.dev_address);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
13
packages/cuda/memory.h
Normal file → Executable file
13
packages/cuda/memory.h
Normal file → Executable file
@ -1,26 +1,27 @@
|
|||||||
#ifndef _MEMORY_H_
|
#ifndef _MEMORY_H_
|
||||||
#define _MEMORY_H_
|
#define _MEMORY_H_
|
||||||
|
|
||||||
//#include <thrust/device_vector.h>
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "lista.h"
|
#include "lista.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
//using namespace thrust;
|
|
||||||
|
|
||||||
void calcular_mem(int);
|
bool comparer(const rulenode&, const rulenode&);
|
||||||
void liberar(int*, int);
|
|
||||||
void limpiar(const char [], size_t);
|
void limpiar(const char [], size_t);
|
||||||
void limpiartodo(int*, int*);
|
void limpiartodo(int*, int*);
|
||||||
int cargar(int, int, int, int, int*, int**, int);
|
int cargar(int, int, int, int, int*, int**, int);
|
||||||
|
int cargarcpu(int, int, int, int, int*, int**, int);
|
||||||
int cargafinal(int, int, int**);
|
int cargafinal(int, int, int**);
|
||||||
void reservar(int**, int);
|
void reservar(int**, size_t);
|
||||||
void registrar(int, int, int*, int, int, int);
|
void registrar(int, int, int*, int, int, int);
|
||||||
|
void registrarcpu(int, int, int*, int, int, int);
|
||||||
bool generadas(int, int, int, int);
|
bool generadas(int, int, int, int);
|
||||||
|
void sumar(int, int*, int, int);
|
||||||
|
void liberar(int);
|
||||||
void mostrar_memoria(void);
|
void mostrar_memoria(void);
|
||||||
void mostrar_memcpu(void);
|
void mostrar_memcpu(void);
|
||||||
void clear_memory(void);
|
void clear_memory(void);
|
||||||
void resultados(vector<rulenode>::iterator, vector<rulenode>::iterator);
|
void clear_memory_all(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
10
packages/cuda/pred.h
Normal file → Executable file
10
packages/cuda/pred.h
Normal file → Executable file
@ -9,11 +9,17 @@ typedef struct Nodo{
|
|||||||
int num_columns;
|
int num_columns;
|
||||||
int is_fact;
|
int is_fact;
|
||||||
int *address_host_table;
|
int *address_host_table;
|
||||||
|
int *negatives;
|
||||||
|
char *predname;
|
||||||
|
double *weight;
|
||||||
}gpunode;
|
}gpunode;
|
||||||
|
|
||||||
typedef gpunode predicate;
|
typedef gpunode predicate;
|
||||||
|
|
||||||
//#define TIMER 1
|
//#define TIMER 1
|
||||||
|
#define DATALOG 1
|
||||||
|
#define NUM_T 4
|
||||||
|
#define INISIZE 1000000
|
||||||
|
|
||||||
#if TIMER
|
#if TIMER
|
||||||
typedef struct Stats{
|
typedef struct Stats{
|
||||||
@ -27,6 +33,8 @@ typedef struct Stats{
|
|||||||
extern statinfo cuda_stats;
|
extern statinfo cuda_stats;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*Constants used to mark comparison predicates*/
|
||||||
|
#define BPOFFSET (-6)
|
||||||
#define SBG_EQ (-1)
|
#define SBG_EQ (-1)
|
||||||
#define SBG_GT (-2)
|
#define SBG_GT (-2)
|
||||||
#define SBG_LT (-3)
|
#define SBG_LT (-3)
|
||||||
@ -34,6 +42,6 @@ extern statinfo cuda_stats;
|
|||||||
#define SBG_LE (-5)
|
#define SBG_LE (-5)
|
||||||
#define SBG_DF (-6)
|
#define SBG_DF (-6)
|
||||||
|
|
||||||
int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**);
|
int Cuda_Eval(predicate**, int, predicate**, int, int*, int**, char*, int);
|
||||||
void Cuda_Statistics( void );
|
void Cuda_Statistics( void );
|
||||||
#endif
|
#endif
|
||||||
|
188
packages/cuda/selectproyect.cu
Normal file → Executable file
188
packages/cuda/selectproyect.cu
Normal file → Executable file
@ -1,10 +1,11 @@
|
|||||||
#include <thrust/device_vector.h>
|
#include <thrust/device_vector.h>
|
||||||
//#include <thrust/device_ptr.h>
|
|
||||||
#include <thrust/scan.h>
|
#include <thrust/scan.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
|
#include "bpreds.h"
|
||||||
|
|
||||||
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
|
/*Mark all rows that comply with the selections*/
|
||||||
|
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -24,14 +25,14 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
|
|||||||
res[id] = 1;
|
res[id] = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*If we already have an array of marks (perhaps because the selfjoin was applied first),
|
||||||
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
|
we unmark any rows that do not comply with the selections*/
|
||||||
|
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int *spos = &shared[numc];
|
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int x, rowact, posact;
|
int x, rowact, posact;
|
||||||
if(threadIdx.x < (numc * 2))
|
if(threadIdx.x < numc)
|
||||||
shared[threadIdx.x] = cons[threadIdx.x];
|
shared[threadIdx.x] = cons[threadIdx.x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
@ -39,10 +40,10 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
|
|||||||
if(res[id] == 0)
|
if(res[id] == 0)
|
||||||
return;
|
return;
|
||||||
rowact = id * cols;
|
rowact = id * cols;
|
||||||
for(x = 0; x < numc; x++)
|
for(x = 0; x < numc; x += 2)
|
||||||
{
|
{
|
||||||
posact = rowact + spos[x];
|
posact = rowact + shared[x];
|
||||||
if(dop1[posact] != shared[x])
|
if(dop1[posact] != shared[x+1])
|
||||||
{
|
{
|
||||||
res[id] = 0;
|
res[id] = 0;
|
||||||
return;
|
return;
|
||||||
@ -51,6 +52,7 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Unmark all rows that do not comply with the selfjoins.*/
|
||||||
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
@ -66,12 +68,12 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
|
|||||||
pos = id * cols;
|
pos = id * cols;
|
||||||
for(x = 0; x < cont; x++)
|
for(x = 0; x < cont; x++)
|
||||||
{
|
{
|
||||||
temp = shared[x];
|
temp = dop1[pos+shared[x]];
|
||||||
y = x + 1;
|
y = x + 1;
|
||||||
temp2 = shared[y];
|
temp2 = shared[y];
|
||||||
while(temp2 > -1)
|
while(temp2 > -1)
|
||||||
{
|
{
|
||||||
if(dop1[temp+pos] != dop1[temp2+pos])
|
if(temp != dop1[temp2+pos])
|
||||||
{
|
{
|
||||||
res[id] = 0;
|
res[id] = 0;
|
||||||
return;
|
return;
|
||||||
@ -84,6 +86,7 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Mark all rows that comply with the selfjoins*/
|
||||||
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
@ -97,12 +100,12 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
|
|||||||
pos = id * cols;
|
pos = id * cols;
|
||||||
for(x = 0; x < cont; x++)
|
for(x = 0; x < cont; x++)
|
||||||
{
|
{
|
||||||
temp = shared[x];
|
temp = dop1[pos+shared[x]];
|
||||||
y = x + 1;
|
y = x + 1;
|
||||||
temp2 = shared[y];
|
temp2 = shared[y];
|
||||||
while(temp2 > -1)
|
while(temp2 > -1)
|
||||||
{
|
{
|
||||||
if(dop1[temp+pos] != dop1[temp2+pos])
|
if(temp != dop1[temp2+pos])
|
||||||
return;
|
return;
|
||||||
y++;
|
y++;
|
||||||
temp2 = shared[y];
|
temp2 = shared[y];
|
||||||
@ -113,6 +116,7 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Project all columns found in 'dhead' to a new array 'res'*/
|
||||||
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
@ -130,76 +134,31 @@ __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*Project all columns found in 'dhead' using only the rows marked as valid (i.e. those that complied with
|
||||||
|
selections, selfjoins, etc.). The array 'temp' holds the result of the prefix sum of said marks.*/
|
||||||
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
||||||
{
|
{
|
||||||
extern __shared__ int shared[];
|
extern __shared__ int shared[];
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int pos, posr, x;
|
int pos, posr, x;
|
||||||
if(threadIdx.x < cols)
|
if(threadIdx.x < hsize)
|
||||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(id < rows)
|
if(id < rows)
|
||||||
{
|
{
|
||||||
posr = temp[id+1];
|
posr = temp[id];
|
||||||
if(temp[id] != posr && posr > 0)
|
if(temp[id+1] != posr)
|
||||||
{
|
{
|
||||||
pos = id * cols;
|
pos = id * cols;
|
||||||
posr = (posr - 1) * hsize;
|
posr *= hsize;
|
||||||
for(x = 0; x < hsize; x++, posr++)
|
for(x = 0; x < hsize; x++, posr++)
|
||||||
res[posr] = dop1[pos+shared[x]];
|
res[posr] = dop1[pos+shared[x]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*__global__ void removedup()
|
/*Performs selections, selfjoins and comparison predicates when the rule has a single normal predicate.*/
|
||||||
{
|
int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *preds, int numpreds, int *project, int **ret, int ANDlogic)
|
||||||
extern __shared__ int shared[];
|
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
if(threadIdx.x < cols)
|
|
||||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
|
||||||
if(id < rows)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
template<typename T> /*a libreria*/
|
|
||||||
struct suma : public binary_function<T,T,T>
|
|
||||||
{
|
|
||||||
__host__ __device__
|
|
||||||
T operator()(const T &r1, const T &r2)
|
|
||||||
{
|
|
||||||
if(r1 > -1)
|
|
||||||
{
|
|
||||||
if(r2 > 0)
|
|
||||||
return r1 + r2;
|
|
||||||
return -r1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(r2 > 0)
|
|
||||||
return abs(r1) + r2;
|
|
||||||
return r1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int mayor(int a, int b, int c)
|
|
||||||
{
|
|
||||||
if(a > b)
|
|
||||||
{
|
|
||||||
if(a > c)
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(b > c)
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
|
|
||||||
{
|
{
|
||||||
int *fres = NULL, *temp = NULL;
|
int *fres = NULL, *temp = NULL;
|
||||||
int *dhead = NULL, tmplen;
|
int *dhead = NULL, tmplen;
|
||||||
@ -209,30 +168,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
#if TIMER
|
#if TIMER
|
||||||
cuda_stats.selects++;
|
cuda_stats.selects++;
|
||||||
#endif
|
#endif
|
||||||
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
|
|
||||||
|
int head_bytes = maximo(4, numselect, numselfj, numpreds, head_size) * sizeof(int);
|
||||||
reservar(&dhead, head_bytes);
|
reservar(&dhead, head_bytes);
|
||||||
#ifdef DEBUG_MEM
|
int numthreads = 1024;
|
||||||
cerr << "+ " << dhead << " dhead " << head_bytes << endl;
|
//int numthreads = 32;
|
||||||
|
int blockllen = rows / numthreads + 1;
|
||||||
|
|
||||||
|
#ifdef ROCKIT
|
||||||
|
ANDlogic = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int blockllen = rows / 1024 + 1;
|
|
||||||
int numthreads = 1024;
|
|
||||||
|
|
||||||
//removerep(dop1, rows, cols, dhead,)
|
|
||||||
if(numselect > 0)
|
if(numselect > 0)
|
||||||
{
|
{
|
||||||
tmplen = rows + 1;
|
tmplen = rows + 1;
|
||||||
size2 = tmplen * sizeof(int);
|
size2 = tmplen * sizeof(int);
|
||||||
reservar(&temp, size2);
|
reservar(&temp, size2);
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "+ " << temp << " temp select " << size2 << endl;
|
|
||||||
#endif
|
|
||||||
cudaMemset(temp, 0, size2);
|
cudaMemset(temp, 0, size2);
|
||||||
|
|
||||||
size = numselect * sizeof(int);
|
size = numselect * sizeof(int);
|
||||||
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
marcar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
||||||
|
|
||||||
if(numselfj > 0)
|
if(numselfj > 0)
|
||||||
{
|
{
|
||||||
@ -241,6 +197,16 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(numpreds > 0)
|
||||||
|
{
|
||||||
|
size = numpreds * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||||
|
if(ANDlogic)
|
||||||
|
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
else
|
||||||
|
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
}
|
||||||
|
|
||||||
res = thrust::device_pointer_cast(temp);
|
res = thrust::device_pointer_cast(temp);
|
||||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||||
num = res[rows];
|
num = res[rows];
|
||||||
@ -249,13 +215,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
|
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, num * size);
|
reservar(&fres, num * size);
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "+ " << fres << " fres select " << num*size << endl;
|
|
||||||
#endif
|
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
liberar(dhead, head_bytes);
|
cudaFree(dhead);
|
||||||
liberar(temp, size2);
|
cudaFree(temp);
|
||||||
*ret = fres;
|
*ret = fres;
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
@ -266,15 +229,22 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
tmplen = rows + 1;
|
tmplen = rows + 1;
|
||||||
size2 = tmplen * sizeof(int);
|
size2 = tmplen * sizeof(int);
|
||||||
reservar(&temp, size2);
|
reservar(&temp, size2);
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "+ " << temp << " temp select " << size2 << endl;
|
|
||||||
#endif
|
|
||||||
cudaMemset(temp, 0, size2);
|
cudaMemset(temp, 0, size2);
|
||||||
|
|
||||||
size = numselfj * sizeof(int);
|
size = numselfj * sizeof(int);
|
||||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||||
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||||
|
|
||||||
|
if(numpreds > 0)
|
||||||
|
{
|
||||||
|
size = numpreds * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||||
|
if(ANDlogic)
|
||||||
|
bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
else
|
||||||
|
bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
res = thrust::device_pointer_cast(temp);
|
res = thrust::device_pointer_cast(temp);
|
||||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||||
num = res[rows];
|
num = res[rows];
|
||||||
@ -283,13 +253,41 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
|
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, num * size);
|
reservar(&fres, num * size);
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "+ " << fres << " fres select again " << num*size << endl;
|
|
||||||
#endif
|
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
liberar(dhead, head_bytes);
|
cudaFree(dhead);
|
||||||
liberar(temp, size2);
|
cudaFree(temp);
|
||||||
|
*ret = fres;
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(numpreds > 0)
|
||||||
|
{
|
||||||
|
tmplen = rows + 1;
|
||||||
|
size2 = tmplen * sizeof(int);
|
||||||
|
reservar(&temp, size2);
|
||||||
|
cudaMemset(temp, 0, size2);
|
||||||
|
size = numpreds * sizeof(int);
|
||||||
|
cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
if(ANDlogic)
|
||||||
|
bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
else
|
||||||
|
bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
|
||||||
|
res = thrust::device_pointer_cast(temp);
|
||||||
|
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||||
|
num = res[rows];
|
||||||
|
|
||||||
|
if(num == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
size = head_size * sizeof(int);
|
||||||
|
reservar(&fres, num * size);
|
||||||
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
|
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||||
|
cudaFree(dhead);
|
||||||
|
cudaFree(temp);
|
||||||
*ret = fres;
|
*ret = fres;
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
@ -297,14 +295,12 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
|
|||||||
{
|
{
|
||||||
size = head_size * sizeof(int);
|
size = head_size * sizeof(int);
|
||||||
reservar(&fres, rows * size);
|
reservar(&fres, rows * size);
|
||||||
#ifdef DEBUG_MEM
|
|
||||||
cerr << "+ " << fres << " fres select third " << rows*size << endl;
|
|
||||||
#endif
|
|
||||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||||
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
||||||
liberar(dhead, head_bytes);
|
cudaFree(dhead);
|
||||||
*ret = fres;
|
*ret = fres;
|
||||||
return rows;
|
return rows;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
889
packages/cuda/union2.cu
Normal file → Executable file
889
packages/cuda/union2.cu
Normal file → Executable file
File diff suppressed because it is too large
Load Diff
@ -2158,12 +2158,14 @@ static foreign_t init_python(void) {
|
|||||||
char **argv;
|
char **argv;
|
||||||
term_t t = PL_new_term_ref();
|
term_t t = PL_new_term_ref();
|
||||||
YAP_Argv(&argv);
|
YAP_Argv(&argv);
|
||||||
|
if (argv) {
|
||||||
#if PY_MAJOR_VERSION < 3
|
#if PY_MAJOR_VERSION < 3
|
||||||
Py_SetProgramName(argv[0]);
|
Py_SetProgramName(argv[0]);
|
||||||
#else
|
#else
|
||||||
wchar_t *buf = Py_DecodeLocale(argv[0], NULL);
|
wchar_t *buf = Py_DecodeLocale(argv[0], NULL);
|
||||||
Py_SetProgramName(buf);
|
Py_SetProgramName(buf);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
Py_Initialize();
|
Py_Initialize();
|
||||||
py_Main = PyImport_AddModule("__main__");
|
py_Main = PyImport_AddModule("__main__");
|
||||||
py_Builtin = PyImport_AddModule("__builtin__");
|
py_Builtin = PyImport_AddModule("__builtin__");
|
||||||
|
Reference in New Issue
Block a user