hiatory

2016-04-19 23:30:02 +01:00
parent 3d68f0e06b
commit cd41d373db
28 changed files with 3153 additions and 2229 deletions
--- a/C/init.c
+++ b/C/init.c
@@ -1438,5 +1438,6 @@ void Yap_exit(int value) {
    Yap_ShutdownLoadForeign();
  }
  Yap_CloseStreams(false);
+  Yap_CloseReadline();
  exit(value);
 }
--- a/H/YapGFlagInfo.h
+++ b/H/YapGFlagInfo.h
@@ -185,7 +185,7 @@ available in experimental implementations.
 */
    YAP_FLAG(FILE_NAME_VARIABLES_FLAG, "file_name_variables", true, booleanFlag,
             "true", NULL),
-    YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15e",
+    YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15f",
             NULL),                                    /**< + `float_format `

                                    C-library `printf()` format specification used by write/1 and
--- a/H/Yapproto.h
+++ b/H/Yapproto.h
@@ -1,4 +1,4 @@
-/*************************************************************************
+  /*************************************************************************
 *									 *
 *	 YAP Prolog 	%W% %G% 					 *
 *	Yap Prolog was developed at NCCUP - Universidade do Porto	 *
@@ -301,6 +301,7 @@ extern void Yap_DebugErrorPutc(int n);
 extern void Yap_DebugErrorPuts(const char *s);
 extern void Yap_DebugWriteIndicator(struct pred_entry *ap);
 void Yap_PlWriteToStream(Term, int, int);
+void Yap_CloseReadline(void);
 /* depth_lim.c */
 bool   Yap_InitReadline(Term t);
 void Yap_InitItDeepenPreds(void);
--- a/H/Yatom.h
+++ b/H/Yatom.h
@@ -659,6 +659,19 @@ INLINE_ONLY inline EXTERN PropFlags IsPredProperty(int flags) {
  return (PropFlags)((flags == PEProp));
 }

+INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe);
+
+INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe) {
+  if (pe->ModuleOfPred == IDB_MODULE) {
+    return NULL;
+  } else if (pe->ArityOfPE == 0) {
+    return (Atom)pe->FunctorOfPred;
+  } else {
+    Functor f = pe->FunctorOfPred;
+    return NameOfFunctor(f);
+  }
+}
+
 /* Flags for code or dbase entry */
 /* There are several flags for code and data base entries */
 typedef enum {
@@ -1258,7 +1271,7 @@ INLINE_ONLY inline EXTERN Prop AbsBlobProp(YAP_BlobPropEntry *p) {

 INLINE_ONLY inline EXTERN bool IsBlobProperty(PropFlags);

-INLINE_ONLY inline EXTERN bool  IsBlobProperty(PropFlags flags) {
+INLINE_ONLY inline EXTERN bool IsBlobProperty(PropFlags flags) {
  return flags == BlobProperty;
 }

@@ -1321,8 +1334,7 @@ INLINE_ONLY inline EXTERN Prop AbsFlagProp(FlagEntry *p) { return (Prop)(p); }
 INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags);

 INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags flags) {
-	return flags == FlagProperty;
-
+  return flags == FlagProperty;
 }

 /* Proto types */
--- a/H/generated/dlocals.h
+++ b/H/generated/dlocals.h
@@ -474,6 +474,8 @@

 #define LOCAL_search_atoms LOCAL->search_atoms_
 #define REMOTE_search_atoms(wid) REMOTE(wid)->search_atoms_
+#define LOCAL_SearchPreds LOCAL->SearchPreds_
+#define REMOTE_SearchPreds(wid) REMOTE(wid)->SearchPreds_

 #define LOCAL_CurSlot LOCAL->CurSlot_
 #define REMOTE_CurSlot(wid) REMOTE(wid)->CurSlot_
--- a/H/generated/hlocals.h
+++ b/H/generated/hlocals.h
@@ -268,6 +268,7 @@ const char*  Error_Function_;
  UInt  exo_arg_;
 // atom completion
  struct scan_atoms*  search_atoms_;
+  struct pred_entry*  SearchPreds_;
 // Slots
  yhandle_t  CurSlot_;
  yhandle_t  NSlots_;
--- a/H/generated/ilocals.h
+++ b/H/generated/ilocals.h
@@ -269,6 +269,7 @@ static void InitWorker(int wid) {



+
  REMOTE_CurSlot(wid) = 0;
  REMOTE_NSlots(wid) = 0;
  REMOTE_SlotBase(wid) = InitHandles(wid);
--- a/H/generated/rlocals.h
+++ b/H/generated/rlocals.h
@@ -279,4 +279,5 @@ static void RestoreWorker(int wid USES_REGS) {



+
 }
--- a/misc/LOCALS
+++ b/misc/LOCALS
@@ -312,6 +312,7 @@ UInt				exo_arg					=0

 // atom completion
 struct scan_atoms*		search_atoms				void
+struct pred_entry*		SearchPreds				void

 // Slots
 yhandle_t				CurSlot					=0
--- a/os/charsio.c
+++ b/os/charsio.c
@@ -470,20 +470,13 @@ code with  _C_.

 */
 static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */
-  int sno = Yap_CheckStream(ARG1, Input_Stream_f, "get_byte/2");
+  int sno = Yap_CheckBinaryStream(ARG1, Input_Stream_f, "get_byte/2");
  Int status;
  Term out;

  if (sno < 0)
    return (FALSE);
  status = GLOBAL_Stream[sno].status;
-  if (!(status & Binary_Stream_f)
-      //&& strictISOFlag()
-      ) {
-    UNLOCK(GLOBAL_Stream[sno].streamlock);
-    Yap_Error(PERMISSION_ERROR_INPUT_STREAM, ARG1, "get_byte/2");
-    return (FALSE);
-  }
  out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno));
  UNLOCK(GLOBAL_Stream[sno].streamlock);
  return Yap_unify_constant(ARG2, out);
@@ -812,16 +805,9 @@ static Int put_byte(USES_REGS1) { /* '$put_byte'(Stream,N)                 */
    Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, t2, "put_code/1");
    return FALSE;
  }
-  int sno = Yap_CheckStream(ARG1, Output_Stream_f, "put/2");
+  int sno = Yap_CheckBinaryStream(ARG1, Output_Stream_f, "put/2");
  if (sno < 0)
    return (FALSE);
-  if (!(GLOBAL_Stream[sno].status & Binary_Stream_f)
-      // && strictISOFlag()
-      ) {
-    UNLOCK(GLOBAL_Stream[sno].streamlock);
-    Yap_Error(PERMISSION_ERROR_OUTPUT_BINARY_STREAM, ARG1, NULL);
-    return false;
-  }
  GLOBAL_Stream[sno].stream_putc(sno, ch);
  /*
   * if (!(GLOBAL_Stream[sno].status & Null_Stream_f))
--- a/os/iopreds.c
+++ b/os/iopreds.c
@@ -1576,6 +1576,24 @@ int Yap_CheckTextStream__(const char *file, const char *f, int line, Term arg,
  return sno;
 }

+int Yap_CheckBinaryStream__(const char *file, const char *f, int line, Term arg,
+                          int kind, const char *msg) {
+  int sno;
+  if ((sno = CheckStream__(file, f, line, arg, kind, msg)) < 0)
+    return -1;
+  if ((GLOBAL_Stream[sno].status & Binary_Stream_f)) {
+    UNLOCK(GLOBAL_Stream[sno].streamlock);
+    if (kind == Input_Stream_f)
+      PlIOError__(file, f, line, PERMISSION_ERROR_INPUT_TEXT_STREAM, arg,
+                  msg);
+    else
+      PlIOError__(file, f, line, PERMISSION_ERROR_OUTPUT_TEXT_STREAM, arg,
+                  msg);
+    return -1;
+  }
+  return sno;
+}
+
 /* used from C-interface */
 int Yap_GetFreeStreamDForReading(void) {
  int sno = GetFreeStreamD();
--- a/os/iopreds.h
+++ b/os/iopreds.h
@@ -45,6 +45,10 @@ extern int Yap_CheckStream__(const char *, const char *, int, Term, int,
  Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
 extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int,
                                 const char *);
+#define Yap_CheckBinaryStream(arg, kind, msg)                                    \
+  Yap_CheckBinaryStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
+extern int Yap_CheckBinaryStream__(const char *, const char *, int, Term, int,
+                                 const char *);

 extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name,
                           encoding_t encoding, stream_flags_t flags,
--- a/os/readline.c
+++ b/os/readline.c
@@ -130,8 +130,8 @@ static char *atom_generator(const char *prefix, int state) {
 }

 typedef struct chain {
-    struct chain *next;
-    char data[2];
+  struct chain *next;
+  char data[2];
 } chain_t;

 static char *predicate_enumerate(const char *prefix, int state) {
@@ -139,352 +139,358 @@ static char *predicate_enumerate(const char *prefix, int state) {
  PredEntry *p;
  ModEntry m0, *mod;
  AtomEntry *ap;
- 
-   if (!state) {
-       p = NULL;
-             mod = &m0;
-             m0.NextME = CurrentModules;
-                 if (mod->AtomOfME == AtomIDB)
-                    mod = mod->NextME;
-       } else {
-       Term cmod;
-        p = LOCAL_SearchPreds;
-        cmod = (p->ModuleOfPred != PROLOG_MODULE ? p->ModuleOfPred : TermProlog );
-        mod = Yap_GetModuleEntry(cmod);
-   }
-        while (mod) {
-  // move to next o;
-            if (p)
-              p = p->NextPredOfModule;
-              while (p == NULL) {
-                  mod = mod->NextME;
-                   if (!mod) {
-                      // done
-                       LOCAL_SearchPreds = NULL;
-                      return NULL;
-                  }
-                  if (mod->AtomOfME == AtomIDB)
-                    mod = mod->NextME;
-                 p = mod->PredForME;
-              }
-         char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
-         if (strlen(c) > strlen(prefix) &&
-            strstr(c, prefix) == c &&
-            !(p->PredFlags & HiddenPredFlag)) {
-          LOCAL_SearchPreds = p;
-          arity_t ar = p->ArityOfPE;
-          int l, r;
-          if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
-            return c;
-          }
-          size_t sz = strlen(c);
-          chain_t *el = (chain_t *)malloc(sizeof(chain_t)+sz);
-          strncpy(LOCAL_FileNameBuf,  c, YAP_FILENAME_MAX);
-          strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
-          return LOCAL_FileNameBuf;
-            } 
-           
-        }
+
+  if (!state) {
+    p = NULL;
+    mod = &m0;
+    m0.NextME = CurrentModules;
+    if (mod->AtomOfME == AtomIDB)
+      mod = mod->NextME;
+  } else {
+    Term cmod;
+    p = LOCAL_SearchPreds;
+    cmod = (p->ModuleOfPred != PROLOG_MODULE ? p->ModuleOfPred : TermProlog);
+    mod = Yap_GetModuleEntry(cmod);
+  }
+  while (mod) {
+    // move to next o;
+    if (p)
+      p = p->NextPredOfModule;
+    while (p == NULL) {
+      mod = mod->NextME;
+      if (!mod) {
+        // done
        LOCAL_SearchPreds = NULL;
-    return NULL;
+        return NULL;
+      }
+      if (mod->AtomOfME == AtomIDB)
+        mod = mod->NextME;
+      p = mod->PredForME;
+    }
+    char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
+    if (strlen(c) > strlen(prefix) && strstr(c, prefix) == c &&
+        !(p->PredFlags & HiddenPredFlag)) {
+      LOCAL_SearchPreds = p;
+      arity_t ar = p->ArityOfPE;
+      int l, r;
+      if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
+        return c;
+      }
+      strncpy(LOCAL_FileNameBuf, c, YAP_FILENAME_MAX);
+      strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
+      return LOCAL_FileNameBuf;
+    }
+  }
+  LOCAL_SearchPreds = NULL;
+  return NULL;
 }

-  static char *predicate_generator(const char *prefix, int state) {
-    char *s = predicate_enumerate(prefix, state);
+static char *predicate_generator(const char *prefix, int state) {
+  char *s = predicate_enumerate(prefix, state);

-    if (s) {
-      char *copy = malloc(1 + strlen(s));
+  if (s) {
+    char *copy = malloc(1 + strlen(s));

-      if (copy) /* else pretend no completion */
-        strcpy(copy, s);
-      s = copy;
-    }
-
-    return s;
+    if (copy) /* else pretend no completion */
+      strcpy(copy, s);
+    s = copy;
  }

-  static char **prolog_completion(const char *text, int start, int end) {
-    char **matches = NULL;
+  return s;
+}

-    if (start == 0 && isalpha(text[0])) {
-      int i = 0;
-      while (i < end) {
-        if (isalnum(text[i]))
-          i++;
-        else
-          break;
-      }
-      if (i == end) {
-        matches = rl_completion_matches((char *)text, predicate_generator);
-      }
-      return matches;
-    } else if (start == 0) {
-      int i = 0;
-      const char *p;
-      while (isblank(text[i++]) && i <= end)
-        ;
-      p = text + i;
+static char **prolog_completion(const char *text, int start, int end) {
+  char **matches = NULL;

-      if ((strstr(p,"[") == p) || (strstr(p,"compile(") == p) ||
-          (strstr(p,"consult(") == p) || (strstr(p,"load_files(") == p) ||
-          (strstr(p,"reconsult(") == p) || (strstr(p,"use_module(") == p))
-        matches = rl_completion_matches((char *)text, /* for pre-4.2 */
-                                        rl_filename_completion_function);
-      return matches;
+  if (start == 0 && isalpha(text[0])) {
+    int i = 0;
+    while (i < end) {
+      if (isalnum(text[i]) || text[i] == '_')
+        i++;
+      else
+        break;
    }
-    int i = end, ch = '\0';
-    while (i > start) {
-      ch = text[-i];
-      if (isalnum(text[i]))
-        continue;
-      break;
+    if (i == end) {
+      matches = rl_completion_matches((char *)text, predicate_generator);
    }
-    if (islower(ch))
-      return rl_completion_matches((char *)text, atom_generator);
+    return matches;
+  } else if (start == 0) {
+    int i = 0;
+    const char *p;
+    while (isblank(text[i++]) && i <= end)
+      ;
+    p = text + i;

-    return NULL;
+    if ((strstr(p, "[") == p) || (strstr(p, "compile(") == p) ||
+        (strstr(p, "consult(") == p) || (strstr(p, "load_files(") == p) ||
+        (strstr(p, "reconsult(") == p) || (strstr(p, "use_module(") == p) ||
+        (strstr(p, "cd(") == p))
+      matches = rl_completion_matches((char *)text, /* for pre-4.2 */
+                                      rl_filename_completion_function);
+    return matches;
  }
-
-  void Yap_ReadlineFlush(int sno) {
-    if (GLOBAL_Stream[sno].status & Tty_Stream_f &&
-        GLOBAL_Stream[sno].status & Output_Stream_f) {
-      rl_redisplay();
-    }
+  int i = end, ch = '\0';
+  while (i > start) {
+    ch = text[--i];
+    if (ch == '\'')
+      return rl_completion_matches((char *)text, /* for pre-4.2 */
+                                   rl_filename_completion_function);
+    if (isalnum(text[i]))
+      continue;
+    break;
  }
+  if (islower(ch))
+    return rl_completion_matches((char *)text, atom_generator);

-  bool Yap_ReadlinePrompt(StreamDesc * s) {
-    if (s->status & Tty_Stream_f) {
-      s->stream_getc = ReadlineGetc;
-      if (GLOBAL_Stream[0].status & Tty_Stream_f &&
-          s->name == GLOBAL_Stream[0].name)
-        s->stream_putc = ReadlinePutc;
-      return true;
-    }
-    return false;
+  return NULL;
+}
+
+void Yap_ReadlineFlush(int sno) {
+  if (GLOBAL_Stream[sno].status & Tty_Stream_f &&
+      GLOBAL_Stream[sno].status & Output_Stream_f) {
+    rl_redisplay();
  }
+}

-  bool Yap_ReadlineOps(StreamDesc * s) {
-    if (s->status & Tty_Stream_f) {
-      if (GLOBAL_Stream[0].status & Tty_Stream_f &&
-          is_same_tty(s->file, GLOBAL_Stream[0].file))
-        s->stream_putc = ReadlinePutc;
-      s->stream_getc = ReadlineGetc;
-      s->status |= Readline_Stream_f;
-      return true;
-    }
-    return false;
-  }
-
-  static int prolog_complete(int ignore, int key) {
-    if (rl_point > 0 && rl_line_buffer[rl_point - 1] != ' ') {
-#if HAVE_DECL_RL_CATCH_SIGNALS_ /* actually version >= 1.2, or true readline   \
-                                   */
-      rl_begin_undo_group();
-      rl_complete(ignore, key);
-      if (rl_point > 0 && rl_line_buffer[rl_point - 1] == ' ') {
-        rl_delete_text(rl_point - 1, rl_point);
-        rl_point -= 1;
-        rl_delete(-1, key);
-      }
-      rl_end_undo_group();
-#endif
-    } else
-      rl_complete(ignore, key);
-
-    return 0;
-  }
-
-  bool Yap_InitReadline(Term enable) {
-    // don't call readline within emacs
-    // if (getenv("ËMACS"))
-    //  return;
-    if (enable == TermFalse)
-      return true;
-    GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
-    GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
-    GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
-#if _MSC_VER || defined(__MINGW32__)
-    rl_instream = stdin;
-#endif
-    rl_outstream = stderr;
-    using_history();
-    const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
-    if (read_history(s) != 0) {
-      FILE *f = fopen(s, "a");
-      if (f) {
-        fclose(f);
-      }
-    }
-    rl_readline_name = "Prolog";
-    rl_attempted_completion_function = prolog_completion;
-#ifdef HAVE_RL_COMPLETION_FUNC_T
-    rl_add_defun("prolog-complete", prolog_complete, '\t');
-#else
-    rl_add_defun("prolog-complete", (void *)prolog_complete, '\t');
-#endif
-    return Yap_ReadlineOps(GLOBAL_Stream + StdInStream);
-  }
-
-  static bool getLine(int inp, int out) {
-    CACHE_REGS
-    rl_instream = GLOBAL_Stream[inp].file;
-    rl_outstream = GLOBAL_Stream[out].file;
-    const unsigned char *myrl_line;
-    StreamDesc *s = GLOBAL_Stream + inp;
-
-    if (!(s->status & Tty_Stream_f))
-      return false;
-
-    /* window of vulnerability opened */
-    fflush(NULL);
-    LOCAL_PrologMode |= ConsoleGetcMode;
-    if (LOCAL_newline) { // no output so far
-      myrl_line = (unsigned char *)readline(LOCAL_Prompt);
-    } else {
-      myrl_line = (unsigned char *)readline(NULL);
-    }
-    /* Do it the gnu way */
-    if (LOCAL_PrologMode & InterruptMode) {
-      Yap_external_signal(0, YAP_INT_SIGNAL);
-      LOCAL_PrologMode &= ~ConsoleGetcMode;
-      if (LOCAL_PrologMode & AbortMode) {
-        Yap_Error(ABORT_EVENT, TermNil, "");
-        LOCAL_ErrorMessage = "Abort";
-        return console_post_process_eof(s);
-      }
-    } else {
-      LOCAL_PrologMode &= ~ConsoleGetcMode;
-      LOCAL_newline = true;
-    }
-    strncpy(LOCAL_Prompt, RepAtom(LOCAL_AtPrompt)->StrOfAE, MAX_PROMPT);
-    /* window of vulnerability closed */
-    if (myrl_line == NULL)
-      return false;
-    if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
-      add_history((char *)myrl_line);
-      write_history(history_file);
-      fflush(NULL);
-    }
-    s->u.irl.ptr = s->u.irl.buf = myrl_line;
+bool Yap_ReadlinePrompt(StreamDesc *s) {
+  if (s->status & Tty_Stream_f) {
+    s->stream_getc = ReadlineGetc;
+    if (GLOBAL_Stream[0].status & Tty_Stream_f &&
+        s->name == GLOBAL_Stream[0].name)
+      s->stream_putc = ReadlinePutc;
    return true;
  }
+  return false;
+}

-  static int ReadlinePutc(int sno, int ch) {
-    CACHE_REGS
-    StreamDesc *s = &GLOBAL_Stream[sno];
-#if MAC || _MSC_VER || defined(__MINGW32__)
-    if (ch == 10) {
-      putc('\n', s->file);
-    } else
+bool Yap_ReadlineOps(StreamDesc *s) {
+  if (s->status & Tty_Stream_f) {
+    if (GLOBAL_Stream[0].status & Tty_Stream_f &&
+        is_same_tty(s->file, GLOBAL_Stream[0].file))
+      s->stream_putc = ReadlinePutc;
+    s->stream_getc = ReadlineGetc;
+    s->status |= Readline_Stream_f;
+    return true;
+  }
+  return false;
+}
+
+static int prolog_complete(int ignore, int key) {
+  if (rl_point > 0 && rl_line_buffer[rl_point - 1] != ' ') {
+#if HAVE_DECL_RL_CATCH_SIGNALS_ /* actually version >= 1.2, or true readline   \
+                                   */
+    rl_begin_undo_group();
+    rl_complete(ignore, key);
+    if (rl_point > 0 && rl_line_buffer[rl_point - 1] == ' ') {
+      rl_delete_text(rl_point - 1, rl_point);
+      rl_point -= 1;
+      rl_delete(-1, key);
+    }
+    rl_end_undo_group();
 #endif
-      putc(ch, s->file);
-    console_count_output_char(ch, s);
-    if (ch == 10) {
-      Yap_ReadlineFlush(sno);
-      LOCAL_newline = true;
+  } else
+    rl_complete(ignore, key);
+
+  return 0;
+}
+
+bool Yap_InitReadline(Term enable) {
+  // don't call readline within emacs
+  // if (getenv("ËMACS"))
+  //  return;
+  if (enable == TermFalse)
+    return true;
+  GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
+  GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
+  GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
+#if _WIN32
+  rl_instream = stdin;
+#endif
+  rl_outstream = stderr;
+  using_history();
+  const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
+  history_file = s;
+  if (read_history(s) != 0) {
+    FILE *f = fopen(s, "a");
+    if (f) {
+      fclose(f);
    }
-    return ((int)ch);
  }
+  rl_readline_name = "Prolog";
+  rl_attempted_completion_function = prolog_completion;
+#ifdef HAVE_RL_COMPLETION_FUNC_T
+  rl_add_defun("prolog-complete", prolog_complete, '\t');
+#else
+  rl_add_defun("prolog-complete", (void *)prolog_complete, '\t');
+#endif
+  return Yap_ReadlineOps(GLOBAL_Stream + StdInStream);
+}

-  /**
-    @brief reading from the console is complicated because we need to
-    know whether to prompt and so on...
+static bool getLine(int inp, int out) {
+  CACHE_REGS
+  rl_instream = GLOBAL_Stream[inp].file;
+  rl_outstream = GLOBAL_Stream[out].file;
+  const unsigned char *myrl_line;
+  StreamDesc *s = GLOBAL_Stream + inp;

-    EOF must be handled by resetting the file.
-  */
-  static int ReadlineGetc(int sno) {
-    StreamDesc *s = &GLOBAL_Stream[sno];
-    int ch;
-    bool fetch = (s->u.irl.buf == NULL);
+  if (!(s->status & Tty_Stream_f))
+    return false;

-    if (!fetch || getLine(sno, StdErrStream)) {
-      const unsigned char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf;
-      ch = *ttyptr;
-      if (ch == '\0') {
-        ch = '\n';
-        free((void *)myrl_line);
-        s->u.irl.ptr = s->u.irl.buf = NULL;
-      }
-    } else {
-      return EOF;
-    }
-    return console_post_process_read_char(ch, s);
+  /* window of vulnerability opened */
+  fflush(NULL);
+  LOCAL_PrologMode |= ConsoleGetcMode;
+  if (LOCAL_newline) { // no output so far
+    myrl_line = (unsigned char *)readline(LOCAL_Prompt);
+  } else {
+    myrl_line = (unsigned char *)readline(NULL);
  }
-
-  /**
-    @brief  Yap_ReadlinePeekChar peeks the next char from the
-    readline buffer, but does not actually grab it.
-
-    The idea is to take advantage of the buffering. Special care must be taken
-    with EOF, though.
-
-  */
-  Int Yap_ReadlinePeekChar(int sno) {
-    StreamDesc *s = &GLOBAL_Stream[sno];
-    int ch;
-
-    if (s->u.irl.buf) {
-      const unsigned char *ttyptr = s->u.irl.ptr;
-      ch = *ttyptr;
-      if (ch == '\0') {
-        ch = '\n';
-      }
-      return ch;
+  /* Do it the gnu way */
+  if (LOCAL_PrologMode & InterruptMode) {
+    Yap_external_signal(0, YAP_INT_SIGNAL);
+    LOCAL_PrologMode &= ~ConsoleGetcMode;
+    if (LOCAL_PrologMode & AbortMode) {
+      Yap_Error(ABORT_EVENT, TermNil, "");
+      LOCAL_ErrorMessage = "Abort";
+      return console_post_process_eof(s);
    }
-    if (getLine(sno, StdErrStream)) {
-      CACHE_REGS
-      ch = s->u.irl.ptr[0];
-      if (ch == '\0') {
-        ch = '\n';
-      }
-      if (ch == '\n') {
-        LOCAL_newline = true;
-      } else {
-        LOCAL_newline = false;
-      }
-    } else {
-      return EOF;
+  } else {
+    LOCAL_PrologMode &= ~ConsoleGetcMode;
+    LOCAL_newline = true;
+  }
+  strncpy(LOCAL_Prompt, RepAtom(LOCAL_AtPrompt)->StrOfAE, MAX_PROMPT);
+  /* window of vulnerability closed */
+  if (myrl_line == NULL)
+    return false;
+  if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
+    add_history((char *)myrl_line);
+    fflush(NULL);
+  }
+  s->u.irl.ptr = s->u.irl.buf = myrl_line;
+  return true;
+}
+
+static int ReadlinePutc(int sno, int ch) {
+  CACHE_REGS
+  StreamDesc *s = &GLOBAL_Stream[sno];
+#if MAC || _MSC_VER || defined(__MINGW32__)
+  if (ch == 10) {
+    putc('\n', s->file);
+  } else
+#endif
+    putc(ch, s->file);
+  console_count_output_char(ch, s);
+  if (ch == 10) {
+    Yap_ReadlineFlush(sno);
+    LOCAL_newline = true;
+  }
+  return ((int)ch);
+}
+
+/**
+  @brief reading from the console is complicated because we need to
+  know whether to prompt and so on...
+
+  EOF must be handled by resetting the file.
+*/
+static int ReadlineGetc(int sno) {
+  StreamDesc *s = &GLOBAL_Stream[sno];
+  int ch;
+  bool fetch = (s->u.irl.buf == NULL);
+
+  if (!fetch || getLine(sno, StdErrStream)) {
+    const unsigned char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf;
+    ch = *ttyptr;
+    if (ch == '\0') {
+      ch = '\n';
+      free((void *)myrl_line);
+      s->u.irl.ptr = s->u.irl.buf = NULL;
+    }
+  } else {
+    return EOF;
+  }
+  return console_post_process_read_char(ch, s);
+}
+
+/**
+  @brief  Yap_ReadlinePeekChar peeks the next char from the
+  readline buffer, but does not actually grab it.
+
+  The idea is to take advantage of the buffering. Special care must be taken
+  with EOF, though.
+
+*/
+Int Yap_ReadlinePeekChar(int sno) {
+  StreamDesc *s = &GLOBAL_Stream[sno];
+  int ch;
+
+  if (s->u.irl.buf) {
+    const unsigned char *ttyptr = s->u.irl.ptr;
+    ch = *ttyptr;
+    if (ch == '\0') {
+      ch = '\n';
    }
    return ch;
  }
-
-  int Yap_ReadlineForSIGINT(void) {
+  if (getLine(sno, StdErrStream)) {
    CACHE_REGS
-    int ch;
-    StreamDesc *s = &GLOBAL_Stream[StdInStream];
-    const unsigned char *myrl_line = s->u.irl.buf;
+    ch = s->u.irl.ptr[0];
+    if (ch == '\0') {
+      ch = '\n';
+    }
+    if (ch == '\n') {
+      LOCAL_newline = true;
+    } else {
+      LOCAL_newline = false;
+    }
+  } else {
+    return EOF;
+  }
+  return ch;
+}

-    if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != NULL) {
+int Yap_ReadlineForSIGINT(void) {
+  CACHE_REGS
+  int ch;
+  StreamDesc *s = &GLOBAL_Stream[StdInStream];
+  const unsigned char *myrl_line = s->u.irl.buf;
+
+  if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != NULL) {
+    ch = myrl_line[0];
+    free((void *)myrl_line);
+    myrl_line = NULL;
+    return ch;
+  } else {
+    myrl_line = (const unsigned char *)readline("Action (h for help): ");
+    if (!myrl_line) {
+      ch = EOF;
+      return ch;
+    } else {
      ch = myrl_line[0];
      free((void *)myrl_line);
      myrl_line = NULL;
      return ch;
-    } else {
-      myrl_line = (const unsigned char *)readline("Action (h for help): ");
-      if (!myrl_line) {
-        ch = EOF;
-        return ch;
-      } else {
-        ch = myrl_line[0];
-        free((void *)myrl_line);
-        myrl_line = NULL;
-        return ch;
-      }
    }
  }
+}

-  static Int has_readline(USES_REGS1) {
+void Yap_CloseReadline(void) {
 #if USE_READLINE
-    return true;
-#else
-    return false;
+  write_history(history_file);
 #endif
-  }
+}

-  void Yap_InitReadlinePreds(void) {
-    Yap_InitCPred("$has_readline", 0, has_readline,
-                  SafePredFlag | HiddenPredFlag);
-  }
+static Int has_readline(USES_REGS1) {
+#if USE_READLINE
+  return true;
+#else
+  return false;
+#endif
+}
+
+void Yap_InitReadlinePreds(void) {
+  Yap_InitCPred("$has_readline", 0, has_readline,
+                SafePredFlag | HiddenPredFlag);
+}

 #else
 bool Yap_InitReadline(Term enable) {
--- a/os/writeterm.c
+++ b/os/writeterm.c
@@ -390,7 +390,9 @@ write1 ( USES_REGS1 )
  if (output_stream == -1) output_stream = 1;
  xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
-    if (LOCAL_Error_TYPE)
+   if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
+      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
+     if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
  }
@@ -415,6 +417,8 @@ write_canonical1 ( USES_REGS1 )
  if (output_stream == -1) output_stream = 1;
  xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
+    if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
+      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
    if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
@@ -440,6 +444,8 @@ write_canonical ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
+    if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
+      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
    if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
@@ -467,7 +473,9 @@ writeq1 ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
-    if (LOCAL_Error_TYPE)
+     if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
+      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
+   if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
  }
@@ -495,6 +503,8 @@ writeq ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
+    if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
+      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
    if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
@@ -523,7 +533,9 @@ print1 ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
-    if (LOCAL_Error_TYPE)
+   if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
+      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
+     if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
  }
@@ -551,7 +563,9 @@ print ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
-    if (LOCAL_Error_TYPE)
+   if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
+      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
+     if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
  }
--- a/packages/cuda/CMakeLists.txt
+++ b/packages/cuda/CMakeLists.txt
@@ -54,11 +54,28 @@ if (CUDA_FOUND)
  macro_optional_find_package (Thrust ON)

  set (CUDA_SOURCES
-    lista.cu
-    memory.cu
-    cuda.c
+CC_CSSTree.cu
+bpreds.cu
+dbio.cu
+lista.cu
+memory.cu
+selectproyect.cu
+treeb.cu
+union2.cu
    )

+  set (CXX_SOURCES
+bpredscpu.cpp
+joincpu.cpp
+selectproyectcpu.cpp
+unioncpu2.cpp
+)
+
+  set (C_SOURCES
+creator2.c
+cuda.c
+)
+
  set (PL_SOURCES
    cuda.yap
    )
--- a/packages/cuda/Makefile.in
+++ b/packages/cuda/Makefile.in
@@ -23,7 +23,7 @@ CC=@CC@
 NVCC=@NVCC@
 CFLAGS= @SHLIB_CFLAGS@ $(YAP_EXTRAS) $(DEFS) -I$(srcdir) -I../.. -I$(srcdir)/../../include  @CUDA_CPPFLAGS@
 NVCCFLAGS=@CUDA_CPPFLAGS@ -I$(srcdir) -I../.. -I$(srcdir)/../../include
-CUDA_LDFLAGS=@CUDA_LDFLAGS@
+LDFLAGS=@LDFLAGS@
 #
 #
 # You shouldn't need to change what follows.
@@ -39,7 +39,7 @@ SO=@SO@
 CWD=$(PWD)
 #

-CUDA_PROLOG= \
+BDD_PROLOG= \
 	$(srcdir)/cuda.yap

 OBJS=cuda.o memory.o lista.o
@@ -62,16 +62,11 @@ memory.o: $(srcdir)/memory.cu $(srcdir)/pred.h
@DO_SECOND_LD@cuda.@SO@: $(OBJS)
@DO_SECOND_LD@	@CUDA_SHLIB_LD@ $(CUDA_LDFLAGS) -o cuda.@SO@ $(OBJS)

-install: all install-examples
+install: all
 	mkdir -p $(DESTDIR)$(SHAREDIR)
-	for h in $(CUDA_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
+	for h in $(BDD_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
 	$(INSTALL_PROGRAM) $(SOBJS) $(DESTDIR)$(YAPLIBDIR)

-install-examples:
-
 clean:
-	rm -f *.o *~ $(OBJS) *.BAK
-
-distclean: clean
-	rm -f $(SOBJS) Makefile
+	rm -f *.o *~ $(OBJS) $(SOBJS) *.BAK

--- a/packages/cuda/bpreds.cu
+++ b/packages/cuda/bpreds.cu
@@ -1,4 +1,113 @@
-__global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, int *res)
+#include <thrust/device_vector.h>
+#include <thrust/scan.h>
+#include <cstdarg>
+#include "pred.h"
+
+/*Determines the maximum from a set of values*/
+int maximo(int count, ...)
+{
+	va_list ap;
+    	int j, temp, mx = 0;
+    	va_start(ap, count);
+
+	for(j = 0; j < count; j++)
+	{
+		temp = va_arg(ap, int);
+		if(temp > mx)
+			mx = temp;
+	}
+
+    	va_end(ap);
+    	return mx;
+}
+
+
+__global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
+{
+ 	extern __shared__ int shared[];
+	int id = blockIdx.x * blockDim.x + threadIdx.x;
+	int x, rowact, rowact1, op1, op2;
+	if(threadIdx.x < numc)
+		shared[threadIdx.x] = cons[threadIdx.x];
+	__syncthreads();
+	if(id < rows)
+	{
+		rowact1 = id * of1;
+		rowact = id * of2;
+		for(x = nx; x < numc; x += 3)
+		{
+			op1 = shared[x+1];
+			if(op1 < 0)
+				op1 = dop1[rowact1 - op1 - 1];
+			else
+				op1 = dop2[rowact + op1];
+			op2 = shared[x+2];
+			if(op2 < 0)
+				op2 = dop1[rowact1 - op2 - 1];
+			else
+				op2 = dop2[rowact + op2];
+			switch(shared[x] - BPOFFSET)
+			{
+				case SBG_EQ: if(op1 != op2)
+						return;
+				break;
+				case SBG_GT: if(op1 <= op2)
+						return;
+				break;
+				case SBG_LT: if(op1 >= op2)
+						return;
+				break;
+				case SBG_GE: if(op1 < op2)
+						return;
+				break;
+				case SBG_LE: if(op1 > op2)
+						return;
+				break;
+				case SBG_DF: if(op1 == op2)
+						return;
+			}
+		}
+		if(res2 != NULL)
+			res2[id] = 1; 
+		for(x = 0; x < nx; x += 3)
+		{
+			op1 = shared[x+1];
+			if(op1 < 0)
+				op1 *= -1;
+			else
+				op1 = dop2[rowact + op1];
+			op2 = shared[x+2];
+			if(op2 < 0)
+				op2 *= -1;
+			else
+				op2 = dop2[rowact + op2];
+			switch(shared[x])
+			{
+				case SBG_EQ: if(op1 != op2)
+						return;
+				break;
+				case SBG_GT: if(op1 <= op2)
+						return;
+				break;
+				case SBG_LT: if(op1 >= op2)
+						return;
+				break;
+				case SBG_GE: if(op1 < op2)
+						return;
+				break;
+				case SBG_LE: if(op1 > op2)
+						return;
+				break;
+				case SBG_DF: if(op1 == op2)
+						return;
+			}
+		}
+		res[id] = 1;
+	}
+}
+
+/*Mark all rows that comply with the comparison predicates*/
+__global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
@@ -8,7 +117,7 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
 	__syncthreads();
 	if(id < rows)
 	{
-		rowact = id * cols;
+		rowact = id * of1; 
 		for(x = 0; x < numc; x += 3)
 		{
 			op1 = shared[x+1];
@@ -23,21 +132,21 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
 				op2 = dop1[rowact + op2];
 			switch(shared[x])
 			{
-				case SBG_EQ:  if(op1 != op2)
+				case SBG_EQ: if(op1 != op2)
 						return;
-				  break;
+				break;
 				case SBG_GT: if(op1 <= op2)
 						return;
-				  break;
+				break;
 				case SBG_LT: if(op1 >= op2)
 						return;
-				  break;
+				break;
 				case SBG_GE: if(op1 < op2)
 						return;
-				  break;
+				break;
 				case SBG_LE: if(op1 > op2)
 						return;
-				  break;
+				break;
 				case SBG_DF: if(op1 == op2)
 						return;
 			}
@@ -46,98 +155,306 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
 	}
 }

-int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
+/*Unmark all rows that do not comply with the comparison predicates*/
+__global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
 {
-	int *temp;
-	int tmplen = rows + 1;
-	int size = tmplen * sizeof(int);
-	reservar(&temp, size);
-#ifdef DEBUG_MEM
-	 cerr << "+ " << temp << " temp bpreds " << size << endl;
-#endif
-	cudaMemset(temp, 0, size);
-
-#if TIMER
-	cuda_stats.builtins++;
-#endif
-	int *dhead;
-	int predn = numpreds.x * 3;
-	int spredn = predn * sizeof(int);
-	int sproj = numpreds.z * sizeof(int);
-	int hsize;
-	if(predn > numpreds.z)
-		hsize = spredn;
-	else
-		hsize = sproj;
-	reservar(&dhead, hsize);
-#ifdef DEBUG_MEM
-	cerr << "+ " << dhead << " dhead  " << hsize << endl;
-#endif
-	cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
-
-	int blockllen = rows / 1024 + 1;
-	int numthreads = 1024;
-
-	/*int x;
-	cout << "arraypreds = ";
-	for(x = 0; x < predn; x++)
-		cout << bin[x] << " ";
-	cout << endl;
-	cout << "temptable = ";
-	for(x = 0; x < numpreds.z; x++)
-		cout << bin[x+predn] << " ";
-	cout << endl; 
-	int y;
-	int *hop1 = (int *)malloc(numpreds.y * rows * sizeof(int));
-	cudaMemcpy(hop1, dop1, numpreds.y * rows * sizeof(int), cudaMemcpyDeviceToHost);
-	for(x = 0; x < rows; x++)
+ 	extern __shared__ int shared[];
+	int id = blockIdx.x * blockDim.x + threadIdx.x;
+	int x, rowact, op1, op2;
+	if(threadIdx.x < numc)
+		shared[threadIdx.x] = cons[threadIdx.x];
+	__syncthreads();
+	if(id < rows)
 	{
-		for(y = 0; y < numpreds.y; y++)
-			cout << hop1[x * numpreds.y + y] << " ";
-		cout << endl;
+		if(res[id] == 0)
+			return;
+		rowact = id * of1; 
+		for(x = 0; x < numc; x += 3)
+		{
+			op1 = shared[x+1];
+			if(op1 < 0)
+				op1 *= -1;
+			else
+				op1 = dop1[rowact + op1];
+			op2 = shared[x+2];
+			if(op2 < 0)
+				op2 *= -1;
+			else
+				op2 = dop1[rowact + op2];
+			switch(shared[x])
+			{
+				case SBG_EQ: if(op1 != op2)
+					     {
+						res[id] = 0;
+						return;
+					     }
+				break;
+				case SBG_GT: if(op1 <= op2)
+					     {
+						res[id] = 0;
+						return;
+					     }
+				break;
+				case SBG_LT: if(op1 >= op2)
+					     {
+						res[id] = 0;
+						return;
+					     }
+				break;
+				case SBG_GE: if(op1 < op2)
+					     {
+						res[id] = 0;
+						return;
+					     }
+				break;
+				case SBG_LE: if(op1 > op2)
+					     {
+						res[id] = 0;
+						return;
+					     }
+				break;
+				case SBG_DF: if(op1 == op2)
+					     {
+						res[id] = 0;
+						return;
+					     }
+			}
+		}
 	}
-	free(hop1);*/
-
-	predicates<<<blockllen, numthreads, spredn>>>(dop1, rows, numpreds.y, dhead, predn, temp + 1);
-
-	/*int y;
-	int *hop1 = (int *)malloc((rows + 1) * sizeof(int));
-	cudaMemcpy(hop1, temp, (rows + 1) * sizeof(int), cudaMemcpyDeviceToHost);
-	for(x = 0; x < (rows + 1); x++)
-		cout << hop1[x] << " ";
-	cout << endl;
-	free(hop1);*/
-
-	thrust::device_ptr<int> res;
-	res = thrust::device_pointer_cast(temp);
-	thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
-	int num = res[rows];
-	if(num == 0)
-		return 0;
-
-	int *fres;
-	reservar(&fres, num * sproj);
-#ifdef DEBUG_MEM
-	cerr << "+ " << fres << " fres  " << num * sproj << endl;
-#endif
-	cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
-	llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
-
-	/*int y;
-	int *hop1 = (int *)malloc(numpreds.z * num * sizeof(int));
-	cudaMemcpy(hop1, fres, numpreds.z * num * sizeof(int), cudaMemcpyDeviceToHost);
-	for(x = 0; x < num; x++)
-	{
-		for(y = 0; y < numpreds.z; y++)
-			cout << hop1[x * numpreds.z + y] << " ";
-		cout << endl;
-	}
-	free(hop1);*/
-
-	liberar(dhead, hsize);
-	liberar(temp, size);
-	liberar(dop1, rows * cols * sizeof(int));
-
-	*ret = fres;
-	return num;
 }
+
+__global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
+{
+ 	extern __shared__ int shared[];
+	int id = blockIdx.x * blockDim.x + threadIdx.x;
+	int x, rowact, rowact1, op1, op2;
+	if(threadIdx.x < numc)
+		shared[threadIdx.x] = cons[threadIdx.x];
+	__syncthreads();
+	if(id < rows)
+	{
+		rowact1 = id * of1;
+		rowact = id * of2;
+		for(x = nx; x < numc; x += 3)
+		{
+			op1 = shared[x+1];
+			if(op1 < 0)
+				op1 = dop1[rowact1 - op1 - 1];
+			else
+				op1 = dop2[rowact + op1];
+			op2 = shared[x+2];
+			if(op2 < 0)
+				op2 = dop1[rowact1 - op2 - 1];
+			else
+				op2 = dop2[rowact + op2];
+			switch(shared[x] - BPOFFSET)
+			{
+				case SBG_EQ: if(op1 == op2)
+					     {
+						res2[id] = 1;
+						x = numc;
+					     }
+				break;
+				case SBG_GT: if(op1 > op2)
+					     {
+						res2[id] = 1;
+						x = numc;
+					     }
+				break;
+				case SBG_LT: if(op1 < op2)
+					     {
+						res2[id] = 1;
+						x = numc;
+					     }
+				break;
+				case SBG_GE: if(op1 >= op2)
+					     {
+						res2[id] = 1;
+						x = numc;
+					     }
+				break;
+				case SBG_LE: if(op1 <= op2)
+					     {
+						res2[id] = 1;
+						x = numc;
+					     }
+				break;
+				case SBG_DF: if(op1 != op2)
+					     {
+						res2[id] = 1;
+						x = numc;
+					     }
+			}
+		}
+		for(x = 0; x < nx; x += 3)
+		{
+			op1 = shared[x+1];
+			if(op1 < 0)
+				op1 *= -1;
+			else
+				op1 = dop2[rowact + op1];
+			op2 = shared[x+2];
+			if(op2 < 0)
+				op2 *= -1;
+			else
+				op2 = dop2[rowact + op2];
+			switch(shared[x])
+			{
+				case SBG_EQ: if(op1 == op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_GT: if(op1 > op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_LT: if(op1 < op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_GE: if(op1 >= op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_LE: if(op1 <= op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_DF: if(op1 != op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+			}
+		}
+	}
+}
+
+/*Mark all rows that comply with the comparison predicates using disjunctions (i.e. a row is marked if it complies with at least one predicate)*/
+__global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
+{
+ 	extern __shared__ int shared[];
+	int id = blockIdx.x * blockDim.x + threadIdx.x;
+	int x, rowact, op1, op2;
+	if(threadIdx.x < numc)
+		shared[threadIdx.x] = cons[threadIdx.x];
+	__syncthreads();
+	if(id < rows)
+	{
+		rowact = id * of1; 
+		for(x = 0; x < numc; x += 3)
+		{
+			op1 = shared[x+1];
+			if(op1 < 0)
+				op1 *= -1;
+			else
+				op1 = dop1[rowact + op1];
+			op2 = shared[x+2];
+			if(op2 < 0)
+				op2 *= -1;
+			else
+				op2 = dop1[rowact + op2];
+			switch(shared[x])
+			{
+				case SBG_EQ: if(op1 == op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_GT: if(op1 > op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_LT: if(op1 < op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_GE: if(op1 >= op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_LE: if(op1 <= op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+				break;
+				case SBG_DF: if(op1 != op2)
+					     {
+						res[id] = 1;
+						return;
+					     }
+			}
+		}
+		
+	}
+}
+
+/*Unmark all rows that do not comply with the comparison predicates using disjunctions (i.e. a row is unmarked only if it complies with none of the predicates)*/
+__global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
+{
+ 	extern __shared__ int shared[];
+	int id = blockIdx.x * blockDim.x + threadIdx.x;
+	int x, rowact, op1, op2;
+	if(threadIdx.x < numc)
+		shared[threadIdx.x] = cons[threadIdx.x];
+	__syncthreads();
+	if(id < rows)
+	{
+		if(res[id] == 0)
+			return;
+		rowact = id * of1; 
+		for(x = 0; x < numc; x += 3)
+		{
+			op1 = shared[x+1];
+			if(op1 < 0)
+				op1 *= -1;
+			else
+				op1 = dop1[rowact + op1];
+			op2 = shared[x+2];
+			if(op2 < 0)
+				op2 *= -1;
+			else
+				op2 = dop1[rowact + op2];
+			switch(shared[x])
+			{
+				case SBG_EQ: if(op1 == op2)
+						return;
+				break;
+				case SBG_GT: if(op1 > op2)
+						return;
+				break;
+				case SBG_LT: if(op1 < op2)
+						return;
+				break;
+				case SBG_GE: if(op1 >= op2)
+						return;
+				break;
+				case SBG_LE: if(op1 <= op2)
+						return;
+				break;
+				case SBG_DF: if(op1 != op2)
+						return;
+			}
+		}
+		res[id] = 0;
+	}
+}
+
--- a/packages/cuda/cuda.c
+++ b/packages/cuda/cuda.c
@@ -6,19 +6,25 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
+#include <inttypes.h>
 #include "pred.h"

+#define MAXARG 100
+
 YAP_Atom AtomEq,
  AtomGt,
  AtomLt,
  AtomGe,
  AtomLe,
-  AtomDf;
+  AtomDf,
+  AtomNt;

-predicate *facts[100]; /*Temporary solution to maintain facts and rules*/
-predicate *rules[100];
+predicate *facts[MAXARG]; /*Temporary solution to maintain facts and rules*/
+predicate *rules[MAXARG];
 int32_t cf = 0, cr = 0;

+char names[1024];
+
 // initialize CUDA system
 void Cuda_Initialize( void );

@@ -39,6 +45,19 @@ void init_cuda( void );

 //#define DEBUG_INTERFACE 1

+#ifdef ROCKIT
+static int32_t query[100];
+static int32_t qcont = 0;
+static int cuda_init_query(void)
+{
+	int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG1));
+	query[qcont] = pname;
+	qcont++;
+	query[qcont] = 0;
+	return TRUE;
+}
+#endif
+
 #if DEBUG_INTERFACE
 static void
 dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
@@ -83,8 +102,18 @@ int32_t Cuda_NewFacts(predicate *pe)
 #if DEBUG_INTERFACE
  dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
 #endif
+
+#ifdef ROCKIT
+  if(cf >= 0)
+  {
+  	facts[cf] = pe;
+	cf++;
+  }
+#else
  facts[cf] = pe;
  cf++;
+#endif
+
  return TRUE;
 }

@@ -115,7 +144,7 @@ int32_t Cuda_Erase(predicate *pe)
  return TRUE;
 }

-static YAP_Bool
+static int
 load_facts( void ) {

  int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
@@ -164,15 +193,18 @@ load_facts( void ) {
 static int currentFact = 0;
 static predicate *currentPred = NULL;

-static YAP_Bool
+static int
 cuda_init_facts( void ) {

  int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
-  int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
+  int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
  int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
  int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
  predicate *pred;

+	strcat(names, YAP_AtomName(YAP_AtomOfTerm(YAP_ARG3)));
+	strcat(names, " ");
+
  if (!mat)
    return FALSE;
  if (YAP_IsVarTerm( YAP_ARG4)) {
@@ -198,14 +230,16 @@ cuda_init_facts( void ) {
  }
 }

-static YAP_Bool
+static int
 cuda_load_fact( void ) {
-  YAP_Term th = YAP_ARG1;

-  int i, j;
+  int i = currentFact;
+
+#if defined(DATALOG) || defined(TUFFY)
+  YAP_Term th = YAP_ARG1;
  int ncols = currentPred->num_columns;
+  int j;
  int *mat = currentPred->address_host_table;
-  i = currentFact;
  for (j = 0; j < ncols; j++) {
    YAP_Term ta = YAP_ArgOfTerm(j+1, th);
    if (YAP_IsAtomTerm(ta)) {
@@ -214,6 +248,8 @@ cuda_load_fact( void ) {
      mat[i*ncols+j] = YAP_IntOfTerm(ta);
    }
  }
+#endif
+
  i++;
  if (i == currentPred->num_rows) {
    Cuda_NewFacts(currentPred);
@@ -225,21 +261,26 @@ cuda_load_fact( void ) {
  return TRUE;
 }

-static YAP_Bool
+static int
 load_rule( void ) {
  // maximum of 2K symbols per rule, should be enough for ILP
-  int32_t vec[2048], *ptr = vec, *nvec;
+  int32_t vec[2048], *ptr = vec, *nvec, neg[2048];
  // qK different variables;
  YAP_Term vars[1024];
-  int32_t nvars = 0;
+  int32_t nvars = 0, x;
  int32_t ngoals = YAP_IntOfTerm(YAP_ARG1);   /* gives the number of goals */
  int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
  YAP_Term t3 = YAP_ARG3;
-  int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
+	YAP_Atom name = YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3)));
+  int32_t pname = YAP_AtomToInt(name);
+
+	const char *strname = YAP_AtomName(name);
  predicate *pred;
+  int32_t cont = 0;
+  memset(neg, 0x0, 2048 * sizeof(int32_t));

  while(YAP_IsPairTerm(t3)) {
-    int32_t j = 0;
+    int32_t j = 0, m;
    YAP_Term th = YAP_HeadOfTerm(t3);
    YAP_Functor f = YAP_FunctorOfTerm( th );
    int32_t n = YAP_ArityOfFunctor( f ); 
@@ -257,8 +298,17 @@ load_rule( void ) {
      *ptr++ = SBG_LE;
    else if (at == AtomDf)
      *ptr++ = SBG_DF;
+    else if (at == AtomNt)
+	{
+      		neg[cont] = 1;
+		cont++;
+	}
    else
-      *ptr++ = YAP_AtomToInt( at );
+	{
+      		*ptr++ = YAP_AtomToInt( at );
+		cont++;
+	}
+
    for (j = 0; j < n; j++) {
      YAP_Term ta = YAP_ArgOfTerm(j+1, th);

@@ -277,6 +327,34 @@ load_rule( void ) {
 	}
      } else if (YAP_IsAtomTerm(ta))  {
 	*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
+      } else if (YAP_IsApplTerm(ta))  {
+	f = YAP_FunctorOfTerm( ta );
+	at = YAP_NameOfFunctor( f );
+	m = YAP_ArityOfFunctor( f );
+	*ptr++ = YAP_AtomToInt( at );
+
+	for (x = 0; x < m; x++) {
+      		YAP_Term ta2 = YAP_ArgOfTerm(x+1, ta);
+
+      		if (YAP_IsVarTerm(ta2)) {
+			int32_t k;
+			for (k = 0; k < nvars; k++) {
+	  			if (vars[k] == ta2) {
+	    				*ptr++ = k+1;
+	    				break;
+	  			}
+			}
+			if (k == nvars) {
+	  			vars[k] = ta2;
+	  			*ptr++ = k+1;
+	  			nvars++;
+			}
+      		} else if (YAP_IsAtomTerm(ta2))  {
+			*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
+      		} else {
+			*ptr++ = -YAP_IntOfTerm(ta);
+      		}
+    	}
      } else {
 	*ptr++ = -YAP_IntOfTerm(ta);
      }
@@ -296,53 +374,136 @@ load_rule( void ) {
  pred->num_rows = ngoals;
  pred->num_columns = ncols;
  pred->is_fact = FALSE;
+	x = (strlen(strname) + 1) * sizeof(char);
+	pred->predname = (char *)malloc(x);
+	memcpy(pred->predname, strname, x); 
  nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
  memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
  pred->address_host_table =  nvec;
+  pred->negatives = (int32_t *)malloc(sizeof(int32_t) * cont);
+  memcpy(pred->negatives, neg, sizeof(int32_t) * cont);
  Cuda_NewRule( pred );
  return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
 }

-static YAP_Bool
+static int
 cuda_erase( void )
 {
  predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
  return Cuda_Erase( ptr );
 }

-static YAP_Bool
+void setQuery(YAP_Term t1, int32_t **res)
+{
+	int32_t *query = (int32_t *)malloc(MAXARG * sizeof(int32_t));
+	int32_t x, y = 0, *itr;
+	predicate *ptr = NULL;
+	if(YAP_IsPairTerm(t1))
+	{
+		while(YAP_IsPairTerm(t1))
+		{
+			ptr = (predicate *)YAP_IntOfTerm(YAP_HeadOfTerm(t1));
+			query[y] = ptr->name;
+			itr = ptr->address_host_table;
+			x = 2;
+			while(itr[x] != 0)
+				x++;
+			query[y+1] = itr[x+1];
+			t1 = YAP_TailOfTerm(t1);
+			y+=2;
+		}
+	}
+	else
+	{
+		ptr = (predicate *)YAP_IntOfTerm(t1);
+		query[y] = ptr->name;
+		itr = ptr->address_host_table;
+		x = 2;
+		while(itr[x] != 0)
+			x++;
+		query[y+1] = itr[x+1];
+		y += 2;
+	}
+	query[y] = -1;
+	query[y+1] = -1;
+	*res = query;
+}
+
+static int
 cuda_eval( void )
 {
  int32_t *mat;
+
+#if defined(DATALOG) || defined(TUFFY)
+	int32_t *query = NULL;
+	setQuery(YAP_ARG1, &query);
+#endif
+
+	int32_t finalDR = YAP_IntOfTerm(YAP_ARG3);
+  int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, names, finalDR);
+
+#ifdef TUFFY
+	cf = 0;
+#endif
+#ifdef ROCKIT
+	if(cf > 0)
+		cf *= -1;
+#endif
+#if defined(TUFFY) || defined(ROCKIT)
+	cr = 0;
+	names[0] = '\0';
+	return FALSE;
+#else
+  int32_t i;
  predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
-  int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
  int32_t ncols = ptr->num_columns;
  YAP_Term out = YAP_TermNil();
  YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
  YAP_Term vec[256];
-  int32_t i;
+
+	YAP_Atom at;

  if (n < 0)
    return FALSE;
  for (i=0; i<n; i++) {
    int32_t ni = ((n-1)-i)*ncols, j;
+
+	printf("%s(", YAP_AtomName(YAP_IntToAtom(ptr->name)));
+
    for (j=0; j<ncols; j++) {
      vec[j] = YAP_MkIntTerm(mat[ni+j]);
+
+	at = YAP_IntToAtom(mat[ni+j]);
+	if(at != NULL)
+		printf("%s", YAP_AtomName(at));
+	else
+		printf("%d", mat[ni+j]);	
+	if(j < (ncols - 1))
+		printf(",");
+
    }
    out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
+
+	printf(")\n");
+
  }
  if (n > 0)
    free( mat );
  return YAP_Unify(YAP_ARG2, out);
+#endif
 }

-static YAP_Bool
+static int
 cuda_coverage( void )
 {
  int32_t *mat;
-  predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
-  int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
-  int32_t ncols = ptr->num_columns;
+
+#if defined(DATALOG) || defined(TUFFY)
+	int32_t *query = NULL;
+	setQuery(YAP_ARG1, &query);
+#endif
+
+  int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
  int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
  int32_t i = n/2, min = 0, max = n-1;
  int32_t t0, t1;
@@ -384,11 +545,16 @@ cuda_coverage( void )
  } while ( TRUE );
 }

-static YAP_Bool cuda_count( void )
+static int cuda_count( void )
 {
  int32_t *mat;
-  predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
-  int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
+
+#if defined(DATALOG) || defined(TUFFY)
+	int32_t *query = NULL;
+	setQuery(YAP_ARG1, &query);
+#endif
+
+  int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);

  if (n < 0)
    return FALSE;
@@ -396,7 +562,7 @@ static YAP_Bool cuda_count( void )
  return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
 }

-static YAP_Bool cuda_statistics( void )
+static int cuda_statistics( void )
 {
  Cuda_Statistics();
  return TRUE;
@@ -416,14 +582,20 @@ init_cuda(void)
  AtomGe = YAP_LookupAtom(">=");
  AtomLe = YAP_LookupAtom("=<");
  AtomDf = YAP_LookupAtom("\\=");
+  AtomNt = YAP_LookupAtom("not");
  YAP_UserCPredicate("load_facts", load_facts, 4);
  YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
  YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
  YAP_UserCPredicate("load_rule", load_rule, 4);
  YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
-  YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
+  YAP_UserCPredicate("cuda_eval", cuda_eval, 3);
  YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
  YAP_UserCPredicate("cuda_count", cuda_count, 2);
  YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
+
+#ifdef ROCKIT
+  YAP_UserCPredicate("cuda_init_query", cuda_init_query, 1);
+#endif
+
 }

--- a/packages/cuda/cuda.yap
+++ b/packages/cuda/cuda.yap
@@ -2,10 +2,11 @@
 		 cuda_inline/2,
 		 cuda_rule/2,
 		 cuda_erase/1,
-		 cuda_eval/2,
+		 cuda_eval/3,
 		 cuda_coverage/4,
 		 cuda_statistics/0,
-		 cuda_count/2]).
+		 cuda_count/2,
+		 cuda_query/1]).

 tell_warning :-
 	print_message(warning,functionality(cuda)).
@@ -40,7 +41,7 @@ count_answers(G, N) :-

 cuda_rule((Head :- Body) , IdRules) :-
 	body_to_list( Body, L, [], 1, N),
-	functor(Head, _Na, Ar),
+	functor(Head, Na, Ar),
 	load_rule( N, Ar, [Head|L], IdRules ).


@@ -54,3 +55,5 @@ body_to_list( B, NL, L, N0, N) :-
 body_to_list( B, [B|L], L, N0, N) :-
 	N is N0+1.

+cuda_query(Call) :-
+	cuda_init_query(Call).
--- a/packages/cuda/lista.cu
+++ b/packages/cuda/lista.cu
--- a/packages/cuda/lista.h
+++ b/packages/cuda/lista.h
@@ -25,8 +25,11 @@ typedef struct auxiliar{
 	int *numselfj;
 	int **wherejoin;
 	int *numjoin;
-	int3 num_bpreds;
-	int *builtin;
+	int totalpreds;
+	int **preds;
+	int2 *numpreds;
+	int *negatives;
+	char *rulename;
 	int gen_act;
 	int gen_ant;
 }rulenode;
--- a/packages/cuda/memory.cu
+++ b/packages/cuda/memory.cu
@@ -5,63 +5,101 @@
 #include <thrust/device_vector.h>
 #include "lista.h"
 #include "memory.h"
+#include "pred.h"

 #define MAX_REC 200
-#define HALF_REC (MAX_REC / 2)
 #define MAX_FIX_POINTS 100

-unsigned int avmem;
 memnode temp_storage[MAX_REC];
+/*List used to store information (address, size, etc.) about facts and rule results loaded in the GPU*/
 list<memnode> GPUmem;
+/*List used to store information about rule results offloaded from the GPU to the CPU*/
 list<memnode> CPUmem;

+/*Auxiliary function to sort rule list*/
+bool comparer(const rulenode &r1, const rulenode &r2)
+{
+	return (r1.name > r2.name); 
+}
+
+/*Used in search functions to compare iterations*/
 bool compareiteration(const memnode &r1, const memnode &r2)
 {
 	return (r1.iteration < r2.iteration); 
 }

+/*Used in search functions to compare names*/
 bool comparename(const memnode &r1, const memnode &r2)
 {
 	return (r1.name > r2.name); 
 }

-void calcular_mem(int dev)
-{
-	cudaDeviceProp p;
-	cudaGetDeviceProperties(&p, dev);
-	avmem = p.totalGlobalMem;
-	temp_storage[0].dev_address = NULL;
-	temp_storage[0].size = 0;
-	temp_storage[HALF_REC].dev_address = NULL;
-	temp_storage[HALF_REC].size = 0;
-
-	//cout << "Initial memory available " << avmem << endl;
-}
-
+/*Linear search of 'name' fact*/
 template<class InputIterator>
 InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
 {
 	while(first!=last) 
 	{
-		if(first->name == name) return first;
+		if(first->name == name && first->isrule == 0) return first;
 			++first;
 	}
 	return last;
 }

-list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum)
+/*Finds all results of rule 'name' in iteration 'itr' in both CPU and GPU memory. Every result found is removed from its respective list*/
+list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
 {
-	int x = 1, sum = 0;
+	int x = 0, sum = 0;
 	memnode temp;
-
-	temp.name = name;
+	list<memnode>::iterator i;
 	temp.iteration = itr;
 	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
+
 	while(rec.first != rec.second)
-	{		
-		
-		//cout << "itr = " << itr << " rec.first = " << rec.first->name << endl;	
-		
+	{
+		if(rec.first->name == name && rec.first->isrule == 1)
+		{
+			temp_storage[x] = *rec.first;
+			rec.first = GPUmem.erase(rec.first);
+			sum += temp_storage[x].rows;
+			x++;
+		}	
+		else
+			rec.first++;
+	}
+	*gpunum = x;
+	temp.name = name;
+	temp.isrule = 1;
+	i = GPUmem.insert(rec.first, temp);
+	rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
+
+	while(rec.first != rec.second)
+	{				
+		if(rec.first->name == name && rec.first->isrule == 1)
+		{
+			temp_storage[x] = *rec.first;
+			rec.first = CPUmem.erase(rec.first);
+			sum += temp_storage[x].rows;
+			x++;
+		}	
+		else
+			rec.first++;
+	}
+	*totalrows = sum;
+	*cpunum = x;
+	return i;
+}
+
+list<memnode>::iterator buscarpornombrecpu(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
+{
+	int x = 0, sum = 0;
+	memnode temp;
+	list<memnode>::iterator i;
+	temp.iteration = itr;
+	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
+
+	while(rec.first != rec.second)
+	{				
 		if(rec.first->name == name)
 		{
 			temp_storage[x] = *rec.first;
@@ -72,25 +110,14 @@ list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *
 		else
 			rec.first++;
 	}
-	//if(x > 1)
-	rec.first = GPUmem.insert(rec.first, temp);
-	*totalrows = sum;
+
 	*gpunum = x;
-	return rec.first;
-}
-
-int buscarpornombrecpu(int name, int itr, int *totalrows)
-{
-	int x = HALF_REC + 1, sum = 0;
-	memnode temp;
-	temp.iteration = itr;
-	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
-
-	/*if(rec.first != rec.second)
-		cout << "bscnomcpu = " << rec.first->name << " " << rec.first->iteration << endl;*/
+	temp.name = name;
+	temp.isrule = 1;
+	rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);

 	while(rec.first != rec.second)
-	{
+	{				
 		if(rec.first->name == name)
 		{
 			temp_storage[x] = *rec.first;
@@ -101,18 +128,24 @@ int buscarpornombrecpu(int name, int itr, int *totalrows)
 		else
 			rec.first++;
 	}
-	*totalrows += sum;
-	return x;
+	i = CPUmem.insert(rec.first, temp);
+	*totalrows = sum;
+	*cpunum = x;
+	return i;
 }

+/*Removes the least recently used memory block from GPU memory, sending it to CPU memory if it's a rule result. 
+If there are no used memory blocks in the GPU and we still don't have enough memory, the program exits with error*/
 void limpiar(const char s[], size_t sz)
 {
 	list<memnode>::iterator ini;
 	memnode temp;
+	size_t free, total;

 	if(GPUmem.size() == 0)
 	{
-		cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl;
+		cudaMemGetInfo(&free,&total);
+		cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
 		exit(1);
 	}		

@@ -122,80 +155,32 @@ void limpiar(const char s[], size_t sz)
 		temp = *ini;
 		temp.dev_address = (int *)malloc(ini->size);
 		cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
-		CPUmem.push_back(temp);
+		list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
+		CPUmem.insert(pos, temp);
 	}
-	liberar(ini->dev_address, ini->size);
+	cudaFree(ini->dev_address);
 	GPUmem.erase(ini);
 }

-void limpiartodo(int *p1, int *p2)
+/*Allocs 'size' amount of bytes in GPU memory. If not enough memory is available, removes least recently used memory blocks until 
+enough space is available*/
+void reservar(int **ptr, size_t size)
 {
-	list<memnode>::iterator ini;
-	memnode temp;
-	int cont = 0;
-	if(p1 != NULL)
-		cont++;	
-	if(p2 != NULL)
-		cont++;
-	ini = GPUmem.begin();
-
-	/*cout << "ANTES" << endl;
-	mostrar_memoria();
-	mostrar_memcpu();
-	cout << "FIN ANTES" << endl;*/
-	//cout << "mem = " << GPUmem.size() << " " << avmem << endl;
-
-	while(GPUmem.size() > cont)
-	{
-		if(ini->dev_address == p1 || ini->dev_address == p2)
-		{
-			ini++;
-			continue;
-		}
-		if(ini->isrule)
-		{
-			temp = *ini; 
-			temp.dev_address = (int *)malloc(ini->size);
-			cudaMemcpy(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
-			CPUmem.push_back(temp);
-		}
-		liberar(ini->dev_address, temp.size);
-		ini = GPUmem.erase(ini);
-	}
-
-	/*cout << "DESPUES" << endl;
-	mostrar_memoria();
-	mostrar_memcpu();
-	cout << "FIN DESPUES" << endl;*/
-	//cout << "memfinal = " << GPUmem.size() << " " << avmem << endl;
-
-}
-
-void liberar(int *ptr, int size)
-{
-	//cout << "L " << avmem << " " << size; 
-
-	cudaFree(ptr);
-#ifdef DEBUG_MEM
-	cerr << "- " << ptr << " " << size << endl;
-#endif
-	avmem += size;
-	
-	//cout << " " << avmem << endl;
-}
-
-void reservar(int **ptr, int size)
-{
-  //size_t free, total;
-  //cudaMemGetInfo(      &free, &total	 );
-  //	cerr << "? " << free << " " << size << endl;
+	size_t free, total;

        if (size == 0) { 
                *ptr = NULL; 
                return;
        }
-	while(avmem < size)
+
+	cudaMemGetInfo(&free, &total);
+	while(free < size)
+	{
+		cout << "Se limpio memoria " << free << " " << total << endl;
 		limpiar("not enough memory", size);
+		cudaMemGetInfo(&free, &total);
+	}
+
 	while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
 		limpiar("Error in memory allocation", size);
 	if (! *ptr ) {
@@ -205,11 +190,9 @@ void reservar(int **ptr, int size)
 	  cerr << "Exiting CUDA...." << endl;
 	  exit(1);
 	}
-	avmem -= size;
-
-	// cout << " " << avmem << endl;
 }

+/*Creates a new entry in the GPU memory list*/
 void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
 {
 	memnode temp;
@@ -222,6 +205,19 @@ void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
 	GPUmem.push_back(temp);
 }

+void registrarcpu(int name, int num_columns, int *ptr, int rows, int itr, int rule)
+{
+	memnode temp;
+	temp.name = name;
+	temp.dev_address = ptr;
+	temp.rows = rows;
+	temp.size = rows * num_columns * sizeof(int);
+	temp.iteration = itr;
+	temp.isrule = rule;
+	CPUmem.push_back(temp);
+}
+
+/*Updates the information of an element in a list*/
 template<class InputIterator>
 void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
 {
@@ -230,6 +226,7 @@ void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
 	i->size = rows * num_columns * sizeof(int);
 }

+/*Count the total number of rows generated by rule 'name' in iteration 'iter'*/
 int numrows(int name, int itr)
 {
 	int sum = 0;
@@ -252,16 +249,17 @@ int numrows(int name, int itr)
 	return sum;
 }

-
 	extern "C" void * YAP_IntToAtom(int);
 	extern  "C" char * YAP_AtomName(void *);

-
+/*Loads facts or rule results in GPU memory. If a fact is already in GPU memory, its pointer is simply returned. Otherwise, 
+memory is reserved and the fact is loaded. Rule results are loaded based on the current iteration 'itr' and both GPU and 
+CPU memories are searched for all instances of said results. The instances are combined into a single one in GPU memory.*/
 int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
 {
 	int numgpu, numcpu, totalrows = 0;
 	int *temp, x;
-	int size, itrant;
+	int size, itrant, inc = 0;
 	list<memnode>::iterator i;
 	memnode fact;

@@ -279,9 +277,6 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
 		}
 		size = num_rows * num_columns * sizeof(int);
 		reservar(&temp, size);
-#ifdef DEBUG_MEM
-		cerr << "+ " << temp << " temp  " << size << endl;
-#endif
 		cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
 		registrar(name, num_columns, temp, num_rows, itr, 0);
 		*ptr = temp;
@@ -290,28 +285,25 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
 	if(itr > 0)
 	{
 		itrant = itr - 1;
-		i = buscarpornombre(name, itrant, &totalrows, &numgpu);
-		numcpu = buscarpornombrecpu(name, itrant, &totalrows);
-
-		if((numgpu == 2) && (numcpu == (HALF_REC + 1)))
+		i = buscarpornombre(name, itrant, &totalrows, &numgpu, &numcpu);
+		if((numgpu == 1) && (numcpu == 1))
 		{
-			actualizar(num_columns, temp_storage[1].dev_address, temp_storage[1].rows, i);
-			*ptr = temp_storage[1].dev_address;
-			return temp_storage[1].rows;
+			actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
+			*ptr = temp_storage[0].dev_address;
+			return temp_storage[0].rows;
 		}
 		size = totalrows * num_columns * sizeof(int);
 		reservar(&temp, size);
-#ifdef DEBUG_MEM
-		cerr << "+ " << temp << " temp 2  " << size << endl;
-#endif
-		for(x = 1; x < numgpu; x++)
+		for(x = 0; x < numgpu; x++)
 		{
-			cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
-			liberar(temp_storage[x].dev_address, temp_storage[x].size);
+			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
+			inc += temp_storage[x].size / sizeof(int);
+			cudaFree(temp_storage[x].dev_address);
 		}
-		for(x = HALF_REC + 1; x < numcpu; x++)
+		for(; x < numcpu; x++)
 		{
-			cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
+			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
+			inc += temp_storage[x].size / sizeof(int);
 			free(temp_storage[x].dev_address);
 		}
 		actualizar(num_columns, temp, totalrows, i);
@@ -321,9 +313,54 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
 	return 0;
 }

+int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
+{
+	int numgpu, numcpu, totalrows = 0;
+	int *temp, x;
+	int size, itrant, inc = 0;
+	list<memnode>::iterator i;
+
+	if(is_fact)
+	{
+		*ptr = address_host_table;
+		return num_rows;
+	}
+	if(itr > 0)
+	{
+		itrant = itr - 1;
+		i = buscarpornombrecpu(name, itrant, &totalrows, &numgpu, &numcpu);
+
+		if((numgpu == 0) && (numcpu == 1))
+		{
+			actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
+			*ptr = temp_storage[0].dev_address;
+			return temp_storage[0].rows;
+		}
+		size = totalrows * num_columns * sizeof(int);
+		temp = (int *)malloc(size);
+		for(x = 0; x < numgpu; x++)
+		{
+			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
+			inc += temp_storage[x].size / sizeof(int);
+			cudaFree(temp_storage[x].dev_address);
+		}
+		for(; x < numcpu; x++)
+		{
+			memcpy(temp + inc, temp_storage[x].dev_address, temp_storage[x].size);
+			inc += temp_storage[x].size / sizeof(int);
+			free(temp_storage[x].dev_address);
+		}
+		actualizar(num_columns, temp, totalrows, i);
+		*ptr = temp;
+		return totalrows;
+	}
+	return 0;
+}
+
+/*Loads all results of rule 'name' from both GPU and CPU memories into the GPU*/
 int cargafinal(int name, int cols, int **ptr)
 {
-	int *temp, *ini, cont = 0;
+	int *temp, *ini, cont = 0, numg = 0, numc = 0;
 	memnode bus;
 	bus.name = name;
 	GPUmem.sort(comparename);
@@ -335,6 +372,7 @@ int cargafinal(int name, int cols, int **ptr)
 	while(pos != endg && pos->name == name)
 	{
 		cont += pos->rows;
+		numg++;
 		pos++;
 	}
 	pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
@@ -342,15 +380,41 @@ int cargafinal(int name, int cols, int **ptr)
 	while(pos != endc && pos->name == name)
 	{
 		cont += pos->rows;
+		numc++;
 		pos++;
 	}
-	
-	reservar(&temp, cont * cols * sizeof(int));
-#ifdef DEBUG_MEM
-	cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;
-#endif
-	ini = temp;	

+	if(numg == 0 && numc == 0)
+		return 0;
+	if(numg == 1 && numc == 0) 
+	{
+		pos = gpu;
+		*ptr = pos->dev_address;
+		cont = pos->rows;
+		GPUmem.erase(pos);
+		#ifdef TUFFY
+		return -cont;
+		#else
+		return cont;
+		#endif
+	}
+	if(numg == 0 && numc == 1)
+	{
+		pos = cpu;
+		cont = pos->rows;
+		#ifdef TUFFY
+		reservar(&temp, pos->size);
+		cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
+		*ptr = temp;
+		#else
+		*ptr = pos->dev_address;
+		#endif
+		CPUmem.erase(pos);
+		return -cont;
+	}
+
+	reservar(&temp, cont * cols * sizeof(int));
+	ini = temp;
 	pos = gpu;
 	while(pos != endg && pos->name == name)
 	{
@@ -365,23 +429,13 @@ int cargafinal(int name, int cols, int **ptr)
 		temp += pos->size / sizeof(int);
 		pos++;
 	}
-
-	/*int x, y;
-	int *hop1 = (int *)malloc(cont * cols * sizeof(int));
-	cudaMemcpy(hop1, ini, cont * cols * sizeof(int), cudaMemcpyDeviceToHost);
-	cout << "select finala" << endl;
-	for(x = 0; x < cont; x++)
-	{
-		for(y = 0; y < cols; y++)
-			cout << hop1[x * cols + y] << " ";
-		cout << endl;
-	}
-	cout << "select finala" << endl;*/
-
 	*ptr = ini;
 	return cont;
 }

+/*Compares the results of the current iteration against the results of older iterations. 
+Used to avoid infinite computations when the result is not a single fixed-point, but an 
+orbit of points.*/
 bool generadas(int name, int filas, int cols, int itr)
 {
 	int r1, r2, x, fin;
@@ -401,46 +455,26 @@ bool generadas(int name, int filas, int cols, int itr)
 			thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
 			r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
 			thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
-
-			/*int y;
-			int *a = (int *)malloc(r1 * cols * sizeof(int));
-			cudaMemcpy(a, dop1, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
-			for(x = 0; x < r1; x++)
-			{
-				for(y = 0; y < cols; y++)
-					cout << a[x * cols + y] << " ";
-			}
-			cout << endl;
-			cudaMemcpy(a, dop2, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
-			for(x = 0; x < r1; x++)
-			{
-				for(y = 0; y < cols; y++)
-					cout << a[x * cols + y] << " ";
-			}
-			cout << endl;
-			free(a);*/
-
 			if(thrust::equal(pt1, pt1 + r1, pt2) == true)
 				return true;
 		}
 	}
-
 	return false;
 }

 void mostrar_memoria()
 {
-	int x;
+	unsigned int x;
 	list<memnode>::iterator i = GPUmem.begin();
 	cout << "Memoria inicio GPU" << endl;
 	for(x = 0; x < GPUmem.size(); x++, i++)
-		cout << i->name << " " << i->iteration << " " << i->size << endl;
+		cout << i->name << " " << i->iteration << " " << i->isrule << " " << i->rows << " " << i->size << endl;
 	cout << "Memoria fin GPU" << endl;
 }

 void mostrar_memcpu()
 {
-	int x;
+	unsigned int x;
 	list<memnode>::iterator i = CPUmem.begin();
 	cout << "Memoria inicio CPU" << endl;
 	for(x = 0; x < CPUmem.size(); x++, i++)
@@ -448,53 +482,7 @@ void mostrar_memcpu()
 	cout << "Memoria fin CPU" << endl;
 }

-void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator last)
-{
-	GPUmem.sort(comparename);
-	CPUmem.sort(comparename);
-	list<memnode>::iterator gpu = GPUmem.begin();
-	list<memnode>::iterator cpu = CPUmem.begin();
-	int x, y, of, cols;
-	int *temp, cont = 0;
-	while(first != last)
-	{
-		while(first->name == gpu->name)
-		{
-			temp = (int *)malloc(gpu->size);
-			cudaMemcpy(temp, gpu->dev_address, gpu->size, cudaMemcpyDeviceToHost);
-			cols = gpu->size / (gpu->rows * sizeof(int));
-			cont += gpu->rows;
-			for(x = 0, of = 0; x < gpu->rows; x++)
-			{
-				for(y = 0; y < cols; y++, of++)
-					cout << temp[of] << " ";
-				cout << endl;
-			}
-			cudaFree(gpu->dev_address);
-#ifdef DEBUG_MEM
-			cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;
-#endif
-			free(temp);
-			gpu++;
-		}
-		while(first->name == cpu->name)
-		{
-			cols = cpu->size / (cpu->rows * sizeof(int));
-			cont += cpu->rows;
-			for(x = 0, of = 0; x < cpu->rows; x++)
-			{
-				for(y = 0; y < cols; y++, of++)
-					cout << cpu->dev_address[of] << " ";
-				cout << endl;
-			}
-			free(cpu->dev_address);
-			cpu++;
-		}
-		first++;
-	}
-	cout << cont << endl;
-}
-
+/*Clear all rule results from both GPU and CPU memory*/
 void clear_memory()
 {
 	list<memnode>::iterator ini;
@@ -503,15 +491,13 @@ void clear_memory()
 	fin = GPUmem.end();
 	while(ini != fin)
 	{
-	  if (ini->isrule) {
-	    cudaFree(ini->dev_address);
-#ifdef DEBUG_MEM
-	    cerr << "- " << ini->dev_address << " ini->dev_address" << endl;
-#endif
-	    ini = GPUmem.erase(ini);
-	  } else {
-	    ini++;
-	  }
+		if(ini->isrule)
+		{
+			cudaFree(ini->dev_address);
+			ini = GPUmem.erase(ini);
+		}
+		else
+			ini++;
 	}
 	ini = CPUmem.begin();
 	fin = CPUmem.end();
@@ -522,3 +508,68 @@ void clear_memory()
 	}
 	CPUmem.clear();
 }
+
+/*Clear everything from both GPU and CPU memory*/
+void clear_memory_all()
+{
+	list<memnode>::iterator ini;
+	list<memnode>::iterator fin;
+       	ini = GPUmem.begin();
+	fin = GPUmem.end();
+	while(ini != fin)
+	{
+		cudaFree(ini->dev_address);
+		ini++;
+	}
+	GPUmem.clear();
+	ini = CPUmem.begin();
+	fin = CPUmem.end();
+	while(ini != fin)
+	{
+		free(ini->dev_address);
+		ini++;
+	}
+	CPUmem.clear();
+}
+
+/*Remove all instances of fact 'name' from both CPU and GPU memories*/
+void liberar(int name)
+{
+	list<memnode>::iterator i;
+	memnode fact;
+	i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
+	if(i != GPUmem.end())
+	{
+		fact = *i;
+		GPUmem.erase(i);
+		cudaFree(fact.dev_address);
+	}
+	i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
+	if(i != CPUmem.end())
+	{
+		fact = *i;
+		CPUmem.erase(i);
+		free(fact.dev_address);
+	}
+}
+
+/*Add all rows in 'dop1' to the fact 'name' by creating a new array capable of holding both.*/
+void sumar(int name, int *dop1, int cols, int rows)
+{
+	list<memnode>::iterator i;
+	memnode fact;
+	i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
+	int *res, newrows, offset;
+	if(i != GPUmem.end())
+	{
+		fact = *i;
+		newrows = rows + fact.rows;
+		reservar(&res, newrows * cols * sizeof(int));
+		offset = fact.rows * cols;
+		cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
+		GPUmem.erase(i);
+		registrar(name, cols, res, newrows, 0, 0);
+		cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
+		cudaFree(fact.dev_address);
+	}
+}
--- a/packages/cuda/memory.h
+++ b/packages/cuda/memory.h
@@ -1,26 +1,27 @@
 #ifndef _MEMORY_H_
 #define _MEMORY_H_

-//#include <thrust/device_vector.h>
 #include <list>
 #include <vector>
 #include "lista.h"

 using namespace std;
-//using namespace thrust;

-void calcular_mem(int);
-void liberar(int*, int);
+bool comparer(const rulenode&, const rulenode&);
 void limpiar(const char [], size_t);
 void limpiartodo(int*, int*);
 int cargar(int, int, int, int, int*, int**, int);
+int cargarcpu(int, int, int, int, int*, int**, int);
 int cargafinal(int, int, int**);
-void reservar(int**, int);
+void reservar(int**, size_t);
 void registrar(int, int, int*, int, int, int);
+void registrarcpu(int, int, int*, int, int, int);
 bool generadas(int, int, int, int);
+void sumar(int, int*, int, int);
+void liberar(int);
 void mostrar_memoria(void);
 void mostrar_memcpu(void);
 void clear_memory(void);
-void resultados(vector<rulenode>::iterator, vector<rulenode>::iterator);
+void clear_memory_all(void);

 #endif
--- a/packages/cuda/pred.h
+++ b/packages/cuda/pred.h
@@ -9,11 +9,17 @@ typedef struct Nodo{
 	int num_columns;
 	int is_fact;
 	int *address_host_table;
+	int *negatives;
+	char *predname;
+	double *weight;
 }gpunode;

 typedef gpunode predicate;

-// #define TIMER 1
+//#define TIMER 1
+#define DATALOG 1
+#define NUM_T 4
+#define INISIZE 1000000

 #if TIMER
 typedef struct Stats{
@@ -27,6 +33,8 @@ typedef struct Stats{
 extern statinfo cuda_stats;
 #endif

+/*Constants used to mark comparison predicates*/
+#define BPOFFSET (-6)
 #define SBG_EQ  (-1)
 #define SBG_GT  (-2)
 #define SBG_LT  (-3)
@@ -34,6 +42,6 @@ extern statinfo cuda_stats;
 #define SBG_LE  (-5)
 #define SBG_DF  (-6)

-int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**);
+int Cuda_Eval(predicate**, int, predicate**, int, int*, int**, char*, int);
 void  Cuda_Statistics( void );
 #endif
--- a/packages/cuda/selectproyect.cu
+++ b/packages/cuda/selectproyect.cu
@@ -1,10 +1,11 @@
 #include <thrust/device_vector.h>
-//#include <thrust/device_ptr.h>
 #include <thrust/scan.h>
 #include <stdlib.h>
 #include "memory.h"
+#include "bpreds.h"

-__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
+/*Mark all rows that comply with the selections*/
+__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
@@ -24,14 +25,14 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
 		res[id] = 1;
 	}
 }
-
-__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
+/*If we already have an array of marks (perhaps because the selfjoin was applied first), 
+we unmark any rows that do not comply with the selections*/
+__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
 {
- 	extern __shared__ int shared[];
-    	int *spos = &shared[numc];
+	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	int x, rowact, posact;
-	if(threadIdx.x < (numc * 2))
+	if(threadIdx.x < numc)
 		shared[threadIdx.x] = cons[threadIdx.x];
 	__syncthreads();
 	if(id < rows)
@@ -39,10 +40,10 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
 		if(res[id] == 0)
 			return;
 		rowact = id * cols;
-		for(x = 0; x < numc; x++)
+		for(x = 0; x < numc; x += 2)
 		{
-			posact = rowact + spos[x];
-			if(dop1[posact] != shared[x])
+			posact = rowact + shared[x];
+			if(dop1[posact] != shared[x+1])
 			{
 				res[id] = 0;
 				return;
@@ -51,6 +52,7 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
 	}
 }

+/*Unmark all rows that do not comply with the selfjoins.*/
 __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
 {
 	extern __shared__ int shared[];
@@ -66,12 +68,12 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
 		pos = id * cols;
 		for(x = 0; x < cont; x++)
 		{
-			temp = shared[x];
+			temp = dop1[pos+shared[x]];
 			y = x + 1;
 			temp2 = shared[y];
 			while(temp2 > -1)
 			{
-				if(dop1[temp+pos] != dop1[temp2+pos])
+				if(temp != dop1[temp2+pos])
 				{
 					res[id] = 0;
 					return;
@@ -84,6 +86,7 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
 	}
 }

+/*Mark all rows that comply with the selfjoins*/
 __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
 {
 	extern __shared__ int shared[];
@@ -97,12 +100,12 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
 		pos = id * cols;
 		for(x = 0; x < cont; x++)
 		{
-			temp = shared[x];
+			temp = dop1[pos+shared[x]];
 			y = x + 1;
 			temp2 = shared[y];
 			while(temp2 > -1)
 			{
-				if(dop1[temp+pos] != dop1[temp2+pos])
+				if(temp != dop1[temp2+pos])
 					return;
 				y++;
 				temp2 = shared[y];
@@ -113,6 +116,7 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
 	}
 }

+/*Project all columns found in 'dhead' to a new array 'res'*/
 __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
 {
 	extern __shared__ int shared[];
@@ -130,76 +134,31 @@ __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize,
 	}
 }

+/*Project all columns found in 'dhead' using only the rows marked as valid (i.e. those that complied with 
+selections, selfjoins, etc.). The array 'temp' holds the result of the prefix sum of said marks.*/
 __global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	int pos, posr, x;
-	if(threadIdx.x < cols)
+	if(threadIdx.x < hsize)
 		shared[threadIdx.x] = dhead[threadIdx.x];
 	__syncthreads();
 	if(id < rows)
 	{		
-		posr = temp[id+1];
-		if(temp[id] != posr && posr > 0)
+		posr = temp[id];
+		if(temp[id+1] != posr)
 		{
 			pos = id * cols;
-			posr = (posr - 1) * hsize;			
+			posr *= hsize;			
 			for(x = 0; x < hsize; x++, posr++)
 				res[posr] = dop1[pos+shared[x]];
 		}
 	}
 }

-/*__global__ void removedup()
-{
-	extern __shared__ int shared[];
-	int id = blockIdx.x * blockDim.x + threadIdx.x;
-	if(threadIdx.x < cols)
-		shared[threadIdx.x] = dhead[threadIdx.x];
-	if(id < rows)
-	{
-		
-	}
-}*/
-
-template<typename T> /*a libreria*/
-struct suma : public binary_function<T,T,T>
-{
-	__host__ __device__ 
-	T operator()(const T &r1, const T &r2)
-	{
-		if(r1 > -1)
-		{
-			if(r2 > 0)
-				return r1 + r2;
-			return -r1;
-		}
-		else
-		{
-			if(r2 > 0)
-				return abs(r1) + r2;
-			return r1;
-		}
-	}
-};
-
-int mayor(int a, int b, int c)
-{
-	if(a > b)
-	{
-		if(a > c)
-			return a;
-	}
-	else
-	{
-		if(b > c)
-			return b;
-	}
-	return c;
-}
-
-int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
+/*Performs selections, selfjoins and comparison predicates when the rule has a single normal predicate.*/
+int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *preds, int numpreds, int *project, int **ret, int ANDlogic)
 {
 	int *fres = NULL, *temp = NULL;
 	int *dhead = NULL, tmplen;
@@ -209,30 +168,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
 #if TIMER
 	cuda_stats.selects++;
 #endif
-	int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
+
+	int head_bytes = maximo(4, numselect, numselfj, numpreds, head_size) * sizeof(int);
 	reservar(&dhead, head_bytes);
-#ifdef DEBUG_MEM
-	cerr << "+ " << dhead << " dhead  " << head_bytes << endl;
-#endif
-
-	int blockllen = rows / 1024 + 1;
 	int numthreads = 1024;
+	//int numthreads = 32;
+	int blockllen = rows / numthreads + 1;
+
+	#ifdef ROCKIT
+		ANDlogic = 1;
+	#endif

-	//removerep(dop1, rows, cols, dhead,) 
 	if(numselect > 0)
 	{		
 		tmplen = rows + 1;
 		size2 = tmplen * sizeof(int);
 		reservar(&temp, size2);
-#ifdef DEBUG_MEM
-		cerr << "+ " << temp << " temp  select " << size2 << endl;
-#endif
 		cudaMemset(temp, 0, size2);
-
 		size = numselect * sizeof(int);
 		cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);

-		marcar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
+		marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
 		
 		if(numselfj > 0)
 		{
@@ -241,6 +197,16 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
 			samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
 		}

+		if(numpreds > 0)
+		{
+			size = numpreds * sizeof(int);
+			cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
+			if(ANDlogic)
+				bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
+			else
+				bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
+		}
+
 		res = thrust::device_pointer_cast(temp);
 		thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
 		num = res[rows];
@@ -249,13 +215,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int

 		size = head_size * sizeof(int);
 		reservar(&fres, num * size);
-#ifdef DEBUG_MEM
-		cerr << "+ " << fres << " fres select  " << num*size << endl;
-#endif
 		cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
 		llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
-		liberar(dhead, head_bytes);
-		liberar(temp, size2);
+		cudaFree(dhead);
+		cudaFree(temp);
 		*ret = fres;
 		return num;
 	}
@@ -266,15 +229,22 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
 			tmplen = rows + 1;
 			size2 = tmplen * sizeof(int);
 			reservar(&temp, size2);
-#ifdef DEBUG_MEM
-			cerr << "+ " << temp << " temp select  " << size2 << endl;
-#endif
 			cudaMemset(temp, 0, size2);
-			
 			size = numselfj * sizeof(int);
 			cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
 			samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);

+			if(numpreds > 0)
+			{
+				size = numpreds * sizeof(int);
+				cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
+				if(ANDlogic)
+					bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
+				else
+					bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
+
+			}
+
 			res = thrust::device_pointer_cast(temp);
 			thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
 			num = res[rows];
@@ -283,28 +253,54 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int

 			size = head_size * sizeof(int);
 			reservar(&fres, num * size);
-#ifdef DEBUG_MEM
-			cerr << "+ " << fres << " fres select again  " << num*size << endl;
-#endif
 			cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
 			llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
-			liberar(dhead, head_bytes);
-			liberar(temp, size2);
+			cudaFree(dhead);
+			cudaFree(temp);
 			*ret = fres;
 			return num;
 		}
 		else
 		{
-			size = head_size * sizeof(int);
-			reservar(&fres, rows * size);
-#ifdef DEBUG_MEM
-			cerr << "+ " << fres << " fres select third  " << rows*size << endl;
-#endif
-			cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
-			proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
-			liberar(dhead, head_bytes);
-			*ret = fres;
-			return rows;
+			if(numpreds > 0)
+			{
+				tmplen = rows + 1;
+				size2 = tmplen * sizeof(int);
+				reservar(&temp, size2);
+				cudaMemset(temp, 0, size2);		
+				size = numpreds * sizeof(int);
+				cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
+
+				if(ANDlogic)
+					bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);					
+				else
+					bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
+				res = thrust::device_pointer_cast(temp);
+				thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
+				num = res[rows];
+
+				if(num == 0)
+					return 0;
+
+				size = head_size * sizeof(int);
+				reservar(&fres, num * size);
+				cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
+				llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
+				cudaFree(dhead);
+				cudaFree(temp);
+				*ret = fres;
+				return num;
+			}
+			else
+			{
+				size = head_size * sizeof(int);
+				reservar(&fres, rows * size);
+				cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
+				proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
+				cudaFree(dhead);
+				*ret = fres;
+				return rows;
+			}
 		}
 	}
 }
--- a/packages/cuda/treeb.cu
+++ b/packages/cuda/treeb.cu
--- a/packages/cuda/union2.cu
+++ b/packages/cuda/union2.cu
--- a/packages/python/python.c
+++ b/packages/python/python.c
@@ -2158,12 +2158,14 @@ static foreign_t init_python(void) {
  char **argv;
  term_t t = PL_new_term_ref();
  YAP_Argv(&argv);
+  if (argv) {
 #if PY_MAJOR_VERSION < 3
-  Py_SetProgramName(argv[0]);
+    Py_SetProgramName(argv[0]);
 #else
-  wchar_t *buf = Py_DecodeLocale(argv[0], NULL);
-  Py_SetProgramName(buf);
+    wchar_t *buf = Py_DecodeLocale(argv[0], NULL);
+    Py_SetProgramName(buf);
 #endif
+  }
  Py_Initialize();
  py_Main = PyImport_AddModule("__main__");
  py_Builtin = PyImport_AddModule("__builtin__");
				`@@ -279,4 +279,5 @@ static void RestoreWorker(int wid USES_REGS) {`




				`}`