hiatory

2016-04-19 23:30:02 +01:00
parent 3d68f0e06b
commit cd41d373db
28 changed files with 3153 additions and 2229 deletions
--- a/C/init.c
+++ b/C/init.c
@@ -1438,5 +1438,6 @@ void Yap_exit(int value) {
    Yap_ShutdownLoadForeign();
  }
  Yap_CloseStreams(false);
  Yap_CloseReadline();
  exit(value);
 }
--- a/H/YapGFlagInfo.h
+++ b/H/YapGFlagInfo.h
@@ -185,7 +185,7 @@ available in experimental implementations.
 */
    YAP_FLAG(FILE_NAME_VARIABLES_FLAG, "file_name_variables", true, booleanFlag,
             "true", NULL),
-    YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15e",
+    YAP_FLAG(FLOAT_FORMAT_FLAG, "float_format", true, isatom, "%15f",
             NULL),                                    /**< + `float_format `
                                    C-library `printf()` format specification used by write/1 and
--- a/H/Yapproto.h
+++ b/H/Yapproto.h
@@ -1,4 +1,4 @@
-/*************************************************************************
+  /*************************************************************************
 *									 *
 *	 YAP Prolog 	%W% %G% 					 *
 *	Yap Prolog was developed at NCCUP - Universidade do Porto	 *
@@ -301,6 +301,7 @@ extern void Yap_DebugErrorPutc(int n);
 extern void Yap_DebugErrorPuts(const char *s);
 extern void Yap_DebugWriteIndicator(struct pred_entry *ap);
 void Yap_PlWriteToStream(Term, int, int);
 void Yap_CloseReadline(void);
 /* depth_lim.c */
 bool   Yap_InitReadline(Term t);
 void Yap_InitItDeepenPreds(void);
--- a/H/Yatom.h
+++ b/H/Yatom.h
@@ -659,6 +659,19 @@ INLINE_ONLY inline EXTERN PropFlags IsPredProperty(int flags) {
  return (PropFlags)((flags == PEProp));
 }
 INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe);
 INLINE_ONLY inline EXTERN Atom NameOfPred(PredEntry *pe) {
  if (pe->ModuleOfPred == IDB_MODULE) {
    return NULL;
  } else if (pe->ArityOfPE == 0) {
    return (Atom)pe->FunctorOfPred;
  } else {
    Functor f = pe->FunctorOfPred;
    return NameOfFunctor(f);
  }
 }
 /* Flags for code or dbase entry */
 /* There are several flags for code and data base entries */
 typedef enum {
@@ -1258,7 +1271,7 @@ INLINE_ONLY inline EXTERN Prop AbsBlobProp(YAP_BlobPropEntry *p) {
 INLINE_ONLY inline EXTERN bool IsBlobProperty(PropFlags);
-INLINE_ONLY inline EXTERN bool  IsBlobProperty(PropFlags flags) {
+INLINE_ONLY inline EXTERN bool IsBlobProperty(PropFlags flags) {
  return flags == BlobProperty;
 }
@@ -1321,8 +1334,7 @@ INLINE_ONLY inline EXTERN Prop AbsFlagProp(FlagEntry *p) { return (Prop)(p); }
 INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags);
 INLINE_ONLY inline EXTERN bool IsFlagProperty(PropFlags flags) {
-	return flags == FlagProperty;
+  return flags == FlagProperty;
 }
 /* Proto types */
--- a/H/generated/dlocals.h
+++ b/H/generated/dlocals.h
@@ -474,6 +474,8 @@
 #define LOCAL_search_atoms LOCAL->search_atoms_
 #define REMOTE_search_atoms(wid) REMOTE(wid)->search_atoms_
 #define LOCAL_SearchPreds LOCAL->SearchPreds_
 #define REMOTE_SearchPreds(wid) REMOTE(wid)->SearchPreds_
 #define LOCAL_CurSlot LOCAL->CurSlot_
 #define REMOTE_CurSlot(wid) REMOTE(wid)->CurSlot_
--- a/H/generated/hlocals.h
+++ b/H/generated/hlocals.h
@@ -268,6 +268,7 @@ const char*  Error_Function_;
  UInt  exo_arg_;
 // atom completion
  struct scan_atoms*  search_atoms_;
  struct pred_entry*  SearchPreds_;
 // Slots
  yhandle_t  CurSlot_;
  yhandle_t  NSlots_;
--- a/H/generated/ilocals.h
+++ b/H/generated/ilocals.h
@@ -269,6 +269,7 @@ static void InitWorker(int wid) {
  REMOTE_CurSlot(wid) = 0;
  REMOTE_NSlots(wid) = 0;
  REMOTE_SlotBase(wid) = InitHandles(wid);
--- a/H/generated/rlocals.h
+++ b/H/generated/rlocals.h
@@ -279,4 +279,5 @@ static void RestoreWorker(int wid USES_REGS) {
 }
--- a/misc/LOCALS
+++ b/misc/LOCALS
@@ -312,6 +312,7 @@ UInt				exo_arg					=0
 // atom completion
 struct scan_atoms*		search_atoms				void
 struct pred_entry*		SearchPreds				void
 // Slots
 yhandle_t				CurSlot					=0
--- a/os/charsio.c
+++ b/os/charsio.c
@@ -470,20 +470,13 @@ code with  _C_.
 */
 static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */
-  int sno = Yap_CheckStream(ARG1, Input_Stream_f, "get_byte/2");
+  int sno = Yap_CheckBinaryStream(ARG1, Input_Stream_f, "get_byte/2");
  Int status;
  Term out;
  if (sno < 0)
    return (FALSE);
  status = GLOBAL_Stream[sno].status;
  if (!(status & Binary_Stream_f)
      //&& strictISOFlag()
      ) {
    UNLOCK(GLOBAL_Stream[sno].streamlock);
    Yap_Error(PERMISSION_ERROR_INPUT_STREAM, ARG1, "get_byte/2");
    return (FALSE);
  }
  out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno));
  UNLOCK(GLOBAL_Stream[sno].streamlock);
  return Yap_unify_constant(ARG2, out);
@@ -812,16 +805,9 @@ static Int put_byte(USES_REGS1) { /* '$put_byte'(Stream,N)                 */
    Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, t2, "put_code/1");
    return FALSE;
  }
-  int sno = Yap_CheckStream(ARG1, Output_Stream_f, "put/2");
+  int sno = Yap_CheckBinaryStream(ARG1, Output_Stream_f, "put/2");
  if (sno < 0)
    return (FALSE);
  if (!(GLOBAL_Stream[sno].status & Binary_Stream_f)
      // && strictISOFlag()
      ) {
    UNLOCK(GLOBAL_Stream[sno].streamlock);
    Yap_Error(PERMISSION_ERROR_OUTPUT_BINARY_STREAM, ARG1, NULL);
    return false;
  }
  GLOBAL_Stream[sno].stream_putc(sno, ch);
  /*
   * if (!(GLOBAL_Stream[sno].status & Null_Stream_f))
--- a/os/iopreds.c
+++ b/os/iopreds.c
@@ -1576,6 +1576,24 @@ int Yap_CheckTextStream__(const char *file, const char *f, int line, Term arg,
  return sno;
 }
 int Yap_CheckBinaryStream__(const char *file, const char *f, int line, Term arg,
                          int kind, const char *msg) {
  int sno;
  if ((sno = CheckStream__(file, f, line, arg, kind, msg)) < 0)
    return -1;
  if ((GLOBAL_Stream[sno].status & Binary_Stream_f)) {
    UNLOCK(GLOBAL_Stream[sno].streamlock);
    if (kind == Input_Stream_f)
      PlIOError__(file, f, line, PERMISSION_ERROR_INPUT_TEXT_STREAM, arg,
                  msg);
    else
      PlIOError__(file, f, line, PERMISSION_ERROR_OUTPUT_TEXT_STREAM, arg,
                  msg);
    return -1;
  }
  return sno;
 }
 /* used from C-interface */
 int Yap_GetFreeStreamDForReading(void) {
  int sno = GetFreeStreamD();
--- a/os/iopreds.h
+++ b/os/iopreds.h
@@ -45,6 +45,10 @@ extern int Yap_CheckStream__(const char *, const char *, int, Term, int,
  Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
 extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int,
                                 const char *);
 #define Yap_CheckBinaryStream(arg, kind, msg)                                    \
  Yap_CheckBinaryStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
 extern int Yap_CheckBinaryStream__(const char *, const char *, int, Term, int,
                                 const char *);
 extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name,
                           encoding_t encoding, stream_flags_t flags,
--- a/os/readline.c
+++ b/os/readline.c
@@ -130,8 +130,8 @@ static char *atom_generator(const char *prefix, int state) {
 }
 typedef struct chain {
-    struct chain *next;
+  struct chain *next;
-    char data[2];
+  char data[2];
 } chain_t;
 static char *predicate_enumerate(const char *prefix, int state) {
@@ -139,352 +139,358 @@ static char *predicate_enumerate(const char *prefix, int state) {
  PredEntry *p;
  ModEntry m0, *mod;
  AtomEntry *ap;
- 
+
-   if (!state) {
+  if (!state) {
-       p = NULL;
+    p = NULL;
-             mod = &m0;
+    mod = &m0;
-             m0.NextME = CurrentModules;
+    m0.NextME = CurrentModules;
-                 if (mod->AtomOfME == AtomIDB)
+    if (mod->AtomOfME == AtomIDB)
-                    mod = mod->NextME;
+      mod = mod->NextME;
-       } else {
+  } else {
-       Term cmod;
+    Term cmod;
-        p = LOCAL_SearchPreds;
+    p = LOCAL_SearchPreds;
-        cmod = (p->ModuleOfPred != PROLOG_MODULE ? p->ModuleOfPred : TermProlog );
+    cmod = (p->ModuleOfPred != PROLOG_MODULE ? p->ModuleOfPred : TermProlog);
-        mod = Yap_GetModuleEntry(cmod);
+    mod = Yap_GetModuleEntry(cmod);
-   }
+  }
-        while (mod) {
+  while (mod) {
-  // move to next o;
+    // move to next o;
-            if (p)
+    if (p)
-              p = p->NextPredOfModule;
+      p = p->NextPredOfModule;
-              while (p == NULL) {
+    while (p == NULL) {
-                  mod = mod->NextME;
+      mod = mod->NextME;
-                   if (!mod) {
+      if (!mod) {
-                      // done
+        // done
                       LOCAL_SearchPreds = NULL;
                      return NULL;
                  }
                  if (mod->AtomOfME == AtomIDB)
                    mod = mod->NextME;
                 p = mod->PredForME;
              }
         char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
         if (strlen(c) > strlen(prefix) &&
            strstr(c, prefix) == c &&
            !(p->PredFlags & HiddenPredFlag)) {
          LOCAL_SearchPreds = p;
          arity_t ar = p->ArityOfPE;
          int l, r;
          if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
            return c;
          }
          size_t sz = strlen(c);
          chain_t *el = (chain_t *)malloc(sizeof(chain_t)+sz);
          strncpy(LOCAL_FileNameBuf,  c, YAP_FILENAME_MAX);
          strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
          return LOCAL_FileNameBuf;
            } 
        }
        LOCAL_SearchPreds = NULL;
-    return NULL;
+        return NULL;
      }
      if (mod->AtomOfME == AtomIDB)
        mod = mod->NextME;
      p = mod->PredForME;
    }
    char *c = RepAtom(ap = NameOfPred(p))->StrOfAE;
    if (strlen(c) > strlen(prefix) && strstr(c, prefix) == c &&
        !(p->PredFlags & HiddenPredFlag)) {
      LOCAL_SearchPreds = p;
      arity_t ar = p->ArityOfPE;
      int l, r;
      if (Yap_IsPrefixOp(AbsAtom(ap), &l, &r) && ar == 1) {
        return c;
      }
      strncpy(LOCAL_FileNameBuf, c, YAP_FILENAME_MAX);
      strncat(LOCAL_FileNameBuf, "(", YAP_FILENAME_MAX);
      return LOCAL_FileNameBuf;
    }
  }
  LOCAL_SearchPreds = NULL;
  return NULL;
 }
-  static char *predicate_generator(const char *prefix, int state) {
+static char *predicate_generator(const char *prefix, int state) {
-    char *s = predicate_enumerate(prefix, state);
+  char *s = predicate_enumerate(prefix, state);
-    if (s) {
+  if (s) {
-      char *copy = malloc(1 + strlen(s));
+    char *copy = malloc(1 + strlen(s));
-      if (copy) /* else pretend no completion */
+    if (copy) /* else pretend no completion */
-        strcpy(copy, s);
+      strcpy(copy, s);
-      s = copy;
+    s = copy;
    }
    return s;
  }
-  static char **prolog_completion(const char *text, int start, int end) {
+  return s;
-    char **matches = NULL;
+}
-    if (start == 0 && isalpha(text[0])) {
+static char **prolog_completion(const char *text, int start, int end) {
-      int i = 0;
+  char **matches = NULL;
      while (i < end) {
        if (isalnum(text[i]))
          i++;
        else
          break;
      }
      if (i == end) {
        matches = rl_completion_matches((char *)text, predicate_generator);
      }
      return matches;
    } else if (start == 0) {
      int i = 0;
      const char *p;
      while (isblank(text[i++]) && i <= end)
        ;
      p = text + i;
-      if ((strstr(p,"[") == p) || (strstr(p,"compile(") == p) ||
+  if (start == 0 && isalpha(text[0])) {
-          (strstr(p,"consult(") == p) || (strstr(p,"load_files(") == p) ||
+    int i = 0;
-          (strstr(p,"reconsult(") == p) || (strstr(p,"use_module(") == p))
+    while (i < end) {
-        matches = rl_completion_matches((char *)text, /* for pre-4.2 */
+      if (isalnum(text[i]) || text[i] == '_')
-                                        rl_filename_completion_function);
+        i++;
-      return matches;
+      else
        break;
    }
-    int i = end, ch = '\0';
+    if (i == end) {
-    while (i > start) {
+      matches = rl_completion_matches((char *)text, predicate_generator);
      ch = text[-i];
      if (isalnum(text[i]))
        continue;
      break;
    }
-    if (islower(ch))
+    return matches;
-      return rl_completion_matches((char *)text, atom_generator);
+  } else if (start == 0) {
    int i = 0;
    const char *p;
    while (isblank(text[i++]) && i <= end)
      ;
    p = text + i;
-    return NULL;
+    if ((strstr(p, "[") == p) || (strstr(p, "compile(") == p) ||
        (strstr(p, "consult(") == p) || (strstr(p, "load_files(") == p) ||
        (strstr(p, "reconsult(") == p) || (strstr(p, "use_module(") == p) ||
        (strstr(p, "cd(") == p))
      matches = rl_completion_matches((char *)text, /* for pre-4.2 */
                                      rl_filename_completion_function);
    return matches;
  }
-
+  int i = end, ch = '\0';
-  void Yap_ReadlineFlush(int sno) {
+  while (i > start) {
-    if (GLOBAL_Stream[sno].status & Tty_Stream_f &&
+    ch = text[--i];
-        GLOBAL_Stream[sno].status & Output_Stream_f) {
+    if (ch == '\'')
-      rl_redisplay();
+      return rl_completion_matches((char *)text, /* for pre-4.2 */
-    }
+                                   rl_filename_completion_function);
    if (isalnum(text[i]))
      continue;
    break;
  }
  if (islower(ch))
    return rl_completion_matches((char *)text, atom_generator);
-  bool Yap_ReadlinePrompt(StreamDesc * s) {
+  return NULL;
-    if (s->status & Tty_Stream_f) {
+}
-      s->stream_getc = ReadlineGetc;
+
-      if (GLOBAL_Stream[0].status & Tty_Stream_f &&
+void Yap_ReadlineFlush(int sno) {
-          s->name == GLOBAL_Stream[0].name)
+  if (GLOBAL_Stream[sno].status & Tty_Stream_f &&
-        s->stream_putc = ReadlinePutc;
+      GLOBAL_Stream[sno].status & Output_Stream_f) {
-      return true;
+    rl_redisplay();
    }
    return false;
  }
 }
-  bool Yap_ReadlineOps(StreamDesc * s) {
+bool Yap_ReadlinePrompt(StreamDesc *s) {
-    if (s->status & Tty_Stream_f) {
+  if (s->status & Tty_Stream_f) {
-      if (GLOBAL_Stream[0].status & Tty_Stream_f &&
+    s->stream_getc = ReadlineGetc;
-          is_same_tty(s->file, GLOBAL_Stream[0].file))
+    if (GLOBAL_Stream[0].status & Tty_Stream_f &&
-        s->stream_putc = ReadlinePutc;
+        s->name == GLOBAL_Stream[0].name)
-      s->stream_getc = ReadlineGetc;
+      s->stream_putc = ReadlinePutc;
      s->status |= Readline_Stream_f;
      return true;
    }
    return false;
  }
  static int prolog_complete(int ignore, int key) {
    if (rl_point > 0 && rl_line_buffer[rl_point - 1] != ' ') {
 #if HAVE_DECL_RL_CATCH_SIGNALS_ /* actually version >= 1.2, or true readline   \
                                   */
      rl_begin_undo_group();
      rl_complete(ignore, key);
      if (rl_point > 0 && rl_line_buffer[rl_point - 1] == ' ') {
        rl_delete_text(rl_point - 1, rl_point);
        rl_point -= 1;
        rl_delete(-1, key);
      }
      rl_end_undo_group();
 #endif
    } else
      rl_complete(ignore, key);
    return 0;
  }
  bool Yap_InitReadline(Term enable) {
    // don't call readline within emacs
    // if (getenv("ËMACS"))
    //  return;
    if (enable == TermFalse)
      return true;
    GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
    GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
    GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
 #if _MSC_VER || defined(__MINGW32__)
    rl_instream = stdin;
 #endif
    rl_outstream = stderr;
    using_history();
    const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
    if (read_history(s) != 0) {
      FILE *f = fopen(s, "a");
      if (f) {
        fclose(f);
      }
    }
    rl_readline_name = "Prolog";
    rl_attempted_completion_function = prolog_completion;
 #ifdef HAVE_RL_COMPLETION_FUNC_T
    rl_add_defun("prolog-complete", prolog_complete, '\t');
 #else
    rl_add_defun("prolog-complete", (void *)prolog_complete, '\t');
 #endif
    return Yap_ReadlineOps(GLOBAL_Stream + StdInStream);
  }
  static bool getLine(int inp, int out) {
    CACHE_REGS
    rl_instream = GLOBAL_Stream[inp].file;
    rl_outstream = GLOBAL_Stream[out].file;
    const unsigned char *myrl_line;
    StreamDesc *s = GLOBAL_Stream + inp;
    if (!(s->status & Tty_Stream_f))
      return false;
    /* window of vulnerability opened */
    fflush(NULL);
    LOCAL_PrologMode |= ConsoleGetcMode;
    if (LOCAL_newline) { // no output so far
      myrl_line = (unsigned char *)readline(LOCAL_Prompt);
    } else {
      myrl_line = (unsigned char *)readline(NULL);
    }
    /* Do it the gnu way */
    if (LOCAL_PrologMode & InterruptMode) {
      Yap_external_signal(0, YAP_INT_SIGNAL);
      LOCAL_PrologMode &= ~ConsoleGetcMode;
      if (LOCAL_PrologMode & AbortMode) {
        Yap_Error(ABORT_EVENT, TermNil, "");
        LOCAL_ErrorMessage = "Abort";
        return console_post_process_eof(s);
      }
    } else {
      LOCAL_PrologMode &= ~ConsoleGetcMode;
      LOCAL_newline = true;
    }
    strncpy(LOCAL_Prompt, RepAtom(LOCAL_AtPrompt)->StrOfAE, MAX_PROMPT);
    /* window of vulnerability closed */
    if (myrl_line == NULL)
      return false;
    if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
      add_history((char *)myrl_line);
      write_history(history_file);
      fflush(NULL);
    }
    s->u.irl.ptr = s->u.irl.buf = myrl_line;
    return true;
  }
  return false;
 }
-  static int ReadlinePutc(int sno, int ch) {
+bool Yap_ReadlineOps(StreamDesc *s) {
-    CACHE_REGS
+  if (s->status & Tty_Stream_f) {
-    StreamDesc *s = &GLOBAL_Stream[sno];
+    if (GLOBAL_Stream[0].status & Tty_Stream_f &&
-#if MAC || _MSC_VER || defined(__MINGW32__)
+        is_same_tty(s->file, GLOBAL_Stream[0].file))
-    if (ch == 10) {
+      s->stream_putc = ReadlinePutc;
-      putc('\n', s->file);
+    s->stream_getc = ReadlineGetc;
-    } else
+    s->status |= Readline_Stream_f;
    return true;
  }
  return false;
 }
 static int prolog_complete(int ignore, int key) {
  if (rl_point > 0 && rl_line_buffer[rl_point - 1] != ' ') {
 #if HAVE_DECL_RL_CATCH_SIGNALS_ /* actually version >= 1.2, or true readline   \
                                   */
    rl_begin_undo_group();
    rl_complete(ignore, key);
    if (rl_point > 0 && rl_line_buffer[rl_point - 1] == ' ') {
      rl_delete_text(rl_point - 1, rl_point);
      rl_point -= 1;
      rl_delete(-1, key);
    }
    rl_end_undo_group();
 #endif
-      putc(ch, s->file);
+  } else
-    console_count_output_char(ch, s);
+    rl_complete(ignore, key);
-    if (ch == 10) {
+
-      Yap_ReadlineFlush(sno);
+  return 0;
-      LOCAL_newline = true;
+}
 bool Yap_InitReadline(Term enable) {
  // don't call readline within emacs
  // if (getenv("ËMACS"))
  //  return;
  if (enable == TermFalse)
    return true;
  GLOBAL_Stream[StdInStream].u.irl.buf = NULL;
  GLOBAL_Stream[StdInStream].u.irl.ptr = NULL;
  GLOBAL_Stream[StdInStream].status |= Readline_Stream_f;
 #if _WIN32
  rl_instream = stdin;
 #endif
  rl_outstream = stderr;
  using_history();
  const char *s = Yap_AbsoluteFile("~/.YAP.history", NULL, true);
  history_file = s;
  if (read_history(s) != 0) {
    FILE *f = fopen(s, "a");
    if (f) {
      fclose(f);
    }
    return ((int)ch);
  }
  rl_readline_name = "Prolog";
  rl_attempted_completion_function = prolog_completion;
 #ifdef HAVE_RL_COMPLETION_FUNC_T
  rl_add_defun("prolog-complete", prolog_complete, '\t');
 #else
  rl_add_defun("prolog-complete", (void *)prolog_complete, '\t');
 #endif
  return Yap_ReadlineOps(GLOBAL_Stream + StdInStream);
 }
-  /**
+static bool getLine(int inp, int out) {
-    @brief reading from the console is complicated because we need to
+  CACHE_REGS
-    know whether to prompt and so on...
+  rl_instream = GLOBAL_Stream[inp].file;
  rl_outstream = GLOBAL_Stream[out].file;
  const unsigned char *myrl_line;
  StreamDesc *s = GLOBAL_Stream + inp;
-    EOF must be handled by resetting the file.
+  if (!(s->status & Tty_Stream_f))
-  */
+    return false;
  static int ReadlineGetc(int sno) {
    StreamDesc *s = &GLOBAL_Stream[sno];
    int ch;
    bool fetch = (s->u.irl.buf == NULL);
-    if (!fetch || getLine(sno, StdErrStream)) {
+  /* window of vulnerability opened */
-      const unsigned char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf;
+  fflush(NULL);
-      ch = *ttyptr;
+  LOCAL_PrologMode |= ConsoleGetcMode;
-      if (ch == '\0') {
+  if (LOCAL_newline) { // no output so far
-        ch = '\n';
+    myrl_line = (unsigned char *)readline(LOCAL_Prompt);
-        free((void *)myrl_line);
+  } else {
-        s->u.irl.ptr = s->u.irl.buf = NULL;
+    myrl_line = (unsigned char *)readline(NULL);
      }
    } else {
      return EOF;
    }
    return console_post_process_read_char(ch, s);
  }
-
+  /* Do it the gnu way */
-  /**
+  if (LOCAL_PrologMode & InterruptMode) {
-    @brief  Yap_ReadlinePeekChar peeks the next char from the
+    Yap_external_signal(0, YAP_INT_SIGNAL);
-    readline buffer, but does not actually grab it.
+    LOCAL_PrologMode &= ~ConsoleGetcMode;
-
+    if (LOCAL_PrologMode & AbortMode) {
-    The idea is to take advantage of the buffering. Special care must be taken
+      Yap_Error(ABORT_EVENT, TermNil, "");
-    with EOF, though.
+      LOCAL_ErrorMessage = "Abort";
-
+      return console_post_process_eof(s);
  */
  Int Yap_ReadlinePeekChar(int sno) {
    StreamDesc *s = &GLOBAL_Stream[sno];
    int ch;
    if (s->u.irl.buf) {
      const unsigned char *ttyptr = s->u.irl.ptr;
      ch = *ttyptr;
      if (ch == '\0') {
        ch = '\n';
      }
      return ch;
    }
-    if (getLine(sno, StdErrStream)) {
+  } else {
-      CACHE_REGS
+    LOCAL_PrologMode &= ~ConsoleGetcMode;
-      ch = s->u.irl.ptr[0];
+    LOCAL_newline = true;
-      if (ch == '\0') {
+  }
-        ch = '\n';
+  strncpy(LOCAL_Prompt, RepAtom(LOCAL_AtPrompt)->StrOfAE, MAX_PROMPT);
-      }
+  /* window of vulnerability closed */
-      if (ch == '\n') {
+  if (myrl_line == NULL)
-        LOCAL_newline = true;
+    return false;
-      } else {
+  if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
-        LOCAL_newline = false;
+    add_history((char *)myrl_line);
-      }
+    fflush(NULL);
-    } else {
+  }
-      return EOF;
+  s->u.irl.ptr = s->u.irl.buf = myrl_line;
  return true;
 }
 static int ReadlinePutc(int sno, int ch) {
  CACHE_REGS
  StreamDesc *s = &GLOBAL_Stream[sno];
 #if MAC || _MSC_VER || defined(__MINGW32__)
  if (ch == 10) {
    putc('\n', s->file);
  } else
 #endif
    putc(ch, s->file);
  console_count_output_char(ch, s);
  if (ch == 10) {
    Yap_ReadlineFlush(sno);
    LOCAL_newline = true;
  }
  return ((int)ch);
 }
 /**
  @brief reading from the console is complicated because we need to
  know whether to prompt and so on...
  EOF must be handled by resetting the file.
 */
 static int ReadlineGetc(int sno) {
  StreamDesc *s = &GLOBAL_Stream[sno];
  int ch;
  bool fetch = (s->u.irl.buf == NULL);
  if (!fetch || getLine(sno, StdErrStream)) {
    const unsigned char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf;
    ch = *ttyptr;
    if (ch == '\0') {
      ch = '\n';
      free((void *)myrl_line);
      s->u.irl.ptr = s->u.irl.buf = NULL;
    }
  } else {
    return EOF;
  }
  return console_post_process_read_char(ch, s);
 }
 /**
  @brief  Yap_ReadlinePeekChar peeks the next char from the
  readline buffer, but does not actually grab it.
  The idea is to take advantage of the buffering. Special care must be taken
  with EOF, though.
 */
 Int Yap_ReadlinePeekChar(int sno) {
  StreamDesc *s = &GLOBAL_Stream[sno];
  int ch;
  if (s->u.irl.buf) {
    const unsigned char *ttyptr = s->u.irl.ptr;
    ch = *ttyptr;
    if (ch == '\0') {
      ch = '\n';
    }
    return ch;
  }
-
+  if (getLine(sno, StdErrStream)) {
  int Yap_ReadlineForSIGINT(void) {
    CACHE_REGS
-    int ch;
+    ch = s->u.irl.ptr[0];
-    StreamDesc *s = &GLOBAL_Stream[StdInStream];
+    if (ch == '\0') {
-    const unsigned char *myrl_line = s->u.irl.buf;
+      ch = '\n';
    }
    if (ch == '\n') {
      LOCAL_newline = true;
    } else {
      LOCAL_newline = false;
    }
  } else {
    return EOF;
  }
  return ch;
 }
-    if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != NULL) {
+int Yap_ReadlineForSIGINT(void) {
  CACHE_REGS
  int ch;
  StreamDesc *s = &GLOBAL_Stream[StdInStream];
  const unsigned char *myrl_line = s->u.irl.buf;
  if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != NULL) {
    ch = myrl_line[0];
    free((void *)myrl_line);
    myrl_line = NULL;
    return ch;
  } else {
    myrl_line = (const unsigned char *)readline("Action (h for help): ");
    if (!myrl_line) {
      ch = EOF;
      return ch;
    } else {
      ch = myrl_line[0];
      free((void *)myrl_line);
      myrl_line = NULL;
      return ch;
    } else {
      myrl_line = (const unsigned char *)readline("Action (h for help): ");
      if (!myrl_line) {
        ch = EOF;
        return ch;
      } else {
        ch = myrl_line[0];
        free((void *)myrl_line);
        myrl_line = NULL;
        return ch;
      }
    }
  }
 }
-  static Int has_readline(USES_REGS1) {
+void Yap_CloseReadline(void) {
 #if USE_READLINE
-    return true;
+  write_history(history_file);
 #else
    return false;
 #endif
-  }
+}
-  void Yap_InitReadlinePreds(void) {
+static Int has_readline(USES_REGS1) {
-    Yap_InitCPred("$has_readline", 0, has_readline,
+#if USE_READLINE
-                  SafePredFlag | HiddenPredFlag);
+  return true;
-  }
+#else
  return false;
 #endif
 }
 void Yap_InitReadlinePreds(void) {
  Yap_InitCPred("$has_readline", 0, has_readline,
                SafePredFlag | HiddenPredFlag);
 }
 #else
 bool Yap_InitReadline(Term enable) {
--- a/os/writeterm.c
+++ b/os/writeterm.c
@@ -390,7 +390,9 @@ write1 ( USES_REGS1 )
  if (output_stream == -1) output_stream = 1;
  xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
-    if (LOCAL_Error_TYPE)
+   if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
     if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
  }
@@ -415,6 +417,8 @@ write_canonical1 ( USES_REGS1 )
  if (output_stream == -1) output_stream = 1;
  xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
    if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
    if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
@@ -440,6 +444,8 @@ write_canonical ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg * args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
    if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
    if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
@@ -467,7 +473,9 @@ writeq1 ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
-    if (LOCAL_Error_TYPE)
+     if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
   if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
  }
@@ -495,6 +503,8 @@ writeq ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
    if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
    if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
@@ -523,7 +533,9 @@ print1 ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
-    if (LOCAL_Error_TYPE)
+   if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
     if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
  }
@@ -551,7 +563,9 @@ print ( USES_REGS1 )
     we cannot make recursive Prolog calls */
  xarg *args = Yap_ArgListToVector ( TermNil, write_defs, WRITE_END  );
  if (args == NULL) {
-    if (LOCAL_Error_TYPE)
+   if (LOCAL_Error_TYPE == DOMAIN_ERROR_OUT_OF_RANGE)
      LOCAL_Error_TYPE = DOMAIN_ERROR_WRITE_OPTION;
     if (LOCAL_Error_TYPE)
      Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term, NULL);
    return false;
  }
--- a/packages/cuda/CMakeLists.txt
+++ b/packages/cuda/CMakeLists.txt
@@ -54,11 +54,28 @@ if (CUDA_FOUND)
  macro_optional_find_package (Thrust ON)
  set (CUDA_SOURCES
-    lista.cu
+CC_CSSTree.cu
-    memory.cu
+bpreds.cu
-    cuda.c
+dbio.cu
 lista.cu
 memory.cu
 selectproyect.cu
 treeb.cu
 union2.cu
    )
  set (CXX_SOURCES
 bpredscpu.cpp
 joincpu.cpp
 selectproyectcpu.cpp
 unioncpu2.cpp
 )
  set (C_SOURCES
 creator2.c
 cuda.c
 )
  set (PL_SOURCES
    cuda.yap
    )
--- a/packages/cuda/Makefile.in
+++ b/packages/cuda/Makefile.in
@@ -23,7 +23,7 @@ CC=@CC@
 NVCC=@NVCC@
 CFLAGS= @SHLIB_CFLAGS@ $(YAP_EXTRAS) $(DEFS) -I$(srcdir) -I../.. -I$(srcdir)/../../include  @CUDA_CPPFLAGS@
 NVCCFLAGS=@CUDA_CPPFLAGS@ -I$(srcdir) -I../.. -I$(srcdir)/../../include
-CUDA_LDFLAGS=@CUDA_LDFLAGS@
+LDFLAGS=@LDFLAGS@
 #
 #
 # You shouldn't need to change what follows.
@@ -39,7 +39,7 @@ SO=@SO@
 CWD=$(PWD)
 #
-CUDA_PROLOG= \
+BDD_PROLOG= \
 	$(srcdir)/cuda.yap
 OBJS=cuda.o memory.o lista.o
@@ -62,16 +62,11 @@ memory.o: $(srcdir)/memory.cu $(srcdir)/pred.h
@DO_SECOND_LD@cuda.@SO@: $(OBJS)
@DO_SECOND_LD@	@CUDA_SHLIB_LD@ $(CUDA_LDFLAGS) -o cuda.@SO@ $(OBJS)
-install: all install-examples
+install: all
 	mkdir -p $(DESTDIR)$(SHAREDIR)
-	for h in $(CUDA_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
+	for h in $(BDD_PROLOG); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
 	$(INSTALL_PROGRAM) $(SOBJS) $(DESTDIR)$(YAPLIBDIR)
 install-examples:
 clean:
-	rm -f *.o *~ $(OBJS) *.BAK
+	rm -f *.o *~ $(OBJS) $(SOBJS) *.BAK
 distclean: clean
 	rm -f $(SOBJS) Makefile
--- a/packages/cuda/bpreds.cu
+++ b/packages/cuda/bpreds.cu
@@ -1,4 +1,113 @@
-__global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, int *res)
+#include <thrust/device_vector.h>
 #include <thrust/scan.h>
 #include <cstdarg>
 #include "pred.h"
 /*Determines the maximum from a set of values*/
 int maximo(int count, ...)
 {
 	va_list ap;
    	int j, temp, mx = 0;
    	va_start(ap, count);
 	for(j = 0; j < count; j++)
 	{
 		temp = va_arg(ap, int);
 		if(temp > mx)
 			mx = temp;
 	}
    	va_end(ap);
    	return mx;
 }
 __global__ void bpreds(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	int x, rowact, rowact1, op1, op2;
 	if(threadIdx.x < numc)
 		shared[threadIdx.x] = cons[threadIdx.x];
 	__syncthreads();
 	if(id < rows)
 	{
 		rowact1 = id * of1;
 		rowact = id * of2;
 		for(x = nx; x < numc; x += 3)
 		{
 			op1 = shared[x+1];
 			if(op1 < 0)
 				op1 = dop1[rowact1 - op1 - 1];
 			else
 				op1 = dop2[rowact + op1];
 			op2 = shared[x+2];
 			if(op2 < 0)
 				op2 = dop1[rowact1 - op2 - 1];
 			else
 				op2 = dop2[rowact + op2];
 			switch(shared[x] - BPOFFSET)
 			{
 				case SBG_EQ: if(op1 != op2)
 						return;
 				break;
 				case SBG_GT: if(op1 <= op2)
 						return;
 				break;
 				case SBG_LT: if(op1 >= op2)
 						return;
 				break;
 				case SBG_GE: if(op1 < op2)
 						return;
 				break;
 				case SBG_LE: if(op1 > op2)
 						return;
 				break;
 				case SBG_DF: if(op1 == op2)
 						return;
 			}
 		}
 		if(res2 != NULL)
 			res2[id] = 1; 
 		for(x = 0; x < nx; x += 3)
 		{
 			op1 = shared[x+1];
 			if(op1 < 0)
 				op1 *= -1;
 			else
 				op1 = dop2[rowact + op1];
 			op2 = shared[x+2];
 			if(op2 < 0)
 				op2 *= -1;
 			else
 				op2 = dop2[rowact + op2];
 			switch(shared[x])
 			{
 				case SBG_EQ: if(op1 != op2)
 						return;
 				break;
 				case SBG_GT: if(op1 <= op2)
 						return;
 				break;
 				case SBG_LT: if(op1 >= op2)
 						return;
 				break;
 				case SBG_GE: if(op1 < op2)
 						return;
 				break;
 				case SBG_LE: if(op1 > op2)
 						return;
 				break;
 				case SBG_DF: if(op1 == op2)
 						return;
 			}
 		}
 		res[id] = 1;
 	}
 }
 /*Mark all rows that comply with the comparison predicates*/
 __global__ void bpredsnormal2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
@@ -8,7 +117,7 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
 	__syncthreads();
 	if(id < rows)
 	{
-		rowact = id * cols;
+		rowact = id * of1; 
 		for(x = 0; x < numc; x += 3)
 		{
 			op1 = shared[x+1];
@@ -23,21 +132,21 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
 				op2 = dop1[rowact + op2];
 			switch(shared[x])
 			{
-				case SBG_EQ:  if(op1 != op2)
+				case SBG_EQ: if(op1 != op2)
 						return;
-				  break;
+				break;
 				case SBG_GT: if(op1 <= op2)
 						return;
-				  break;
+				break;
 				case SBG_LT: if(op1 >= op2)
 						return;
-				  break;
+				break;
 				case SBG_GE: if(op1 < op2)
 						return;
-				  break;
+				break;
 				case SBG_LE: if(op1 > op2)
 						return;
-				  break;
+				break;
 				case SBG_DF: if(op1 == op2)
 						return;
 			}
@@ -46,98 +155,306 @@ __global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, i
 	}
 }
-int bpreds(int *dop1, int rows, int cols, int *bin, int3 numpreds, int **ret)
+/*Unmark all rows that do not comply with the comparison predicates*/
 __global__ void bpredsnormal(int *dop1, int rows, int of1, int *cons, int numc, int *res)
 {
-	int *temp;
+ 	extern __shared__ int shared[];
-	int tmplen = rows + 1;
+	int id = blockIdx.x * blockDim.x + threadIdx.x;
-	int size = tmplen * sizeof(int);
+	int x, rowact, op1, op2;
-	reservar(&temp, size);
+	if(threadIdx.x < numc)
-#ifdef DEBUG_MEM
+		shared[threadIdx.x] = cons[threadIdx.x];
-	 cerr << "+ " << temp << " temp bpreds " << size << endl;
+	__syncthreads();
-#endif
+	if(id < rows)
 	cudaMemset(temp, 0, size);
 #if TIMER
 	cuda_stats.builtins++;
 #endif
 	int *dhead;
 	int predn = numpreds.x * 3;
 	int spredn = predn * sizeof(int);
 	int sproj = numpreds.z * sizeof(int);
 	int hsize;
 	if(predn > numpreds.z)
 		hsize = spredn;
 	else
 		hsize = sproj;
 	reservar(&dhead, hsize);
 #ifdef DEBUG_MEM
 	cerr << "+ " << dhead << " dhead  " << hsize << endl;
 #endif
 	cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
 	int blockllen = rows / 1024 + 1;
 	int numthreads = 1024;
 	/*int x;
 	cout << "arraypreds = ";
 	for(x = 0; x < predn; x++)
 		cout << bin[x] << " ";
 	cout << endl;
 	cout << "temptable = ";
 	for(x = 0; x < numpreds.z; x++)
 		cout << bin[x+predn] << " ";
 	cout << endl; 
 	int y;
 	int *hop1 = (int *)malloc(numpreds.y * rows * sizeof(int));
 	cudaMemcpy(hop1, dop1, numpreds.y * rows * sizeof(int), cudaMemcpyDeviceToHost);
 	for(x = 0; x < rows; x++)
 	{
-		for(y = 0; y < numpreds.y; y++)
+		if(res[id] == 0)
-			cout << hop1[x * numpreds.y + y] << " ";
+			return;
-		cout << endl;
+		rowact = id * of1; 
 		for(x = 0; x < numc; x += 3)
 		{
 			op1 = shared[x+1];
 			if(op1 < 0)
 				op1 *= -1;
 			else
 				op1 = dop1[rowact + op1];
 			op2 = shared[x+2];
 			if(op2 < 0)
 				op2 *= -1;
 			else
 				op2 = dop1[rowact + op2];
 			switch(shared[x])
 			{
 				case SBG_EQ: if(op1 != op2)
 					     {
 						res[id] = 0;
 						return;
 					     }
 				break;
 				case SBG_GT: if(op1 <= op2)
 					     {
 						res[id] = 0;
 						return;
 					     }
 				break;
 				case SBG_LT: if(op1 >= op2)
 					     {
 						res[id] = 0;
 						return;
 					     }
 				break;
 				case SBG_GE: if(op1 < op2)
 					     {
 						res[id] = 0;
 						return;
 					     }
 				break;
 				case SBG_LE: if(op1 > op2)
 					     {
 						res[id] = 0;
 						return;
 					     }
 				break;
 				case SBG_DF: if(op1 == op2)
 					     {
 						res[id] = 0;
 						return;
 					     }
 			}
 		}
 	}
 	free(hop1);*/
 	predicates<<<blockllen, numthreads, spredn>>>(dop1, rows, numpreds.y, dhead, predn, temp + 1);
 	/*int y;
 	int *hop1 = (int *)malloc((rows + 1) * sizeof(int));
 	cudaMemcpy(hop1, temp, (rows + 1) * sizeof(int), cudaMemcpyDeviceToHost);
 	for(x = 0; x < (rows + 1); x++)
 		cout << hop1[x] << " ";
 	cout << endl;
 	free(hop1);*/
 	thrust::device_ptr<int> res;
 	res = thrust::device_pointer_cast(temp);
 	thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
 	int num = res[rows];
 	if(num == 0)
 		return 0;
 	int *fres;
 	reservar(&fres, num * sproj);
 #ifdef DEBUG_MEM
 	cerr << "+ " << fres << " fres  " << num * sproj << endl;
 #endif
 	cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
 	llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
 	/*int y;
 	int *hop1 = (int *)malloc(numpreds.z * num * sizeof(int));
 	cudaMemcpy(hop1, fres, numpreds.z * num * sizeof(int), cudaMemcpyDeviceToHost);
 	for(x = 0; x < num; x++)
 	{
 		for(y = 0; y < numpreds.z; y++)
 			cout << hop1[x * numpreds.z + y] << " ";
 		cout << endl;
 	}
 	free(hop1);*/
 	liberar(dhead, hsize);
 	liberar(temp, size);
 	liberar(dop1, rows * cols * sizeof(int));
 	*ret = fres;
 	return num;
 }
 __global__ void bpredsOR(int *dop1, int *dop2, int rows, int of1, int of2, int *cons, int numc, int nx, int *res, int *res2)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	int x, rowact, rowact1, op1, op2;
 	if(threadIdx.x < numc)
 		shared[threadIdx.x] = cons[threadIdx.x];
 	__syncthreads();
 	if(id < rows)
 	{
 		rowact1 = id * of1;
 		rowact = id * of2;
 		for(x = nx; x < numc; x += 3)
 		{
 			op1 = shared[x+1];
 			if(op1 < 0)
 				op1 = dop1[rowact1 - op1 - 1];
 			else
 				op1 = dop2[rowact + op1];
 			op2 = shared[x+2];
 			if(op2 < 0)
 				op2 = dop1[rowact1 - op2 - 1];
 			else
 				op2 = dop2[rowact + op2];
 			switch(shared[x] - BPOFFSET)
 			{
 				case SBG_EQ: if(op1 == op2)
 					     {
 						res2[id] = 1;
 						x = numc;
 					     }
 				break;
 				case SBG_GT: if(op1 > op2)
 					     {
 						res2[id] = 1;
 						x = numc;
 					     }
 				break;
 				case SBG_LT: if(op1 < op2)
 					     {
 						res2[id] = 1;
 						x = numc;
 					     }
 				break;
 				case SBG_GE: if(op1 >= op2)
 					     {
 						res2[id] = 1;
 						x = numc;
 					     }
 				break;
 				case SBG_LE: if(op1 <= op2)
 					     {
 						res2[id] = 1;
 						x = numc;
 					     }
 				break;
 				case SBG_DF: if(op1 != op2)
 					     {
 						res2[id] = 1;
 						x = numc;
 					     }
 			}
 		}
 		for(x = 0; x < nx; x += 3)
 		{
 			op1 = shared[x+1];
 			if(op1 < 0)
 				op1 *= -1;
 			else
 				op1 = dop2[rowact + op1];
 			op2 = shared[x+2];
 			if(op2 < 0)
 				op2 *= -1;
 			else
 				op2 = dop2[rowact + op2];
 			switch(shared[x])
 			{
 				case SBG_EQ: if(op1 == op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_GT: if(op1 > op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_LT: if(op1 < op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_GE: if(op1 >= op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_LE: if(op1 <= op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_DF: if(op1 != op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 			}
 		}
 	}
 }
 /*Mark all rows that comply with the comparison predicates using disjunctions (i.e. a row is marked if it complies with at least one predicate)*/
 __global__ void bpredsorlogic2(int *dop1, int rows, int of1, int *cons, int numc, int *res)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	int x, rowact, op1, op2;
 	if(threadIdx.x < numc)
 		shared[threadIdx.x] = cons[threadIdx.x];
 	__syncthreads();
 	if(id < rows)
 	{
 		rowact = id * of1; 
 		for(x = 0; x < numc; x += 3)
 		{
 			op1 = shared[x+1];
 			if(op1 < 0)
 				op1 *= -1;
 			else
 				op1 = dop1[rowact + op1];
 			op2 = shared[x+2];
 			if(op2 < 0)
 				op2 *= -1;
 			else
 				op2 = dop1[rowact + op2];
 			switch(shared[x])
 			{
 				case SBG_EQ: if(op1 == op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_GT: if(op1 > op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_LT: if(op1 < op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_GE: if(op1 >= op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_LE: if(op1 <= op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 				break;
 				case SBG_DF: if(op1 != op2)
 					     {
 						res[id] = 1;
 						return;
 					     }
 			}
 		}
 	}
 }
 /*Unmark all rows that do not comply with the comparison predicates using disjunctions (i.e. a row is unmarked only if it complies with none of the predicates)*/
 __global__ void bpredsorlogic(int *dop1, int rows, int of1, int *cons, int numc, int *res)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	int x, rowact, op1, op2;
 	if(threadIdx.x < numc)
 		shared[threadIdx.x] = cons[threadIdx.x];
 	__syncthreads();
 	if(id < rows)
 	{
 		if(res[id] == 0)
 			return;
 		rowact = id * of1; 
 		for(x = 0; x < numc; x += 3)
 		{
 			op1 = shared[x+1];
 			if(op1 < 0)
 				op1 *= -1;
 			else
 				op1 = dop1[rowact + op1];
 			op2 = shared[x+2];
 			if(op2 < 0)
 				op2 *= -1;
 			else
 				op2 = dop1[rowact + op2];
 			switch(shared[x])
 			{
 				case SBG_EQ: if(op1 == op2)
 						return;
 				break;
 				case SBG_GT: if(op1 > op2)
 						return;
 				break;
 				case SBG_LT: if(op1 < op2)
 						return;
 				break;
 				case SBG_GE: if(op1 >= op2)
 						return;
 				break;
 				case SBG_LE: if(op1 <= op2)
 						return;
 				break;
 				case SBG_DF: if(op1 != op2)
 						return;
 			}
 		}
 		res[id] = 0;
 	}
 }
--- a/packages/cuda/cuda.c
+++ b/packages/cuda/cuda.c
@@ -6,19 +6,25 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <inttypes.h>
 #include "pred.h"
 #define MAXARG 100
 YAP_Atom AtomEq,
  AtomGt,
  AtomLt,
  AtomGe,
  AtomLe,
-  AtomDf;
+  AtomDf,
  AtomNt;
-predicate *facts[100]; /*Temporary solution to maintain facts and rules*/
+predicate *facts[MAXARG]; /*Temporary solution to maintain facts and rules*/
-predicate *rules[100];
+predicate *rules[MAXARG];
 int32_t cf = 0, cr = 0;
 char names[1024];
 // initialize CUDA system
 void Cuda_Initialize( void );
@@ -39,6 +45,19 @@ void init_cuda( void );
 //#define DEBUG_INTERFACE 1
 #ifdef ROCKIT
 static int32_t query[100];
 static int32_t qcont = 0;
 static int cuda_init_query(void)
 {
 	int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG1));
 	query[qcont] = pname;
 	qcont++;
 	query[qcont] = 0;
 	return TRUE;
 }
 #endif
 #if DEBUG_INTERFACE
 static void
 dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
@@ -83,8 +102,18 @@ int32_t Cuda_NewFacts(predicate *pe)
 #if DEBUG_INTERFACE
  dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
 #endif
 #ifdef ROCKIT
  if(cf >= 0)
  {
  	facts[cf] = pe;
 	cf++;
  }
 #else
  facts[cf] = pe;
  cf++;
 #endif
  return TRUE;
 }
@@ -115,7 +144,7 @@ int32_t Cuda_Erase(predicate *pe)
  return TRUE;
 }
-static YAP_Bool
+static int
 load_facts( void ) {
  int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
@@ -164,15 +193,18 @@ load_facts( void ) {
 static int currentFact = 0;
 static predicate *currentPred = NULL;
-static YAP_Bool
+static int
 cuda_init_facts( void ) {
  int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
-  int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
+  int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
  int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
  int32_t pname = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG3));
  predicate *pred;
 	strcat(names, YAP_AtomName(YAP_AtomOfTerm(YAP_ARG3)));
 	strcat(names, " ");
  if (!mat)
    return FALSE;
  if (YAP_IsVarTerm( YAP_ARG4)) {
@@ -198,14 +230,16 @@ cuda_init_facts( void ) {
  }
 }
-static YAP_Bool
+static int
 cuda_load_fact( void ) {
  YAP_Term th = YAP_ARG1;
-  int i, j;
+  int i = currentFact;
 #if defined(DATALOG) || defined(TUFFY)
  YAP_Term th = YAP_ARG1;
  int ncols = currentPred->num_columns;
  int j;
  int *mat = currentPred->address_host_table;
  i = currentFact;
  for (j = 0; j < ncols; j++) {
    YAP_Term ta = YAP_ArgOfTerm(j+1, th);
    if (YAP_IsAtomTerm(ta)) {
@@ -214,6 +248,8 @@ cuda_load_fact( void ) {
      mat[i*ncols+j] = YAP_IntOfTerm(ta);
    }
  }
 #endif
  i++;
  if (i == currentPred->num_rows) {
    Cuda_NewFacts(currentPred);
@@ -225,21 +261,26 @@ cuda_load_fact( void ) {
  return TRUE;
 }
-static YAP_Bool
+static int
 load_rule( void ) {
  // maximum of 2K symbols per rule, should be enough for ILP
-  int32_t vec[2048], *ptr = vec, *nvec;
+  int32_t vec[2048], *ptr = vec, *nvec, neg[2048];
  // qK different variables;
  YAP_Term vars[1024];
-  int32_t nvars = 0;
+  int32_t nvars = 0, x;
  int32_t ngoals = YAP_IntOfTerm(YAP_ARG1);   /* gives the number of goals */
  int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
  YAP_Term t3 = YAP_ARG3;
-  int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
+	YAP_Atom name = YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3)));
  int32_t pname = YAP_AtomToInt(name);
 	const char *strname = YAP_AtomName(name);
  predicate *pred;
  int32_t cont = 0;
  memset(neg, 0x0, 2048 * sizeof(int32_t));
  while(YAP_IsPairTerm(t3)) {
-    int32_t j = 0;
+    int32_t j = 0, m;
    YAP_Term th = YAP_HeadOfTerm(t3);
    YAP_Functor f = YAP_FunctorOfTerm( th );
    int32_t n = YAP_ArityOfFunctor( f ); 
@@ -257,8 +298,17 @@ load_rule( void ) {
      *ptr++ = SBG_LE;
    else if (at == AtomDf)
      *ptr++ = SBG_DF;
    else if (at == AtomNt)
 	{
      		neg[cont] = 1;
 		cont++;
 	}
    else
-      *ptr++ = YAP_AtomToInt( at );
+	{
      		*ptr++ = YAP_AtomToInt( at );
 		cont++;
 	}
    for (j = 0; j < n; j++) {
      YAP_Term ta = YAP_ArgOfTerm(j+1, th);
@@ -277,6 +327,34 @@ load_rule( void ) {
 	}
      } else if (YAP_IsAtomTerm(ta))  {
 	*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
      } else if (YAP_IsApplTerm(ta))  {
 	f = YAP_FunctorOfTerm( ta );
 	at = YAP_NameOfFunctor( f );
 	m = YAP_ArityOfFunctor( f );
 	*ptr++ = YAP_AtomToInt( at );
 	for (x = 0; x < m; x++) {
      		YAP_Term ta2 = YAP_ArgOfTerm(x+1, ta);
      		if (YAP_IsVarTerm(ta2)) {
 			int32_t k;
 			for (k = 0; k < nvars; k++) {
 	  			if (vars[k] == ta2) {
 	    				*ptr++ = k+1;
 	    				break;
 	  			}
 			}
 			if (k == nvars) {
 	  			vars[k] = ta2;
 	  			*ptr++ = k+1;
 	  			nvars++;
 			}
      		} else if (YAP_IsAtomTerm(ta2))  {
 			*ptr++ = -YAP_AtomToInt(YAP_AtomOfTerm(ta));
      		} else {
 			*ptr++ = -YAP_IntOfTerm(ta);
      		}
    	}
      } else {
 	*ptr++ = -YAP_IntOfTerm(ta);
      }
@@ -296,53 +374,136 @@ load_rule( void ) {
  pred->num_rows = ngoals;
  pred->num_columns = ncols;
  pred->is_fact = FALSE;
 	x = (strlen(strname) + 1) * sizeof(char);
 	pred->predname = (char *)malloc(x);
 	memcpy(pred->predname, strname, x); 
  nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
  memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
  pred->address_host_table =  nvec;
  pred->negatives = (int32_t *)malloc(sizeof(int32_t) * cont);
  memcpy(pred->negatives, neg, sizeof(int32_t) * cont);
  Cuda_NewRule( pred );
  return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
 }
-static YAP_Bool
+static int
 cuda_erase( void )
 {
  predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
  return Cuda_Erase( ptr );
 }
-static YAP_Bool
+void setQuery(YAP_Term t1, int32_t **res)
 {
 	int32_t *query = (int32_t *)malloc(MAXARG * sizeof(int32_t));
 	int32_t x, y = 0, *itr;
 	predicate *ptr = NULL;
 	if(YAP_IsPairTerm(t1))
 	{
 		while(YAP_IsPairTerm(t1))
 		{
 			ptr = (predicate *)YAP_IntOfTerm(YAP_HeadOfTerm(t1));
 			query[y] = ptr->name;
 			itr = ptr->address_host_table;
 			x = 2;
 			while(itr[x] != 0)
 				x++;
 			query[y+1] = itr[x+1];
 			t1 = YAP_TailOfTerm(t1);
 			y+=2;
 		}
 	}
 	else
 	{
 		ptr = (predicate *)YAP_IntOfTerm(t1);
 		query[y] = ptr->name;
 		itr = ptr->address_host_table;
 		x = 2;
 		while(itr[x] != 0)
 			x++;
 		query[y+1] = itr[x+1];
 		y += 2;
 	}
 	query[y] = -1;
 	query[y+1] = -1;
 	*res = query;
 }
 static int
 cuda_eval( void )
 {
  int32_t *mat;
 #if defined(DATALOG) || defined(TUFFY)
 	int32_t *query = NULL;
 	setQuery(YAP_ARG1, &query);
 #endif
 	int32_t finalDR = YAP_IntOfTerm(YAP_ARG3);
  int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, names, finalDR);
 #ifdef TUFFY
 	cf = 0;
 #endif
 #ifdef ROCKIT
 	if(cf > 0)
 		cf *= -1;
 #endif
 #if defined(TUFFY) || defined(ROCKIT)
 	cr = 0;
 	names[0] = '\0';
 	return FALSE;
 #else
  int32_t i;
  predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
  int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
  int32_t ncols = ptr->num_columns;
  YAP_Term out = YAP_TermNil();
  YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
  YAP_Term vec[256];
-  int32_t i;
+
 	YAP_Atom at;
  if (n < 0)
    return FALSE;
  for (i=0; i<n; i++) {
    int32_t ni = ((n-1)-i)*ncols, j;
 	printf("%s(", YAP_AtomName(YAP_IntToAtom(ptr->name)));
    for (j=0; j<ncols; j++) {
      vec[j] = YAP_MkIntTerm(mat[ni+j]);
 	at = YAP_IntToAtom(mat[ni+j]);
 	if(at != NULL)
 		printf("%s", YAP_AtomName(at));
 	else
 		printf("%d", mat[ni+j]);	
 	if(j < (ncols - 1))
 		printf(",");
    }
    out = YAP_MkPairTerm(YAP_MkApplTerm( f, ncols, vec ), out);
 	printf(")\n");
  }
  if (n > 0)
    free( mat );
  return YAP_Unify(YAP_ARG2, out);
 #endif
 }
-static YAP_Bool
+static int
 cuda_coverage( void )
 {
  int32_t *mat;
-  predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
+
-  int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
+#if defined(DATALOG) || defined(TUFFY)
-  int32_t ncols = ptr->num_columns;
+	int32_t *query = NULL;
 	setQuery(YAP_ARG1, &query);
 #endif
  int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
  int32_t post = YAP_AtomToInt(YAP_AtomOfTerm(YAP_ARG2));
  int32_t i = n/2, min = 0, max = n-1;
  int32_t t0, t1;
@@ -384,11 +545,16 @@ cuda_coverage( void )
  } while ( TRUE );
 }
-static YAP_Bool cuda_count( void )
+static int cuda_count( void )
 {
  int32_t *mat;
-  predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
+
-  int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
+#if defined(DATALOG) || defined(TUFFY)
 	int32_t *query = NULL;
 	setQuery(YAP_ARG1, &query);
 #endif
  int32_t n = Cuda_Eval(facts, cf, rules, cr, query, & mat, 0, 0);
  if (n < 0)
    return FALSE;
@@ -396,7 +562,7 @@ static YAP_Bool cuda_count( void )
  return YAP_Unify(YAP_ARG2, YAP_MkIntTerm(n));
 }
-static YAP_Bool cuda_statistics( void )
+static int cuda_statistics( void )
 {
  Cuda_Statistics();
  return TRUE;
@@ -416,14 +582,20 @@ init_cuda(void)
  AtomGe = YAP_LookupAtom(">=");
  AtomLe = YAP_LookupAtom("=<");
  AtomDf = YAP_LookupAtom("\\=");
  AtomNt = YAP_LookupAtom("not");
  YAP_UserCPredicate("load_facts", load_facts, 4);
  YAP_UserCPredicate("cuda_init_facts", cuda_init_facts, 4);
  YAP_UserCPredicate("cuda_load_fact", cuda_load_fact, 1);
  YAP_UserCPredicate("load_rule", load_rule, 4);
  YAP_UserCPredicate("cuda_erase", cuda_erase, 1);
-  YAP_UserCPredicate("cuda_eval", cuda_eval, 2);
+  YAP_UserCPredicate("cuda_eval", cuda_eval, 3);
  YAP_UserCPredicate("cuda_coverage", cuda_coverage, 4);
  YAP_UserCPredicate("cuda_count", cuda_count, 2);
  YAP_UserCPredicate("cuda_statistics", cuda_statistics, 0);
 #ifdef ROCKIT
  YAP_UserCPredicate("cuda_init_query", cuda_init_query, 1);
 #endif
 }
--- a/packages/cuda/cuda.yap
+++ b/packages/cuda/cuda.yap
@@ -2,10 +2,11 @@
 		 cuda_inline/2,
 		 cuda_rule/2,
 		 cuda_erase/1,
-		 cuda_eval/2,
+		 cuda_eval/3,
 		 cuda_coverage/4,
 		 cuda_statistics/0,
-		 cuda_count/2]).
+		 cuda_count/2,
 		 cuda_query/1]).
 tell_warning :-
 	print_message(warning,functionality(cuda)).
@@ -40,7 +41,7 @@ count_answers(G, N) :-
 cuda_rule((Head :- Body) , IdRules) :-
 	body_to_list( Body, L, [], 1, N),
-	functor(Head, _Na, Ar),
+	functor(Head, Na, Ar),
 	load_rule( N, Ar, [Head|L], IdRules ).
@@ -54,3 +55,5 @@ body_to_list( B, NL, L, N0, N) :-
 body_to_list( B, [B|L], L, N0, N) :-
 	N is N0+1.
 cuda_query(Call) :-
 	cuda_init_query(Call).
--- a/packages/cuda/lista.cu
+++ b/packages/cuda/lista.cu
--- a/packages/cuda/lista.h
+++ b/packages/cuda/lista.h
@@ -25,8 +25,11 @@ typedef struct auxiliar{
 	int *numselfj;
 	int **wherejoin;
 	int *numjoin;
-	int3 num_bpreds;
+	int totalpreds;
-	int *builtin;
+	int **preds;
 	int2 *numpreds;
 	int *negatives;
 	char *rulename;
 	int gen_act;
 	int gen_ant;
 }rulenode;
--- a/packages/cuda/memory.cu
+++ b/packages/cuda/memory.cu
@@ -5,63 +5,101 @@
 #include <thrust/device_vector.h>
 #include "lista.h"
 #include "memory.h"
 #include "pred.h"
 #define MAX_REC 200
 #define HALF_REC (MAX_REC / 2)
 #define MAX_FIX_POINTS 100
 unsigned int avmem;
 memnode temp_storage[MAX_REC];
 /*List used to store information (address, size, etc.) about facts and rule results loaded in the GPU*/
 list<memnode> GPUmem;
 /*List used to store information about rule results offloaded from the GPU to the CPU*/
 list<memnode> CPUmem;
 /*Auxiliary function to sort rule list*/
 bool comparer(const rulenode &r1, const rulenode &r2)
 {
 	return (r1.name > r2.name); 
 }
 /*Used in search functions to compare iterations*/
 bool compareiteration(const memnode &r1, const memnode &r2)
 {
 	return (r1.iteration < r2.iteration); 
 }
 /*Used in search functions to compare names*/
 bool comparename(const memnode &r1, const memnode &r2)
 {
 	return (r1.name > r2.name); 
 }
-void calcular_mem(int dev)
+/*Linear search of 'name' fact*/
 {
 	cudaDeviceProp p;
 	cudaGetDeviceProperties(&p, dev);
 	avmem = p.totalGlobalMem;
 	temp_storage[0].dev_address = NULL;
 	temp_storage[0].size = 0;
 	temp_storage[HALF_REC].dev_address = NULL;
 	temp_storage[HALF_REC].size = 0;
 	//cout << "Initial memory available " << avmem << endl;
 }
 template<class InputIterator>
 InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
 {
 	while(first!=last) 
 	{
-		if(first->name == name) return first;
+		if(first->name == name && first->isrule == 0) return first;
 			++first;
 	}
 	return last;
 }
-list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum)
+/*Finds all results of rule 'name' in iteration 'itr' in both CPU and GPU memory. Every result found is removed from its respective list*/
 list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
 {
-	int x = 1, sum = 0;
+	int x = 0, sum = 0;
 	memnode temp;
-
+	list<memnode>::iterator i;
 	temp.name = name;
 	temp.iteration = itr;
 	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
 	while(rec.first != rec.second)
-	{		
+	{
-		
+		if(rec.first->name == name && rec.first->isrule == 1)
-		//cout << "itr = " << itr << " rec.first = " << rec.first->name << endl;	
+		{
-		
+			temp_storage[x] = *rec.first;
 			rec.first = GPUmem.erase(rec.first);
 			sum += temp_storage[x].rows;
 			x++;
 		}	
 		else
 			rec.first++;
 	}
 	*gpunum = x;
 	temp.name = name;
 	temp.isrule = 1;
 	i = GPUmem.insert(rec.first, temp);
 	rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
 	while(rec.first != rec.second)
 	{				
 		if(rec.first->name == name && rec.first->isrule == 1)
 		{
 			temp_storage[x] = *rec.first;
 			rec.first = CPUmem.erase(rec.first);
 			sum += temp_storage[x].rows;
 			x++;
 		}	
 		else
 			rec.first++;
 	}
 	*totalrows = sum;
 	*cpunum = x;
 	return i;
 }
 list<memnode>::iterator buscarpornombrecpu(int name, int itr, int *totalrows, int *gpunum, int *cpunum)
 {
 	int x = 0, sum = 0;
 	memnode temp;
 	list<memnode>::iterator i;
 	temp.iteration = itr;
 	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
 	while(rec.first != rec.second)
 	{				
 		if(rec.first->name == name)
 		{
 			temp_storage[x] = *rec.first;
@@ -72,25 +110,14 @@ list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *
 		else
 			rec.first++;
 	}
-	//if(x > 1)
+
 	rec.first = GPUmem.insert(rec.first, temp);
 	*totalrows = sum;
 	*gpunum = x;
-	return rec.first;
+	temp.name = name;
-}
+	temp.isrule = 1;
-
+	rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
 int buscarpornombrecpu(int name, int itr, int *totalrows)
 {
 	int x = HALF_REC + 1, sum = 0;
 	memnode temp;
 	temp.iteration = itr;
 	pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
 	/*if(rec.first != rec.second)
 		cout << "bscnomcpu = " << rec.first->name << " " << rec.first->iteration << endl;*/
 	while(rec.first != rec.second)
-	{
+	{				
 		if(rec.first->name == name)
 		{
 			temp_storage[x] = *rec.first;
@@ -101,18 +128,24 @@ int buscarpornombrecpu(int name, int itr, int *totalrows)
 		else
 			rec.first++;
 	}
-	*totalrows += sum;
+	i = CPUmem.insert(rec.first, temp);
-	return x;
+	*totalrows = sum;
 	*cpunum = x;
 	return i;
 }
 /*Removes the least recently used memory block from GPU memory, sending it to CPU memory if it's a rule result. 
 If there are no used memory blocks in the GPU and we still don't have enough memory, the program exits with error*/
 void limpiar(const char s[], size_t sz)
 {
 	list<memnode>::iterator ini;
 	memnode temp;
 	size_t free, total;
 	if(GPUmem.size() == 0)
 	{
-		cerr << s << ": not enough GPU memory: have " << avmem << ", need " << sz << " bytes." << endl;
+		cudaMemGetInfo(&free,&total);
 		cerr << s << ": not enough GPU memory: have " << free << " of " << total << ", need " << sz << " bytes." << endl;
 		exit(1);
 	}		
@@ -122,80 +155,32 @@ void limpiar(const char s[], size_t sz)
 		temp = *ini;
 		temp.dev_address = (int *)malloc(ini->size);
 		cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
-		CPUmem.push_back(temp);
+		list<memnode>::iterator pos = lower_bound(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
 		CPUmem.insert(pos, temp);
 	}
-	liberar(ini->dev_address, ini->size);
+	cudaFree(ini->dev_address);
 	GPUmem.erase(ini);
 }
-void limpiartodo(int *p1, int *p2)
+/*Allocs 'size' amount of bytes in GPU memory. If not enough memory is available, removes least recently used memory blocks until 
 enough space is available*/
 void reservar(int **ptr, size_t size)
 {
-	list<memnode>::iterator ini;
+	size_t free, total;
 	memnode temp;
 	int cont = 0;
 	if(p1 != NULL)
 		cont++;	
 	if(p2 != NULL)
 		cont++;
 	ini = GPUmem.begin();
 	/*cout << "ANTES" << endl;
 	mostrar_memoria();
 	mostrar_memcpu();
 	cout << "FIN ANTES" << endl;*/
 	//cout << "mem = " << GPUmem.size() << " " << avmem << endl;
 	while(GPUmem.size() > cont)
 	{
 		if(ini->dev_address == p1 || ini->dev_address == p2)
 		{
 			ini++;
 			continue;
 		}
 		if(ini->isrule)
 		{
 			temp = *ini; 
 			temp.dev_address = (int *)malloc(ini->size);
 			cudaMemcpy(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
 			CPUmem.push_back(temp);
 		}
 		liberar(ini->dev_address, temp.size);
 		ini = GPUmem.erase(ini);
 	}
 	/*cout << "DESPUES" << endl;
 	mostrar_memoria();
 	mostrar_memcpu();
 	cout << "FIN DESPUES" << endl;*/
 	//cout << "memfinal = " << GPUmem.size() << " " << avmem << endl;
 }
 void liberar(int *ptr, int size)
 {
 	//cout << "L " << avmem << " " << size; 
 	cudaFree(ptr);
 #ifdef DEBUG_MEM
 	cerr << "- " << ptr << " " << size << endl;
 #endif
 	avmem += size;
 	//cout << " " << avmem << endl;
 }
 void reservar(int **ptr, int size)
 {
  //size_t free, total;
  //cudaMemGetInfo(      &free, &total	 );
  //	cerr << "? " << free << " " << size << endl;
        if (size == 0) { 
                *ptr = NULL; 
                return;
        }
-	while(avmem < size)
+
 	cudaMemGetInfo(&free, &total);
 	while(free < size)
 	{
 		cout << "Se limpio memoria " << free << " " << total << endl;
 		limpiar("not enough memory", size);
 		cudaMemGetInfo(&free, &total);
 	}
 	while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
 		limpiar("Error in memory allocation", size);
 	if (! *ptr ) {
@@ -205,11 +190,9 @@ void reservar(int **ptr, int size)
 	  cerr << "Exiting CUDA...." << endl;
 	  exit(1);
 	}
 	avmem -= size;
 	// cout << " " << avmem << endl;
 }
 /*Creates a new entry in the GPU memory list*/
 void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
 {
 	memnode temp;
@@ -222,6 +205,19 @@ void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
 	GPUmem.push_back(temp);
 }
 void registrarcpu(int name, int num_columns, int *ptr, int rows, int itr, int rule)
 {
 	memnode temp;
 	temp.name = name;
 	temp.dev_address = ptr;
 	temp.rows = rows;
 	temp.size = rows * num_columns * sizeof(int);
 	temp.iteration = itr;
 	temp.isrule = rule;
 	CPUmem.push_back(temp);
 }
 /*Updates the information of an element in a list*/
 template<class InputIterator>
 void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
 {
@@ -230,6 +226,7 @@ void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
 	i->size = rows * num_columns * sizeof(int);
 }
 /*Count the total number of rows generated by rule 'name' in iteration 'iter'*/
 int numrows(int name, int itr)
 {
 	int sum = 0;
@@ -252,16 +249,17 @@ int numrows(int name, int itr)
 	return sum;
 }
 	extern "C" void * YAP_IntToAtom(int);
 	extern  "C" char * YAP_AtomName(void *);
-
+/*Loads facts or rule results in GPU memory. If a fact is already in GPU memory, its pointer is simply returned. Otherwise, 
 memory is reserved and the fact is loaded. Rule results are loaded based on the current iteration 'itr' and both GPU and 
 CPU memories are searched for all instances of said results. The instances are combined into a single one in GPU memory.*/
 int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
 {
 	int numgpu, numcpu, totalrows = 0;
 	int *temp, x;
-	int size, itrant;
+	int size, itrant, inc = 0;
 	list<memnode>::iterator i;
 	memnode fact;
@@ -279,9 +277,6 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
 		}
 		size = num_rows * num_columns * sizeof(int);
 		reservar(&temp, size);
 #ifdef DEBUG_MEM
 		cerr << "+ " << temp << " temp  " << size << endl;
 #endif
 		cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
 		registrar(name, num_columns, temp, num_rows, itr, 0);
 		*ptr = temp;
@@ -290,28 +285,25 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
 	if(itr > 0)
 	{
 		itrant = itr - 1;
-		i = buscarpornombre(name, itrant, &totalrows, &numgpu);
+		i = buscarpornombre(name, itrant, &totalrows, &numgpu, &numcpu);
-		numcpu = buscarpornombrecpu(name, itrant, &totalrows);
+		if((numgpu == 1) && (numcpu == 1))
 		if((numgpu == 2) && (numcpu == (HALF_REC + 1)))
 		{
-			actualizar(num_columns, temp_storage[1].dev_address, temp_storage[1].rows, i);
+			actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
-			*ptr = temp_storage[1].dev_address;
+			*ptr = temp_storage[0].dev_address;
-			return temp_storage[1].rows;
+			return temp_storage[0].rows;
 		}
 		size = totalrows * num_columns * sizeof(int);
 		reservar(&temp, size);
-#ifdef DEBUG_MEM
+		for(x = 0; x < numgpu; x++)
 		cerr << "+ " << temp << " temp 2  " << size << endl;
 #endif
 		for(x = 1; x < numgpu; x++)
 		{
-			cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
+			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
-			liberar(temp_storage[x].dev_address, temp_storage[x].size);
+			inc += temp_storage[x].size / sizeof(int);
 			cudaFree(temp_storage[x].dev_address);
 		}
-		for(x = HALF_REC + 1; x < numcpu; x++)
+		for(; x < numcpu; x++)
 		{
-			cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
+			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
 			inc += temp_storage[x].size / sizeof(int);
 			free(temp_storage[x].dev_address);
 		}
 		actualizar(num_columns, temp, totalrows, i);
@@ -321,9 +313,54 @@ int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_ho
 	return 0;
 }
 int cargarcpu(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
 {
 	int numgpu, numcpu, totalrows = 0;
 	int *temp, x;
 	int size, itrant, inc = 0;
 	list<memnode>::iterator i;
 	if(is_fact)
 	{
 		*ptr = address_host_table;
 		return num_rows;
 	}
 	if(itr > 0)
 	{
 		itrant = itr - 1;
 		i = buscarpornombrecpu(name, itrant, &totalrows, &numgpu, &numcpu);
 		if((numgpu == 0) && (numcpu == 1))
 		{
 			actualizar(num_columns, temp_storage[0].dev_address, temp_storage[0].rows, i);
 			*ptr = temp_storage[0].dev_address;
 			return temp_storage[0].rows;
 		}
 		size = totalrows * num_columns * sizeof(int);
 		temp = (int *)malloc(size);
 		for(x = 0; x < numgpu; x++)
 		{
 			cudaMemcpyAsync(temp + inc, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToHost);
 			inc += temp_storage[x].size / sizeof(int);
 			cudaFree(temp_storage[x].dev_address);
 		}
 		for(; x < numcpu; x++)
 		{
 			memcpy(temp + inc, temp_storage[x].dev_address, temp_storage[x].size);
 			inc += temp_storage[x].size / sizeof(int);
 			free(temp_storage[x].dev_address);
 		}
 		actualizar(num_columns, temp, totalrows, i);
 		*ptr = temp;
 		return totalrows;
 	}
 	return 0;
 }
 /*Loads all results of rule 'name' from both GPU and CPU memories into the GPU*/
 int cargafinal(int name, int cols, int **ptr)
 {
-	int *temp, *ini, cont = 0;
+	int *temp, *ini, cont = 0, numg = 0, numc = 0;
 	memnode bus;
 	bus.name = name;
 	GPUmem.sort(comparename);
@@ -335,6 +372,7 @@ int cargafinal(int name, int cols, int **ptr)
 	while(pos != endg && pos->name == name)
 	{
 		cont += pos->rows;
 		numg++;
 		pos++;
 	}
 	pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
@@ -342,15 +380,41 @@ int cargafinal(int name, int cols, int **ptr)
 	while(pos != endc && pos->name == name)
 	{
 		cont += pos->rows;
 		numc++;
 		pos++;
 	}
 	reservar(&temp, cont * cols * sizeof(int));
 #ifdef DEBUG_MEM
 	cerr << "+ " << temp << " temp 3 " << cont * cols * sizeof(int) << endl;
 #endif
 	ini = temp;	
 	if(numg == 0 && numc == 0)
 		return 0;
 	if(numg == 1 && numc == 0) 
 	{
 		pos = gpu;
 		*ptr = pos->dev_address;
 		cont = pos->rows;
 		GPUmem.erase(pos);
 		#ifdef TUFFY
 		return -cont;
 		#else
 		return cont;
 		#endif
 	}
 	if(numg == 0 && numc == 1)
 	{
 		pos = cpu;
 		cont = pos->rows;
 		#ifdef TUFFY
 		reservar(&temp, pos->size);
 		cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
 		*ptr = temp;
 		#else
 		*ptr = pos->dev_address;
 		#endif
 		CPUmem.erase(pos);
 		return -cont;
 	}
 	reservar(&temp, cont * cols * sizeof(int));
 	ini = temp;
 	pos = gpu;
 	while(pos != endg && pos->name == name)
 	{
@@ -365,23 +429,13 @@ int cargafinal(int name, int cols, int **ptr)
 		temp += pos->size / sizeof(int);
 		pos++;
 	}
 	/*int x, y;
 	int *hop1 = (int *)malloc(cont * cols * sizeof(int));
 	cudaMemcpy(hop1, ini, cont * cols * sizeof(int), cudaMemcpyDeviceToHost);
 	cout << "select finala" << endl;
 	for(x = 0; x < cont; x++)
 	{
 		for(y = 0; y < cols; y++)
 			cout << hop1[x * cols + y] << " ";
 		cout << endl;
 	}
 	cout << "select finala" << endl;*/
 	*ptr = ini;
 	return cont;
 }
 /*Compares the results of the current iteration against the results of older iterations. 
 Used to avoid infinite computations when the result is not a single fixed-point, but an 
 orbit of points.*/
 bool generadas(int name, int filas, int cols, int itr)
 {
 	int r1, r2, x, fin;
@@ -401,46 +455,26 @@ bool generadas(int name, int filas, int cols, int itr)
 			thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
 			r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
 			thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
 			/*int y;
 			int *a = (int *)malloc(r1 * cols * sizeof(int));
 			cudaMemcpy(a, dop1, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
 			for(x = 0; x < r1; x++)
 			{
 				for(y = 0; y < cols; y++)
 					cout << a[x * cols + y] << " ";
 			}
 			cout << endl;
 			cudaMemcpy(a, dop2, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
 			for(x = 0; x < r1; x++)
 			{
 				for(y = 0; y < cols; y++)
 					cout << a[x * cols + y] << " ";
 			}
 			cout << endl;
 			free(a);*/
 			if(thrust::equal(pt1, pt1 + r1, pt2) == true)
 				return true;
 		}
 	}
 	return false;
 }
 void mostrar_memoria()
 {
-	int x;
+	unsigned int x;
 	list<memnode>::iterator i = GPUmem.begin();
 	cout << "Memoria inicio GPU" << endl;
 	for(x = 0; x < GPUmem.size(); x++, i++)
-		cout << i->name << " " << i->iteration << " " << i->size << endl;
+		cout << i->name << " " << i->iteration << " " << i->isrule << " " << i->rows << " " << i->size << endl;
 	cout << "Memoria fin GPU" << endl;
 }
 void mostrar_memcpu()
 {
-	int x;
+	unsigned int x;
 	list<memnode>::iterator i = CPUmem.begin();
 	cout << "Memoria inicio CPU" << endl;
 	for(x = 0; x < CPUmem.size(); x++, i++)
@@ -448,53 +482,7 @@ void mostrar_memcpu()
 	cout << "Memoria fin CPU" << endl;
 }
-void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator last)
+/*Clear all rule results from both GPU and CPU memory*/
 {
 	GPUmem.sort(comparename);
 	CPUmem.sort(comparename);
 	list<memnode>::iterator gpu = GPUmem.begin();
 	list<memnode>::iterator cpu = CPUmem.begin();
 	int x, y, of, cols;
 	int *temp, cont = 0;
 	while(first != last)
 	{
 		while(first->name == gpu->name)
 		{
 			temp = (int *)malloc(gpu->size);
 			cudaMemcpy(temp, gpu->dev_address, gpu->size, cudaMemcpyDeviceToHost);
 			cols = gpu->size / (gpu->rows * sizeof(int));
 			cont += gpu->rows;
 			for(x = 0, of = 0; x < gpu->rows; x++)
 			{
 				for(y = 0; y < cols; y++, of++)
 					cout << temp[of] << " ";
 				cout << endl;
 			}
 			cudaFree(gpu->dev_address);
 #ifdef DEBUG_MEM
 			cerr << "- " << gpu->dev_address << " gpu->dev_address" << endl;
 #endif
 			free(temp);
 			gpu++;
 		}
 		while(first->name == cpu->name)
 		{
 			cols = cpu->size / (cpu->rows * sizeof(int));
 			cont += cpu->rows;
 			for(x = 0, of = 0; x < cpu->rows; x++)
 			{
 				for(y = 0; y < cols; y++, of++)
 					cout << cpu->dev_address[of] << " ";
 				cout << endl;
 			}
 			free(cpu->dev_address);
 			cpu++;
 		}
 		first++;
 	}
 	cout << cont << endl;
 }
 void clear_memory()
 {
 	list<memnode>::iterator ini;
@@ -503,15 +491,13 @@ void clear_memory()
 	fin = GPUmem.end();
 	while(ini != fin)
 	{
-	  if (ini->isrule) {
+		if(ini->isrule)
-	    cudaFree(ini->dev_address);
+		{
-#ifdef DEBUG_MEM
+			cudaFree(ini->dev_address);
-	    cerr << "- " << ini->dev_address << " ini->dev_address" << endl;
+			ini = GPUmem.erase(ini);
-#endif
+		}
-	    ini = GPUmem.erase(ini);
+		else
-	  } else {
+			ini++;
 	    ini++;
 	  }
 	}
 	ini = CPUmem.begin();
 	fin = CPUmem.end();
@@ -522,3 +508,68 @@ void clear_memory()
 	}
 	CPUmem.clear();
 }
 /*Clear everything from both GPU and CPU memory*/
 void clear_memory_all()
 {
 	list<memnode>::iterator ini;
 	list<memnode>::iterator fin;
       	ini = GPUmem.begin();
 	fin = GPUmem.end();
 	while(ini != fin)
 	{
 		cudaFree(ini->dev_address);
 		ini++;
 	}
 	GPUmem.clear();
 	ini = CPUmem.begin();
 	fin = CPUmem.end();
 	while(ini != fin)
 	{
 		free(ini->dev_address);
 		ini++;
 	}
 	CPUmem.clear();
 }
 /*Remove all instances of fact 'name' from both CPU and GPU memories*/
 void liberar(int name)
 {
 	list<memnode>::iterator i;
 	memnode fact;
 	i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
 	if(i != GPUmem.end())
 	{
 		fact = *i;
 		GPUmem.erase(i);
 		cudaFree(fact.dev_address);
 	}
 	i = buscarhecho(CPUmem.begin(), CPUmem.end(), name);
 	if(i != CPUmem.end())
 	{
 		fact = *i;
 		CPUmem.erase(i);
 		free(fact.dev_address);
 	}
 }
 /*Add all rows in 'dop1' to the fact 'name' by creating a new array capable of holding both.*/
 void sumar(int name, int *dop1, int cols, int rows)
 {
 	list<memnode>::iterator i;
 	memnode fact;
 	i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
 	int *res, newrows, offset;
 	if(i != GPUmem.end())
 	{
 		fact = *i;
 		newrows = rows + fact.rows;
 		reservar(&res, newrows * cols * sizeof(int));
 		offset = fact.rows * cols;
 		cudaMemcpyAsync(res, fact.dev_address, offset * sizeof(int), cudaMemcpyDeviceToDevice);
 		GPUmem.erase(i);
 		registrar(name, cols, res, newrows, 0, 0);
 		cudaMemcpyAsync(res + offset, dop1, rows * cols * sizeof(int), cudaMemcpyDeviceToDevice);
 		cudaFree(fact.dev_address);
 	}
 }
--- a/packages/cuda/memory.h
+++ b/packages/cuda/memory.h
@@ -1,26 +1,27 @@
 #ifndef _MEMORY_H_
 #define _MEMORY_H_
 //#include <thrust/device_vector.h>
 #include <list>
 #include <vector>
 #include "lista.h"
 using namespace std;
 //using namespace thrust;
-void calcular_mem(int);
+bool comparer(const rulenode&, const rulenode&);
 void liberar(int*, int);
 void limpiar(const char [], size_t);
 void limpiartodo(int*, int*);
 int cargar(int, int, int, int, int*, int**, int);
 int cargarcpu(int, int, int, int, int*, int**, int);
 int cargafinal(int, int, int**);
-void reservar(int**, int);
+void reservar(int**, size_t);
 void registrar(int, int, int*, int, int, int);
 void registrarcpu(int, int, int*, int, int, int);
 bool generadas(int, int, int, int);
 void sumar(int, int*, int, int);
 void liberar(int);
 void mostrar_memoria(void);
 void mostrar_memcpu(void);
 void clear_memory(void);
-void resultados(vector<rulenode>::iterator, vector<rulenode>::iterator);
+void clear_memory_all(void);
 #endif
--- a/packages/cuda/pred.h
+++ b/packages/cuda/pred.h
@@ -9,11 +9,17 @@ typedef struct Nodo{
 	int num_columns;
 	int is_fact;
 	int *address_host_table;
 	int *negatives;
 	char *predname;
 	double *weight;
 }gpunode;
 typedef gpunode predicate;
-// #define TIMER 1
+//#define TIMER 1
 #define DATALOG 1
 #define NUM_T 4
 #define INISIZE 1000000
 #if TIMER
 typedef struct Stats{
@@ -27,6 +33,8 @@ typedef struct Stats{
 extern statinfo cuda_stats;
 #endif
 /*Constants used to mark comparison predicates*/
 #define BPOFFSET (-6)
 #define SBG_EQ  (-1)
 #define SBG_GT  (-2)
 #define SBG_LT  (-3)
@@ -34,6 +42,6 @@ extern statinfo cuda_stats;
 #define SBG_LE  (-5)
 #define SBG_DF  (-6)
-int Cuda_Eval(predicate**, int, predicate**, int, predicate*, int**);
+int Cuda_Eval(predicate**, int, predicate**, int, int*, int**, char*, int);
 void  Cuda_Statistics( void );
 #endif
--- a/packages/cuda/selectproyect.cu
+++ b/packages/cuda/selectproyect.cu
@@ -1,10 +1,11 @@
 #include <thrust/device_vector.h>
 //#include <thrust/device_ptr.h>
 #include <thrust/scan.h>
 #include <stdlib.h>
 #include "memory.h"
 #include "bpreds.h"
-__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
+/*Mark all rows that comply with the selections*/
 __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
@@ -24,14 +25,14 @@ __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *
 		res[id] = 1;
 	}
 }
-
+/*If we already have an array of marks (perhaps because the selfjoin was applied first), 
-__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
+we unmark any rows that do not comply with the selections*/
 __global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res)
 {
- 	extern __shared__ int shared[];
+	extern __shared__ int shared[];
    	int *spos = &shared[numc];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	int x, rowact, posact;
-	if(threadIdx.x < (numc * 2))
+	if(threadIdx.x < numc)
 		shared[threadIdx.x] = cons[threadIdx.x];
 	__syncthreads();
 	if(id < rows)
@@ -39,10 +40,10 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
 		if(res[id] == 0)
 			return;
 		rowact = id * cols;
-		for(x = 0; x < numc; x++)
+		for(x = 0; x < numc; x += 2)
 		{
-			posact = rowact + spos[x];
+			posact = rowact + shared[x];
-			if(dop1[posact] != shared[x])
+			if(dop1[posact] != shared[x+1])
 			{
 				res[id] = 0;
 				return;
@@ -51,6 +52,7 @@ __global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int
 	}
 }
 /*Unmark all rows that do not comply with the selfjoins.*/
 __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
 {
 	extern __shared__ int shared[];
@@ -66,12 +68,12 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
 		pos = id * cols;
 		for(x = 0; x < cont; x++)
 		{
-			temp = shared[x];
+			temp = dop1[pos+shared[x]];
 			y = x + 1;
 			temp2 = shared[y];
 			while(temp2 > -1)
 			{
-				if(dop1[temp+pos] != dop1[temp2+pos])
+				if(temp != dop1[temp2+pos])
 				{
 					res[id] = 0;
 					return;
@@ -84,6 +86,7 @@ __global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, in
 	}
 }
 /*Mark all rows that comply with the selfjoins*/
 __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
 {
 	extern __shared__ int shared[];
@@ -97,12 +100,12 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
 		pos = id * cols;
 		for(x = 0; x < cont; x++)
 		{
-			temp = shared[x];
+			temp = dop1[pos+shared[x]];
 			y = x + 1;
 			temp2 = shared[y];
 			while(temp2 > -1)
 			{
-				if(dop1[temp+pos] != dop1[temp2+pos])
+				if(temp != dop1[temp2+pos])
 					return;
 				y++;
 				temp2 = shared[y];
@@ -113,6 +116,7 @@ __global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, i
 	}
 }
 /*Project all columns found in 'dhead' to a new array 'res'*/
 __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
 {
 	extern __shared__ int shared[];
@@ -130,76 +134,31 @@ __global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize,
 	}
 }
 /*Project all columns found in 'dhead' using only the rows marked as valid (i.e. those that complied with 
 selections, selfjoins, etc.). The array 'temp' holds the result of the prefix sum of said marks.*/
 __global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
 {
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	int pos, posr, x;
-	if(threadIdx.x < cols)
+	if(threadIdx.x < hsize)
 		shared[threadIdx.x] = dhead[threadIdx.x];
 	__syncthreads();
 	if(id < rows)
 	{		
-		posr = temp[id+1];
+		posr = temp[id];
-		if(temp[id] != posr && posr > 0)
+		if(temp[id+1] != posr)
 		{
 			pos = id * cols;
-			posr = (posr - 1) * hsize;			
+			posr *= hsize;			
 			for(x = 0; x < hsize; x++, posr++)
 				res[posr] = dop1[pos+shared[x]];
 		}
 	}
 }
-/*__global__ void removedup()
+/*Performs selections, selfjoins and comparison predicates when the rule has a single normal predicate.*/
-{
+int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *preds, int numpreds, int *project, int **ret, int ANDlogic)
 	extern __shared__ int shared[];
 	int id = blockIdx.x * blockDim.x + threadIdx.x;
 	if(threadIdx.x < cols)
 		shared[threadIdx.x] = dhead[threadIdx.x];
 	if(id < rows)
 	{
 	}
 }*/
 template<typename T> /*a libreria*/
 struct suma : public binary_function<T,T,T>
 {
 	__host__ __device__ 
 	T operator()(const T &r1, const T &r2)
 	{
 		if(r1 > -1)
 		{
 			if(r2 > 0)
 				return r1 + r2;
 			return -r1;
 		}
 		else
 		{
 			if(r2 > 0)
 				return abs(r1) + r2;
 			return r1;
 		}
 	}
 };
 int mayor(int a, int b, int c)
 {
 	if(a > b)
 	{
 		if(a > c)
 			return a;
 	}
 	else
 	{
 		if(b > c)
 			return b;
 	}
 	return c;
 }
 int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
 {
 	int *fres = NULL, *temp = NULL;
 	int *dhead = NULL, tmplen;
@@ -209,30 +168,27 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
 #if TIMER
 	cuda_stats.selects++;
 #endif
-	int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
+
 	int head_bytes = maximo(4, numselect, numselfj, numpreds, head_size) * sizeof(int);
 	reservar(&dhead, head_bytes);
 #ifdef DEBUG_MEM
 	cerr << "+ " << dhead << " dhead  " << head_bytes << endl;
 #endif
 	int blockllen = rows / 1024 + 1;
 	int numthreads = 1024;
 	//int numthreads = 32;
 	int blockllen = rows / numthreads + 1;
 	#ifdef ROCKIT
 		ANDlogic = 1;
 	#endif
 	//removerep(dop1, rows, cols, dhead,) 
 	if(numselect > 0)
 	{		
 		tmplen = rows + 1;
 		size2 = tmplen * sizeof(int);
 		reservar(&temp, size2);
 #ifdef DEBUG_MEM
 		cerr << "+ " << temp << " temp  select " << size2 << endl;
 #endif
 		cudaMemset(temp, 0, size2);
 		size = numselect * sizeof(int);
 		cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
-		marcar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
+		marcar2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
 		if(numselfj > 0)
 		{
@@ -241,6 +197,16 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
 			samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
 		}
 		if(numpreds > 0)
 		{
 			size = numpreds * sizeof(int);
 			cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
 			if(ANDlogic)
 				bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
 			else
 				bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
 		}
 		res = thrust::device_pointer_cast(temp);
 		thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
 		num = res[rows];
@@ -249,13 +215,10 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
 		size = head_size * sizeof(int);
 		reservar(&fres, num * size);
 #ifdef DEBUG_MEM
 		cerr << "+ " << fres << " fres select  " << num*size << endl;
 #endif
 		cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
 		llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
-		liberar(dhead, head_bytes);
+		cudaFree(dhead);
-		liberar(temp, size2);
+		cudaFree(temp);
 		*ret = fres;
 		return num;
 	}
@@ -266,15 +229,22 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
 			tmplen = rows + 1;
 			size2 = tmplen * sizeof(int);
 			reservar(&temp, size2);
 #ifdef DEBUG_MEM
 			cerr << "+ " << temp << " temp select  " << size2 << endl;
 #endif
 			cudaMemset(temp, 0, size2);
 			size = numselfj * sizeof(int);
 			cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
 			samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
 			if(numpreds > 0)
 			{
 				size = numpreds * sizeof(int);
 				cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
 				if(ANDlogic)
 					bpredsnormal<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
 				else
 					bpredsorlogic<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
 			}
 			res = thrust::device_pointer_cast(temp);
 			thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
 			num = res[rows];
@@ -283,28 +253,54 @@ int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int
 			size = head_size * sizeof(int);
 			reservar(&fres, num * size);
 #ifdef DEBUG_MEM
 			cerr << "+ " << fres << " fres select again  " << num*size << endl;
 #endif
 			cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
 			llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
-			liberar(dhead, head_bytes);
+			cudaFree(dhead);
-			liberar(temp, size2);
+			cudaFree(temp);
 			*ret = fres;
 			return num;
 		}
 		else
 		{
-			size = head_size * sizeof(int);
+			if(numpreds > 0)
-			reservar(&fres, rows * size);
+			{
-#ifdef DEBUG_MEM
+				tmplen = rows + 1;
-			cerr << "+ " << fres << " fres select third  " << rows*size << endl;
+				size2 = tmplen * sizeof(int);
-#endif
+				reservar(&temp, size2);
-			cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
+				cudaMemset(temp, 0, size2);		
-			proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
+				size = numpreds * sizeof(int);
-			liberar(dhead, head_bytes);
+				cudaMemcpy(dhead, preds, size, cudaMemcpyHostToDevice);
-			*ret = fres;
+
-			return rows;
+				if(ANDlogic)
 					bpredsnormal2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);					
 				else
 					bpredsorlogic2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numpreds, temp + 1);
 				res = thrust::device_pointer_cast(temp);
 				thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
 				num = res[rows];
 				if(num == 0)
 					return 0;
 				size = head_size * sizeof(int);
 				reservar(&fres, num * size);
 				cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
 				llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
 				cudaFree(dhead);
 				cudaFree(temp);
 				*ret = fres;
 				return num;
 			}
 			else
 			{
 				size = head_size * sizeof(int);
 				reservar(&fres, rows * size);
 				cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
 				proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
 				cudaFree(dhead);
 				*ret = fres;
 				return rows;
 			}
 		}
 	}
 }
--- a/packages/cuda/treeb.cu
+++ b/packages/cuda/treeb.cu
--- a/packages/cuda/union2.cu
+++ b/packages/cuda/union2.cu
--- a/packages/python/python.c
+++ b/packages/python/python.c
@@ -2158,12 +2158,14 @@ static foreign_t init_python(void) {
  char **argv;
  term_t t = PL_new_term_ref();
  YAP_Argv(&argv);
  if (argv) {
 #if PY_MAJOR_VERSION < 3
-  Py_SetProgramName(argv[0]);
+    Py_SetProgramName(argv[0]);
 #else
-  wchar_t *buf = Py_DecodeLocale(argv[0], NULL);
+    wchar_t *buf = Py_DecodeLocale(argv[0], NULL);
-  Py_SetProgramName(buf);
+    Py_SetProgramName(buf);
 #endif
  }
  Py_Initialize();
  py_Main = PyImport_AddModule("__main__");
  py_Builtin = PyImport_AddModule("__builtin__");
`@@ -279,4 +279,5 @@ static void RestoreWorker(int wid USES_REGS) {`




	`}`	`}`