yap-6.3/library/dialect/swi/os/pl-read.c


#include "pl-ctype.h"
#include "pl-dtoa.h"
#include "pl-incl.h"
#include "pl-umap.c" /* Unicode map */
#include "pl-utf8.h"

#include "pl-read.h" /* read structure */

/**
 *  @defgroup ReadTerm Read Term from Streams
 *  @ingroup  InputOutput
 * @{
 */

static bool isStringStream(IOSTREAM *s) {
  return s->functions == &Sstringfunctions;
}

void init_read_data(ReadData _PL_rd, IOSTREAM *in ARG_LD) {
  CACHE_REGS
  memset(_PL_rd, 0, sizeof(*_PL_rd)); /* optimise! */

  _PL_rd->magic = RD_MAGIC;
  _PL_rd->varnames = 0;
  _PL_rd->module = Yap_GetModuleEntry(CurrentModule);
  _PL_rd->exception = 0;
  _PL_rd->stream = in;
  _PL_rd->has_exception = 0;
  _PL_rd->module = MODULE_parse;
  _PL_rd->flags = _PL_rd->module->flags; /* change for options! */
  _PL_rd->styleCheck = LOCAL_debugstatus.styleCheck;
  _PL_rd->on_error = AtomError;
  _PL_rd->backquoted_string = truePrologFlag(PLFLAG_BACKQUOTED_STRING);
}

void free_read_data(ReadData _PL_rd) {}

static int read_term(term_t t, ReadData _PL_rd ARG_LD) {
  return Yap_read_term(t, rb.stream, _PL_rd);
}

static void addUTF8Buffer(Buffer b, int c);

static void addUTF8Buffer(Buffer b, int c) {
  if (c >= 0x80) {
    char buf[6];
    char *p, *end;

    end = utf8_put_char(buf, c);
    for (p = buf; p < end; p++) {
      addBuffer(b, *p & 0xff, char);
    }
  } else {
    addBuffer(b, c, char);
  }
}

/*******************************
 *     UNICODE CLASSIFIERS      *
 *******************************/

#define CharTypeW(c, t, w)                                                     \
  ((unsigned)(c) <= 0xff ? (_PL_char_types[(unsigned)(c)] t)                   \
                         : (uflagsW(c) & (w)))

#define PlBlankW(c) CharTypeW(c, == SP, U_SEPARATOR)
#define PlUpperW(c) CharTypeW(c, == UC, U_UPPERCASE)
#define PlIdStartW(c)                                                          \
  (c <= 0xff ? (isLower(c) || isUpper(c) || c == '_') : uflagsW(c) & U_ID_START)
#define PlIdContW(c) CharTypeW(c, >= UC, U_ID_CONTINUE)
#define PlSymbolW(c) CharTypeW(c, == SY, U_SYMBOL)
#define PlPunctW(c) CharTypeW(c, == PU, 0)
#define PlSoloW(c) CharTypeW(c, == SO, U_OTHER)
#define PlInvalidW(c) (uflagsW(c) == 0)

int f_is_prolog_var_start(wint_t c) {
  return PlIdStartW(c) && (PlUpperW(c) || c == '_');
}

int f_is_prolog_atom_start(wint_t c) { return PlIdStartW(c) != 0; }

int f_is_prolog_identifier_continue(wint_t c) {
  return PlIdContW(c) || c == '_';
}

int f_is_prolog_symbol(wint_t c) { return PlSymbolW(c) != 0; }

int unicode_separator(pl_wchar_t c) { return PlBlankW(c); }

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   FALSE	return false
   TRUE	redo
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

static int reportReadError(ReadData rd) {
  if (rd->on_error == ATOM_error)
    return PL_raise_exception(rd->exception);
  if (rd->on_error != ATOM_quiet)
    printMessage(ATOM_error, PL_TERM, rd->exception);
  PL_clear_exception();

  if (rd->on_error == ATOM_dec10)
    return TRUE;

  return FALSE;
}

/* static int */
/* reportSingletons(ReadData rd, singletons, Atom amod, Atom aname, UInt arity)
 */
/* {  */
/*   printMessage(ATOM_warning, PL_FUNCTOR_CHARS,  */
/* 	       "singletons", 2, */
/* 	       PL_TERM, singletons,  */
/* 	       PL_TERM, mod, */
/* 	       PL_FUNCTOR_divide2, */
/* 	       PL_ATOM, name, */
/* 	       PL_INT, arity); */

/*   return FALSE; */
/* } */

/********************************
 *           RAW READING         *
 *********************************/

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   Scan the input, give prompts when necessary and return a char *  holding
   a  stripped  version of the next term.  Contiguous white space is mapped
   on a single space, block and % ... \n comment  is  deleted.   Memory  is
   claimed automatically en enlarged if necessary.

   (char *) NULL is returned on a syntax error.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

#define syntaxError(what, rd)                                                  \
  {                                                                            \
    errorWarning(what, 0, rd);                                                 \
    fail;                                                                      \
  }

static term_t makeErrorTerm(const char *id_str, term_t id_term,
                            ReadData _PL_rd) {
  GET_LD
  term_t ex, loc = 0; /* keep compiler happy */
  unsigned char const *s, *ll = NULL;
  int rc = TRUE;

  if (!(ex = PL_new_term_ref()) || !(loc = PL_new_term_ref()))
    rc = FALSE;

  if (rc && !id_term) {
    if (!(id_term = PL_new_term_ref()) || !PL_put_atom_chars(id_term, id_str))
      rc = FALSE;
  }

  if (rc)
    rc = PL_unify_term(ex, PL_FUNCTOR, FUNCTOR_error2, PL_FUNCTOR,
                       FUNCTOR_syntax_error1, PL_TERM, id_term, PL_TERM, loc);

  source_char_no += last_token_start - rdbase;
  for (s = rdbase; s < last_token_start; s++) {
    if (*s == '\n') {
      source_line_no++;
      ll = s + 1;
    }
  }

  if (ll) {
    int lp = 0;

    for (s = ll; s < last_token_start; s++) {
      switch (*s) {
      case '\b':
        if (lp > 0)
          lp--;
        break;
      case '\t':
        lp |= 7;
      default:
        lp++;
      }
    }

    source_line_pos = lp;
  }

  if (rc) {
    if (ReadingSource) /* reading a file */
    {
      rc = PL_unify_term(loc, PL_FUNCTOR, FUNCTOR_file4, PL_ATOM,
                         source_file_name, PL_INT, source_line_no, PL_INT,
                         source_line_pos, PL_INT64, source_char_no);
    } else if (isStringStream(rb.stream)) {
      size_t pos;

      pos = utf8_strlen((char *)rdbase, last_token_start - rdbase);

      rc = PL_unify_term(loc, PL_FUNCTOR, FUNCTOR_string2, PL_UTF8_STRING,
                         rdbase, PL_INT, (int)pos);
    } else /* any stream */
    {
      term_t stream;

      if (!(stream = PL_new_term_ref()) ||
          !PL_unify_stream_or_alias(stream, rb.stream) ||
          !PL_unify_term(loc, PL_FUNCTOR, FUNCTOR_stream4, PL_TERM, stream,
                         PL_INT, source_line_no, PL_INT, source_line_pos,
                         PL_INT64, source_char_no))
        rc = FALSE;
    }
  }

  return (rc ? ex : (term_t)0);
}

static bool errorWarning(const char *id_str, term_t id_term, ReadData _PL_rd) {
  GET_LD
  term_t ex;

  LD->exception.processing = TRUE; /* allow using spare stack */

  ex = makeErrorTerm(id_str, id_term, _PL_rd);

  if (_PL_rd) {
    _PL_rd->has_exception = TRUE;
    if (ex)
      PL_put_term(_PL_rd->exception, ex);
    else
      PL_put_term(_PL_rd->exception, exception_term);
  } else {
    if (ex)
      PL_raise_exception(ex);
  }

  fail;
}

static void clearBuffer(ReadData _PL_rd) {
  if (rb.size == 0) {
    rb.base = rb.fast;
    rb.size = sizeof(rb.fast);
  }
  rb.end = rb.base + rb.size;
  rdbase = rb.here = rb.base;

  _PL_rd->posp = rdbase;
  _PL_rd->posi = 0;
}

static void growToBuffer(int c, ReadData _PL_rd) {
  if (rb.base == rb.fast) /* intptr_t clause: jump to use malloc() */
  {
    rb.base = PL_malloc(FASTBUFFERSIZE * 2);
    memmove(rb.base, rb.fast, FASTBUFFERSIZE);
  } else
    rb.base = PL_realloc(rb.base, rb.size * 2);

  DEBUG(8, Sdprintf("Reallocated read buffer at %ld\n", (intptr_t)rb.base));
  _PL_rd->posp = rdbase = rb.base;
  rb.here = rb.base + rb.size;
  rb.size *= 2;
  rb.end = rb.base + rb.size;
  _PL_rd->posi = 0;

  *rb.here++ = c;
}

static inline void addByteToBuffer(int c, ReadData _PL_rd) {
  c &= 0xff;

  if (rb.here >= rb.end)
    growToBuffer(c, _PL_rd);
  else
    *rb.here++ = c;
}

static void addToBuffer(int c, ReadData _PL_rd) {
  if (c <= 0x7f) {
    addByteToBuffer(c, _PL_rd);
  } else {
    char buf[10];
    char *s, *e;

    e = utf8_put_char(buf, c);
    for (s = buf; s < e; s++)
      addByteToBuffer(*s, _PL_rd);
  }
}

#if __YAP_PROLOG__
void Yap_setCurrentSourceLocation(void *rd) {
  GET_LD
  setCurrentSourceLocation(rd PASS_LD);
}
#endif

static inline int getchr__(ReadData _PL_rd) {
  int c = Sgetcode(rb.stream);

  if (!_PL_rd->char_conversion_table || c < 0 || c >= 256)
    return c;

  return _PL_rd->char_conversion_table[c];
}

#define getchr() getchr__(_PL_rd)
#define getchrq() Sgetcode(rb.stream)

#define ensure_space(c)                                                        \
  {                                                                            \
    if (something_read && (c == '\n' || !isBlank(rb.here[-1])))                \
      addToBuffer(c, _PL_rd);                                                  \
  }
#define set_start_line                                                         \
  {                                                                            \
    if (!something_read) {                                                     \
      setCurrentSourceLocation(_PL_rd PASS_LD);                                \
      something_read++;                                                        \
    }                                                                          \
  }

#ifdef O_QUASIQUOTATIONS
/** '$qq_open'(+QQRange, -Stream) is det.

    Opens a quasi-quoted memory range.

    @arg QQRange is a term '$quasi_quotation'(ReadData, Start, Length)
    @arg Stream  is a UTF-8 encoded string, whose position indication
    reflects the location in the real file.
*/

static PRED_IMPL("$qq_open", 2, qq_open, 0) {
  PRED_LD

  if (PL_is_functor(A1, FUNCTOR_dquasi_quotation3)) {
    void *ptr;
    char *start;
    size_t len;
    term_t arg = PL_new_term_ref();
    IOSTREAM *s;

    if (PL_get_arg(1, A1, arg) && PL_get_pointer_ex(arg, &ptr) &&
        PL_get_arg(2, A1, arg) && PL_get_intptr(arg, (intptr_t *)&start) &&
        PL_get_arg(3, A1, arg) &&
        PL_get_intptr(arg, (intptr_t *)&len)) { // source_location pos;
      if ((s = Sopenmem(&start, &len, "r")))
        s->encoding = ENC_UTF8;

      return PL_unify_stream(A2, s);
    }
  } else
    PL_type_error("read_context", A1);

  return FALSE;
}

static int parse_quasi_quotations(ReadData _PL_rd ARG_LD) {
  if (_PL_rd->qq_tail) {
    term_t av;
    int rc;

    if (!PL_unify_nil(_PL_rd->qq_tail))
      return FALSE;

    if (!_PL_rd->quasi_quotations) {
      if ((av = PL_new_term_refs(2)) && PL_put_term(av + 0, _PL_rd->qq) &&
#if __YAP_PROLOG__
          PL_put_atom(av + 1, YAP_SWIAtomFromAtom(_PL_rd->module->AtomOfME)) &&
#else
          PL_put_atom(av + 1, _PL_rd->module->name) &&
#endif
          PL_cons_functor_v(av, FUNCTOR_dparse_quasi_quotations2, av)) {
        term_t ex;
        rc = callProlog(MODULE_system, av + 0, PL_Q_CATCH_EXCEPTION, &ex);
        if (rc)
          return TRUE;
        _PL_rd->exception = ex;
        _PL_rd->has_exception = TRUE;
      }
      return FALSE;
    } else
      return TRUE;
  } else if (_PL_rd->quasi_quotations) /* user option, but no quotes */
  {
    return PL_unify_nil(_PL_rd->quasi_quotations);
  } else
    return TRUE;
}

#endif /*O_QUASIQUOTATIONS*/

#define rawSyntaxError(what)                                                   \
  {                                                                            \
    addToBuffer(EOS, _PL_rd);                                                  \
    rdbase = rb.base, last_token_start = rb.here - 1;                          \
    syntaxError(what, _PL_rd);                                                 \
  }

static int raw_read_quoted(int q, ReadData _PL_rd) {
  int newlines = 0;
  int c;

  addToBuffer(q, _PL_rd);
  while ((c = getchrq()) != EOF && c != q) {
    if (c == '\\' && DO_CHARESCAPE) {
      int base;
      addToBuffer(c, _PL_rd);

      switch ((c = getchrq())) {
      case EOF:
        goto eofinstr;
      case 'u': /* \uXXXX */
      case 'U': /* \UXXXXXXXX */
        addToBuffer(c, _PL_rd);
        continue;
      case 'x': /* \xNN\ */
        addToBuffer(c, _PL_rd);
        c = getchrq();
        if (c == EOF)
          goto eofinstr;
        if (digitValue(16, c) >= 0) {
          base = 16;
          addToBuffer(c, _PL_rd);

        xdigits:
          c = getchrq();
          while (digitValue(base, c) >= 0) {
            addToBuffer(c, _PL_rd);
            c = getchrq();
          }
        }
        if (c == EOF)
          goto eofinstr;
        addToBuffer(c, _PL_rd);
        if (c == q)
          return TRUE;
        continue;
      default:
        addToBuffer(c, _PL_rd);
        if (digitValue(8, c) >= 0) /* \NNN\ */
        {
          base = 8;
          goto xdigits;
        } else if (c == '\n') /* \<newline> */
        {
          c = getchrq();
          if (c == EOF)
            goto eofinstr;
          addToBuffer(c, _PL_rd);
          if (c == q)
            return TRUE;
        }
        continue; /* \symbolic-control-char */
      }
    } else if (c == '\n' && newlines++ > MAXNEWLINES &&
               (_PL_rd->styleCheck & LONGATOM_CHECK)) {
      rawSyntaxError("long_string");
    }
    addToBuffer(c, _PL_rd);
  }
  if (c == EOF) {
  eofinstr:
    rawSyntaxError("end_of_file_in_string");
  }
  addToBuffer(c, _PL_rd);

  return TRUE;
}

static int add_comment(Buffer b, IOPOS *pos, ReadData _PL_rd ARG_LD) {
  term_t head = PL_new_term_ref();

  assert(_PL_rd->comments);
  if (!PL_unify_list(_PL_rd->comments, head, _PL_rd->comments))
    return FALSE;
  if (pos) {
    if (!PL_unify_term(head, PL_FUNCTOR, FUNCTOR_minus2, PL_FUNCTOR,
                       FUNCTOR_stream_position4, PL_INT64, pos->charno, PL_INT,
                       pos->lineno, PL_INT, pos->linepos, PL_INT, 0,
                       PL_UTF8_STRING, baseBuffer(b, char)))
      return FALSE;
  } else {
    if (!PL_unify_term(head, PL_FUNCTOR, FUNCTOR_minus2, ATOM_minus,
                       PL_UTF8_STRING, baseBuffer(b, char)))
      return FALSE;
  }

  PL_reset_term_refs(head);
  return TRUE;
}

static void setErrorLocation(IOPOS *pos, ReadData _PL_rd) {
  if (pos) {
    GET_LD

    source_char_no = pos->charno;
    source_line_pos = pos->linepos;
    source_line_no = pos->lineno;
  }
  rb.here = rb.base + 1; /* see rawSyntaxError() */
}

static unsigned char *raw_read2(ReadData _PL_rd ARG_LD) {
  int c;
  bool something_read = FALSE;
  bool dotseen = FALSE;
  IOPOS pbuf; /* comment start */
  IOPOS *pos;

  clearBuffer(_PL_rd); /* clear input buffer */
  _PL_rd->strictness = truePrologFlag(PLFLAG_ISO);
  source_line_no = -1;

  for (;;) {
    c = getchr();

  handle_c:
    switch (c) {
    case EOF:
      if (isStringStream(rb.stream)) /* do not require '. ' when */
      {
        addToBuffer(' ', _PL_rd); /* reading from a string */
        addToBuffer('.', _PL_rd);
        addToBuffer(' ', _PL_rd);
        addToBuffer(EOS, _PL_rd);
        return rb.base;
      }
      if (something_read) {
        if (dotseen) /* term.<EOF> */
        {
          if (rb.here - rb.base == 1)
            rawSyntaxError("end_of_clause");
          ensure_space(' ');
          addToBuffer(EOS, _PL_rd);
          return rb.base;
        }
        rawSyntaxError("end_of_file");
      }
      if (Sfpasteof(rb.stream)) {
        term_t stream;

        LD->exception.processing = TRUE;
        stream = PL_new_term_ref();
        PL_unify_stream_or_alias(stream, rb.stream);
        PL_error(NULL, 0, NULL, ERR_PERMISSION, ATOM_input,
                 ATOM_past_end_of_stream, stream);
        return NULL;
      }
      set_start_line;
      strcpy((char *)rb.base, "end_of_file. ");
      rb.here = rb.base + 14;
      return rb.base;
    case '/':
      if (rb.stream->position) {
        pbuf = *rb.stream->position;
        pbuf.charno--;
        pbuf.linepos--;
        pos = &pbuf;
      } else
        pos = NULL;

      c = getchr();
      if (c == '*') {
        int last;
        int level = 1;
        union {
          tmp_buffer ctmpbuf;
          buffer tmpbuf;
        } u;
        Buffer cbuf;

        if (_PL_rd->comments) {
          initBuffer(&u.ctmpbuf);
          cbuf = &u.tmpbuf;
          addUTF8Buffer(cbuf, '/');
          addUTF8Buffer(cbuf, '*');
        } else {
          cbuf = NULL;
        }

        if ((last = getchr()) == EOF) {
          if (cbuf)
            discardBuffer(cbuf);
          setErrorLocation(pos, _PL_rd);
          rawSyntaxError("end_of_file_in_block_comment");
        }
        if (cbuf)
          addUTF8Buffer(cbuf, last);

        if (something_read) {
          addToBuffer(' ', _PL_rd); /* positions */
          addToBuffer(' ', _PL_rd);
          addToBuffer(last == '\n' ? last : ' ', _PL_rd);
        }

        for (;;) {
          c = getchr();

          if (cbuf)
            addUTF8Buffer(cbuf, c);

          switch (c) {
          case EOF:
            if (cbuf)
              discardBuffer(cbuf);
            setErrorLocation(pos, _PL_rd);
            rawSyntaxError("end_of_file_in_block_comment");
#ifndef __YAP_PROLOG__
          /* YAP does not support comment levels in original scanner */
          case '*':
            if (last == '/')
              level++;
            break;
#endif
          case '/':
            if (last == '*' && (--level == 0 || _PL_rd->strictness)) {
              if (cbuf) {
                addUTF8Buffer(cbuf, EOS);
                if (!add_comment(cbuf, pos, _PL_rd PASS_LD)) {
                  discardBuffer(cbuf);
                  return FALSE;
                }
                discardBuffer(cbuf);
              }
              c = ' ';
              goto handle_c;
            }
            break;
          }
          if (something_read)
            addToBuffer(c == '\n' ? c : ' ', _PL_rd);
          last = c;
        }
      } else {
        set_start_line;
        addToBuffer('/', _PL_rd);
        if (isSymbolW(c)) {
          while (c != EOF && isSymbolW(c) &&
                 !(c == '`' && _PL_rd->backquoted_string)) {
            addToBuffer(c, _PL_rd);
            c = getchr();
          }
        }
        dotseen = FALSE;
        goto handle_c;
      }
    case '%':
      if (something_read)
        addToBuffer(' ', _PL_rd);
      if (_PL_rd->comments) {
        union {
          tmp_buffer ctmpbuf;
          buffer uctmpbuf;
        } u;
        Buffer cbuf;

        if (rb.stream->position) {
          pbuf = *rb.stream->position;
          pbuf.charno--;
          pbuf.linepos--;
          pos = &pbuf;
        } else
          pos = NULL;

        initBuffer(&u.ctmpbuf);
        cbuf = (Buffer)&u.uctmpbuf;
        addUTF8Buffer(cbuf, '%');

        for (;;) {
          while ((c = getchr()) != EOF && c != '\n') {
            addUTF8Buffer(cbuf, c);
            if (something_read) /* record positions */
              addToBuffer(' ', _PL_rd);
          }
          if (c == '\n') {
            int c2 = Speekcode(rb.stream);

            if (c2 == '%') {
              if (something_read) {
                addToBuffer(c, _PL_rd);
                addToBuffer(' ', _PL_rd);
              }
              addUTF8Buffer(cbuf, c);
              c = Sgetcode(rb.stream);
              assert(c == c2);
              addUTF8Buffer(cbuf, c);
              continue;
            }
          }
          break;
        }
        addUTF8Buffer(cbuf, EOS);
        if (!add_comment(cbuf, pos, _PL_rd PASS_LD)) {
          discardBuffer(cbuf);
          return FALSE;
        }
        discardBuffer(cbuf);
      } else {
        while ((c = getchr()) != EOF && c != '\n') {
          if (something_read) /* record positions */
            addToBuffer(' ', _PL_rd);
        }
      }
      goto handle_c; /* is the newline */
    case '\'':
      if (rb.here > rb.base && isDigit(rb.here[-1])) {
        cucharp bs = &rb.here[-1];

        if (bs > rb.base && isDigit(bs[-1]))
          bs--;
        if (bs > rb.base && isSign(bs[-1]))
          bs--;

        if (bs == rb.base || !PlIdContW(bs[-1])) {
          int base;

          if (isSign(bs[0]))
            bs++;
          base = atoi((char *)bs);

          if (base <= 36) {
            if (base == 0) /* 0'<c> */
            {
              addToBuffer(c, _PL_rd);
              {
                if ((c = getchr()) != EOF) {
                  addToBuffer(c, _PL_rd);
                  if (c == '\\') /* 0'\<c> */
                  {
                    if ((c = getchr()) != EOF)
                      addToBuffer(c, _PL_rd);
                  } else if (c == '\'') /* 0'' */
                  {
                    if ((c = getchr()) != EOF) {
                      if (c == '\'')
                        addToBuffer(c, _PL_rd);
                      else
                        goto handle_c;
                    }
                  }
                  break;
                }
                rawSyntaxError("end_of_file");
              }
            } else {
              int c2 = Speekcode(rb.stream);

              if (c2 != EOF) {
                if (digitValue(base, c2) >= 0) {
                  addToBuffer(c, _PL_rd);
                  c = Sgetcode(rb.stream);
                  addToBuffer(c, _PL_rd);
                  dotseen = FALSE;
                  break;
                }
                goto sqatom;
              }
              rawSyntaxError("end_of_file");
            }
          }
        }
      }

    sqatom:
      set_start_line;
      if (!raw_read_quoted(c, _PL_rd))
        fail;
      dotseen = FALSE;
      break;
    case '"':
      set_start_line;
      if (!raw_read_quoted(c, _PL_rd))
        fail;
      dotseen = FALSE;
      break;
    case '.':
      addToBuffer(c, _PL_rd);
      set_start_line;
      dotseen++;
      c = getchr();
      if (isSymbolW(c)) {
        while (c != EOF && isSymbolW(c) &&
               !(c == '`' && _PL_rd->backquoted_string)) {
          addToBuffer(c, _PL_rd);
          c = getchr();
        }
        dotseen = FALSE;
      }
      goto handle_c;
    case '`':
      if (_PL_rd->backquoted_string) {
        set_start_line;
        if (!raw_read_quoted(c, _PL_rd))
          fail;
        dotseen = FALSE;
        break;
      }
    /*FALLTHROUGH*/
    default:
      if (c < 0xff) {
        switch (_PL_char_types[c]) {
        case SP:
        case CT:
        blank:
          if (dotseen) {
            if (rb.here - rb.base == 1)
              rawSyntaxError("end_of_clause");
            ensure_space(c);
            addToBuffer(EOS, _PL_rd);
            return rb.base;
          }
          do {
            if (something_read) /* positions, \0 --> ' ' */
              addToBuffer(c ? c : ' ', _PL_rd);
            else
              ensure_space(c);
            c = getchr();
          } while (c != EOF && PlBlankW(c));
          goto handle_c;
        case SY:
          set_start_line;
          do {
            addToBuffer(c, _PL_rd);
            c = getchr();
            if (c == '`' && _PL_rd->backquoted_string)
              break;
          } while (c != EOF && c <= 0xff && isSymbol(c));
          /* TBD: wide symbols? */
          dotseen = FALSE;
          goto handle_c;
        case LC:
        case UC:
          set_start_line;
          do {
            addToBuffer(c, _PL_rd);
            c = getchr();
          } while (c != EOF && PlIdContW(c));
          dotseen = FALSE;
          goto handle_c;
        default:
          addToBuffer(c, _PL_rd);
          dotseen = FALSE;
          set_start_line;
        }
      } else /* > 255 */
      {
        if (PlIdStartW(c)) {
          set_start_line;
          do {
            addToBuffer(c, _PL_rd);
            c = getchr();
          } while (c != EOF && PlIdContW(c));
          dotseen = FALSE;
          goto handle_c;
        } else if (PlBlankW(c)) {
          goto blank;
        } else {
          addToBuffer(c, _PL_rd);
          dotseen = FALSE;
          set_start_line;
        }
      }
    }
  }
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   Raw reading returns a string in  UTF-8   notation  of the a Prolog term.
   Comment inside the term is  replaced  by   spaces  or  newline to ensure
   proper reconstruction of source locations. Comment   before  the term is
   skipped.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

static unsigned char *raw_read(ReadData _PL_rd, unsigned char **endp ARG_LD) {
  unsigned char *s;

  if ((rb.stream->flags & SIO_ISATTY) && Sfileno(rb.stream) >= 0) {
    ttybuf tab;

    PushTty(rb.stream, &tab, TTY_SAVE); /* make sure tty is sane */
    PopTty(rb.stream, &ttytab, FALSE);
    s = raw_read2(_PL_rd PASS_LD);
    PopTty(rb.stream, &tab, TRUE);
  } else {
    s = raw_read2(_PL_rd PASS_LD);
  }

  if (endp)
    *endp = _PL_rd->_rb.here;

  return s;
}

static void callCommentHook(term_t comments, term_t tpos, term_t term) {
  GET_LD
  fid_t fid;
  term_t av;

  if ((fid = PL_open_foreign_frame()) && (av = PL_new_term_refs(3))) {
    qid_t qid;

    PL_put_term(av + 0, comments);
    PL_put_term(av + 1, tpos);
    PL_put_term(av + 2, term);

    if ((qid = PL_open_query(NULL, PL_Q_NODEBUG | PL_Q_CATCH_EXCEPTION,
                             (predicate_t)PredCommentHook, av))) {
      term_t ex;

      if (!PL_next_solution(qid) && (ex = PL_exception(qid)))
        printMessage(ATOM_error, PL_TERM, ex);

      PL_close_query(qid);
    }
    PL_discard_foreign_frame(fid);
  }
}

/********************************
 *       PROLOG CONNECTION       *
 *********************************/

static unsigned char *backSkipUTF8(unsigned const char *start,
                                   unsigned const char *end, int *chr) {
  const unsigned char *s;

  for (s = end - 1; s > start && (*s & 0x80); s--)
    ;
  utf8_get_char((char *)s, chr);

  return (unsigned char *)s;
}

static unsigned char *backSkipBlanks(const unsigned char *start,
                                     const unsigned char *end) {
  const unsigned char *s;

  for (; end > start; end = s) {
    unsigned char *e;
    int chr;

    for (s = end - 1; s > start && ISUTF8_CB(*s); s--)
      ;
    e = (unsigned char *)utf8_get_char((char *)s, &chr);
    assert(e == end);
    if (!PlBlankW(chr))
      return (unsigned char *)end;
  }

  return (unsigned char *)start;
}

static inline ucharp skipSpaces(cucharp in) {
  int chr;
  ucharp s;

  for (; *in; in = s) {
    s = utf8_get_uchar(in, &chr);

    if (!PlBlankW(chr))
      return (ucharp)in;
  }

  return (ucharp)in;
}

word pl_raw_read2(term_t from, term_t term) {
  GET_LD
  unsigned char *s, *e, *t2, *top;
  read_data rd;
  word rval;
  IOSTREAM *in;
  int chr;
  PL_chars_t txt;

  if (!getTextInputStream(from, &in))
    fail;

  init_read_data(&rd, in PASS_LD);
  if (!(s = raw_read(&rd, &e PASS_LD))) {
    rval = PL_raise_exception(rd.exception);
    goto out;
  }

  /* strip the input from blanks */
  top = backSkipBlanks(s, e - 1);
  t2 = backSkipUTF8(s, top, &chr);
  if (chr == '.')
    top = backSkipBlanks(s, t2);
  /* watch for "0' ." */
  if (top < e && top - 2 >= s && top[-1] == '\'' && top[-2] == '0')
    top++;
  *top = EOS;
  s = skipSpaces(s);

  txt.text.t = (char *)s;
  txt.length = top - s;
  txt.storage = PL_CHARS_HEAP;
  txt.encoding = ENC_UTF8;
  txt.canonical = FALSE;

  rval = PL_unify_text(term, 0, &txt, PL_ATOM);
  LD->read_varnames = rd.varnames;

out:
  free_read_data(&rd);
  if (Sferror(in))
    return streamStatus(in);
  else
    PL_release_stream(in);

  return rval;
}

static int unify_read_term_position(term_t tpos ARG_LD) {
  if (tpos && source_line_no > 0) {
    return PL_unify_term(tpos, PL_FUNCTOR, FUNCTOR_stream_position4, PL_INT64,
                         source_char_no, PL_INT, source_line_no, PL_INT,
                         source_line_pos, PL_INT64, source_byte_no);
  } else {
    return TRUE;
  }
}

static const opt_spec read_clause_options[] = {
    {ATOM_variable_names, OPT_TERM},
    {ATOM_term_position, OPT_TERM},
    {ATOM_subterm_positions, OPT_TERM},
    {ATOM_process_comment, OPT_BOOL},
    {ATOM_comments, OPT_TERM},
    {ATOM_syntax_errors, OPT_ATOM},
    {NULL_ATOM, 0}};

/** read_clause(+Stream:stream, -Clause:clause, +Options:list)

    Like read_term/3, but uses current compiler options.

    Options:
    * variable_names(-Names)
    * process_comment(+Boolean)
    * comments(-List)
    * syntax_errors(+Atom)
    * term_position(-Position)
    * subterm_positions(-Layout)
    */
static int read_clause(IOSTREAM *s, term_t term, term_t options ARG_LD) {
  read_data rd;
  int rval;
  fid_t fid;
  term_t tpos = 0;
  term_t comments = 0;
  term_t opt_comments = 0;
  int process_comment;
  atom_t syntax_errors = ATOM_dec10;

  {
    OPCODE ophook = PredCommentHook->OpcodeOfPred;
    if (ophook == UNDEF_OPCODE || ophook == FAIL_OPCODE)
      process_comment = FALSE;
    else
      process_comment = TRUE;
  }
  if (!(fid = PL_open_foreign_frame()))
    return FALSE;

retry:
  init_read_data(&rd, s PASS_LD);

  if (options &&
      !scan_options(options, 0, ATOM_read_option, read_clause_options,
                    &rd.varnames, &tpos, &rd.subtpos, &process_comment,
                    &opt_comments, &syntax_errors)) {
    PL_close_foreign_frame(fid);
    return FALSE;
  }

  if (opt_comments) {
    comments = PL_new_term_ref();
  } else if (process_comment) {
    if (!tpos)
      tpos = PL_new_term_ref();
    comments = PL_new_term_ref();
  }

  REGS_FROM_LD
  rd.module = Yap_GetModuleEntry(LOCAL_SourceModule);
  if (comments)
    rd.comments = PL_copy_term_ref(comments);
  rd.on_error = syntax_errors;
  rd.singles = rd.styleCheck & SINGLETON_CHECK ? 1 : 0;
  if ((rval = read_term(term, &rd PASS_LD)) &&
      (!tpos || (rval = unify_read_term_position(tpos PASS_LD)))) {
    PredEntry *ap;
    LD->read_varnames = rd.varnames;

    if (rd.singles) {
      // warning, singletons([X=_A],f(X,Y,Z), pos).
      printMessage(ATOM_warning, PL_FUNCTOR_CHARS, "singletons", 3, PL_TERM,
                   rd.singles, PL_TERM, term, PL_TERM, tpos);
    }
    ap = Yap_PredFromClause(Yap_GetFromSlot(term) PASS_REGS);
    if (rd.styleCheck & (DISCONTIGUOUS_STYLE | MULTIPLE_CHECK) && ap != NULL) {
      if (rd.styleCheck & (DISCONTIGUOUS_STYLE) &&
          Yap_discontiguous(ap PASS_REGS)) {
        printMessage(ATOM_warning, PL_FUNCTOR_CHARS, "discontiguous", 2,
                     PL_TERM, term, PL_TERM, tpos);
      }
      if (rd.styleCheck & (MULTIPLE_CHECK) && Yap_multiple(ap PASS_REGS)) {
        printMessage(ATOM_warning, PL_FUNCTOR_CHARS, "multiple", 3, PL_TERM,
                     term, PL_TERM, tpos, PL_ATOM,
                     YAP_SWIAtomFromAtom(ap->src.OwnerFile));
      }
    }
    if (rd.comments && (rval = PL_unify_nil(rd.comments))) {
      if (opt_comments)
        rval = PL_unify(opt_comments, comments);
      else if (!PL_get_nil(comments))
        callCommentHook(comments, tpos, term);
    }
  } else {
    if (rd.has_exception && reportReadError(&rd)) {
      PL_rewind_foreign_frame(fid);
      free_read_data(&rd);
      goto retry;
    }
  }
  free_read_data(&rd);

  return rval;
}

static PRED_IMPL("read_clause", 3, read_clause, 0) {
  PRED_LD
  int rc;
  IOSTREAM *s;

  if (!getTextInputStream(A1, &s))
    return FALSE;
  rc = read_clause(s, A2, A3 PASS_LD);
  if (Sferror(s))
    return streamStatus(s);
  else
    PL_release_stream(s);

  return rc;
}

word pl_raw_read(term_t term) { return pl_raw_read2(0, term); }

static const opt_spec read_term_options[] = {
    {ATOM_variable_names, OPT_TERM},
    {ATOM_variables, OPT_TERM},
    {ATOM_singletons, OPT_TERM},
    {ATOM_term_position, OPT_TERM},
    //  		{ ATOM_subterm_positions, OPT_TERM },
    {ATOM_character_escapes, OPT_BOOL},
    {ATOM_double_quotes, OPT_ATOM},
    {ATOM_module, OPT_ATOM},
    {ATOM_syntax_errors, OPT_ATOM},
    {ATOM_backquoted_string, OPT_BOOL},
    {ATOM_comments, OPT_TERM},
    {ATOM_process_comment, OPT_BOOL},
#ifdef O_QUASIQUOTATIONS
    {ATOM_quasi_quotations, OPT_TERM},
#endif
    {ATOM_cycles, OPT_BOOL},
    {NULL_ATOM, 0}};

static foreign_t read_term_from_stream(IOSTREAM *s, term_t term,
                                       term_t options ARG_LD) {
  term_t tpos = 0;
  term_t comments = 0;
  term_t opt_comments = 0;
  int process_comment;
  int rval;
  atom_t w;
  read_data rd;
  int charescapes = -1;
  atom_t dq = NULL_ATOM;
  atom_t mname = NULL_ATOM;
  fid_t fid = PL_open_foreign_frame();

  if (!fid)
    return FALSE;
retry:
  init_read_data(&rd, s PASS_LD);

  if (!scan_options(options, 0, ATOM_read_option, read_term_options,
                    &rd.varnames, &rd.variables, &rd.singles, &tpos,
                    //		&rd.subtpos,
                    &charescapes, &dq, &mname, &rd.on_error,
                    &rd.backquoted_string, &opt_comments, &process_comment,
#ifdef O_QUASIQUOTATIONS
                    &rd.quasi_quotations,
#endif
                    &rd.cycles)) {
    PL_discard_foreign_frame(fid);
    free_read_data(&rd);
    return FALSE;
  }

  // yap specific, do not call process comment if undefined
  if (process_comment) {
    OPCODE ophook = PredCommentHook->OpcodeOfPred;
    if (ophook == UNDEF_OPCODE || ophook == FAIL_OPCODE)
      process_comment = FALSE;
  }

  if (opt_comments) {
    comments = PL_new_term_ref();
  } else if (process_comment) {
    if (!tpos)
      tpos = PL_new_term_ref();
    comments = PL_new_term_ref();
  }

  if (mname) {
    rd.module = lookupModule(mname);
    rd.flags = rd.module->flags;
  }

  if (charescapes != -1) {
    if (charescapes)
      set(&rd, M_CHARESCAPE);
    else
      clear(&rd, M_CHARESCAPE);
  }
  if (dq) {
    if (!setDoubleQuotes(dq, &rd.flags))
      return FALSE;
  }
  if (rd.singles && PL_get_atom(rd.singles, &w) && w == ATOM_warning)
    rd.singles = 1;

  if (comments)
    rd.comments = PL_copy_term_ref(comments);

  rval = read_term(term, &rd PASS_LD);
  if (Sferror(s)) {
    free_read_data(&rd);
    return FALSE;
  }
  LD->read_varnames = rd.varnames;
#ifdef O_QUASIQUOTATIONS
  if (rval)
    rval = parse_quasi_quotations(&rd PASS_LD);
#endif
  if (rval) {
    if (tpos)
      rval = unify_read_term_position(tpos PASS_LD);
    if (rval) {
      if (opt_comments)
        rval = PL_unify(opt_comments, comments);
      else if (comments && !PL_get_nil(comments))
        callCommentHook(comments, tpos, term);
    }
  } else {
    if (rd.has_exception && reportReadError(&rd)) {
      PL_rewind_foreign_frame(fid);
      free_read_data(&rd);
      goto retry;
    }
  }
  free_read_data(&rd);

  return rval;
}

/** @pred  read_term(+ _Stream_,- _T_,+ _Options_) is iso

    Reads term  _T_ from stream  _Stream_ with execution controlled by the
    same options as read_term/2.


*/
static PRED_IMPL("read_term", 3, read_term, PL_FA_ISO) {
  PRED_LD
  IOSTREAM *s;

  if (getTextInputStream(A1, &s)) {
    if (read_term_from_stream(s, A2, A3 PASS_LD))
      return PL_release_stream(s);
    if (Sferror(s))
      return streamStatus(s);
    PL_release_stream(s);
    return FALSE;
  }

  return FALSE;
}

/** read_term(-Term, +Options) is det.
 */

/** @pred read_term(- _T_,+ _Options_) is iso


    Reads term  _T_ from the current input stream with execution
    controlled by the following options:

    + comments(- _Comments_)

    Unify _Comments_ with a list of string terms including comments before
    and within the term.

    + module( + _Module_)

    Read term using _Module_ as source module.

    + quasi_quotations(-List)

    Unify _List_ with the quasi-quotations present in the term.

    + term_position(- _Position_)

    Unify  _Position_ with a term describing the position of the stream
    at the start of parse. Use stream_position_data/3 to obtain extra
    information.

    + singletons(- _Names_)

    Unify  _Names_ with a list of the form  _Name=Var_, where
    _Name_ is the name of a non-anonymous singleton variable in the
    original term, and `Var` is the variable's representation in
    YAP.
    The variables occur in left-to-right traversal order.

    + syntax_errors(+ _Val_)

    Control action to be taken after syntax errors. See yap_flag/2
    for detailed information.

    + variables(- _Names_)

    Unify  _Names_ with a list of the form  _Name=Var_, where  _Name_ is
    the name of a non-anonymous variable in the original term, and  _Var_
    is the variable's representation in YAP.
    The variables occur in left-to-right traversal order.


*/
static PRED_IMPL("read_term", 2, read_term, PL_FA_ISO) {
  PRED_LD
  IOSTREAM *s;

  if (getTextInputStream(0, &s)) {
    if (read_term_from_stream(s, A1, A2 PASS_LD))
      return PL_release_stream(s);
    if (Sferror(s))
      return streamStatus(s);
    PL_release_stream(s);
    return FALSE;
  }

  return FALSE;
}

/*******************************
 *	   TERM <->ATOM		*
 *******************************/

static int atom_to_term(term_t atom, term_t term, term_t bindings) {
  GET_LD
  PL_chars_t txt;

  if (!bindings && PL_is_variable(atom)) /* term_to_atom(+, -) */
  {
    char buf[1024];
    size_t bufsize = sizeof(buf);
    int rval;
    char *s = buf;
    IOSTREAM *stream;
    PL_chars_t txt;

    stream = Sopenmem(&s, &bufsize, "w");
    stream->encoding = ENC_UTF8;
    PL_write_term(stream, term, 1200, PL_WRT_QUOTED);
    Sflush(stream);

    txt.text.t = s;
    txt.length = bufsize;
    txt.storage = PL_CHARS_HEAP;
    txt.encoding = ENC_UTF8;
    txt.canonical = FALSE;
    rval = PL_unify_text(atom, 0, &txt, PL_ATOM);

    Sclose(stream);
    if (s != buf)
      Sfree(s);

    return rval;
  }

  if (PL_get_text(atom, &txt, CVT_ALL | CVT_EXCEPTION)) {
    GET_LD
    read_data rd;
    int rval;
    IOSTREAM *stream;
    source_location oldsrc = LD->read_source;

    stream = Sopen_text(&txt, "r");

    init_read_data(&rd, stream PASS_LD);
    if (bindings && (PL_is_variable(bindings) || PL_is_list(bindings)))
      rd.varnames = bindings;
    else if (bindings)
      return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_list, bindings);

    if (!(rval = read_term(term, &rd PASS_LD)) && rd.has_exception)
      rval = PL_raise_exception(rd.exception);
    free_read_data(&rd);
    Sclose(stream);
    LD->read_source = oldsrc;

    //   getchar();
    return rval;
  }

  fail;
}

Term Yap_CharsToTerm(const char *s, size_t *lenp, Term *bindingsp) {
  GET_LD;
  read_data rd;
  int rval;
  IOSTREAM *stream;
  source_location oldsrc = LD->read_source;

  stream = Sopen_string(0, (char *)s, strlen(s), "r");

  init_read_data(&rd, stream PASS_LD);
  rd.varnames = bindings;
  term_t tt = Yap_NewSlots(1);

  if (!(rval = read_term(tt, &rd PASS_LD)) && rd.has_exception) {
    rval = PL_raise_exception(rd.exception);
    return 0L;
  }
  free_read_data(&rd);
  Sclose(stream);
  LD->read_source = oldsrc;

  //   getchar();
  return Yap_GetFromSlot(tt);
}

/** @pred atom_to_term(+ _Atom_, - _Term_, - _Bindings_)


    Use  _Atom_ as input to read_term/2 using the option `variable_names` and
   return the read term in  _Term_ and the variable bindings in  _Bindings_.
   _Bindings_ is a list of `Name = Var` couples, thus providing access to the
   actual variable names. See also read_term/2. If Atom has no valid syntax, a
   syntax_error exception is raised.


*/
static PRED_IMPL("atom_to_term", 3, atom_to_term, 0) {
  return atom_to_term(A1, A2, A3);
}

static PRED_IMPL("term_to_atom", 2, term_to_atom, 0) {
  return atom_to_term(A2, A1, 0);
}

static PRED_IMPL("$context_variables", 1, context_variables, 0) {
  CACHE_REGS
  if (LOCAL_VarNames == (CELL)0)
    return Yap_unify(TermNil, ARG1);
  return Yap_unify(LOCAL_VarNames, ARG1);
}

static PRED_IMPL("$set_source", 2, set_source, 0) {
  GET_LD
  atom_t at;
  term_t a = PL_new_term_ref();

  if (!PL_get_atom(A1, &at))
    return FALSE;
  source_file_name = at;
  if (!PL_get_arg(1, A2, a) || !PL_get_int64(a, &source_char_no) ||
      !PL_get_arg(2, A2, a) || !PL_get_long(a, &source_line_no) ||
      !PL_get_arg(3, A2, a) || !PL_get_long(a, &source_line_pos) ||
      !PL_get_arg(4, A2, a) || !PL_get_int64(a, &source_byte_no)) {
    return FALSE;
  }
  return TRUE;
}

int PL_chars_to_term(const char *s, term_t t) {
  GET_LD
  read_data rd;
  int rval;
  IOSTREAM *stream = Sopen_string(NULL, (char *)s, -1, "r");
  source_location oldsrc = LD->read_source;

  init_read_data(&rd, stream PASS_LD);
  PL_put_variable(t);
  if (!(rval = read_term(t, &rd PASS_LD)) && rd.has_exception)
    PL_put_term(t, rd.exception);
  LOCAL_VarNames = rd.varnames;
  free_read_data(&rd);
  Sclose(stream);
  LD->read_source = oldsrc;

  return rval;
}

/*******************************
 *      PUBLISH PREDICATES	*
 *******************************/

BeginPredDefs(read) PRED_DEF("read_term", 3, read_term, PL_FA_ISO)
    PRED_DEF("read_term", 2, read_term, PL_FA_ISO)
        PRED_DEF("read_clause", 3, read_clause, 0)
            PRED_DEF("atom_to_term", 3, atom_to_term, 0)
                PRED_DEF("term_to_atom", 2, term_to_atom, 0)
                    PRED_DEF("$context_variables", 1, context_variables, 0)
                        PRED_DEF("$set_source", 2, set_source, 0)
#ifdef O_QUASIQUOTATIONS
                            PRED_DEF("$qq_open", 2, qq_open, 0)
#endif
                                EndPredDefs

    //! @}