#include "pl-incl.h"
#include "pl-ctype.h"
#include "pl-utf8.h"
#include "pl-dtoa.h"
#include "pl-umap.c"			/* Unicode map */

#include "pl-read.h"			/* read structure */

/**
 *  @defgroup ReadTerm Read Term from Streams
 *  @ingroup  InputOutput
 * @{
 */

static bool
isStringStream(IOSTREAM *s)
{ return s->functions == &Sstringfunctions;
}



void
init_read_data(ReadData _PL_rd, IOSTREAM *in ARG_LD)
{  CACHE_REGS
    memset(_PL_rd, 0, sizeof(*_PL_rd));	/* optimise! */

  _PL_rd->magic = RD_MAGIC;
  _PL_rd->varnames = 0;
  _PL_rd->module = Yap_GetModuleEntry(CurrentModule);
  _PL_rd->exception = 0;
  _PL_rd->stream = in;
  _PL_rd->has_exception = 0;
  _PL_rd->module = MODULE_parse;
  _PL_rd->flags  = _PL_rd->module->flags; /* change for options! */
  _PL_rd->styleCheck = LOCAL_debugstatus.styleCheck;
  _PL_rd->on_error = AtomError;
  _PL_rd->backquoted_string = truePrologFlag(PLFLAG_BACKQUOTED_STRING);
}

void
free_read_data(ReadData _PL_rd)
{
}

static int
read_term(term_t t, ReadData _PL_rd ARG_LD)
{
  return Yap_read_term(t, rb.stream, _PL_rd);
}


static void	  addUTF8Buffer(Buffer b, int c);

static void
addUTF8Buffer(Buffer b, int c)
{ if ( c >= 0x80 )
    { char buf[6];
      char *p, *end;

      end = utf8_put_char(buf, c);
      for(p=buf; p<end; p++)
	{ addBuffer(b, *p&0xff, char);
	}
    } else
    { addBuffer(b, c, char);
    }
}

/*******************************
 *     UNICODE CLASSIFIERS      *
 *******************************/

#define CharTypeW(c, t, w)					\
  ((unsigned)(c) <= 0xff ? (_PL_char_types[(unsigned)(c)] t)	\
   : (uflagsW(c) & (w)))

#define PlBlankW(c)     CharTypeW(c, == SP, U_SEPARATOR)
#define PlUpperW(c)     CharTypeW(c, == UC, U_UPPERCASE)
#define PlIdStartW(c)   (c <= 0xff ? (isLower(c)||isUpper(c)||c=='_')	\
			 : uflagsW(c) & U_ID_START)
#define PlIdContW(c)    CharTypeW(c, >= UC, U_ID_CONTINUE)
#define PlSymbolW(c)    CharTypeW(c, == SY, U_SYMBOL)
#define PlPunctW(c)     CharTypeW(c, == PU, 0)
#define PlSoloW(c)      CharTypeW(c, == SO, U_OTHER)
#define PlInvalidW(c)   (uflagsW(c) == 0)

int
f_is_prolog_var_start(wint_t c)
{ return PlIdStartW(c) && (PlUpperW(c) || c == '_');
}

int
f_is_prolog_atom_start(wint_t c)
{ return PlIdStartW(c) != 0;
}

int
f_is_prolog_identifier_continue(wint_t c)
{ return PlIdContW(c) || c == '_';
}

int
f_is_prolog_symbol(wint_t c)
{ return PlSymbolW(c) != 0;
}

int
unicode_separator(pl_wchar_t c)
{ return PlBlankW(c);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   FALSE	return false
   TRUE	redo
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

static int
reportReadError(ReadData rd)
{ if ( rd->on_error == ATOM_error )
    return PL_raise_exception(rd->exception);
  if ( rd->on_error != ATOM_quiet )
    printMessage(ATOM_error, PL_TERM, rd->exception);
  PL_clear_exception();

  if ( rd->on_error == ATOM_dec10 )
    return TRUE;

  return FALSE;
}


/* static int */
/* reportSingletons(ReadData rd, singletons, Atom amod, Atom aname, UInt arity) */
/* {  */
/*   printMessage(ATOM_warning, PL_FUNCTOR_CHARS,  */
/* 	       "singletons", 2, */
/* 	       PL_TERM, singletons,  */
/* 	       PL_TERM, mod, */
/* 	       PL_FUNCTOR_divide2, */
/* 	       PL_ATOM, name, */
/* 	       PL_INT, arity); */

/*   return FALSE; */
/* } */


/********************************
 *           RAW READING         *
 *********************************/


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   Scan the input, give prompts when necessary and return a char *  holding
   a  stripped  version of the next term.  Contiguous white space is mapped
   on a single space, block and % ... \n comment  is  deleted.   Memory  is
   claimed automatically en enlarged if necessary.

   (char *) NULL is returned on a syntax error.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

#define syntaxError(what, rd) { errorWarning(what, 0, rd); fail; }

static term_t
makeErrorTerm(const char *id_str, term_t id_term, ReadData _PL_rd)
{ GET_LD
    term_t ex, loc=0;			/* keep compiler happy */
  unsigned char const *s, *ll = NULL;
  int rc = TRUE;

  if ( !(ex = PL_new_term_ref()) ||
       !(loc = PL_new_term_ref()) )
    rc = FALSE;

  if ( rc && !id_term )
    { if ( !(id_term=PL_new_term_ref()) ||
	   !PL_put_atom_chars(id_term, id_str) )
	rc = FALSE;
    }

  if ( rc )
    rc = PL_unify_term(ex,
		       PL_FUNCTOR, FUNCTOR_error2,
		       PL_FUNCTOR, FUNCTOR_syntax_error1,
		       PL_TERM, id_term,
		       PL_TERM, loc);

  source_char_no += last_token_start - rdbase;
  for(s=rdbase; s<last_token_start; s++)
    { if ( *s == '\n' )
	{ source_line_no++;
	  ll = s+1;
	}
    }

  if ( ll )
    { int lp = 0;

      for(s = ll; s<last_token_start; s++)
	{ switch(*s)
	    { case '\b':
		if ( lp > 0 ) lp--;
		break;
	    case '\t':
	      lp |= 7;
	    default:
	      lp++;
	    }
	}

      source_line_pos = lp;
    }

  if ( rc )
    { if ( ReadingSource )			/* reading a file */
	{ rc = PL_unify_term(loc,
			     PL_FUNCTOR, FUNCTOR_file4,
			     PL_ATOM, source_file_name,
			     PL_INT, source_line_no,
			     PL_INT, source_line_pos,
			     PL_INT64, source_char_no);
	} else if ( isStringStream(rb.stream) )
	{ size_t pos;

	  pos = utf8_strlen((char *)rdbase, last_token_start-rdbase);

	  rc = PL_unify_term(loc,
			     PL_FUNCTOR, FUNCTOR_string2,
			     PL_UTF8_STRING, rdbase,
			     PL_INT, (int)pos);
	} else				/* any stream */
	{ term_t stream;

	  if ( !(stream=PL_new_term_ref()) ||
	       !PL_unify_stream_or_alias(stream, rb.stream) ||
	       !PL_unify_term(loc,
			      PL_FUNCTOR, FUNCTOR_stream4,
			      PL_TERM, stream,
			      PL_INT, source_line_no,
			      PL_INT, source_line_pos,
			      PL_INT64, source_char_no) )
	    rc = FALSE;
	}
    }

  return (rc ? ex : (term_t)0);
}



static bool
errorWarning(const char *id_str, term_t id_term, ReadData _PL_rd)
{ GET_LD
    term_t ex;

  LD->exception.processing = TRUE;	/* allow using spare stack */

  ex = makeErrorTerm(id_str, id_term, _PL_rd);

  if ( _PL_rd )
    { _PL_rd->has_exception = TRUE;
      if ( ex )
	PL_put_term(_PL_rd->exception, ex);
      else
	PL_put_term(_PL_rd->exception, exception_term);
    } else
    { if ( ex )
	PL_raise_exception(ex);
    }

  fail;
}



static void
clearBuffer(ReadData _PL_rd)
{ if (rb.size == 0)
    { rb.base = rb.fast;
      rb.size = sizeof(rb.fast);
    }
  rb.end = rb.base + rb.size;
  rdbase = rb.here = rb.base;

  _PL_rd->posp = rdbase;
  _PL_rd->posi = 0;
}


static void
growToBuffer(int c, ReadData _PL_rd)
{ if ( rb.base == rb.fast )		/* intptr_t clause: jump to use malloc() */
    { rb.base = PL_malloc(FASTBUFFERSIZE * 2);
      memcpy(rb.base, rb.fast, FASTBUFFERSIZE);
    } else
    rb.base = PL_realloc(rb.base, rb.size*2);

  DEBUG(8, Sdprintf("Reallocated read buffer at %ld\n", (intptr_t) rb.base));
  _PL_rd->posp = rdbase = rb.base;
  rb.here = rb.base + rb.size;
  rb.size *= 2;
  rb.end  = rb.base + rb.size;
  _PL_rd->posi = 0;

  *rb.here++ = c;
}


static inline void
addByteToBuffer(int c, ReadData _PL_rd)
{ c &= 0xff;

  if ( rb.here >= rb.end )
    growToBuffer(c, _PL_rd);
  else
    *rb.here++ = c;
}


static void
addToBuffer(int c, ReadData _PL_rd)
{ if ( c <= 0x7f )
    { addByteToBuffer(c, _PL_rd);
    } else
    { char buf[10];
      char *s, *e;

      e = utf8_put_char(buf, c);
      for(s=buf; s<e; s++)
	addByteToBuffer(*s, _PL_rd);
    }
}



#if __YAP_PROLOG__
void
Yap_setCurrentSourceLocation( void *rd )
{
  GET_LD
    setCurrentSourceLocation(rd PASS_LD);
}
#endif


static inline int
getchr__(ReadData _PL_rd)
{ int c = Sgetcode(rb.stream);

  if ( !_PL_rd->char_conversion_table || c < 0 || c >= 256 )
    return c;

  return _PL_rd->char_conversion_table[c];
}


#define getchr()  getchr__(_PL_rd)
#define getchrq() Sgetcode(rb.stream)

#define ensure_space(c) { if ( something_read &&			\
			       (c == '\n' || !isBlank(rb.here[-1])) )	\
      addToBuffer(c, _PL_rd);						\
  }
#define set_start_line { if ( !something_read )		\
      { setCurrentSourceLocation(_PL_rd PASS_LD);	\
	something_read++;				\
      }							\
  }





#ifdef O_QUASIQUOTATIONS
/** '$qq_open'(+QQRange, -Stream) is det.

    Opens a quasi-quoted memory range.

    @arg QQRange is a term '$quasi_quotation'(ReadData, Start, Length)
    @arg Stream  is a UTF-8 encoded string, whose position indication
    reflects the location in the real file.
*/

static
PRED_IMPL("$qq_open", 2, qq_open, 0)
{ PRED_LD

    if ( PL_is_functor(A1, FUNCTOR_dquasi_quotation3) )
      { void *ptr;
	char * start;
	size_t len;
	term_t arg = PL_new_term_ref();
	IOSTREAM *s;

	if ( PL_get_arg(1, A1, arg) && PL_get_pointer_ex(arg, &ptr) &&
	     PL_get_arg(2, A1, arg) && PL_get_intptr(arg, (intptr_t *)&start) &&
	     PL_get_arg(3, A1, arg) && PL_get_intptr(arg, (intptr_t *)&len) )
	  {  //source_location pos;
	    if ( (s=Sopenmem(&start, &len, "r")) )
	      s->encoding = ENC_UTF8;

	    return PL_unify_stream(A2, s);
	  }
      } else
      PL_type_error("read_context", A1);

  return FALSE;
}


static int
parse_quasi_quotations(ReadData _PL_rd ARG_LD)
{ if ( _PL_rd->qq_tail )
    { term_t av;
      int rc;

      if ( !PL_unify_nil(_PL_rd->qq_tail) )
	return FALSE;

      if ( !_PL_rd->quasi_quotations )
	{ if ( (av = PL_new_term_refs(2)) &&
	       PL_put_term(av+0, _PL_rd->qq) &&
#if __YAP_PROLOG__
	       PL_put_atom(av+1, YAP_SWIAtomFromAtom(_PL_rd->module->AtomOfME)) &&
#else
	       PL_put_atom(av+1, _PL_rd->module->name) &&
#endif
	       PL_cons_functor_v(av, FUNCTOR_dparse_quasi_quotations2, av) )
	    { term_t ex;
	      rc = callProlog(MODULE_system, av+0, PL_Q_CATCH_EXCEPTION, &ex);
	      if ( rc )
		return TRUE;
	      _PL_rd->exception = ex;
	      _PL_rd->has_exception = TRUE;
	    }
	  return FALSE;
	} else
	return TRUE;
    } else if ( _PL_rd->quasi_quotations )	/* user option, but no quotes */
    { return PL_unify_nil(_PL_rd->quasi_quotations);
    } else
    return TRUE;
}


#endif /*O_QUASIQUOTATIONS*/




#define rawSyntaxError(what) { addToBuffer(EOS, _PL_rd);	\
    rdbase = rb.base, last_token_start = rb.here-1;		\
    syntaxError(what, _PL_rd);					\
  }

static int
raw_read_quoted(int q, ReadData _PL_rd)
{ int newlines = 0;
  int c;

  addToBuffer(q, _PL_rd);
  while((c=getchrq()) != EOF && c != q)
    { if ( c == '\\' && DO_CHARESCAPE )
	{ int base;
	  addToBuffer(c, _PL_rd);

	  switch( (c=getchrq()) )
	    { case EOF:
		goto eofinstr;
	    case 'u':			/* \uXXXX */
	    case 'U':			/* \UXXXXXXXX */
	      addToBuffer(c, _PL_rd);
	      continue;
	    case 'x':			/* \xNN\ */
	      addToBuffer(c, _PL_rd);
	      c = getchrq();
	      if ( c == EOF )
		goto eofinstr;
	      if ( digitValue(16, c) >= 0 )
		{ base = 16;
		  addToBuffer(c, _PL_rd);

		xdigits:
		  c = getchrq();
		  while( digitValue(base, c) >= 0 )
		    { addToBuffer(c, _PL_rd);
		      c = getchrq();
		    }
		}
	      if ( c == EOF )
		goto eofinstr;
	      addToBuffer(c, _PL_rd);
	      if ( c == q )
		return TRUE;
	      continue;
	    default:
	      addToBuffer(c, _PL_rd);
	      if ( digitValue(8, c) >= 0 )	/* \NNN\ */
		{ base = 8;
		  goto xdigits;
		} else if ( c == '\n' )	/* \<newline> */
		{ c = getchrq();
		  if ( c == EOF )
		    goto eofinstr;
		  addToBuffer(c, _PL_rd);
		  if ( c == q )
		    return TRUE;
		}
	      continue;			/* \symbolic-control-char */
	    }
	} else if (c == '\n' &&
		   newlines++ > MAXNEWLINES &&
		   (_PL_rd->styleCheck & LONGATOM_CHECK))
	{ rawSyntaxError("long_string");
	}
      addToBuffer(c, _PL_rd);
    }
  if (c == EOF)
    { eofinstr:
      rawSyntaxError("end_of_file_in_string");
    }
  addToBuffer(c, _PL_rd);

  return TRUE;
}


static int
add_comment(Buffer b, IOPOS *pos, ReadData _PL_rd ARG_LD)
{ term_t head = PL_new_term_ref();

  assert(_PL_rd->comments);
  if ( !PL_unify_list(_PL_rd->comments, head, _PL_rd->comments) )
    return FALSE;
  if ( pos )
    { if ( !PL_unify_term(head,
			  PL_FUNCTOR, FUNCTOR_minus2,
			  PL_FUNCTOR, FUNCTOR_stream_position4,
			  PL_INT64, pos->charno,
			  PL_INT, pos->lineno,
			  PL_INT, pos->linepos,
			  PL_INT, 0,
			  PL_UTF8_STRING, baseBuffer(b, char)) )
	return FALSE;
    } else
    { if ( !PL_unify_term(head,
			  PL_FUNCTOR, FUNCTOR_minus2,
			  ATOM_minus,
			  PL_UTF8_STRING, baseBuffer(b, char)) )
	return FALSE;
    }

  PL_reset_term_refs(head);
  return TRUE;
}


static void
setErrorLocation(IOPOS *pos, ReadData _PL_rd)
{ if ( pos )
    { GET_LD

	source_char_no = pos->charno;
      source_line_pos = pos->linepos;
      source_line_no = pos->lineno;
    }
  rb.here = rb.base+1;			/* see rawSyntaxError() */
}


static unsigned char *
raw_read2(ReadData _PL_rd ARG_LD)
{ int c;
  bool something_read = FALSE;
  bool dotseen = FALSE;
  IOPOS pbuf;					/* comment start */
  IOPOS *pos;

  clearBuffer(_PL_rd);				/* clear input buffer */
  _PL_rd->strictness = truePrologFlag(PLFLAG_ISO);
  source_line_no = -1;

  for(;;)
    { c = getchr();

    handle_c:
      switch(c)
	{ case EOF:
	    if ( isStringStream(rb.stream) ) /* do not require '. ' when */
	      { addToBuffer(' ', _PL_rd);     /* reading from a string */
		addToBuffer('.', _PL_rd);
		addToBuffer(' ', _PL_rd);
		addToBuffer(EOS, _PL_rd);
		return rb.base;
	      }
	    if (something_read)
	      { if ( dotseen )		/* term.<EOF> */
		  { if ( rb.here - rb.base == 1 )
		      rawSyntaxError("end_of_clause");
		    ensure_space(' ');
		    addToBuffer(EOS, _PL_rd);
		    return rb.base;
		  }
		rawSyntaxError("end_of_file");
	      }
	    if ( Sfpasteof(rb.stream) )
	      { term_t stream;

		LD->exception.processing = TRUE;
		stream = PL_new_term_ref();
		PL_unify_stream_or_alias(stream, rb.stream);
		PL_error(NULL, 0, NULL, ERR_PERMISSION,
			 ATOM_input, ATOM_past_end_of_stream, stream);
		return NULL;
	      }
	    set_start_line;
	    strcpy((char *)rb.base, "end_of_file. ");
	    rb.here = rb.base + 14;
	    return rb.base;
	case '/': if ( rb.stream->position )
	      { pbuf = *rb.stream->position;
		pbuf.charno--;
		pbuf.linepos--;
		pos = &pbuf;
	      } else
	      pos = NULL;

	  c = getchr();
	  if ( c == '*' )
	    { int last;
	      int level = 1;
	      union {
		tmp_buffer ctmpbuf;
		buffer tmpbuf;
	      } u;
	      Buffer cbuf;

	      if ( _PL_rd->comments )
		{ initBuffer(&u.ctmpbuf);
		  cbuf = &u.tmpbuf;
		  addUTF8Buffer(cbuf, '/');
		  addUTF8Buffer(cbuf, '*');
		} else
		{ cbuf = NULL;
		}

	      if ((last = getchr()) == EOF)
		{ if ( cbuf )
		    discardBuffer(cbuf);
		  setErrorLocation(pos, _PL_rd);
		  rawSyntaxError("end_of_file_in_block_comment");
		}
	      if ( cbuf )
		addUTF8Buffer(cbuf, last);

	      if ( something_read )
		{ addToBuffer(' ', _PL_rd);	/* positions */
		  addToBuffer(' ', _PL_rd);
		  addToBuffer(last == '\n' ? last : ' ', _PL_rd);
		}

	      for(;;)
		{ c = getchr();

		  if ( cbuf )
		    addUTF8Buffer(cbuf, c);

		  switch( c )
		    { case EOF:
			if ( cbuf )
			  discardBuffer(cbuf);
			setErrorLocation(pos, _PL_rd);
			rawSyntaxError("end_of_file_in_block_comment");
#ifndef __YAP_PROLOG__
			/* YAP does not support comment levels in original scanner */
		    case '*':
		      if ( last == '/' )
			level++;
		      break;
#endif
		    case '/':
		      if ( last == '*' &&
			   (--level == 0 || _PL_rd->strictness) )
			{ if ( cbuf )
			    { addUTF8Buffer(cbuf, EOS);
			      if ( !add_comment(cbuf, pos, _PL_rd PASS_LD) )
				{ discardBuffer(cbuf);
				  return FALSE;
				}
			      discardBuffer(cbuf);
			    }
			  c = ' ';
			  goto handle_c;
			}
		      break;
		    }
		  if ( something_read )
		    addToBuffer(c == '\n' ? c : ' ', _PL_rd);
		  last = c;
		}
	    } else
	    { set_start_line;
	      addToBuffer('/', _PL_rd);
	      if ( isSymbolW(c) )
		{ while( c != EOF && isSymbolW(c) &&
			 !(c == '`' && _PL_rd->backquoted_string) )
		    { addToBuffer(c, _PL_rd);
		      c = getchr();
		    }
		}
	      dotseen = FALSE;
	      goto handle_c;
	    }
	case '%': if ( something_read )
	    addToBuffer(' ', _PL_rd);
	  if ( _PL_rd->comments )
	    { union {
	      tmp_buffer ctmpbuf;
	      buffer uctmpbuf;
	    } u;
	      Buffer cbuf;

	      if ( rb.stream->position )
		{ pbuf = *rb.stream->position;
		  pbuf.charno--;
		  pbuf.linepos--;
		  pos = &pbuf;
		} else
		pos = NULL;

	      initBuffer(&u.ctmpbuf);
	      cbuf = (Buffer)&u.uctmpbuf;
	      addUTF8Buffer(cbuf, '%');

	      for(;;)
		{ while((c=getchr()) != EOF && c != '\n')
		    { addUTF8Buffer(cbuf, c);
		      if ( something_read )		/* record positions */
			addToBuffer(' ', _PL_rd);
		    }
		  if ( c == '\n' )
		    { int c2 = Speekcode(rb.stream);

		      if ( c2 == '%' )
			{ if ( something_read )
			    { addToBuffer(c, _PL_rd);
			      addToBuffer(' ', _PL_rd);
			    }
			  addUTF8Buffer(cbuf, c);
			  c = Sgetcode(rb.stream);
			  assert(c==c2);
			  addUTF8Buffer(cbuf, c);
			  continue;
			}
		    }
		  break;
		}
	      addUTF8Buffer(cbuf, EOS);
	      if ( !add_comment(cbuf, pos, _PL_rd PASS_LD) )
		{ discardBuffer(cbuf);
		  return FALSE;
		}
	      discardBuffer(cbuf);
	    } else
	    { while((c=getchr()) != EOF && c != '\n')
		{ if ( something_read )		/* record positions */
		    addToBuffer(' ', _PL_rd);
		}
	    }
	  goto handle_c;		/* is the newline */
	case '\'': if ( rb.here > rb.base && isDigit(rb.here[-1]) )
	    { cucharp bs = &rb.here[-1];

	      if ( bs > rb.base && isDigit(bs[-1]) )
		bs--;
	      if ( bs > rb.base && isSign(bs[-1]) )
		bs--;

	      if ( bs == rb.base || !PlIdContW(bs[-1]) )
		{ int base;

		  if ( isSign(bs[0]) )
		    bs++;
		  base = atoi((char*)bs);

		  if ( base <= 36 )
		    { if ( base == 0 )			/* 0'<c> */
			{ addToBuffer(c, _PL_rd);
			  { if ( (c=getchr()) != EOF )
			      { addToBuffer(c, _PL_rd);
				if ( c == '\\' ) 		/* 0'\<c> */
				  { if ( (c=getchr()) != EOF )
				      addToBuffer(c, _PL_rd);
				  } else if ( c == '\'' ) 	/* 0'' */
				  { if ( (c=getchr()) != EOF )
				      { if ( c == '\'' )
					  addToBuffer(c, _PL_rd);
					else
					  goto handle_c;
				      }
				  }
				break;
			      }
			    rawSyntaxError("end_of_file");
			  }
			} else
			{ int c2 = Speekcode(rb.stream);

			  if ( c2 != EOF )
			    { if ( digitValue(base, c2) >= 0 )
				{ addToBuffer(c, _PL_rd);
				  c = Sgetcode(rb.stream);
				  addToBuffer(c, _PL_rd);
				  dotseen = FALSE;
				  break;
				}
			      goto sqatom;
			    }
			  rawSyntaxError("end_of_file");
			}
		    }
		}
	    }

	sqatom:
	  set_start_line;
	  if ( !raw_read_quoted(c, _PL_rd) )
	    fail;
	  dotseen = FALSE;
	  break;
	case '"':	set_start_line;
	  if ( !raw_read_quoted(c, _PL_rd) )
	    fail;
	  dotseen = FALSE;
	  break;
	case '.': addToBuffer(c, _PL_rd);
	  set_start_line;
	  dotseen++;
	  c = getchr();
	  if ( isSymbolW(c) )
	    { while( c != EOF && isSymbolW(c) &&
		     !(c == '`' && _PL_rd->backquoted_string) )
		{ addToBuffer(c, _PL_rd);
		  c = getchr();
		}
	      dotseen = FALSE;
	    }
	  goto handle_c;
	case '`': if ( _PL_rd->backquoted_string )
	    { set_start_line;
	      if ( !raw_read_quoted(c, _PL_rd) )
		fail;
	      dotseen = FALSE;
	      break;
	    }
	  /*FALLTHROUGH*/
	default:	if ( c < 0xff )
	    { switch(_PL_char_types[c])
		{ case SP:
		case CT:
		blank:
		  if ( dotseen )
		    { if ( rb.here - rb.base == 1 )
			rawSyntaxError("end_of_clause");
		      ensure_space(c);
		      addToBuffer(EOS, _PL_rd);
		      return rb.base;
		    }
		  do
		    { if ( something_read ) /* positions, \0 --> ' ' */
			addToBuffer(c ? c : ' ', _PL_rd);
		      else
			ensure_space(c);
		      c = getchr();
		    } while( c != EOF && PlBlankW(c) );
		  goto handle_c;
		case SY:
		  set_start_line;
		  do
		    { addToBuffer(c, _PL_rd);
		      c = getchr();
		      if ( c == '`' && _PL_rd->backquoted_string )
			break;
		    } while( c != EOF && c <= 0xff && isSymbol(c) );
		  /* TBD: wide symbols? */
		  dotseen = FALSE;
		  goto handle_c;
		case LC:
		case UC:
		  set_start_line;
		  do
		    { addToBuffer(c, _PL_rd);
		      c = getchr();
		    } while( c != EOF && PlIdContW(c) );
		  dotseen = FALSE;
		  goto handle_c;
		default:
		  addToBuffer(c, _PL_rd);
		  dotseen = FALSE;
		  set_start_line;
		}
	    } else			/* > 255 */
	    { if ( PlIdStartW(c) )
		{ set_start_line;
		  do
		    { addToBuffer(c, _PL_rd);
		      c = getchr();
		    } while( c != EOF && PlIdContW(c) );
		  dotseen = FALSE;
		  goto handle_c;
		} else if ( PlBlankW(c) )
		{ goto blank;
		} else
		{ addToBuffer(c, _PL_rd);
		  dotseen = FALSE;
		  set_start_line;
		}
	    }
	}
    }
}


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   Raw reading returns a string in  UTF-8   notation  of the a Prolog term.
   Comment inside the term is  replaced  by   spaces  or  newline to ensure
   proper reconstruction of source locations. Comment   before  the term is
   skipped.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

static unsigned char *
raw_read(ReadData _PL_rd, unsigned char **endp ARG_LD)
{ unsigned char *s;

  if ( (rb.stream->flags & SIO_ISATTY) && Sfileno(rb.stream) >= 0 )
    { ttybuf tab;

      PushTty(rb.stream, &tab, TTY_SAVE);		/* make sure tty is sane */
      PopTty(rb.stream, &ttytab, FALSE);
      s = raw_read2(_PL_rd PASS_LD);
      PopTty(rb.stream, &tab, TRUE);
    } else
    { s = raw_read2(_PL_rd PASS_LD);
    }

  if ( endp )
    *endp = _PL_rd->_rb.here;

  return s;
}

static void
callCommentHook(term_t comments, term_t tpos, term_t term)
{ GET_LD
    fid_t fid;
  term_t av;

  if ( (fid = PL_open_foreign_frame()) &&
       (av = PL_new_term_refs(3)) )
    { qid_t qid;

      PL_put_term(av+0, comments);
      PL_put_term(av+1, tpos);
      PL_put_term(av+2, term);

      if ( (qid = PL_open_query(NULL, PL_Q_NODEBUG|PL_Q_CATCH_EXCEPTION,
				(predicate_t)PredCommentHook, av)) )
	{ term_t ex;

	  if ( !PL_next_solution(qid) && (ex=PL_exception(qid)) )
	    printMessage(ATOM_error, PL_TERM, ex);

	  PL_close_query(qid);
	}
      PL_discard_foreign_frame(fid);
    }
}



/********************************
 *       PROLOG CONNECTION       *
 *********************************/

static unsigned char *
backSkipUTF8(unsigned const char *start, unsigned const char *end, int *chr)
{ const unsigned char *s;

  for(s=end-1 ; s>start && ( *s&0x80 ); s--)
    ;
  utf8_get_char((char*)s, chr);

  return (unsigned char *)s;
}


static unsigned char *
backSkipBlanks(const unsigned char *start, const unsigned char *end)
{ const unsigned char *s;

  for( ; end > start; end = s)
    { unsigned char *e;
      int chr;

      for(s=end-1 ; s>start && ISUTF8_CB(*s); s--)
	;
      e = (unsigned char*)utf8_get_char((char*)s, &chr);
      assert(e == end);
      if ( !PlBlankW(chr) )
	return (unsigned char*)end;
    }

  return (unsigned char *)start;
}

static inline ucharp
skipSpaces(cucharp in)
{ int chr;
  ucharp s;

  for( ; *in; in=s)
    { s = utf8_get_uchar(in, &chr);

      if ( !PlBlankW(chr) )
	return (ucharp)in;
    }

  return (ucharp)in;
}



word
pl_raw_read2(term_t from, term_t term)
{ GET_LD
    unsigned char *s, *e, *t2, *top;
  read_data rd;
  word rval;
  IOSTREAM *in;
  int chr;
  PL_chars_t txt;

  if ( !getTextInputStream(from, &in) )
    fail;

  init_read_data(&rd, in PASS_LD);
  if ( !(s = raw_read(&rd, &e PASS_LD)) )
    { rval = PL_raise_exception(rd.exception);
      goto out;
    }

  /* strip the input from blanks */
  top = backSkipBlanks(s, e-1);
  t2 = backSkipUTF8(s, top, &chr);
  if ( chr == '.' )
    top = backSkipBlanks(s, t2);
  /* watch for "0' ." */
  if ( top < e && top-2 >= s && top[-1] == '\'' && top[-2] == '0' )
    top++;
  *top = EOS;
  s = skipSpaces(s);

  txt.text.t    = (char*)s;
  txt.length    = top-s;
  txt.storage   = PL_CHARS_HEAP;
  txt.encoding  = ENC_UTF8;
  txt.canonical = FALSE;

  rval = PL_unify_text(term, 0, &txt, PL_ATOM);
  LD->read_varnames = rd.varnames;

 out:
  free_read_data(&rd);
  if ( Sferror(in) )
    return streamStatus(in);
  else
    PL_release_stream(in);

  return rval;
}


static int
unify_read_term_position(term_t tpos ARG_LD)
{ if ( tpos && source_line_no > 0 )
    { return PL_unify_term(tpos,
			   PL_FUNCTOR, FUNCTOR_stream_position4,
			   PL_INT64, source_char_no,
			   PL_INT, source_line_no,
			   PL_INT, source_line_pos,
			   PL_INT64, source_byte_no);
    } else
    { return TRUE;
    }
}

static const opt_spec read_clause_options[] =
  { { ATOM_variable_names,    OPT_TERM },
    { ATOM_term_position,     OPT_TERM },
    { ATOM_subterm_positions, OPT_TERM },
    { ATOM_process_comment,   OPT_BOOL },
    { ATOM_comments,          OPT_TERM },
    { ATOM_syntax_errors,     OPT_ATOM },
    { NULL_ATOM,              0 }
  };


/** read_clause(+Stream:stream, -Clause:clause, +Options:list)

    Like read_term/3, but uses current compiler options.

    Options:
    * variable_names(-Names)
    * process_comment(+Boolean)
    * comments(-List)
    * syntax_errors(+Atom)
    * term_position(-Position)
    * subterm_positions(-Layout)
    */
static int
read_clause(IOSTREAM *s, term_t term, term_t options ARG_LD)
{
  read_data rd;
  int rval;
  fid_t fid;
  term_t tpos = 0;
  term_t comments = 0;
  term_t opt_comments = 0;
  int process_comment;
  atom_t syntax_errors = ATOM_dec10;

  {
    OPCODE ophook = PredCommentHook->OpcodeOfPred;
    if (ophook == UNDEF_OPCODE || ophook == FAIL_OPCODE)
      process_comment = FALSE;
    else
      process_comment = TRUE;
  }
  if ( !(fid=PL_open_foreign_frame()) )
    return FALSE;

 retry:
  init_read_data(&rd, s PASS_LD);

  if ( options &&
       !scan_options(options, 0, ATOM_read_option, read_clause_options,
		     &rd.varnames,
		     &tpos,
		     &rd.subtpos,
		     &process_comment,
		     &opt_comments,
		     &syntax_errors) )
    { PL_close_foreign_frame(fid);
      return FALSE;
    }

  if ( opt_comments )
    { comments = PL_new_term_ref();
    } else if ( process_comment )
    { if ( !tpos )
	tpos = PL_new_term_ref();
      comments = PL_new_term_ref();
    }

  REGS_FROM_LD
    rd.module = Yap_GetModuleEntry( LOCAL_SourceModule );
  if ( comments )
    rd.comments = PL_copy_term_ref(comments);
  rd.on_error = syntax_errors;
  rd.singles = rd.styleCheck & SINGLETON_CHECK ? 1 : 0;
  if ( (rval=read_term(term, &rd PASS_LD)) &&
       (!tpos || (rval=unify_read_term_position(tpos PASS_LD))) )
    {
      PredEntry *ap;
      LD->read_varnames = rd.varnames;

      if (rd.singles) {
	// warning, singletons([X=_A],f(X,Y,Z), pos).
	printMessage(ATOM_warning,
		     PL_FUNCTOR_CHARS, "singletons", 3,
		     PL_TERM, rd.singles,
		     PL_TERM, term,
		     PL_TERM, tpos );
      }
      ap = Yap_PredFromClause( Yap_GetFromSlot(term)  PASS_REGS);
      if (rd.styleCheck & (DISCONTIGUOUS_STYLE|MULTIPLE_CHECK) && ap != NULL ) {
	if ( rd.styleCheck & (DISCONTIGUOUS_STYLE) && Yap_discontiguous( ap  PASS_REGS) ) {
	  printMessage(ATOM_warning,
		       PL_FUNCTOR_CHARS, "discontiguous", 2,
		       PL_TERM, term,
		       PL_TERM, tpos );
	}
	if (  rd.styleCheck & (MULTIPLE_CHECK) &&  Yap_multiple( ap  PASS_REGS) ) {
	  printMessage(ATOM_warning,
		       PL_FUNCTOR_CHARS, "multiple", 3,
		       PL_TERM, term,
		       PL_TERM, tpos,
		       PL_ATOM, YAP_SWIAtomFromAtom(ap->src.OwnerFile) );
	}
      }
      if ( rd.comments &&
	   (rval = PL_unify_nil(rd.comments)) )
	{ if ( opt_comments )
	    rval = PL_unify(opt_comments, comments);
	  else if ( !PL_get_nil(comments) )
	    callCommentHook(comments, tpos, term);
	}
    } else
    { if ( rd.has_exception && reportReadError(&rd) )
	{ PL_rewind_foreign_frame(fid);
	  free_read_data(&rd);
	  goto retry;
	}
    }
  free_read_data(&rd);

  return rval;
}


static
PRED_IMPL("read_clause", 3, read_clause, 0)
{ PRED_LD
    int rc;
  IOSTREAM *s;

  if ( !getTextInputStream(A1, &s) )
    return FALSE;
  rc = read_clause(s, A2, A3 PASS_LD);
  if ( Sferror(s) )
    return streamStatus(s);
  else
    PL_release_stream(s);

  return rc;
}


word
pl_raw_read(term_t term)
{ return pl_raw_read2(0, term);
}


static const opt_spec read_term_options[] =
  { { ATOM_variable_names,    OPT_TERM },
    { ATOM_variables,         OPT_TERM },
    { ATOM_singletons,        OPT_TERM },
    { ATOM_term_position,     OPT_TERM },
    //  		{ ATOM_subterm_positions, OPT_TERM },
    { ATOM_character_escapes, OPT_BOOL },
    { ATOM_double_quotes,	    OPT_ATOM },
    { ATOM_module,	    OPT_ATOM },
    { ATOM_syntax_errors,     OPT_ATOM },
    { ATOM_backquoted_string, OPT_BOOL },
    { ATOM_comments,	    OPT_TERM },
    { ATOM_process_comment,   OPT_BOOL },
#ifdef O_QUASIQUOTATIONS
    { ATOM_quasi_quotations,  OPT_TERM },
#endif
    { ATOM_cycles,	    OPT_BOOL },
    { NULL_ATOM,		    0 }
  };


static foreign_t
read_term_from_stream(IOSTREAM *s, term_t term, term_t options ARG_LD)
{ term_t tpos = 0;
  term_t comments = 0;
  term_t opt_comments = 0;
  int process_comment;
  int rval;
  atom_t w;
  read_data rd;
  int charescapes = -1;
  atom_t dq = NULL_ATOM;
  atom_t mname = NULL_ATOM;
  fid_t fid = PL_open_foreign_frame();

  if (!fid)
    return FALSE;
 retry:
  init_read_data(&rd, s PASS_LD);

  if ( !scan_options(options, 0, ATOM_read_option, read_term_options,
		     &rd.varnames,
		     &rd.variables,
		     &rd.singles,
		     &tpos,
		     //		&rd.subtpos,
		     &charescapes,
		     &dq,
		     &mname,
		     &rd.on_error,
		     &rd.backquoted_string,
		     &opt_comments,
		     &process_comment,
#ifdef O_QUASIQUOTATIONS
		     &rd.quasi_quotations,
#endif
		     &rd.cycles) ) {
    PL_discard_foreign_frame(fid);
    free_read_data(&rd);
    return FALSE;
  }

  // yap specific, do not call process comment if undefined
  if (process_comment) {
    OPCODE ophook = PredCommentHook->OpcodeOfPred;
    if (ophook == UNDEF_OPCODE || ophook == FAIL_OPCODE)
      process_comment = FALSE;
  }

  if ( opt_comments )
    { comments = PL_new_term_ref();
    } else if ( process_comment )
    { if ( !tpos )
	tpos = PL_new_term_ref();
      comments = PL_new_term_ref();
    }

  if ( mname )
    { rd.module = lookupModule(mname);
      rd.flags  = rd.module->flags;
    }

  if ( charescapes != -1 )
    { if ( charescapes )
	set(&rd, M_CHARESCAPE);
      else
	clear(&rd, M_CHARESCAPE);
    }
  if ( dq )
    { if ( !setDoubleQuotes(dq, &rd.flags) )
	return FALSE;
    }
  if ( rd.singles && PL_get_atom(rd.singles, &w) && w == ATOM_warning)
    rd.singles = 1;

  if ( comments )
    rd.comments = PL_copy_term_ref(comments);

  rval = read_term(term, &rd PASS_LD);
  if ( Sferror(s) ) {
    free_read_data(&rd);
    return FALSE;
  }
  LD->read_varnames = rd.varnames;
#ifdef O_QUASIQUOTATIONS
  if ( rval )
    rval = parse_quasi_quotations(&rd PASS_LD);
#endif
  if ( rval )
    { if ( tpos )
	rval = unify_read_term_position(tpos PASS_LD);
      if (rval) {
	if ( opt_comments )
	  rval = PL_unify(opt_comments, comments);
	else if (comments && !PL_get_nil(comments) )
	  callCommentHook(comments, tpos, term);
      }
    } else {
    if ( rd.has_exception && reportReadError(&rd) )
      { PL_rewind_foreign_frame(fid);
	free_read_data(&rd);
	goto retry;
      }
  }
  free_read_data(&rd);

  return rval;
}


/** @pred  read_term(+ _Stream_,- _T_,+ _Options_) is iso

    Reads term  _T_ from stream  _Stream_ with execution controlled by the
    same options as read_term/2.


*/
static
PRED_IMPL("read_term", 3, read_term, PL_FA_ISO)
{ PRED_LD
    IOSTREAM *s;

  if ( getTextInputStream(A1, &s) )
    { if ( read_term_from_stream(s, A2, A3 PASS_LD) )
	return PL_release_stream(s);
      if ( Sferror(s) )
	return streamStatus(s);
      PL_release_stream(s);
      return FALSE;
    }

  return FALSE;
}

/** read_term(-Term, +Options) is det.
 */


/** @pred read_term(- _T_,+ _Options_) is iso


    Reads term  _T_ from the current input stream with execution
    controlled by the following options:

    + comments(- _Comments_)

    Unify _Comments_ with a list of string terms including comments before
    and within the term.

    + module( + _Module_)

    Read term using _Module_ as source module.

    + quasi_quotations(-List)

    Unify _List_ with the quasi-quotations present in the term.

    + term_position(- _Position_)

    Unify  _Position_ with a term describing the position of the stream
    at the start of parse. Use stream_position_data/3 to obtain extra
    information.

    + singletons(- _Names_)

    Unify  _Names_ with a list of the form  _Name=Var_, where
    _Name_ is the name of a non-anonymous singleton variable in the
    original term, and `Var` is the variable's representation in
    YAP.
    The variables occur in left-to-right traversal order.

    + syntax_errors(+ _Val_)

    Control action to be taken after syntax errors. See yap_flag/2
    for detailed information.

    + variables(- _Names_)

    Unify  _Names_ with a list of the form  _Name=Var_, where  _Name_ is
    the name of a non-anonymous variable in the original term, and  _Var_
    is the variable's representation in YAP.
    The variables occur in left-to-right traversal order.


*/
static
PRED_IMPL("read_term", 2, read_term, PL_FA_ISO)
{ PRED_LD
    IOSTREAM *s;

  if ( getTextInputStream(0, &s) )
    { if ( read_term_from_stream(s, A1, A2 PASS_LD) )
	return PL_release_stream(s);
      if ( Sferror(s) )
	return streamStatus(s);
      PL_release_stream(s);
      return FALSE;
    }

  return FALSE;
}


/*******************************
 *	   TERM <->ATOM		*
 *******************************/

static int
atom_to_term(term_t atom, term_t term, term_t bindings)
{ GET_LD
    PL_chars_t txt;

  if ( !bindings && PL_is_variable(atom) ) /* term_to_atom(+, -) */
    { char buf[1024];
      size_t bufsize = sizeof(buf);
      int rval;
      char *s = buf;
      IOSTREAM *stream;
      PL_chars_t txt;

      stream = Sopenmem(&s, &bufsize, "w");
      stream->encoding = ENC_UTF8;
      PL_write_term(stream, term, 1200, PL_WRT_QUOTED);
      Sflush(stream);

      txt.text.t = s;
      txt.length = bufsize;
      txt.storage = PL_CHARS_HEAP;
      txt.encoding = ENC_UTF8;
      txt.canonical = FALSE;
      rval = PL_unify_text(atom, 0, &txt, PL_ATOM);

      Sclose(stream);
      if ( s != buf )
	Sfree(s);

      return rval;
    }

  if ( PL_get_text(atom, &txt, CVT_ALL|CVT_EXCEPTION) )
    { GET_LD
	read_data rd;
      int rval;
      IOSTREAM *stream;
      source_location oldsrc = LD->read_source;

      stream = Sopen_text(&txt, "r");

      init_read_data(&rd, stream PASS_LD);
      if ( bindings && (PL_is_variable(bindings) || PL_is_list(bindings)) )
	rd.varnames = bindings;
      else if ( bindings )
	return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_list, bindings);

      if ( !(rval = read_term(term, &rd PASS_LD)) && rd.has_exception )
	rval = PL_raise_exception(rd.exception);
      free_read_data(&rd);
      Sclose(stream);
      LD->read_source = oldsrc;

      //   getchar();
      return rval;
    }

  fail;
}

Term
Yap_StringToTerm(const char *s, size_t *lenp, term_t bindings)
{ GET_LD;
  read_data rd;
  int rval;
  IOSTREAM *stream;
  source_location oldsrc = LD->read_source;

  stream = Sopen_string(0, (char *)s, strlen( s ),  "r");

  init_read_data(&rd, stream PASS_LD);
  rd.varnames = bindings;
  term_t tt = Yap_NewSlots(1);

  if ( !(rval = read_term(tt, &rd PASS_LD)) && rd.has_exception ) {
    rval = PL_raise_exception(rd.exception);
    return 0L;
  }
  free_read_data(&rd);
  Sclose(stream);
  LD->read_source = oldsrc;

  //   getchar();
  return Yap_GetFromSlot( tt);
}

/** @pred atom_to_term(+ _Atom_, - _Term_, - _Bindings_)


    Use  _Atom_ as input to read_term/2 using the option `variable_names` and return the read term in  _Term_ and the variable bindings in  _Bindings_.  _Bindings_ is a list of `Name = Var` couples, thus providing access to the actual variable names. See also read_term/2. If Atom has no valid syntax, a syntax_error exception is raised.


*/
static
PRED_IMPL("atom_to_term", 3, atom_to_term, 0)
{ return atom_to_term(A1, A2, A3);
}


static
PRED_IMPL("term_to_atom", 2, term_to_atom, 0)
{ return atom_to_term(A2, A1, 0);
}

static
PRED_IMPL("$context_variables", 1, context_variables, 0)
{   CACHE_REGS
    if ( LOCAL_VarNames == (CELL)0 )
      return Yap_unify( TermNil, ARG1);
  return Yap_unify( LOCAL_VarNames, ARG1);
}

static
PRED_IMPL("$set_source", 2, set_source, 0)
{
  GET_LD
    atom_t at;
  term_t a = PL_new_term_ref();

  if (!PL_get_atom(A1, &at))
    return FALSE;
  source_file_name = at;
  if (!PL_get_arg(1, A2, a) || !PL_get_int64(a, &source_char_no) ||
      !PL_get_arg(2, A2, a) || !PL_get_long(a, &source_line_no) ||
      !PL_get_arg(3, A2, a) || !PL_get_long(a, &source_line_pos) ||
      !PL_get_arg(4, A2, a) || !PL_get_int64(a, &source_byte_no) ) {
    return FALSE;
  }
  return TRUE;
}

int
PL_chars_to_term(const char *s, term_t t)
{ GET_LD
    read_data rd;
  int rval;
  IOSTREAM *stream = Sopen_string(NULL, (char *)s, -1, "r");
  source_location oldsrc = LD->read_source;

  init_read_data(&rd, stream PASS_LD);
  PL_put_variable(t);
  if ( !(rval = read_term(t, &rd PASS_LD)) && rd.has_exception )
    PL_put_term(t, rd.exception);
  LOCAL_VarNames = rd.varnames;
  free_read_data(&rd);
  Sclose(stream);
  LD->read_source = oldsrc;

  return rval;
}

/*******************************
 *      PUBLISH PREDICATES	*
 *******************************/

BeginPredDefs(read)
PRED_DEF("read_term",		  3, read_term,		  PL_FA_ISO)
PRED_DEF("read_term",		  2, read_term,		  PL_FA_ISO)
PRED_DEF("read_clause",         3, read_clause,         0)
PRED_DEF("atom_to_term", 3, atom_to_term, 0)
PRED_DEF("term_to_atom", 2, term_to_atom, 0)
PRED_DEF("$context_variables", 1, context_variables, 0)
PRED_DEF("$set_source",  2, set_source, 0)
#ifdef O_QUASIQUOTATIONS
PRED_DEF("$qq_open",            2, qq_open,             0)
#endif
EndPredDefs

//! @}