This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
Vítor Santos Costa b3cc23ce64 IO patches
simplify error handling
use get and inject
use wide support in OS
be stricter in checkin streams and arguments
2015-10-08 02:23:45 +01:00

1641 lines
37 KiB
C

#include "pl-incl.h"
#include "pl-ctype.h"
#include "pl-utf8.h"
#include "pl-dtoa.h"
#include "pl-umap.c" /* Unicode map */
#include "pl-read.h" /* read structure */
/**
* @defgroup ReadTerm Read Term from Streams
* @ingroup InputOutput
* @{
*/
static bool
isStringStream(IOSTREAM *s)
{ return s->functions == &Sstringfunctions;
}
void
init_read_data(ReadData _PL_rd, IOSTREAM *in ARG_LD)
{ CACHE_REGS
memset(_PL_rd, 0, sizeof(*_PL_rd)); /* optimise! */
_PL_rd->magic = RD_MAGIC;
_PL_rd->varnames = 0;
_PL_rd->module = Yap_GetModuleEntry(CurrentModule);
_PL_rd->exception = 0;
_PL_rd->stream = in;
_PL_rd->has_exception = 0;
_PL_rd->module = MODULE_parse;
_PL_rd->flags = _PL_rd->module->flags; /* change for options! */
_PL_rd->styleCheck = LOCAL_debugstatus.styleCheck;
_PL_rd->on_error = AtomError;
_PL_rd->backquoted_string = truePrologFlag(PLFLAG_BACKQUOTED_STRING);
}
void
free_read_data(ReadData _PL_rd)
{
}
static int
read_term(term_t t, ReadData _PL_rd ARG_LD)
{
return Yap_read_term(t, rb.stream, _PL_rd);
}
static void addUTF8Buffer(Buffer b, int c);
static void
addUTF8Buffer(Buffer b, int c)
{ if ( c >= 0x80 )
{ char buf[6];
char *p, *end;
end = utf8_put_char(buf, c);
for(p=buf; p<end; p++)
{ addBuffer(b, *p&0xff, char);
}
} else
{ addBuffer(b, c, char);
}
}
/*******************************
* UNICODE CLASSIFIERS *
*******************************/
#define CharTypeW(c, t, w) \
((unsigned)(c) <= 0xff ? (_PL_char_types[(unsigned)(c)] t) \
: (uflagsW(c) & (w)))
#define PlBlankW(c) CharTypeW(c, == SP, U_SEPARATOR)
#define PlUpperW(c) CharTypeW(c, == UC, U_UPPERCASE)
#define PlIdStartW(c) (c <= 0xff ? (isLower(c)||isUpper(c)||c=='_') \
: uflagsW(c) & U_ID_START)
#define PlIdContW(c) CharTypeW(c, >= UC, U_ID_CONTINUE)
#define PlSymbolW(c) CharTypeW(c, == SY, U_SYMBOL)
#define PlPunctW(c) CharTypeW(c, == PU, 0)
#define PlSoloW(c) CharTypeW(c, == SO, U_OTHER)
#define PlInvalidW(c) (uflagsW(c) == 0)
int
f_is_prolog_var_start(wint_t c)
{ return PlIdStartW(c) && (PlUpperW(c) || c == '_');
}
int
f_is_prolog_atom_start(wint_t c)
{ return PlIdStartW(c) != 0;
}
int
f_is_prolog_identifier_continue(wint_t c)
{ return PlIdContW(c) || c == '_';
}
int
f_is_prolog_symbol(wint_t c)
{ return PlSymbolW(c) != 0;
}
int
unicode_separator(pl_wchar_t c)
{ return PlBlankW(c);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FALSE return false
TRUE redo
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
reportReadError(ReadData rd)
{ if ( rd->on_error == ATOM_error )
return PL_raise_exception(rd->exception);
if ( rd->on_error != ATOM_quiet )
printMessage(ATOM_error, PL_TERM, rd->exception);
PL_clear_exception();
if ( rd->on_error == ATOM_dec10 )
return TRUE;
return FALSE;
}
/* static int */
/* reportSingletons(ReadData rd, singletons, Atom amod, Atom aname, UInt arity) */
/* { */
/* printMessage(ATOM_warning, PL_FUNCTOR_CHARS, */
/* "singletons", 2, */
/* PL_TERM, singletons, */
/* PL_TERM, mod, */
/* PL_FUNCTOR_divide2, */
/* PL_ATOM, name, */
/* PL_INT, arity); */
/* return FALSE; */
/* } */
/********************************
* RAW READING *
*********************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Scan the input, give prompts when necessary and return a char * holding
a stripped version of the next term. Contiguous white space is mapped
on a single space, block and % ... \n comment is deleted. Memory is
claimed automatically en enlarged if necessary.
(char *) NULL is returned on a syntax error.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#define syntaxError(what, rd) { errorWarning(what, 0, rd); fail; }
static term_t
makeErrorTerm(const char *id_str, term_t id_term, ReadData _PL_rd)
{ GET_LD
term_t ex, loc=0; /* keep compiler happy */
unsigned char const *s, *ll = NULL;
int rc = TRUE;
if ( !(ex = PL_new_term_ref()) ||
!(loc = PL_new_term_ref()) )
rc = FALSE;
if ( rc && !id_term )
{ if ( !(id_term=PL_new_term_ref()) ||
!PL_put_atom_chars(id_term, id_str) )
rc = FALSE;
}
if ( rc )
rc = PL_unify_term(ex,
PL_FUNCTOR, FUNCTOR_error2,
PL_FUNCTOR, FUNCTOR_syntax_error1,
PL_TERM, id_term,
PL_TERM, loc);
source_char_no += last_token_start - rdbase;
for(s=rdbase; s<last_token_start; s++)
{ if ( *s == '\n' )
{ source_line_no++;
ll = s+1;
}
}
if ( ll )
{ int lp = 0;
for(s = ll; s<last_token_start; s++)
{ switch(*s)
{ case '\b':
if ( lp > 0 ) lp--;
break;
case '\t':
lp |= 7;
default:
lp++;
}
}
source_line_pos = lp;
}
if ( rc )
{ if ( ReadingSource ) /* reading a file */
{ rc = PL_unify_term(loc,
PL_FUNCTOR, FUNCTOR_file4,
PL_ATOM, source_file_name,
PL_INT, source_line_no,
PL_INT, source_line_pos,
PL_INT64, source_char_no);
} else if ( isStringStream(rb.stream) )
{ size_t pos;
pos = utf8_strlen((char *)rdbase, last_token_start-rdbase);
rc = PL_unify_term(loc,
PL_FUNCTOR, FUNCTOR_string2,
PL_UTF8_STRING, rdbase,
PL_INT, (int)pos);
} else /* any stream */
{ term_t stream;
if ( !(stream=PL_new_term_ref()) ||
!PL_unify_stream_or_alias(stream, rb.stream) ||
!PL_unify_term(loc,
PL_FUNCTOR, FUNCTOR_stream4,
PL_TERM, stream,
PL_INT, source_line_no,
PL_INT, source_line_pos,
PL_INT64, source_char_no) )
rc = FALSE;
}
}
return (rc ? ex : (term_t)0);
}
static bool
errorWarning(const char *id_str, term_t id_term, ReadData _PL_rd)
{ GET_LD
term_t ex;
LD->exception.processing = TRUE; /* allow using spare stack */
ex = makeErrorTerm(id_str, id_term, _PL_rd);
if ( _PL_rd )
{ _PL_rd->has_exception = TRUE;
if ( ex )
PL_put_term(_PL_rd->exception, ex);
else
PL_put_term(_PL_rd->exception, exception_term);
} else
{ if ( ex )
PL_raise_exception(ex);
}
fail;
}
static void
clearBuffer(ReadData _PL_rd)
{ if (rb.size == 0)
{ rb.base = rb.fast;
rb.size = sizeof(rb.fast);
}
rb.end = rb.base + rb.size;
rdbase = rb.here = rb.base;
_PL_rd->posp = rdbase;
_PL_rd->posi = 0;
}
static void
growToBuffer(int c, ReadData _PL_rd)
{ if ( rb.base == rb.fast ) /* intptr_t clause: jump to use malloc() */
{ rb.base = PL_malloc(FASTBUFFERSIZE * 2);
memcpy(rb.base, rb.fast, FASTBUFFERSIZE);
} else
rb.base = PL_realloc(rb.base, rb.size*2);
DEBUG(8, Sdprintf("Reallocated read buffer at %ld\n", (intptr_t) rb.base));
_PL_rd->posp = rdbase = rb.base;
rb.here = rb.base + rb.size;
rb.size *= 2;
rb.end = rb.base + rb.size;
_PL_rd->posi = 0;
*rb.here++ = c;
}
static inline void
addByteToBuffer(int c, ReadData _PL_rd)
{ c &= 0xff;
if ( rb.here >= rb.end )
growToBuffer(c, _PL_rd);
else
*rb.here++ = c;
}
static void
addToBuffer(int c, ReadData _PL_rd)
{ if ( c <= 0x7f )
{ addByteToBuffer(c, _PL_rd);
} else
{ char buf[10];
char *s, *e;
e = utf8_put_char(buf, c);
for(s=buf; s<e; s++)
addByteToBuffer(*s, _PL_rd);
}
}
#if __YAP_PROLOG__
void
Yap_setCurrentSourceLocation( void *rd )
{
GET_LD
setCurrentSourceLocation(rd PASS_LD);
}
#endif
static inline int
getchr__(ReadData _PL_rd)
{ int c = Sgetcode(rb.stream);
if ( !_PL_rd->char_conversion_table || c < 0 || c >= 256 )
return c;
return _PL_rd->char_conversion_table[c];
}
#define getchr() getchr__(_PL_rd)
#define getchrq() Sgetcode(rb.stream)
#define ensure_space(c) { if ( something_read && \
(c == '\n' || !isBlank(rb.here[-1])) ) \
addToBuffer(c, _PL_rd); \
}
#define set_start_line { if ( !something_read ) \
{ setCurrentSourceLocation(_PL_rd PASS_LD); \
something_read++; \
} \
}
#ifdef O_QUASIQUOTATIONS
/** '$qq_open'(+QQRange, -Stream) is det.
Opens a quasi-quoted memory range.
@arg QQRange is a term '$quasi_quotation'(ReadData, Start, Length)
@arg Stream is a UTF-8 encoded string, whose position indication
reflects the location in the real file.
*/
static
PRED_IMPL("$qq_open", 2, qq_open, 0)
{ PRED_LD
if ( PL_is_functor(A1, FUNCTOR_dquasi_quotation3) )
{ void *ptr;
char * start;
size_t len;
term_t arg = PL_new_term_ref();
IOSTREAM *s;
if ( PL_get_arg(1, A1, arg) && PL_get_pointer_ex(arg, &ptr) &&
PL_get_arg(2, A1, arg) && PL_get_intptr(arg, (intptr_t *)&start) &&
PL_get_arg(3, A1, arg) && PL_get_intptr(arg, (intptr_t *)&len) )
{ //source_location pos;
if ( (s=Sopenmem(&start, &len, "r")) )
s->encoding = ENC_UTF8;
return PL_unify_stream(A2, s);
}
} else
PL_type_error("read_context", A1);
return FALSE;
}
static int
parse_quasi_quotations(ReadData _PL_rd ARG_LD)
{ if ( _PL_rd->qq_tail )
{ term_t av;
int rc;
if ( !PL_unify_nil(_PL_rd->qq_tail) )
return FALSE;
if ( !_PL_rd->quasi_quotations )
{ if ( (av = PL_new_term_refs(2)) &&
PL_put_term(av+0, _PL_rd->qq) &&
#if __YAP_PROLOG__
PL_put_atom(av+1, YAP_SWIAtomFromAtom(_PL_rd->module->AtomOfME)) &&
#else
PL_put_atom(av+1, _PL_rd->module->name) &&
#endif
PL_cons_functor_v(av, FUNCTOR_dparse_quasi_quotations2, av) )
{ term_t ex;
rc = callProlog(MODULE_system, av+0, PL_Q_CATCH_EXCEPTION, &ex);
if ( rc )
return TRUE;
_PL_rd->exception = ex;
_PL_rd->has_exception = TRUE;
}
return FALSE;
} else
return TRUE;
} else if ( _PL_rd->quasi_quotations ) /* user option, but no quotes */
{ return PL_unify_nil(_PL_rd->quasi_quotations);
} else
return TRUE;
}
#endif /*O_QUASIQUOTATIONS*/
#define rawSyntaxError(what) { addToBuffer(EOS, _PL_rd); \
rdbase = rb.base, last_token_start = rb.here-1; \
syntaxError(what, _PL_rd); \
}
static int
raw_read_quoted(int q, ReadData _PL_rd)
{ int newlines = 0;
int c;
addToBuffer(q, _PL_rd);
while((c=getchrq()) != EOF && c != q)
{ if ( c == '\\' && DO_CHARESCAPE )
{ int base;
addToBuffer(c, _PL_rd);
switch( (c=getchrq()) )
{ case EOF:
goto eofinstr;
case 'u': /* \uXXXX */
case 'U': /* \UXXXXXXXX */
addToBuffer(c, _PL_rd);
continue;
case 'x': /* \xNN\ */
addToBuffer(c, _PL_rd);
c = getchrq();
if ( c == EOF )
goto eofinstr;
if ( digitValue(16, c) >= 0 )
{ base = 16;
addToBuffer(c, _PL_rd);
xdigits:
c = getchrq();
while( digitValue(base, c) >= 0 )
{ addToBuffer(c, _PL_rd);
c = getchrq();
}
}
if ( c == EOF )
goto eofinstr;
addToBuffer(c, _PL_rd);
if ( c == q )
return TRUE;
continue;
default:
addToBuffer(c, _PL_rd);
if ( digitValue(8, c) >= 0 ) /* \NNN\ */
{ base = 8;
goto xdigits;
} else if ( c == '\n' ) /* \<newline> */
{ c = getchrq();
if ( c == EOF )
goto eofinstr;
addToBuffer(c, _PL_rd);
if ( c == q )
return TRUE;
}
continue; /* \symbolic-control-char */
}
} else if (c == '\n' &&
newlines++ > MAXNEWLINES &&
(_PL_rd->styleCheck & LONGATOM_CHECK))
{ rawSyntaxError("long_string");
}
addToBuffer(c, _PL_rd);
}
if (c == EOF)
{ eofinstr:
rawSyntaxError("end_of_file_in_string");
}
addToBuffer(c, _PL_rd);
return TRUE;
}
static int
add_comment(Buffer b, IOPOS *pos, ReadData _PL_rd ARG_LD)
{ term_t head = PL_new_term_ref();
assert(_PL_rd->comments);
if ( !PL_unify_list(_PL_rd->comments, head, _PL_rd->comments) )
return FALSE;
if ( pos )
{ if ( !PL_unify_term(head,
PL_FUNCTOR, FUNCTOR_minus2,
PL_FUNCTOR, FUNCTOR_stream_position4,
PL_INT64, pos->charno,
PL_INT, pos->lineno,
PL_INT, pos->linepos,
PL_INT, 0,
PL_UTF8_STRING, baseBuffer(b, char)) )
return FALSE;
} else
{ if ( !PL_unify_term(head,
PL_FUNCTOR, FUNCTOR_minus2,
ATOM_minus,
PL_UTF8_STRING, baseBuffer(b, char)) )
return FALSE;
}
PL_reset_term_refs(head);
return TRUE;
}
static void
setErrorLocation(IOPOS *pos, ReadData _PL_rd)
{ if ( pos )
{ GET_LD
source_char_no = pos->charno;
source_line_pos = pos->linepos;
source_line_no = pos->lineno;
}
rb.here = rb.base+1; /* see rawSyntaxError() */
}
static unsigned char *
raw_read2(ReadData _PL_rd ARG_LD)
{ int c;
bool something_read = FALSE;
bool dotseen = FALSE;
IOPOS pbuf; /* comment start */
IOPOS *pos;
clearBuffer(_PL_rd); /* clear input buffer */
_PL_rd->strictness = truePrologFlag(PLFLAG_ISO);
source_line_no = -1;
for(;;)
{ c = getchr();
handle_c:
switch(c)
{ case EOF:
if ( isStringStream(rb.stream) ) /* do not require '. ' when */
{ addToBuffer(' ', _PL_rd); /* reading from a string */
addToBuffer('.', _PL_rd);
addToBuffer(' ', _PL_rd);
addToBuffer(EOS, _PL_rd);
return rb.base;
}
if (something_read)
{ if ( dotseen ) /* term.<EOF> */
{ if ( rb.here - rb.base == 1 )
rawSyntaxError("end_of_clause");
ensure_space(' ');
addToBuffer(EOS, _PL_rd);
return rb.base;
}
rawSyntaxError("end_of_file");
}
if ( Sfpasteof(rb.stream) )
{ term_t stream;
LD->exception.processing = TRUE;
stream = PL_new_term_ref();
PL_unify_stream_or_alias(stream, rb.stream);
PL_error(NULL, 0, NULL, ERR_PERMISSION,
ATOM_input, ATOM_past_end_of_stream, stream);
return NULL;
}
set_start_line;
strcpy((char *)rb.base, "end_of_file. ");
rb.here = rb.base + 14;
return rb.base;
case '/': if ( rb.stream->position )
{ pbuf = *rb.stream->position;
pbuf.charno--;
pbuf.linepos--;
pos = &pbuf;
} else
pos = NULL;
c = getchr();
if ( c == '*' )
{ int last;
int level = 1;
union {
tmp_buffer ctmpbuf;
buffer tmpbuf;
} u;
Buffer cbuf;
if ( _PL_rd->comments )
{ initBuffer(&u.ctmpbuf);
cbuf = &u.tmpbuf;
addUTF8Buffer(cbuf, '/');
addUTF8Buffer(cbuf, '*');
} else
{ cbuf = NULL;
}
if ((last = getchr()) == EOF)
{ if ( cbuf )
discardBuffer(cbuf);
setErrorLocation(pos, _PL_rd);
rawSyntaxError("end_of_file_in_block_comment");
}
if ( cbuf )
addUTF8Buffer(cbuf, last);
if ( something_read )
{ addToBuffer(' ', _PL_rd); /* positions */
addToBuffer(' ', _PL_rd);
addToBuffer(last == '\n' ? last : ' ', _PL_rd);
}
for(;;)
{ c = getchr();
if ( cbuf )
addUTF8Buffer(cbuf, c);
switch( c )
{ case EOF:
if ( cbuf )
discardBuffer(cbuf);
setErrorLocation(pos, _PL_rd);
rawSyntaxError("end_of_file_in_block_comment");
#ifndef __YAP_PROLOG__
/* YAP does not support comment levels in original scanner */
case '*':
if ( last == '/' )
level++;
break;
#endif
case '/':
if ( last == '*' &&
(--level == 0 || _PL_rd->strictness) )
{ if ( cbuf )
{ addUTF8Buffer(cbuf, EOS);
if ( !add_comment(cbuf, pos, _PL_rd PASS_LD) )
{ discardBuffer(cbuf);
return FALSE;
}
discardBuffer(cbuf);
}
c = ' ';
goto handle_c;
}
break;
}
if ( something_read )
addToBuffer(c == '\n' ? c : ' ', _PL_rd);
last = c;
}
} else
{ set_start_line;
addToBuffer('/', _PL_rd);
if ( isSymbolW(c) )
{ while( c != EOF && isSymbolW(c) &&
!(c == '`' && _PL_rd->backquoted_string) )
{ addToBuffer(c, _PL_rd);
c = getchr();
}
}
dotseen = FALSE;
goto handle_c;
}
case '%': if ( something_read )
addToBuffer(' ', _PL_rd);
if ( _PL_rd->comments )
{ union {
tmp_buffer ctmpbuf;
buffer uctmpbuf;
} u;
Buffer cbuf;
if ( rb.stream->position )
{ pbuf = *rb.stream->position;
pbuf.charno--;
pbuf.linepos--;
pos = &pbuf;
} else
pos = NULL;
initBuffer(&u.ctmpbuf);
cbuf = (Buffer)&u.uctmpbuf;
addUTF8Buffer(cbuf, '%');
for(;;)
{ while((c=getchr()) != EOF && c != '\n')
{ addUTF8Buffer(cbuf, c);
if ( something_read ) /* record positions */
addToBuffer(' ', _PL_rd);
}
if ( c == '\n' )
{ int c2 = Speekcode(rb.stream);
if ( c2 == '%' )
{ if ( something_read )
{ addToBuffer(c, _PL_rd);
addToBuffer(' ', _PL_rd);
}
addUTF8Buffer(cbuf, c);
c = Sgetcode(rb.stream);
assert(c==c2);
addUTF8Buffer(cbuf, c);
continue;
}
}
break;
}
addUTF8Buffer(cbuf, EOS);
if ( !add_comment(cbuf, pos, _PL_rd PASS_LD) )
{ discardBuffer(cbuf);
return FALSE;
}
discardBuffer(cbuf);
} else
{ while((c=getchr()) != EOF && c != '\n')
{ if ( something_read ) /* record positions */
addToBuffer(' ', _PL_rd);
}
}
goto handle_c; /* is the newline */
case '\'': if ( rb.here > rb.base && isDigit(rb.here[-1]) )
{ cucharp bs = &rb.here[-1];
if ( bs > rb.base && isDigit(bs[-1]) )
bs--;
if ( bs > rb.base && isSign(bs[-1]) )
bs--;
if ( bs == rb.base || !PlIdContW(bs[-1]) )
{ int base;
if ( isSign(bs[0]) )
bs++;
base = atoi((char*)bs);
if ( base <= 36 )
{ if ( base == 0 ) /* 0'<c> */
{ addToBuffer(c, _PL_rd);
{ if ( (c=getchr()) != EOF )
{ addToBuffer(c, _PL_rd);
if ( c == '\\' ) /* 0'\<c> */
{ if ( (c=getchr()) != EOF )
addToBuffer(c, _PL_rd);
} else if ( c == '\'' ) /* 0'' */
{ if ( (c=getchr()) != EOF )
{ if ( c == '\'' )
addToBuffer(c, _PL_rd);
else
goto handle_c;
}
}
break;
}
rawSyntaxError("end_of_file");
}
} else
{ int c2 = Speekcode(rb.stream);
if ( c2 != EOF )
{ if ( digitValue(base, c2) >= 0 )
{ addToBuffer(c, _PL_rd);
c = Sgetcode(rb.stream);
addToBuffer(c, _PL_rd);
dotseen = FALSE;
break;
}
goto sqatom;
}
rawSyntaxError("end_of_file");
}
}
}
}
sqatom:
set_start_line;
if ( !raw_read_quoted(c, _PL_rd) )
fail;
dotseen = FALSE;
break;
case '"': set_start_line;
if ( !raw_read_quoted(c, _PL_rd) )
fail;
dotseen = FALSE;
break;
case '.': addToBuffer(c, _PL_rd);
set_start_line;
dotseen++;
c = getchr();
if ( isSymbolW(c) )
{ while( c != EOF && isSymbolW(c) &&
!(c == '`' && _PL_rd->backquoted_string) )
{ addToBuffer(c, _PL_rd);
c = getchr();
}
dotseen = FALSE;
}
goto handle_c;
case '`': if ( _PL_rd->backquoted_string )
{ set_start_line;
if ( !raw_read_quoted(c, _PL_rd) )
fail;
dotseen = FALSE;
break;
}
/*FALLTHROUGH*/
default: if ( c < 0xff )
{ switch(_PL_char_types[c])
{ case SP:
case CT:
blank:
if ( dotseen )
{ if ( rb.here - rb.base == 1 )
rawSyntaxError("end_of_clause");
ensure_space(c);
addToBuffer(EOS, _PL_rd);
return rb.base;
}
do
{ if ( something_read ) /* positions, \0 --> ' ' */
addToBuffer(c ? c : ' ', _PL_rd);
else
ensure_space(c);
c = getchr();
} while( c != EOF && PlBlankW(c) );
goto handle_c;
case SY:
set_start_line;
do
{ addToBuffer(c, _PL_rd);
c = getchr();
if ( c == '`' && _PL_rd->backquoted_string )
break;
} while( c != EOF && c <= 0xff && isSymbol(c) );
/* TBD: wide symbols? */
dotseen = FALSE;
goto handle_c;
case LC:
case UC:
set_start_line;
do
{ addToBuffer(c, _PL_rd);
c = getchr();
} while( c != EOF && PlIdContW(c) );
dotseen = FALSE;
goto handle_c;
default:
addToBuffer(c, _PL_rd);
dotseen = FALSE;
set_start_line;
}
} else /* > 255 */
{ if ( PlIdStartW(c) )
{ set_start_line;
do
{ addToBuffer(c, _PL_rd);
c = getchr();
} while( c != EOF && PlIdContW(c) );
dotseen = FALSE;
goto handle_c;
} else if ( PlBlankW(c) )
{ goto blank;
} else
{ addToBuffer(c, _PL_rd);
dotseen = FALSE;
set_start_line;
}
}
}
}
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Raw reading returns a string in UTF-8 notation of the a Prolog term.
Comment inside the term is replaced by spaces or newline to ensure
proper reconstruction of source locations. Comment before the term is
skipped.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static unsigned char *
raw_read(ReadData _PL_rd, unsigned char **endp ARG_LD)
{ unsigned char *s;
if ( (rb.stream->flags & SIO_ISATTY) && Sfileno(rb.stream) >= 0 )
{ ttybuf tab;
PushTty(rb.stream, &tab, TTY_SAVE); /* make sure tty is sane */
PopTty(rb.stream, &ttytab, FALSE);
s = raw_read2(_PL_rd PASS_LD);
PopTty(rb.stream, &tab, TRUE);
} else
{ s = raw_read2(_PL_rd PASS_LD);
}
if ( endp )
*endp = _PL_rd->_rb.here;
return s;
}
static void
callCommentHook(term_t comments, term_t tpos, term_t term)
{ GET_LD
fid_t fid;
term_t av;
if ( (fid = PL_open_foreign_frame()) &&
(av = PL_new_term_refs(3)) )
{ qid_t qid;
PL_put_term(av+0, comments);
PL_put_term(av+1, tpos);
PL_put_term(av+2, term);
if ( (qid = PL_open_query(NULL, PL_Q_NODEBUG|PL_Q_CATCH_EXCEPTION,
(predicate_t)PredCommentHook, av)) )
{ term_t ex;
if ( !PL_next_solution(qid) && (ex=PL_exception(qid)) )
printMessage(ATOM_error, PL_TERM, ex);
PL_close_query(qid);
}
PL_discard_foreign_frame(fid);
}
}
/********************************
* PROLOG CONNECTION *
*********************************/
static unsigned char *
backSkipUTF8(unsigned const char *start, unsigned const char *end, int *chr)
{ const unsigned char *s;
for(s=end-1 ; s>start && ( *s&0x80 ); s--)
;
utf8_get_char((char*)s, chr);
return (unsigned char *)s;
}
static unsigned char *
backSkipBlanks(const unsigned char *start, const unsigned char *end)
{ const unsigned char *s;
for( ; end > start; end = s)
{ unsigned char *e;
int chr;
for(s=end-1 ; s>start && ISUTF8_CB(*s); s--)
;
e = (unsigned char*)utf8_get_char((char*)s, &chr);
assert(e == end);
if ( !PlBlankW(chr) )
return (unsigned char*)end;
}
return (unsigned char *)start;
}
static inline ucharp
skipSpaces(cucharp in)
{ int chr;
ucharp s;
for( ; *in; in=s)
{ s = utf8_get_uchar(in, &chr);
if ( !PlBlankW(chr) )
return (ucharp)in;
}
return (ucharp)in;
}
word
pl_raw_read2(term_t from, term_t term)
{ GET_LD
unsigned char *s, *e, *t2, *top;
read_data rd;
word rval;
IOSTREAM *in;
int chr;
PL_chars_t txt;
if ( !getTextInputStream(from, &in) )
fail;
init_read_data(&rd, in PASS_LD);
if ( !(s = raw_read(&rd, &e PASS_LD)) )
{ rval = PL_raise_exception(rd.exception);
goto out;
}
/* strip the input from blanks */
top = backSkipBlanks(s, e-1);
t2 = backSkipUTF8(s, top, &chr);
if ( chr == '.' )
top = backSkipBlanks(s, t2);
/* watch for "0' ." */
if ( top < e && top-2 >= s && top[-1] == '\'' && top[-2] == '0' )
top++;
*top = EOS;
s = skipSpaces(s);
txt.text.t = (char*)s;
txt.length = top-s;
txt.storage = PL_CHARS_HEAP;
txt.encoding = ENC_UTF8;
txt.canonical = FALSE;
rval = PL_unify_text(term, 0, &txt, PL_ATOM);
LD->read_varnames = rd.varnames;
out:
free_read_data(&rd);
if ( Sferror(in) )
return streamStatus(in);
else
PL_release_stream(in);
return rval;
}
static int
unify_read_term_position(term_t tpos ARG_LD)
{ if ( tpos && source_line_no > 0 )
{ return PL_unify_term(tpos,
PL_FUNCTOR, FUNCTOR_stream_position4,
PL_INT64, source_char_no,
PL_INT, source_line_no,
PL_INT, source_line_pos,
PL_INT64, source_byte_no);
} else
{ return TRUE;
}
}
static const opt_spec read_clause_options[] =
{ { ATOM_variable_names, OPT_TERM },
{ ATOM_term_position, OPT_TERM },
{ ATOM_subterm_positions, OPT_TERM },
{ ATOM_process_comment, OPT_BOOL },
{ ATOM_comments, OPT_TERM },
{ ATOM_syntax_errors, OPT_ATOM },
{ NULL_ATOM, 0 }
};
/** read_clause(+Stream:stream, -Clause:clause, +Options:list)
Like read_term/3, but uses current compiler options.
Options:
* variable_names(-Names)
* process_comment(+Boolean)
* comments(-List)
* syntax_errors(+Atom)
* term_position(-Position)
* subterm_positions(-Layout)
*/
static int
read_clause(IOSTREAM *s, term_t term, term_t options ARG_LD)
{
read_data rd;
int rval;
fid_t fid;
term_t tpos = 0;
term_t comments = 0;
term_t opt_comments = 0;
int process_comment;
atom_t syntax_errors = ATOM_dec10;
{
OPCODE ophook = PredCommentHook->OpcodeOfPred;
if (ophook == UNDEF_OPCODE || ophook == FAIL_OPCODE)
process_comment = FALSE;
else
process_comment = TRUE;
}
if ( !(fid=PL_open_foreign_frame()) )
return FALSE;
retry:
init_read_data(&rd, s PASS_LD);
if ( options &&
!scan_options(options, 0, ATOM_read_option, read_clause_options,
&rd.varnames,
&tpos,
&rd.subtpos,
&process_comment,
&opt_comments,
&syntax_errors) )
{ PL_close_foreign_frame(fid);
return FALSE;
}
if ( opt_comments )
{ comments = PL_new_term_ref();
} else if ( process_comment )
{ if ( !tpos )
tpos = PL_new_term_ref();
comments = PL_new_term_ref();
}
REGS_FROM_LD
rd.module = Yap_GetModuleEntry( LOCAL_SourceModule );
if ( comments )
rd.comments = PL_copy_term_ref(comments);
rd.on_error = syntax_errors;
rd.singles = rd.styleCheck & SINGLETON_CHECK ? 1 : 0;
if ( (rval=read_term(term, &rd PASS_LD)) &&
(!tpos || (rval=unify_read_term_position(tpos PASS_LD))) )
{
PredEntry *ap;
LD->read_varnames = rd.varnames;
if (rd.singles) {
// warning, singletons([X=_A],f(X,Y,Z), pos).
printMessage(ATOM_warning,
PL_FUNCTOR_CHARS, "singletons", 3,
PL_TERM, rd.singles,
PL_TERM, term,
PL_TERM, tpos );
}
ap = Yap_PredFromClause( Yap_GetFromSlot(term) PASS_REGS);
if (rd.styleCheck & (DISCONTIGUOUS_STYLE|MULTIPLE_CHECK) && ap != NULL ) {
if ( rd.styleCheck & (DISCONTIGUOUS_STYLE) && Yap_discontiguous( ap PASS_REGS) ) {
printMessage(ATOM_warning,
PL_FUNCTOR_CHARS, "discontiguous", 2,
PL_TERM, term,
PL_TERM, tpos );
}
if ( rd.styleCheck & (MULTIPLE_CHECK) && Yap_multiple( ap PASS_REGS) ) {
printMessage(ATOM_warning,
PL_FUNCTOR_CHARS, "multiple", 3,
PL_TERM, term,
PL_TERM, tpos,
PL_ATOM, YAP_SWIAtomFromAtom(ap->src.OwnerFile) );
}
}
if ( rd.comments &&
(rval = PL_unify_nil(rd.comments)) )
{ if ( opt_comments )
rval = PL_unify(opt_comments, comments);
else if ( !PL_get_nil(comments) )
callCommentHook(comments, tpos, term);
}
} else
{ if ( rd.has_exception && reportReadError(&rd) )
{ PL_rewind_foreign_frame(fid);
free_read_data(&rd);
goto retry;
}
}
free_read_data(&rd);
return rval;
}
static
PRED_IMPL("read_clause", 3, read_clause, 0)
{ PRED_LD
int rc;
IOSTREAM *s;
if ( !getTextInputStream(A1, &s) )
return FALSE;
rc = read_clause(s, A2, A3 PASS_LD);
if ( Sferror(s) )
return streamStatus(s);
else
PL_release_stream(s);
return rc;
}
word
pl_raw_read(term_t term)
{ return pl_raw_read2(0, term);
}
static const opt_spec read_term_options[] =
{ { ATOM_variable_names, OPT_TERM },
{ ATOM_variables, OPT_TERM },
{ ATOM_singletons, OPT_TERM },
{ ATOM_term_position, OPT_TERM },
// { ATOM_subterm_positions, OPT_TERM },
{ ATOM_character_escapes, OPT_BOOL },
{ ATOM_double_quotes, OPT_ATOM },
{ ATOM_module, OPT_ATOM },
{ ATOM_syntax_errors, OPT_ATOM },
{ ATOM_backquoted_string, OPT_BOOL },
{ ATOM_comments, OPT_TERM },
{ ATOM_process_comment, OPT_BOOL },
#ifdef O_QUASIQUOTATIONS
{ ATOM_quasi_quotations, OPT_TERM },
#endif
{ ATOM_cycles, OPT_BOOL },
{ NULL_ATOM, 0 }
};
static foreign_t
read_term_from_stream(IOSTREAM *s, term_t term, term_t options ARG_LD)
{ term_t tpos = 0;
term_t comments = 0;
term_t opt_comments = 0;
int process_comment;
int rval;
atom_t w;
read_data rd;
int charescapes = -1;
atom_t dq = NULL_ATOM;
atom_t mname = NULL_ATOM;
fid_t fid = PL_open_foreign_frame();
if (!fid)
return FALSE;
retry:
init_read_data(&rd, s PASS_LD);
if ( !scan_options(options, 0, ATOM_read_option, read_term_options,
&rd.varnames,
&rd.variables,
&rd.singles,
&tpos,
// &rd.subtpos,
&charescapes,
&dq,
&mname,
&rd.on_error,
&rd.backquoted_string,
&opt_comments,
&process_comment,
#ifdef O_QUASIQUOTATIONS
&rd.quasi_quotations,
#endif
&rd.cycles) ) {
PL_discard_foreign_frame(fid);
free_read_data(&rd);
return FALSE;
}
// yap specific, do not call process comment if undefined
if (process_comment) {
OPCODE ophook = PredCommentHook->OpcodeOfPred;
if (ophook == UNDEF_OPCODE || ophook == FAIL_OPCODE)
process_comment = FALSE;
}
if ( opt_comments )
{ comments = PL_new_term_ref();
} else if ( process_comment )
{ if ( !tpos )
tpos = PL_new_term_ref();
comments = PL_new_term_ref();
}
if ( mname )
{ rd.module = lookupModule(mname);
rd.flags = rd.module->flags;
}
if ( charescapes != -1 )
{ if ( charescapes )
set(&rd, M_CHARESCAPE);
else
clear(&rd, M_CHARESCAPE);
}
if ( dq )
{ if ( !setDoubleQuotes(dq, &rd.flags) )
return FALSE;
}
if ( rd.singles && PL_get_atom(rd.singles, &w) && w == ATOM_warning)
rd.singles = 1;
if ( comments )
rd.comments = PL_copy_term_ref(comments);
rval = read_term(term, &rd PASS_LD);
if ( Sferror(s) ) {
free_read_data(&rd);
return FALSE;
}
LD->read_varnames = rd.varnames;
#ifdef O_QUASIQUOTATIONS
if ( rval )
rval = parse_quasi_quotations(&rd PASS_LD);
#endif
if ( rval )
{ if ( tpos )
rval = unify_read_term_position(tpos PASS_LD);
if (rval) {
if ( opt_comments )
rval = PL_unify(opt_comments, comments);
else if (comments && !PL_get_nil(comments) )
callCommentHook(comments, tpos, term);
}
} else {
if ( rd.has_exception && reportReadError(&rd) )
{ PL_rewind_foreign_frame(fid);
free_read_data(&rd);
goto retry;
}
}
free_read_data(&rd);
return rval;
}
/** @pred read_term(+ _Stream_,- _T_,+ _Options_) is iso
Reads term _T_ from stream _Stream_ with execution controlled by the
same options as read_term/2.
*/
static
PRED_IMPL("read_term", 3, read_term, PL_FA_ISO)
{ PRED_LD
IOSTREAM *s;
if ( getTextInputStream(A1, &s) )
{ if ( read_term_from_stream(s, A2, A3 PASS_LD) )
return PL_release_stream(s);
if ( Sferror(s) )
return streamStatus(s);
PL_release_stream(s);
return FALSE;
}
return FALSE;
}
/** read_term(-Term, +Options) is det.
*/
/** @pred read_term(- _T_,+ _Options_) is iso
Reads term _T_ from the current input stream with execution
controlled by the following options:
+ comments(- _Comments_)
Unify _Comments_ with a list of string terms including comments before
and within the term.
+ module( + _Module_)
Read term using _Module_ as source module.
+ quasi_quotations(-List)
Unify _List_ with the quasi-quotations present in the term.
+ term_position(- _Position_)
Unify _Position_ with a term describing the position of the stream
at the start of parse. Use stream_position_data/3 to obtain extra
information.
+ singletons(- _Names_)
Unify _Names_ with a list of the form _Name=Var_, where
_Name_ is the name of a non-anonymous singleton variable in the
original term, and `Var` is the variable's representation in
YAP.
The variables occur in left-to-right traversal order.
+ syntax_errors(+ _Val_)
Control action to be taken after syntax errors. See yap_flag/2
for detailed information.
+ variables(- _Names_)
Unify _Names_ with a list of the form _Name=Var_, where _Name_ is
the name of a non-anonymous variable in the original term, and _Var_
is the variable's representation in YAP.
The variables occur in left-to-right traversal order.
*/
static
PRED_IMPL("read_term", 2, read_term, PL_FA_ISO)
{ PRED_LD
IOSTREAM *s;
if ( getTextInputStream(0, &s) )
{ if ( read_term_from_stream(s, A1, A2 PASS_LD) )
return PL_release_stream(s);
if ( Sferror(s) )
return streamStatus(s);
PL_release_stream(s);
return FALSE;
}
return FALSE;
}
/*******************************
* TERM <->ATOM *
*******************************/
static int
atom_to_term(term_t atom, term_t term, term_t bindings)
{ GET_LD
PL_chars_t txt;
if ( !bindings && PL_is_variable(atom) ) /* term_to_atom(+, -) */
{ char buf[1024];
size_t bufsize = sizeof(buf);
int rval;
char *s = buf;
IOSTREAM *stream;
PL_chars_t txt;
stream = Sopenmem(&s, &bufsize, "w");
stream->encoding = ENC_UTF8;
PL_write_term(stream, term, 1200, PL_WRT_QUOTED);
Sflush(stream);
txt.text.t = s;
txt.length = bufsize;
txt.storage = PL_CHARS_HEAP;
txt.encoding = ENC_UTF8;
txt.canonical = FALSE;
rval = PL_unify_text(atom, 0, &txt, PL_ATOM);
Sclose(stream);
if ( s != buf )
Sfree(s);
return rval;
}
if ( PL_get_text(atom, &txt, CVT_ALL|CVT_EXCEPTION) )
{ GET_LD
read_data rd;
int rval;
IOSTREAM *stream;
source_location oldsrc = LD->read_source;
stream = Sopen_text(&txt, "r");
init_read_data(&rd, stream PASS_LD);
if ( bindings && (PL_is_variable(bindings) || PL_is_list(bindings)) )
rd.varnames = bindings;
else if ( bindings )
return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_list, bindings);
if ( !(rval = read_term(term, &rd PASS_LD)) && rd.has_exception )
rval = PL_raise_exception(rd.exception);
free_read_data(&rd);
Sclose(stream);
LD->read_source = oldsrc;
// getchar();
return rval;
}
fail;
}
Term
Yap_StringToTerm(const char *s, size_t *lenp, term_t bindings)
{ GET_LD;
read_data rd;
int rval;
IOSTREAM *stream;
source_location oldsrc = LD->read_source;
stream = Sopen_string(0, (char *)s, strlen( s ), "r");
init_read_data(&rd, stream PASS_LD);
rd.varnames = bindings;
term_t tt = Yap_NewSlots(1);
if ( !(rval = read_term(tt, &rd PASS_LD)) && rd.has_exception ) {
rval = PL_raise_exception(rd.exception);
return 0L;
}
free_read_data(&rd);
Sclose(stream);
LD->read_source = oldsrc;
// getchar();
return Yap_GetFromSlot( tt);
}
/** @pred atom_to_term(+ _Atom_, - _Term_, - _Bindings_)
Use _Atom_ as input to read_term/2 using the option `variable_names` and return the read term in _Term_ and the variable bindings in _Bindings_. _Bindings_ is a list of `Name = Var` couples, thus providing access to the actual variable names. See also read_term/2. If Atom has no valid syntax, a syntax_error exception is raised.
*/
static
PRED_IMPL("atom_to_term", 3, atom_to_term, 0)
{ return atom_to_term(A1, A2, A3);
}
static
PRED_IMPL("term_to_atom", 2, term_to_atom, 0)
{ return atom_to_term(A2, A1, 0);
}
static
PRED_IMPL("$context_variables", 1, context_variables, 0)
{ CACHE_REGS
if ( LOCAL_VarNames == (CELL)0 )
return Yap_unify( TermNil, ARG1);
return Yap_unify( LOCAL_VarNames, ARG1);
}
static
PRED_IMPL("$set_source", 2, set_source, 0)
{
GET_LD
atom_t at;
term_t a = PL_new_term_ref();
if (!PL_get_atom(A1, &at))
return FALSE;
source_file_name = at;
if (!PL_get_arg(1, A2, a) || !PL_get_int64(a, &source_char_no) ||
!PL_get_arg(2, A2, a) || !PL_get_long(a, &source_line_no) ||
!PL_get_arg(3, A2, a) || !PL_get_long(a, &source_line_pos) ||
!PL_get_arg(4, A2, a) || !PL_get_int64(a, &source_byte_no) ) {
return FALSE;
}
return TRUE;
}
int
PL_chars_to_term(const char *s, term_t t)
{ GET_LD
read_data rd;
int rval;
IOSTREAM *stream = Sopen_string(NULL, (char *)s, -1, "r");
source_location oldsrc = LD->read_source;
init_read_data(&rd, stream PASS_LD);
PL_put_variable(t);
if ( !(rval = read_term(t, &rd PASS_LD)) && rd.has_exception )
PL_put_term(t, rd.exception);
LOCAL_VarNames = rd.varnames;
free_read_data(&rd);
Sclose(stream);
LD->read_source = oldsrc;
return rval;
}
/*******************************
* PUBLISH PREDICATES *
*******************************/
BeginPredDefs(read)
PRED_DEF("read_term", 3, read_term, PL_FA_ISO)
PRED_DEF("read_term", 2, read_term, PL_FA_ISO)
PRED_DEF("read_clause", 3, read_clause, 0)
PRED_DEF("atom_to_term", 3, atom_to_term, 0)
PRED_DEF("term_to_atom", 2, term_to_atom, 0)
PRED_DEF("$context_variables", 1, context_variables, 0)
PRED_DEF("$set_source", 2, set_source, 0)
#ifdef O_QUASIQUOTATIONS
PRED_DEF("$qq_open", 2, qq_open, 0)
#endif
EndPredDefs
//! @}