#include "pl-incl.h" #include "pl-ctype.h" #include "pl-utf8.h" #include "pl-dtoa.h" #include "pl-umap.c" /* Unicode map */ #include "pl-read.h" /* read structure */ /** * @defgroup ReadTerm Read Term from Streams * @ingroup InputOutput * @{ */ static bool isStringStream(IOSTREAM *s) { return s->functions == &Sstringfunctions; } void init_read_data(ReadData _PL_rd, IOSTREAM *in ARG_LD) { CACHE_REGS memset(_PL_rd, 0, sizeof(*_PL_rd)); /* optimise! */ _PL_rd->magic = RD_MAGIC; _PL_rd->varnames = 0; _PL_rd->module = Yap_GetModuleEntry(CurrentModule); _PL_rd->exception = PL_new_term_ref(); rb.stream = in; _PL_rd->has_exception = 0; _PL_rd->module = MODULE_parse; _PL_rd->flags = _PL_rd->module->flags; /* change for options! */ _PL_rd->styleCheck = debugstatus.styleCheck; _PL_rd->on_error = ATOM_error; _PL_rd->backquoted_string = truePrologFlag(PLFLAG_BACKQUOTED_STRING); } void free_read_data(ReadData _PL_rd) { } static int read_term(term_t t, ReadData _PL_rd ARG_LD) { return Yap_read_term(t, rb.stream, _PL_rd); } static void addUTF8Buffer(Buffer b, int c); static void addUTF8Buffer(Buffer b, int c) { if ( c >= 0x80 ) { char buf[6]; char *p, *end; end = utf8_put_char(buf, c); for(p=buf; p= UC, U_ID_CONTINUE) #define PlSymbolW(c) CharTypeW(c, == SY, U_SYMBOL) #define PlPunctW(c) CharTypeW(c, == PU, 0) #define PlSoloW(c) CharTypeW(c, == SO, U_OTHER) #define PlInvalidW(c) (uflagsW(c) == 0) int f_is_prolog_var_start(wint_t c) { return PlIdStartW(c) && (PlUpperW(c) || c == '_'); } int f_is_prolog_atom_start(wint_t c) { return PlIdStartW(c) != 0; } int f_is_prolog_identifier_continue(wint_t c) { return PlIdContW(c) || c == '_'; } int f_is_prolog_symbol(wint_t c) { return PlSymbolW(c) != 0; } int unicode_separator(pl_wchar_t c) { return PlBlankW(c); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - FALSE return false TRUE redo - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int reportReadError(ReadData rd) { if ( rd->on_error == ATOM_error ) return PL_raise_exception(rd->exception); if ( rd->on_error != ATOM_quiet ) printMessage(ATOM_error, PL_TERM, rd->exception); PL_clear_exception(); if ( rd->on_error == ATOM_dec10 ) return TRUE; return FALSE; } /* static int */ /* reportSingletons(ReadData rd, singletons, Atom amod, Atom aname, UInt arity) */ /* { */ /* printMessage(ATOM_warning, PL_FUNCTOR_CHARS, */ /* "singletons", 2, */ /* PL_TERM, singletons, */ /* PL_TERM, mod, */ /* PL_FUNCTOR_divide2, */ /* PL_ATOM, name, */ /* PL_INT, arity); */ /* return FALSE; */ /* } */ /******************************** * RAW READING * *********************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Scan the input, give prompts when necessary and return a char * holding a stripped version of the next term. Contiguous white space is mapped on a single space, block and % ... \n comment is deleted. Memory is claimed automatically en enlarged if necessary. (char *) NULL is returned on a syntax error. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #define syntaxError(what, rd) { errorWarning(what, 0, rd); fail; } static term_t makeErrorTerm(const char *id_str, term_t id_term, ReadData _PL_rd) { GET_LD term_t ex, loc=0; /* keep compiler happy */ unsigned char const *s, *ll = NULL; int rc = TRUE; if ( !(ex = PL_new_term_ref()) || !(loc = PL_new_term_ref()) ) rc = FALSE; if ( rc && !id_term ) { if ( !(id_term=PL_new_term_ref()) || !PL_put_atom_chars(id_term, id_str) ) rc = FALSE; } if ( rc ) rc = PL_unify_term(ex, PL_FUNCTOR, FUNCTOR_error2, PL_FUNCTOR, FUNCTOR_syntax_error1, PL_TERM, id_term, PL_TERM, loc); source_char_no += last_token_start - rdbase; for(s=rdbase; s 0 ) lp--; break; case '\t': lp |= 7; default: lp++; } } source_line_pos = lp; } if ( rc ) { if ( ReadingSource ) /* reading a file */ { rc = PL_unify_term(loc, PL_FUNCTOR, FUNCTOR_file4, PL_ATOM, source_file_name, PL_INT, source_line_no, PL_INT, source_line_pos, PL_INT64, source_char_no); } else if ( isStringStream(rb.stream) ) { size_t pos; pos = utf8_strlen((char *)rdbase, last_token_start-rdbase); rc = PL_unify_term(loc, PL_FUNCTOR, FUNCTOR_string2, PL_UTF8_STRING, rdbase, PL_INT, (int)pos); } else /* any stream */ { term_t stream; if ( !(stream=PL_new_term_ref()) || !PL_unify_stream_or_alias(stream, rb.stream) || !PL_unify_term(loc, PL_FUNCTOR, FUNCTOR_stream4, PL_TERM, stream, PL_INT, source_line_no, PL_INT, source_line_pos, PL_INT64, source_char_no) ) rc = FALSE; } } return (rc ? ex : (term_t)0); } static bool errorWarning(const char *id_str, term_t id_term, ReadData _PL_rd) { GET_LD term_t ex; LD->exception.processing = TRUE; /* allow using spare stack */ ex = makeErrorTerm(id_str, id_term, _PL_rd); if ( _PL_rd ) { _PL_rd->has_exception = TRUE; if ( ex ) PL_put_term(_PL_rd->exception, ex); else PL_put_term(_PL_rd->exception, exception_term); } else { if ( ex ) PL_raise_exception(ex); } fail; } static void clearBuffer(ReadData _PL_rd) { if (rb.size == 0) { rb.base = rb.fast; rb.size = sizeof(rb.fast); } rb.end = rb.base + rb.size; rdbase = rb.here = rb.base; _PL_rd->posp = rdbase; _PL_rd->posi = 0; } static void growToBuffer(int c, ReadData _PL_rd) { if ( rb.base == rb.fast ) /* intptr_t clause: jump to use malloc() */ { rb.base = PL_malloc(FASTBUFFERSIZE * 2); memcpy(rb.base, rb.fast, FASTBUFFERSIZE); } else rb.base = PL_realloc(rb.base, rb.size*2); DEBUG(8, Sdprintf("Reallocated read buffer at %ld\n", (intptr_t) rb.base)); _PL_rd->posp = rdbase = rb.base; rb.here = rb.base + rb.size; rb.size *= 2; rb.end = rb.base + rb.size; _PL_rd->posi = 0; *rb.here++ = c; } static inline void addByteToBuffer(int c, ReadData _PL_rd) { c &= 0xff; if ( rb.here >= rb.end ) growToBuffer(c, _PL_rd); else *rb.here++ = c; } static void addToBuffer(int c, ReadData _PL_rd) { if ( c <= 0x7f ) { addByteToBuffer(c, _PL_rd); } else { char buf[10]; char *s, *e; e = utf8_put_char(buf, c); for(s=buf; schar_conversion_table || c < 0 || c >= 256 ) return c; return _PL_rd->char_conversion_table[c]; } #define getchr() getchr__(_PL_rd) #define getchrq() Sgetcode(rb.stream) #define ensure_space(c) { if ( something_read && \ (c == '\n' || !isBlank(rb.here[-1])) ) \ addToBuffer(c, _PL_rd); \ } #define set_start_line { if ( !something_read ) \ { setCurrentSourceLocation(_PL_rd PASS_LD); \ something_read++; \ } \ } #ifdef O_QUASIQUOTATIONS /** '$qq_open'(+QQRange, -Stream) is det. Opens a quasi-quoted memory range. @arg QQRange is a term '$quasi_quotation'(ReadData, Start, Length) @arg Stream is a UTF-8 encoded string, whose position indication reflects the location in the real file. */ static PRED_IMPL("$qq_open", 2, qq_open, 0) { PRED_LD if ( PL_is_functor(A1, FUNCTOR_dquasi_quotation3) ) { void *ptr; char * start; size_t len; term_t arg = PL_new_term_ref(); IOSTREAM *s; if ( PL_get_arg(1, A1, arg) && PL_get_pointer_ex(arg, &ptr) && PL_get_arg(2, A1, arg) && PL_get_intptr(arg, (intptr_t *)&start) && PL_get_arg(3, A1, arg) && PL_get_intptr(arg, (intptr_t *)&len) ) { //source_location pos; if ( (s=Sopenmem(&start, &len, "r")) ) s->encoding = ENC_UTF8; return PL_unify_stream(A2, s); } } else PL_type_error("read_context", A1); return FALSE; } static int parse_quasi_quotations(ReadData _PL_rd ARG_LD) { if ( _PL_rd->qq_tail ) { term_t av; int rc; if ( !PL_unify_nil(_PL_rd->qq_tail) ) return FALSE; if ( !_PL_rd->quasi_quotations ) { if ( (av = PL_new_term_refs(2)) && PL_put_term(av+0, _PL_rd->qq) && #if __YAP_PROLOG__ PL_put_atom(av+1, YAP_SWIAtomFromAtom(_PL_rd->module->AtomOfME)) && #else PL_put_atom(av+1, _PL_rd->module->name) && #endif PL_cons_functor_v(av, FUNCTOR_dparse_quasi_quotations2, av) ) { term_t ex; rc = callProlog(MODULE_system, av+0, PL_Q_CATCH_EXCEPTION, &ex); if ( rc ) return TRUE; _PL_rd->exception = ex; _PL_rd->has_exception = TRUE; } return FALSE; } else return TRUE; } else if ( _PL_rd->quasi_quotations ) /* user option, but no quotes */ { return PL_unify_nil(_PL_rd->quasi_quotations); } else return TRUE; } #endif /*O_QUASIQUOTATIONS*/ #define rawSyntaxError(what) { addToBuffer(EOS, _PL_rd); \ rdbase = rb.base, last_token_start = rb.here-1; \ syntaxError(what, _PL_rd); \ } static int raw_read_quoted(int q, ReadData _PL_rd) { int newlines = 0; int c; addToBuffer(q, _PL_rd); while((c=getchrq()) != EOF && c != q) { if ( c == '\\' && DO_CHARESCAPE ) { int base; addToBuffer(c, _PL_rd); switch( (c=getchrq()) ) { case EOF: goto eofinstr; case 'u': /* \uXXXX */ case 'U': /* \UXXXXXXXX */ addToBuffer(c, _PL_rd); continue; case 'x': /* \xNN\ */ addToBuffer(c, _PL_rd); c = getchrq(); if ( c == EOF ) goto eofinstr; if ( digitValue(16, c) >= 0 ) { base = 16; addToBuffer(c, _PL_rd); xdigits: c = getchrq(); while( digitValue(base, c) >= 0 ) { addToBuffer(c, _PL_rd); c = getchrq(); } } if ( c == EOF ) goto eofinstr; addToBuffer(c, _PL_rd); if ( c == q ) return TRUE; continue; default: addToBuffer(c, _PL_rd); if ( digitValue(8, c) >= 0 ) /* \NNN\ */ { base = 8; goto xdigits; } else if ( c == '\n' ) /* \ */ { c = getchrq(); if ( c == EOF ) goto eofinstr; addToBuffer(c, _PL_rd); if ( c == q ) return TRUE; } continue; /* \symbolic-control-char */ } } else if (c == '\n' && newlines++ > MAXNEWLINES && (_PL_rd->styleCheck & LONGATOM_CHECK)) { rawSyntaxError("long_string"); } addToBuffer(c, _PL_rd); } if (c == EOF) { eofinstr: rawSyntaxError("end_of_file_in_string"); } addToBuffer(c, _PL_rd); return TRUE; } static int add_comment(Buffer b, IOPOS *pos, ReadData _PL_rd ARG_LD) { term_t head = PL_new_term_ref(); assert(_PL_rd->comments); if ( !PL_unify_list(_PL_rd->comments, head, _PL_rd->comments) ) return FALSE; if ( pos ) { if ( !PL_unify_term(head, PL_FUNCTOR, FUNCTOR_minus2, PL_FUNCTOR, FUNCTOR_stream_position4, PL_INT64, pos->charno, PL_INT, pos->lineno, PL_INT, pos->linepos, PL_INT, 0, PL_UTF8_STRING, baseBuffer(b, char)) ) return FALSE; } else { if ( !PL_unify_term(head, PL_FUNCTOR, FUNCTOR_minus2, ATOM_minus, PL_UTF8_STRING, baseBuffer(b, char)) ) return FALSE; } PL_reset_term_refs(head); return TRUE; } static void setErrorLocation(IOPOS *pos, ReadData _PL_rd) { if ( pos ) { GET_LD source_char_no = pos->charno; source_line_pos = pos->linepos; source_line_no = pos->lineno; } rb.here = rb.base+1; /* see rawSyntaxError() */ } static unsigned char * raw_read2(ReadData _PL_rd ARG_LD) { int c; bool something_read = FALSE; bool dotseen = FALSE; IOPOS pbuf; /* comment start */ IOPOS *pos; clearBuffer(_PL_rd); /* clear input buffer */ _PL_rd->strictness = truePrologFlag(PLFLAG_ISO); source_line_no = -1; for(;;) { c = getchr(); handle_c: switch(c) { case EOF: if ( isStringStream(rb.stream) ) /* do not require '. ' when */ { addToBuffer(' ', _PL_rd); /* reading from a string */ addToBuffer('.', _PL_rd); addToBuffer(' ', _PL_rd); addToBuffer(EOS, _PL_rd); return rb.base; } if (something_read) { if ( dotseen ) /* term. */ { if ( rb.here - rb.base == 1 ) rawSyntaxError("end_of_clause"); ensure_space(' '); addToBuffer(EOS, _PL_rd); return rb.base; } rawSyntaxError("end_of_file"); } if ( Sfpasteof(rb.stream) ) { term_t stream; LD->exception.processing = TRUE; stream = PL_new_term_ref(); PL_unify_stream_or_alias(stream, rb.stream); PL_error(NULL, 0, NULL, ERR_PERMISSION, ATOM_input, ATOM_past_end_of_stream, stream); return NULL; } set_start_line; strcpy((char *)rb.base, "end_of_file. "); rb.here = rb.base + 14; return rb.base; case '/': if ( rb.stream->position ) { pbuf = *rb.stream->position; pbuf.charno--; pbuf.linepos--; pos = &pbuf; } else pos = NULL; c = getchr(); if ( c == '*' ) { int last; int level = 1; union { tmp_buffer ctmpbuf; buffer tmpbuf; } u; Buffer cbuf; if ( _PL_rd->comments ) { initBuffer(&u.ctmpbuf); cbuf = &u.tmpbuf; addUTF8Buffer(cbuf, '/'); addUTF8Buffer(cbuf, '*'); } else { cbuf = NULL; } if ((last = getchr()) == EOF) { if ( cbuf ) discardBuffer(cbuf); setErrorLocation(pos, _PL_rd); rawSyntaxError("end_of_file_in_block_comment"); } if ( cbuf ) addUTF8Buffer(cbuf, last); if ( something_read ) { addToBuffer(' ', _PL_rd); /* positions */ addToBuffer(' ', _PL_rd); addToBuffer(last == '\n' ? last : ' ', _PL_rd); } for(;;) { c = getchr(); if ( cbuf ) addUTF8Buffer(cbuf, c); switch( c ) { case EOF: if ( cbuf ) discardBuffer(cbuf); setErrorLocation(pos, _PL_rd); rawSyntaxError("end_of_file_in_block_comment"); #ifndef __YAP_PROLOG__ /* YAP does not support comment levels in original scanner */ case '*': if ( last == '/' ) level++; break; #endif case '/': if ( last == '*' && (--level == 0 || _PL_rd->strictness) ) { if ( cbuf ) { addUTF8Buffer(cbuf, EOS); if ( !add_comment(cbuf, pos, _PL_rd PASS_LD) ) { discardBuffer(cbuf); return FALSE; } discardBuffer(cbuf); } c = ' '; goto handle_c; } break; } if ( something_read ) addToBuffer(c == '\n' ? c : ' ', _PL_rd); last = c; } } else { set_start_line; addToBuffer('/', _PL_rd); if ( isSymbolW(c) ) { while( c != EOF && isSymbolW(c) && !(c == '`' && _PL_rd->backquoted_string) ) { addToBuffer(c, _PL_rd); c = getchr(); } } dotseen = FALSE; goto handle_c; } case '%': if ( something_read ) addToBuffer(' ', _PL_rd); if ( _PL_rd->comments ) { union { tmp_buffer ctmpbuf; buffer uctmpbuf; } u; Buffer cbuf; if ( rb.stream->position ) { pbuf = *rb.stream->position; pbuf.charno--; pbuf.linepos--; pos = &pbuf; } else pos = NULL; initBuffer(&u.ctmpbuf); cbuf = (Buffer)&u.uctmpbuf; addUTF8Buffer(cbuf, '%'); for(;;) { while((c=getchr()) != EOF && c != '\n') { addUTF8Buffer(cbuf, c); if ( something_read ) /* record positions */ addToBuffer(' ', _PL_rd); } if ( c == '\n' ) { int c2 = Speekcode(rb.stream); if ( c2 == '%' ) { if ( something_read ) { addToBuffer(c, _PL_rd); addToBuffer(' ', _PL_rd); } addUTF8Buffer(cbuf, c); c = Sgetcode(rb.stream); assert(c==c2); addUTF8Buffer(cbuf, c); continue; } } break; } addUTF8Buffer(cbuf, EOS); if ( !add_comment(cbuf, pos, _PL_rd PASS_LD) ) { discardBuffer(cbuf); return FALSE; } discardBuffer(cbuf); } else { while((c=getchr()) != EOF && c != '\n') { if ( something_read ) /* record positions */ addToBuffer(' ', _PL_rd); } } goto handle_c; /* is the newline */ case '\'': if ( rb.here > rb.base && isDigit(rb.here[-1]) ) { cucharp bs = &rb.here[-1]; if ( bs > rb.base && isDigit(bs[-1]) ) bs--; if ( bs > rb.base && isSign(bs[-1]) ) bs--; if ( bs == rb.base || !PlIdContW(bs[-1]) ) { int base; if ( isSign(bs[0]) ) bs++; base = atoi((char*)bs); if ( base <= 36 ) { if ( base == 0 ) /* 0' */ { addToBuffer(c, _PL_rd); { if ( (c=getchr()) != EOF ) { addToBuffer(c, _PL_rd); if ( c == '\\' ) /* 0'\ */ { if ( (c=getchr()) != EOF ) addToBuffer(c, _PL_rd); } else if ( c == '\'' ) /* 0'' */ { if ( (c=getchr()) != EOF ) { if ( c == '\'' ) addToBuffer(c, _PL_rd); else goto handle_c; } } break; } rawSyntaxError("end_of_file"); } } else { int c2 = Speekcode(rb.stream); if ( c2 != EOF ) { if ( digitValue(base, c2) >= 0 ) { addToBuffer(c, _PL_rd); c = Sgetcode(rb.stream); addToBuffer(c, _PL_rd); dotseen = FALSE; break; } goto sqatom; } rawSyntaxError("end_of_file"); } } } } sqatom: set_start_line; if ( !raw_read_quoted(c, _PL_rd) ) fail; dotseen = FALSE; break; case '"': set_start_line; if ( !raw_read_quoted(c, _PL_rd) ) fail; dotseen = FALSE; break; case '.': addToBuffer(c, _PL_rd); set_start_line; dotseen++; c = getchr(); if ( isSymbolW(c) ) { while( c != EOF && isSymbolW(c) && !(c == '`' && _PL_rd->backquoted_string) ) { addToBuffer(c, _PL_rd); c = getchr(); } dotseen = FALSE; } goto handle_c; case '`': if ( _PL_rd->backquoted_string ) { set_start_line; if ( !raw_read_quoted(c, _PL_rd) ) fail; dotseen = FALSE; break; } /*FALLTHROUGH*/ default: if ( c < 0xff ) { switch(_PL_char_types[c]) { case SP: case CT: blank: if ( dotseen ) { if ( rb.here - rb.base == 1 ) rawSyntaxError("end_of_clause"); ensure_space(c); addToBuffer(EOS, _PL_rd); return rb.base; } do { if ( something_read ) /* positions, \0 --> ' ' */ addToBuffer(c ? c : ' ', _PL_rd); else ensure_space(c); c = getchr(); } while( c != EOF && PlBlankW(c) ); goto handle_c; case SY: set_start_line; do { addToBuffer(c, _PL_rd); c = getchr(); if ( c == '`' && _PL_rd->backquoted_string ) break; } while( c != EOF && c <= 0xff && isSymbol(c) ); /* TBD: wide symbols? */ dotseen = FALSE; goto handle_c; case LC: case UC: set_start_line; do { addToBuffer(c, _PL_rd); c = getchr(); } while( c != EOF && PlIdContW(c) ); dotseen = FALSE; goto handle_c; default: addToBuffer(c, _PL_rd); dotseen = FALSE; set_start_line; } } else /* > 255 */ { if ( PlIdStartW(c) ) { set_start_line; do { addToBuffer(c, _PL_rd); c = getchr(); } while( c != EOF && PlIdContW(c) ); dotseen = FALSE; goto handle_c; } else if ( PlBlankW(c) ) { goto blank; } else { addToBuffer(c, _PL_rd); dotseen = FALSE; set_start_line; } } } } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Raw reading returns a string in UTF-8 notation of the a Prolog term. Comment inside the term is replaced by spaces or newline to ensure proper reconstruction of source locations. Comment before the term is skipped. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static unsigned char * raw_read(ReadData _PL_rd, unsigned char **endp ARG_LD) { unsigned char *s; if ( (rb.stream->flags & SIO_ISATTY) && Sfileno(rb.stream) >= 0 ) { ttybuf tab; PushTty(rb.stream, &tab, TTY_SAVE); /* make sure tty is sane */ PopTty(rb.stream, &ttytab, FALSE); s = raw_read2(_PL_rd PASS_LD); PopTty(rb.stream, &tab, TRUE); } else { s = raw_read2(_PL_rd PASS_LD); } if ( endp ) *endp = _PL_rd->_rb.here; return s; } static void callCommentHook(term_t comments, term_t tpos, term_t term) { GET_LD fid_t fid; term_t av; if ( (fid = PL_open_foreign_frame()) && (av = PL_new_term_refs(3)) ) { qid_t qid; PL_put_term(av+0, comments); PL_put_term(av+1, tpos); PL_put_term(av+2, term); if ( (qid = PL_open_query(NULL, PL_Q_NODEBUG|PL_Q_CATCH_EXCEPTION, (predicate_t)PredCommentHook, av)) ) { term_t ex; if ( !PL_next_solution(qid) && (ex=PL_exception(qid)) ) printMessage(ATOM_error, PL_TERM, ex); PL_close_query(qid); } PL_discard_foreign_frame(fid); } } /******************************** * PROLOG CONNECTION * *********************************/ static unsigned char * backSkipUTF8(unsigned const char *start, unsigned const char *end, int *chr) { const unsigned char *s; for(s=end-1 ; s>start && ( *s&0x80 ); s--) ; utf8_get_char((char*)s, chr); return (unsigned char *)s; } static unsigned char * backSkipBlanks(const unsigned char *start, const unsigned char *end) { const unsigned char *s; for( ; end > start; end = s) { unsigned char *e; int chr; for(s=end-1 ; s>start && ISUTF8_CB(*s); s--) ; e = (unsigned char*)utf8_get_char((char*)s, &chr); assert(e == end); if ( !PlBlankW(chr) ) return (unsigned char*)end; } return (unsigned char *)start; } static inline ucharp skipSpaces(cucharp in) { int chr; ucharp s; for( ; *in; in=s) { s = utf8_get_uchar(in, &chr); if ( !PlBlankW(chr) ) return (ucharp)in; } return (ucharp)in; } word pl_raw_read2(term_t from, term_t term) { GET_LD unsigned char *s, *e, *t2, *top; read_data rd; word rval; IOSTREAM *in; int chr; PL_chars_t txt; if ( !getTextInputStream(from, &in) ) fail; init_read_data(&rd, in PASS_LD); if ( !(s = raw_read(&rd, &e PASS_LD)) ) { rval = PL_raise_exception(rd.exception); goto out; } /* strip the input from blanks */ top = backSkipBlanks(s, e-1); t2 = backSkipUTF8(s, top, &chr); if ( chr == '.' ) top = backSkipBlanks(s, t2); /* watch for "0' ." */ if ( top < e && top-2 >= s && top[-1] == '\'' && top[-2] == '0' ) top++; *top = EOS; s = skipSpaces(s); txt.text.t = (char*)s; txt.length = top-s; txt.storage = PL_CHARS_HEAP; txt.encoding = ENC_UTF8; txt.canonical = FALSE; rval = PL_unify_text(term, 0, &txt, PL_ATOM); LD->read_varnames = rd.varnames; out: free_read_data(&rd); if ( Sferror(in) ) return streamStatus(in); else PL_release_stream(in); return rval; } static int unify_read_term_position(term_t tpos ARG_LD) { if ( tpos && source_line_no > 0 ) { return PL_unify_term(tpos, PL_FUNCTOR, FUNCTOR_stream_position4, PL_INT64, source_char_no, PL_INT, source_line_no, PL_INT, source_line_pos, PL_INT64, source_byte_no); } else { return TRUE; } } static const opt_spec read_clause_options[] = { { ATOM_variable_names, OPT_TERM }, { ATOM_term_position, OPT_TERM }, { ATOM_subterm_positions, OPT_TERM }, { ATOM_process_comment, OPT_BOOL }, { ATOM_comments, OPT_TERM }, { ATOM_syntax_errors, OPT_ATOM }, { NULL_ATOM, 0 } }; /** read_clause(+Stream:stream, -Clause:clause, +Options:list) Like read_term/3, but uses current compiler options. Options: * variable_names(-Names) * process_comment(+Boolean) * comments(-List) * syntax_errors(+Atom) * term_position(-Position) * subterm_positions(-Layout) */ static int read_clause(IOSTREAM *s, term_t term, term_t options ARG_LD) { read_data rd; int rval; fid_t fid; term_t tpos = 0; term_t comments = 0; term_t opt_comments = 0; int process_comment; atom_t syntax_errors = ATOM_dec10; { OPCODE ophook = PredCommentHook->OpcodeOfPred; if (ophook == UNDEF_OPCODE || ophook == FAIL_OPCODE) process_comment = FALSE; else process_comment = TRUE; } if ( !(fid=PL_open_foreign_frame()) ) return FALSE; retry: init_read_data(&rd, s PASS_LD); if ( options && !scan_options(options, 0, ATOM_read_option, read_clause_options, &rd.varnames, &tpos, &rd.subtpos, &process_comment, &opt_comments, &syntax_errors) ) { PL_close_foreign_frame(fid); return FALSE; } if ( opt_comments ) { comments = PL_new_term_ref(); } else if ( process_comment ) { if ( !tpos ) tpos = PL_new_term_ref(); comments = PL_new_term_ref(); } REGS_FROM_LD rd.module = Yap_GetModuleEntry( LOCAL_SourceModule ); if ( comments ) rd.comments = PL_copy_term_ref(comments); rd.on_error = syntax_errors; rd.singles = rd.styleCheck & SINGLETON_CHECK ? 1 : 0; if ( (rval=read_term(term, &rd PASS_LD)) && (!tpos || (rval=unify_read_term_position(tpos PASS_LD))) ) { PredEntry *ap; LD->read_varnames = rd.varnames; if (rd.singles) { // warning, singletons([X=_A],f(X,Y,Z), pos). printMessage(ATOM_warning, PL_FUNCTOR_CHARS, "singletons", 3, PL_TERM, rd.singles, PL_TERM, term, PL_TERM, tpos ); } ap = Yap_PredFromClause( Yap_GetFromSlot(term) PASS_REGS); if (rd.styleCheck & (DISCONTIGUOUS_STYLE|MULTIPLE_CHECK) && ap != NULL ) { if ( rd.styleCheck & (DISCONTIGUOUS_STYLE) && Yap_discontiguous( ap PASS_REGS) ) { printMessage(ATOM_warning, PL_FUNCTOR_CHARS, "discontiguous", 2, PL_TERM, term, PL_TERM, tpos ); } if ( rd.styleCheck & (MULTIPLE_CHECK) && Yap_multiple( ap PASS_REGS) ) { printMessage(ATOM_warning, PL_FUNCTOR_CHARS, "multiple", 3, PL_TERM, term, PL_TERM, tpos, PL_ATOM, YAP_SWIAtomFromAtom(ap->src.OwnerFile) ); } } if ( rd.comments && (rval = PL_unify_nil(rd.comments)) ) { if ( opt_comments ) rval = PL_unify(opt_comments, comments); else if ( !PL_get_nil(comments) ) callCommentHook(comments, tpos, term); } } else { if ( rd.has_exception && reportReadError(&rd) ) { PL_rewind_foreign_frame(fid); free_read_data(&rd); goto retry; } } free_read_data(&rd); return rval; } static PRED_IMPL("read_clause", 3, read_clause, 0) { PRED_LD int rc; IOSTREAM *s; if ( !getTextInputStream(A1, &s) ) return FALSE; rc = read_clause(s, A2, A3 PASS_LD); if ( Sferror(s) ) return streamStatus(s); else PL_release_stream(s); return rc; } word pl_raw_read(term_t term) { return pl_raw_read2(0, term); } static const opt_spec read_term_options[] = { { ATOM_variable_names, OPT_TERM }, { ATOM_variables, OPT_TERM }, { ATOM_singletons, OPT_TERM }, { ATOM_term_position, OPT_TERM }, // { ATOM_subterm_positions, OPT_TERM }, { ATOM_character_escapes, OPT_BOOL }, { ATOM_double_quotes, OPT_ATOM }, { ATOM_module, OPT_ATOM }, { ATOM_syntax_errors, OPT_ATOM }, { ATOM_backquoted_string, OPT_BOOL }, { ATOM_comments, OPT_TERM }, { ATOM_process_comment, OPT_BOOL }, #ifdef O_QUASIQUOTATIONS { ATOM_quasi_quotations, OPT_TERM }, #endif { ATOM_cycles, OPT_BOOL }, { NULL_ATOM, 0 } }; static foreign_t read_term_from_stream(IOSTREAM *s, term_t term, term_t options ARG_LD) { term_t tpos = 0; term_t comments = 0; term_t opt_comments = 0; int process_comment; int rval; atom_t w; read_data rd; int charescapes = -1; atom_t dq = NULL_ATOM; atom_t mname = NULL_ATOM; fid_t fid = PL_open_foreign_frame(); if (!fid) return FALSE; retry: init_read_data(&rd, s PASS_LD); if ( !scan_options(options, 0, ATOM_read_option, read_term_options, &rd.varnames, &rd.variables, &rd.singles, &tpos, // &rd.subtpos, &charescapes, &dq, &mname, &rd.on_error, &rd.backquoted_string, &opt_comments, &process_comment, #ifdef O_QUASIQUOTATIONS &rd.quasi_quotations, #endif &rd.cycles) ) { PL_discard_foreign_frame(fid); free_read_data(&rd); return FALSE; } // yap specific, do not call process comment if undefined if (process_comment) { OPCODE ophook = PredCommentHook->OpcodeOfPred; if (ophook == UNDEF_OPCODE || ophook == FAIL_OPCODE) process_comment = FALSE; } if ( opt_comments ) { comments = PL_new_term_ref(); } else if ( process_comment ) { if ( !tpos ) tpos = PL_new_term_ref(); comments = PL_new_term_ref(); } if ( mname ) { rd.module = lookupModule(mname); rd.flags = rd.module->flags; } if ( charescapes != -1 ) { if ( charescapes ) set(&rd, M_CHARESCAPE); else clear(&rd, M_CHARESCAPE); } if ( dq ) { if ( !setDoubleQuotes(dq, &rd.flags) ) return FALSE; } if ( rd.singles && PL_get_atom(rd.singles, &w) && w == ATOM_warning) rd.singles = 1; if ( comments ) rd.comments = PL_copy_term_ref(comments); rval = read_term(term, &rd PASS_LD); if ( Sferror(s) ) { free_read_data(&rd); return FALSE; } LD->read_varnames = rd.varnames; #ifdef O_QUASIQUOTATIONS if ( rval ) rval = parse_quasi_quotations(&rd PASS_LD); #endif if ( rval ) { if ( tpos ) rval = unify_read_term_position(tpos PASS_LD); if (rval) { if ( opt_comments ) rval = PL_unify(opt_comments, comments); else if (comments && !PL_get_nil(comments) ) callCommentHook(comments, tpos, term); } } else { if ( rd.has_exception && reportReadError(&rd) ) { PL_rewind_foreign_frame(fid); free_read_data(&rd); goto retry; } } free_read_data(&rd); return rval; } /** @pred read_term(+ _Stream_,- _T_,+ _Options_) is iso Reads term _T_ from stream _Stream_ with execution controlled by the same options as read_term/2. */ static PRED_IMPL("read_term", 3, read_term, PL_FA_ISO) { PRED_LD IOSTREAM *s; if ( getTextInputStream(A1, &s) ) { if ( read_term_from_stream(s, A2, A3 PASS_LD) ) return PL_release_stream(s); if ( Sferror(s) ) return streamStatus(s); PL_release_stream(s); return FALSE; } return FALSE; } /** read_term(-Term, +Options) is det. */ /** @pred read_term(- _T_,+ _Options_) is iso Reads term _T_ from the current input stream with execution controlled by the following options: + comments(- _Comments_) Unify _Comments_ with a list of string terms including comments before and within the term. + module( + _Module_) Read term using _Module_ as source module. + quasi_quotations(-List) Unify _List_ with the quasi-quotations present in the term. + term_position(- _Position_) Unify _Position_ with a term describing the position of the stream at the start of parse. Use stream_position_data/3 to obtain extra information. + singletons(- _Names_) Unify _Names_ with a list of the form _Name=Var_, where _Name_ is the name of a non-anonymous singleton variable in the original term, and `Var` is the variable's representation in YAP. The variables occur in left-to-right traversal order. + syntax_errors(+ _Val_) Control action to be taken after syntax errors. See yap_flag/2 for detailed information. + variables(- _Names_) Unify _Names_ with a list of the form _Name=Var_, where _Name_ is the name of a non-anonymous variable in the original term, and _Var_ is the variable's representation in YAP. The variables occur in left-to-right traversal order. */ static PRED_IMPL("read_term", 2, read_term, PL_FA_ISO) { PRED_LD IOSTREAM *s; if ( getTextInputStream(0, &s) ) { if ( read_term_from_stream(s, A1, A2 PASS_LD) ) return PL_release_stream(s); if ( Sferror(s) ) return streamStatus(s); PL_release_stream(s); return FALSE; } return FALSE; } /******************************* * TERM <->ATOM * *******************************/ static int atom_to_term(term_t atom, term_t term, term_t bindings) { GET_LD PL_chars_t txt; if ( !bindings && PL_is_variable(atom) ) /* term_to_atom(+, -) */ { char buf[1024]; size_t bufsize = sizeof(buf); int rval; char *s = buf; IOSTREAM *stream; PL_chars_t txt; stream = Sopenmem(&s, &bufsize, "w"); stream->encoding = ENC_UTF8; PL_write_term(stream, term, 1200, PL_WRT_QUOTED); Sflush(stream); txt.text.t = s; txt.length = bufsize; txt.storage = PL_CHARS_HEAP; txt.encoding = ENC_UTF8; txt.canonical = FALSE; rval = PL_unify_text(atom, 0, &txt, PL_ATOM); Sclose(stream); if ( s != buf ) Sfree(s); return rval; } if ( PL_get_text(atom, &txt, CVT_ALL|CVT_EXCEPTION) ) { GET_LD read_data rd; int rval; IOSTREAM *stream; source_location oldsrc = LD->read_source; stream = Sopen_text(&txt, "r"); init_read_data(&rd, stream PASS_LD); if ( bindings && (PL_is_variable(bindings) || PL_is_list(bindings)) ) rd.varnames = bindings; else if ( bindings ) return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_list, bindings); if ( !(rval = read_term(term, &rd PASS_LD)) && rd.has_exception ) rval = PL_raise_exception(rd.exception); free_read_data(&rd); Sclose(stream); LD->read_source = oldsrc; // getchar(); return rval; } fail; } Term Yap_StringToTerm(const char *s, size_t len, term_t bindings) { GET_LD; read_data rd; int rval; IOSTREAM *stream; source_location oldsrc = LD->read_source; stream = Sopen_string(0, (char *)s, strlen( s ), "r"); init_read_data(&rd, stream PASS_LD); rd.varnames = bindings; term_t tt = Yap_NewSlots(1); if ( !(rval = read_term(tt, &rd PASS_LD)) && rd.has_exception ) { rval = PL_raise_exception(rd.exception); return 0L; } free_read_data(&rd); Sclose(stream); LD->read_source = oldsrc; // getchar(); return Yap_GetFromSlot( tt); } /** @pred atom_to_term(+ _Atom_, - _Term_, - _Bindings_) Use _Atom_ as input to read_term/2 using the option `variable_names` and return the read term in _Term_ and the variable bindings in _Bindings_. _Bindings_ is a list of `Name = Var` couples, thus providing access to the actual variable names. See also read_term/2. If Atom has no valid syntax, a syntax_error exception is raised. */ static PRED_IMPL("atom_to_term", 3, atom_to_term, 0) { return atom_to_term(A1, A2, A3); } static PRED_IMPL("term_to_atom", 2, term_to_atom, 0) { return atom_to_term(A2, A1, 0); } static PRED_IMPL("$context_variables", 1, context_variables, 0) { CACHE_REGS if ( LOCAL_VarNames == (CELL)0 ) return Yap_unify( TermNil, ARG1); return Yap_unify( LOCAL_VarNames, ARG1); } static PRED_IMPL("$set_source", 2, set_source, 0) { GET_LD atom_t at; term_t a = PL_new_term_ref(); if (!PL_get_atom(A1, &at)) return FALSE; source_file_name = at; if (!PL_get_arg(1, A2, a) || !PL_get_int64(a, &source_char_no) || !PL_get_arg(2, A2, a) || !PL_get_long(a, &source_line_no) || !PL_get_arg(3, A2, a) || !PL_get_long(a, &source_line_pos) || !PL_get_arg(4, A2, a) || !PL_get_int64(a, &source_byte_no) ) { return FALSE; } return TRUE; } int PL_chars_to_term(const char *s, term_t t) { GET_LD read_data rd; int rval; IOSTREAM *stream = Sopen_string(NULL, (char *)s, -1, "r"); source_location oldsrc = LD->read_source; init_read_data(&rd, stream PASS_LD); PL_put_variable(t); if ( !(rval = read_term(t, &rd PASS_LD)) && rd.has_exception ) PL_put_term(t, rd.exception); LOCAL_VarNames = rd.varnames; free_read_data(&rd); Sclose(stream); LD->read_source = oldsrc; return rval; } /******************************* * PUBLISH PREDICATES * *******************************/ BeginPredDefs(read) PRED_DEF("read_term", 3, read_term, PL_FA_ISO) PRED_DEF("read_term", 2, read_term, PL_FA_ISO) PRED_DEF("read_clause", 3, read_clause, 0) PRED_DEF("atom_to_term", 3, atom_to_term, 0) PRED_DEF("term_to_atom", 2, term_to_atom, 0) PRED_DEF("$context_variables", 1, context_variables, 0) PRED_DEF("$set_source", 2, set_source, 0) #ifdef O_QUASIQUOTATIONS PRED_DEF("$qq_open", 2, qq_open, 0) #endif EndPredDefs //! @}