This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/C/scanner.c

1381 lines
35 KiB
C
Raw Blame History

/*************************************************************************
* *
* YAP Prolog *
* *
* Yap Prolog was developed at NCCUP - Universidade do Porto *
* *
* Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003 *
* *
**************************************************************************
* *
* File: %W% %G% *
* Last rev: 22-1-03 *
* mods: *
* comments: Prolog's scanner *
* *
*************************************************************************/
/*
* Description:
*
* This module produces a list of tokens for use by the parser. The calling
* program should supply a routine int nextch(charpos) int *charpos; which,
* when called should produce the next char or -1 if none availlable. The
* scanner will stop producing tokens when it either finds an end of file
* (-1) or a token consisting of just '.' followed by a blank or control
* char. Scanner errors will be signalled by the scanner exiting with a non-
* zero ErrorMsg and ErrorPos. Note that, even in this case, the scanner
* will try to find the end of the term. A function char
* *AllocScannerMemory(nbytes) should be supplied for allocating (temporary)
* space for strings and for the table of prolog variables occurring in the
* term.
*
*/
#include "Yap.h"
#include "Yatom.h"
#include "YapHeap.h"
#include "SWI-Stream.h"
#include "yapio.h"
#include "alloc.h"
#include "eval.h"
#if _MSC_VER || defined(__MINGW32__)
#if HAVE_FINITE==1
#undef HAVE_FINITE
#endif
#include <windows.h>
#endif
#include "iopreds.h"
#if HAVE_STRING_H
#include <string.h>
#endif
#if HAVE_WCTYPE_H
#include <wctype.h>
#endif
/* You just can't trust some machines */
#define my_isxdigit(C,SU,SL) (chtype(C) == NU || (C >= 'A' && \
C <= (SU)) || (C >= 'a' && C <= (SL)))
#define my_isupper(C) ( C >= 'A' && C <= 'Z' )
#define my_islower(C) ( C >= 'a' && C <= 'z' )
STATIC_PROTO(Term float_send, (char *, int));
STATIC_PROTO(Term get_num, (int *, int *, IOSTREAM *,char *,UInt,int));
/* token table with some help from Richard O'Keefe's PD scanner */
static char chtype0[NUMBER_OF_CHARS+1] =
{
EF,
/* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
/* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
/* sp ! " # $ % & ' ( ) * + , - . / */
BS, SL, DC, SY, LC, CC, SY, QT, BK, BK, SY, SY, BK, SY, SY, SY,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, SY, SL, SY, SY, SY, SY,
/* @ A B C D E F G H I J K L M N O */
SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, BK, SY, BK, SY, UL,
/* ` a b c d e f g h i j k l m n o */
SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
/* p q r s t u v w x y z { | } ~ del */
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, BK, BK, BK, SY, BS,
/* 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 */
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
/* 144 145 <20> 147 148 149 150 151 152 153 154 155 156 157 158 159 */
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
BS, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SY, SY, SY,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
SY, SY, LC, LC, SY, SY, SY, SY, SY, LC, LC, SY, SY, SY, SY, SY,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
#ifdef vms
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, LC,
#else
UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC,
#endif
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> cannot write the last three because of lcc */
#ifdef vms
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC
#else
LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC
#endif
};
char *Yap_chtype = chtype0+1;
int
Yap_wide_chtype(Int ch) {
#if HAVE_WCTYPE_H
if (iswalnum(ch)) {
if (iswlower(ch)) return LC;
if (iswdigit(ch)) return NU;
return UC;
}
if (iswpunct(ch)) return SY;
#endif
return BS;
}
static inline int
getchr__(IOSTREAM *inp)
{ int c = Sgetcode(inp);
if ( !CharConversionTable || c < 0 || c >= 256 )
return c;
return CharConversionTable[c];
}
#define getchr(inp) getchr__(inp)
#define getchrq(inp) Sgetcode(inp)
EXTERN inline int
GetCurInpPos (IOSTREAM *inp_stream)
{
return inp_stream->posbuf.lineno;
}
/* in case there is an overflow */
typedef struct scanner_extra_alloc {
struct scanner_extra_alloc *next;
void *filler;
} ScannerExtraBlock;
static char *
AllocScannerMemory(unsigned int size)
{
CACHE_REGS
char *AuxSpScan;
AuxSpScan = LOCAL_ScannerStack;
size = AdjustSize(size);
if (LOCAL_ScannerExtraBlocks) {
struct scanner_extra_alloc *ptr;
if (!(ptr = (struct scanner_extra_alloc *)malloc(size+sizeof(ScannerExtraBlock)))) {
return NULL;
}
ptr->next = LOCAL_ScannerExtraBlocks;
LOCAL_ScannerExtraBlocks = ptr;
return (char *)(ptr+1);
} else if (LOCAL_TrailTop <= AuxSpScan+size) {
UInt alloc_size = sizeof(CELL) * K16;
if (size > alloc_size)
alloc_size = size;
if(!Yap_growtrail(alloc_size, TRUE)) {
struct scanner_extra_alloc *ptr;
if (!(ptr = (struct scanner_extra_alloc *)malloc(size+sizeof(ScannerExtraBlock)))) {
return NULL;
}
ptr->next = LOCAL_ScannerExtraBlocks;
LOCAL_ScannerExtraBlocks = ptr;
return (char *)(ptr+1);
}
}
LOCAL_ScannerStack = AuxSpScan+size;
return AuxSpScan;
}
static void
PopScannerMemory(char *block, unsigned int size)
{
CACHE_REGS
if (block == LOCAL_ScannerStack-size) {
LOCAL_ScannerStack -= size;
} else if (block == (char *)(LOCAL_ScannerExtraBlocks+1)) {
struct scanner_extra_alloc *ptr = LOCAL_ScannerExtraBlocks;
LOCAL_ScannerExtraBlocks = ptr->next;
free(ptr);
}
}
char *
Yap_AllocScannerMemory(unsigned int size)
{
/* I assume memory has been initialised */
return AllocScannerMemory(size);
}
extern double atof(const char *);
static Term
float_send(char *s, int sign)
{
Float f = (Float)atof(s);
#if HAVE_FINITE
if (yap_flags[LANGUAGE_MODE_FLAG] == 1) { /* iso */
if (!finite(f)) {
CACHE_REGS
LOCAL_ErrorMessage = "Float overflow while scanning";
return(MkEvalFl(0.0));
}
}
#endif
return (MkEvalFl(f*sign));
}
/* we have an overflow at s */
static Term
read_int_overflow(const char *s, Int base, Int val, int sign)
{
#ifdef USE_GMP
/* try to scan it as a bignum */
mpz_t new;
Term t;
mpz_init_set_str (new, s, base);
if (sign < 0)
mpz_neg(new, new);
t = Yap_MkBigIntTerm(new);
mpz_clear(new);
return t;
#else
/* try to scan it as a float */
return MkIntegerTerm(val);
#endif
}
static int
send_error_message(char s[])
{
CACHE_REGS
LOCAL_ErrorMessage = s;
return 0;
}
static wchar_t
read_quoted_char(int *scan_nextp, IOSTREAM *inp_stream)
{
int ch;
/* escape sequence */
restart:
ch = getchrq(inp_stream);
switch (ch) {
case 10:
do {
ch = getchrq(inp_stream);
if (ch == '\\') goto restart;
if (chtype(ch) != BS || ch == 10) {
return ch;
}
} while (TRUE);
case 'a':
return '\a';
case 'b':
return '\b';
case 'c':
if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
return send_error_message("invalid escape sequence \\c");
} else {
/* sicstus */
ch = getchrq(inp_stream);
if (chtype(ch) == SL) {
goto restart;
} else {
return 'c';
}
}
case 'd':
return 127;
case 'e':
return '\x1B'; /* <ESC>, a.k.a. \e */
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 's': /* space */
if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
return send_error_message("invalid escape sequence \\s");
} else
return ' ';
case 't':
return '\t';
case 'u':
{
int i;
wchar_t wc='\0';
for (i=0; i< 4; i++) {
ch = getchrq(inp_stream);
if (ch>='0' && ch <= '9') {
wc += (ch-'0')<<((3-i)*4);
} else if (ch>='a' && ch <= 'f') {
wc += ((ch-'a')+10)<<((3-i)*4);
} else if (ch>='A' && ch <= 'F') {
wc += ((ch-'A')+10)<<((3-i)*4);
} else {
return send_error_message("invalid escape sequence");
}
}
return wc;
}
case 'U':
{
int i;
wchar_t wc='\0';
for (i=0; i< 8; i++) {
ch = getchrq(inp_stream);
if (ch>='0' && ch <= '9') {
wc += (ch-'0')<<((7-i)*4);
} else if (ch>='a' && ch <= 'f') {
wc += ((ch-'a')+10)<<((7-i)*4);
} else if (ch>='A' && ch <= 'F') {
wc += ((ch-'A')+10)<<((7-i)*4);
} else {
return send_error_message("invalid escape sequence");
}
}
return wc;
}
case 'v':
return '\v';
case 'z': /* Prolog end-of-file */
if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
return send_error_message("invalid escape sequence \\z");
} else
return -1;
case '\\':
return '\\';
case '\'':
return '\'';
case '"':
return '"';
case '`':
return '`';
case '^':
if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
return send_error_message("invalid escape sequence");
} else {
ch = getchrq(inp_stream);
if (ch == '?') {/* delete character */
return 127;
} else if (ch >= 'a' && ch < 'z') {/* hexa */
return ch - 'a';
} else if (ch >= 'A' && ch < 'Z') {/* hexa */
return ch - 'A';
} else {
return '^';
}
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
/* character in octal: maximum of 3 digits, terminates with \ */
/* follow ISO */
if (TRUE || yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
unsigned char so_far = ch-'0';
ch = getchrq(inp_stream);
if (ch >= '0' && ch < '8') {/* octal */
so_far = so_far*8+(ch-'0');
ch = getchrq(inp_stream);
if (ch >= '0' && ch < '8') { /* octal */
so_far = so_far*8+(ch-'0');
ch = getchrq(inp_stream);
if (ch != '\\') {
return send_error_message("invalid octal escape sequence");
}
return so_far;
} else if (ch == '\\') {
return so_far;
} else {
return send_error_message("invalid octal escape sequence");
}
} else if (ch == '\\') {
return so_far;
} else {
return send_error_message("invalid octal escape sequence");
}
} else {
/* sicstus */
unsigned char so_far = ch-'0';
ch = getchrq(inp_stream);
if (ch >= '0' && ch < '8') {/* octal */
so_far = so_far*8+(ch-'0');
ch = getchrq(inp_stream);
if (ch >= '0' && ch < '8') { /* octal */
return so_far*8+(ch-'0');
} else {
*scan_nextp = FALSE;
return so_far;
}
} else {
*scan_nextp = FALSE;
return so_far;
}
}
case 'x':
/* hexadecimal character (YAP allows empty hexadecimal */
if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
unsigned char so_far = 0;
ch = getchrq(inp_stream);
if (my_isxdigit(ch,'f','F')) {/* hexa */
so_far = so_far * 16 + (chtype(ch) == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
ch = getchrq(inp_stream);
if (my_isxdigit(ch,'f','F')) { /* hexa */
so_far = so_far * 16 + (chtype(ch) == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
ch = getchrq(inp_stream);
if (ch == '\\') {
return so_far;
} else {
return send_error_message("invalid hexadecimal escape sequence");
}
} else if (ch == '\\') {
return so_far;
} else {
return send_error_message("invalid hexadecimal escape sequence");
}
} else if (ch == '\\') {
return so_far;
} else {
return send_error_message("invalid hexadecimal escape sequence");
}
} else {
/* sicstus mode */
unsigned char so_far = 0;
ch = getchrq(inp_stream);
so_far = (chtype(ch) == NU ? ch - '0' :
my_isupper(ch) ? ch - 'A' + 10 :
my_islower(ch) ? ch - 'a' +10 : 0);
ch = getchrq(inp_stream);
return so_far*16 + (chtype(ch) == NU ? ch - '0' :
my_isupper(ch) ? ch - 'A' +10 :
my_islower(ch) ? ch - 'a' + 10 : 0);
}
default:
/* accept sequence. Note that the ISO standard does not
consider this sequence legal, whereas SICStus would
eat up the escape sequence. */
if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
return send_error_message("invalid escape sequence");
} else {
/* sicstus */
if (chtype(ch) == SL) {
goto restart;
} else {
return ch;
}
}
}
}
static int
num_send_error_message(char s[])
{
CACHE_REGS
LOCAL_ErrorMessage = s;
return TermNil;
}
/* reads a number, either integer or float */
static Term
get_num(int *chp, int *chbuffp, IOSTREAM *inp_stream, char *s, UInt max_size, int sign)
{
char *sp = s;
int ch = *chp;
Int val = 0L, base = ch - '0';
int might_be_float = TRUE, has_overflow = FALSE;
*sp++ = ch;
ch = getchr(inp_stream);
/*
* because of things like 00'2, 03'2 and even better 12'2, I need to
* do this (have mercy)
*/
if (chtype(ch) == NU) {
*sp++ = ch;
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
base = 10 * base + ch - '0';
ch = getchr(inp_stream);
}
if (ch == '\'') {
if (base > 36) {
return num_send_error_message("Admissible bases are 0..36");
}
might_be_float = FALSE;
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = ch;
ch = getchr(inp_stream);
if (base == 0) {
wchar_t ascii = ch;
int scan_extra = TRUE;
if (ch == '\\' &&
yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
ascii = read_quoted_char(&scan_extra, inp_stream);
}
/* a quick way to represent ASCII */
if (scan_extra)
*chp = getchr(inp_stream);
if (sign == -1) {
return MkIntegerTerm(-ascii);
}
return MkIntegerTerm(ascii);
} else if (base >= 10 && base <= 36) {
int upper_case = 'A' - 11 + base;
int lower_case = 'a' - 11 + base;
while (my_isxdigit(ch, upper_case, lower_case)) {
Int oval = val;
int chval = (chtype(ch) == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = ch;
val = oval * base + chval;
if (oval != (val-chval)/base) /* overflow */
has_overflow = (has_overflow || TRUE);
ch = getchr(inp_stream);
}
}
} else if (ch == 'x' && base == 0) {
might_be_float = FALSE;
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = ch;
ch = getchr(inp_stream);
while (my_isxdigit(ch, 'F', 'f')) {
Int oval = val;
int chval = (chtype(ch) == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = ch;
val = val * 16 + chval;
if (oval != (val-chval)/16) /* overflow */
has_overflow = TRUE;
ch = getchr(inp_stream);
}
*chp = ch;
}
else if (ch == 'o' && base == 0) {
might_be_float = FALSE;
base = 8;
ch = getchr(inp_stream);
} else if (ch == 'b' && base == 0) {
might_be_float = FALSE;
base = 2;
ch = getchr(inp_stream);
} else {
val = base;
base = 10;
}
while (chtype(ch) == NU) {
Int oval = val;
if (!(val == 0 && ch == '0') || has_overflow) {
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = ch;
}
if (ch - '0' >= base) {
if (sign == -1)
return MkIntegerTerm(-val);
return MkIntegerTerm(val);
}
val = val * base + ch - '0';
if (val/base != oval || val -oval*base != ch-'0') /* overflow */
has_overflow = TRUE;
ch = getchr(inp_stream);
}
if (might_be_float && ( ch == '.' || ch == 'e' || ch == 'E')) {
if (yap_flags[STRICT_ISO_FLAG] && (ch == 'e' || ch == 'E')) {
return num_send_error_message("Float format not allowed in ISO mode");
}
if (ch == '.') {
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = '.';
if (chtype(ch = getchr(inp_stream)) != NU) {
*chbuffp = '.';
*chp = ch;
*--sp = '\0';
if (has_overflow)
return read_int_overflow(s,base,val,sign);
if (sign == -1)
return MkIntegerTerm(-val);
return MkIntegerTerm(val);
}
do {
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = ch;
}
while (chtype(ch = getchr(inp_stream)) == NU);
}
if (ch == 'e' || ch == 'E') {
char *sp0 = sp;
char cbuff = ch;
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = ch;
ch = getchr(inp_stream);
if (ch == '-') {
cbuff = '-';
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = '-';
ch = getchr(inp_stream);
} else if (ch == '+') {
cbuff = '+';
ch = getchr(inp_stream);
}
if (chtype(ch) != NU) {
/* error */
char *sp;
*chp = ch;
*chbuffp = cbuff;
*sp0 = '\0';
for (sp = s; sp < sp0; sp++) {
if (*sp == '.')
return float_send(s,sign);
}
return MkIntegerTerm(sign*val);
}
do {
if (--max_size == 0) {
return num_send_error_message("Number Too Long");
}
*sp++ = ch;
} while (chtype(ch = getchr(inp_stream)) == NU);
}
*sp = '\0';
*chp = ch;
return float_send(s,sign);
} else if (has_overflow) {
*sp = '\0';
/* skip base */
*chp = ch;
if (s[0] == '0' && s[1] == 'x')
return read_int_overflow(s+2,16,val,sign);
else if (s[0] == '0' && s[1] == 'o')
return read_int_overflow(s+2,8,val,sign);
else if (s[0] == '0' && s[1] == 'b')
return read_int_overflow(s+2,2,val,sign);
if (s[1] == '\'')
return read_int_overflow(s+2,base,val,sign);
if (s[2] == '\'')
return read_int_overflow(s+3,base,val,sign);
return read_int_overflow(s,base,val,sign);
} else {
*chp = ch;
return MkIntegerTerm(val*sign);
}
}
/* given a function getchr scan until we either find the number
or end of file */
Term
Yap_scan_num(IOSTREAM *inp)
{
CACHE_REGS
Term out;
int sign = 1;
int ch, cherr;
char *ptr;
LOCAL_ErrorMessage = NULL;
LOCAL_ScannerStack = (char *)TR;
LOCAL_ScannerExtraBlocks = NULL;
if (!(ptr = AllocScannerMemory(4096))) {
LOCAL_ErrorMessage = "Trail Overflow";
LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR;
return TermNil;
}
ch = getchr(inp);
while (chtype(ch) == BS) {
ch = getchr(inp);
}
if (ch == '-') {
sign = -1;
ch = getchr(inp);
} else if (ch == '+') {
ch = getchr(inp);
}
if (chtype(ch) != NU) {
Yap_clean_tokenizer(NULL, NULL, NULL, 0L);
return TermNil;
}
cherr = '\0';
if (ASP-H < 1024)
return TermNil;
out = get_num(&ch, &cherr, inp, ptr, 4096, sign); /* */
PopScannerMemory(ptr, 4096);
Yap_clean_tokenizer(NULL, NULL, NULL, 0L);
if (LOCAL_ErrorMessage != NULL || ch != -1 || cherr)
return TermNil;
return out;
}
#define CHECK_SPACE() \
if (ASP-H < 1024) { \
LOCAL_ErrorMessage = "Stack Overflow"; \
LOCAL_Error_TYPE = OUT_OF_STACK_ERROR; \
LOCAL_Error_Size = 0L; \
if (p) \
p->Tok = Ord(kind = eot_tok); \
/* serious error now */ \
return l; \
}
static void
open_comment(int ch, IOSTREAM *inp_stream USES_REGS) {
CELL *h0 = H;
H += 5;
h0[0] = AbsAppl(h0+2);
h0[1] = TermNil;
if (!LOCAL_CommentsTail) {
/* first comment */
LOCAL_Comments = AbsPair(h0);
} else {
/* extra comment */
*LOCAL_CommentsTail = AbsPair(h0);
}
LOCAL_CommentsTail = h0+1;
h0 += 2;
h0[0] = (CELL)FunctorMinus;
h0[1] = Yap_StreamPosition(inp_stream);
h0[2] = TermNil;
LOCAL_CommentsNextChar = h0+2;
LOCAL_CommentsBuff = (wchar_t *)malloc(1024*sizeof(wchar_t));
LOCAL_CommentsBuffLim = 1024;
LOCAL_CommentsBuff[0] = ch;
LOCAL_CommentsBuffPos = 1;
}
static void
extend_comment(int ch USES_REGS) {
LOCAL_CommentsBuff[LOCAL_CommentsBuffPos] = ch;
LOCAL_CommentsBuffPos++;
if (LOCAL_CommentsBuffPos == LOCAL_CommentsBuffLim-1) {
LOCAL_CommentsBuff = (wchar_t *)realloc(LOCAL_CommentsBuff,sizeof(wchar_t)*(LOCAL_CommentsBuffLim+4096));
LOCAL_CommentsBuffLim += 4096;
}
}
static void
close_comment( USES_REGS1 ) {
LOCAL_CommentsBuff[LOCAL_CommentsBuffPos] = '\0';
*LOCAL_CommentsNextChar = Yap_MkBlobWideStringTerm(LOCAL_CommentsBuff, LOCAL_CommentsBuffPos);
free(LOCAL_CommentsBuff);
LOCAL_CommentsBuff = NULL;
LOCAL_CommentsBuffLim = 0;
}
static wchar_t *
ch_to_wide(char *base, char *charp)
{
CACHE_REGS
int n = charp-base, i;
wchar_t *nb = (wchar_t *)base;
if ((nb+n) + 1024 > (wchar_t *)AuxSp) {
LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
LOCAL_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
return NULL;
}
for (i=n; i > 0; i--) {
nb[i-1] = (unsigned char)base[i-1];
}
return nb+n;
}
#define add_ch_to_buff(ch) \
if (wcharp) { *wcharp++ = (ch); charp = (char *)wcharp; } \
else { \
if (ch > MAX_ISO_LATIN1 && !wcharp) { \
/* does not fit in ISO-LATIN */ \
wcharp = ch_to_wide(TokImage, charp); \
if (!wcharp) goto huge_var_error; \
*wcharp++ = (ch); charp = (char *)wcharp; \
} else *charp++ = ch; \
}
TokEntry *
Yap_tokenizer(IOSTREAM *inp_stream, int store_comments, Term *tposp)
{
CACHE_REGS
TokEntry *t, *l, *p;
enum TokenKinds kind;
int solo_flag = TRUE;
int ch;
wchar_t *wcharp;
LOCAL_ErrorMessage = NULL;
LOCAL_Error_Size = 0;
LOCAL_VarTable = NULL;
LOCAL_AnonVarTable = NULL;
LOCAL_ScannerStack = (char *)TR;
LOCAL_ScannerExtraBlocks = NULL;
l = NULL;
p = NULL; /* Just to make lint happy */
ch = getchr(inp_stream);
while (chtype(ch) == BS) {
ch = getchr(inp_stream);
}
*tposp = Yap_StreamPosition(inp_stream);
LOCAL_StartLine = inp_stream->posbuf.lineno;
do {
wchar_t och;
int quote, isvar;
char *charp, *mp;
unsigned int len;
char *TokImage = NULL;
t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry));
t->TokNext = NULL;
if (t == NULL) {
LOCAL_ErrorMessage = "Trail Overflow";
LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR;
if (p)
p->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
}
if (!l)
l = t;
else
p->TokNext = t;
p = t;
restart:
while (chtype(ch) == BS) {
ch = getchr(inp_stream);
}
t->TokPos = GetCurInpPos(inp_stream);
switch (chtype(ch)) {
case CC:
if (store_comments) {
CHECK_SPACE();
open_comment(ch, inp_stream PASS_REGS);
continue_comment:
while ((ch = getchr(inp_stream)) != 10 && chtype(ch) != EF) {
CHECK_SPACE();
extend_comment(ch PASS_REGS);
}
CHECK_SPACE();
extend_comment(ch PASS_REGS);
if (chtype(ch) != EF) {
ch = getchr(inp_stream);
if (chtype(ch) == CC) {
extend_comment(ch PASS_REGS);
goto continue_comment;
}
}
close_comment( PASS_REGS1 );
} else {
while ((ch = getchr(inp_stream)) != 10 && chtype(ch) != EF);
}
if (chtype(ch) != EF) {
/* blank space */
if (t == l) {
/* we found a comment before reading characters */
while (chtype(ch) == BS) {
ch = getchr(inp_stream);
}
CHECK_SPACE();
*tposp = Yap_StreamPosition(inp_stream);
}
goto restart;
} else {
t->Tok = Ord(kind = eot_tok);
}
break;
case UC:
case UL:
case LC:
och = ch;
ch = getchr(inp_stream);
scan_name:
TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE;
charp = TokImage;
wcharp = NULL;
isvar = (chtype(och) != LC);
add_ch_to_buff(och);
for (; chtype(ch) <= NU; ch = getchr(inp_stream)) {
if (charp == (char *)AuxSp-1024) {
huge_var_error:
/* huge atom or variable, we are in trouble */
LOCAL_ErrorMessage = "Code Space Overflow due to huge atom";
LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
if (p)
p->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
}
add_ch_to_buff(ch);
}
while (ch == '\'' && isvar && yap_flags[VARS_CAN_HAVE_QUOTE_FLAG]) {
if (charp == (char *)AuxSp-1024) {
goto huge_var_error;
}
add_ch_to_buff(ch);
ch = getchr(inp_stream);
}
add_ch_to_buff('\0');
if (!isvar) {
Atom ae;
/* don't do this in iso */
if (wcharp) {
ae = Yap_LookupWideAtom((wchar_t *)TokImage);
} else {
ae = Yap_LookupAtom(TokImage);
}
if (ae == NIL) {
LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR;
LOCAL_ErrorMessage = "Code Space Overflow";
if (p)
t->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
}
t->TokInfo = Unsigned(ae);
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
if (ch == '(')
solo_flag = FALSE;
t->Tok = Ord(kind = Name_tok);
} else {
t->TokInfo = Unsigned(Yap_LookupVar(TokImage));
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
t->Tok = Ord(kind = Var_tok);
}
break;
case NU:
{
int cherr;
int cha = ch;
char *ptr;
cherr = 0;
if (!(ptr = AllocScannerMemory(4096))) {
LOCAL_ErrorMessage = "Trail Overflow";
LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR;
if (p)
t->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
}
CHECK_SPACE();
if ((t->TokInfo = get_num(&cha,&cherr,inp_stream,ptr,4096,1)) == 0L) {
if (p)
p->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
}
PopScannerMemory(ptr, 4096);
ch = cha;
if (cherr) {
TokEntry *e;
t->Tok = Number_tok;
t->TokPos = GetCurInpPos(inp_stream);
e = (TokEntry *) AllocScannerMemory(sizeof(TokEntry));
if (e == NULL) {
LOCAL_ErrorMessage = "Trail Overflow";
LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR;
if (p)
p->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
} else {
e->TokNext = NULL;
}
t->TokNext = e;
t = e;
p = e;
switch (cherr) {
case 'e':
case 'E':
och = cherr;
goto scan_name;
break;
case '=':
case '_':
/* handle error while parsing a float */
{
TokEntry *e2;
t->Tok = Ord(Var_tok);
t->TokInfo = Unsigned(Yap_LookupVar("E"));
t->TokPos = GetCurInpPos(inp_stream);
e2 = (TokEntry *) AllocScannerMemory(sizeof(TokEntry));
if (e2 == NULL) {
LOCAL_ErrorMessage = "Trail Overflow";
LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR;
if (p)
p->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
} else {
e2->TokNext = NULL;
}
t->TokNext = e2;
t = e2;
p = e2;
if (cherr == '=')
och = '+';
else
och = '-';
}
goto enter_symbol;
case '+':
case '-':
/* handle error while parsing a float */
{
TokEntry *e2;
t->Tok = Name_tok;
if (ch == '(')
solo_flag = FALSE;
t->TokInfo = Unsigned(AtomE);
t->TokPos = GetCurInpPos(inp_stream);
e2 = (TokEntry *) AllocScannerMemory(sizeof(TokEntry));
if (e2 == NULL) {
LOCAL_ErrorMessage = "Trail Overflow";
LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR;
t->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
} else {
e2->TokNext = NULL;
}
t->TokNext = e2;
t = e2;
p = e2;
}
default:
och = cherr;
goto enter_symbol;
}
} else {
t->Tok = Ord(kind = Number_tok);
}
}
break;
case QT:
case DC:
TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE;
charp = TokImage;
quote = ch;
len = 0;
ch = getchrq(inp_stream);
wcharp = NULL;
while (TRUE) {
if (charp + 1024 > (char *)AuxSp) {
LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
LOCAL_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
break;
}
if (ch == 10 && yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
/* in ISO a new line terminates a string */
LOCAL_ErrorMessage = "layout character \n inside quotes";
break;
}
if (ch == quote) {
ch = getchrq(inp_stream);
if (ch != quote)
break;
add_ch_to_buff(ch);
ch = getchrq(inp_stream);
} else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
int scan_next = TRUE;
ch = read_quoted_char(&scan_next, inp_stream);
add_ch_to_buff(ch);
if (scan_next) {
ch = getchrq(inp_stream);
}
} else if (chtype(ch) == EF && ch <= MAX_ISO_LATIN1) {
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
t->Tok = Ord(kind = eot_tok);
break;
} else {
add_ch_to_buff(ch);
ch = getchrq(inp_stream);
}
++len;
if (charp > (char *)AuxSp - 1024) {
/* Not enough space to read in the string. */
LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
LOCAL_ErrorMessage = "not enough space to read in string or quoted atom";
/* serious error now */
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
t->Tok = Ord(kind = eot_tok);
return l;
}
}
if (wcharp) {
*wcharp = '\0';
} else {
*charp = '\0';
}
if (quote == '"') {
if (wcharp) {
mp = AllocScannerMemory(sizeof(wchar_t)*(len+1));
} else {
mp = AllocScannerMemory(len + 1);
}
if (mp == NULL) {
LOCAL_ErrorMessage = "not enough heap space to read in string or quoted atom";
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
t->Tok = Ord(kind = eot_tok);
return l;
}
if (wcharp)
wcscpy((wchar_t *)mp,(wchar_t *)TokImage);
else
strcpy(mp, TokImage);
t->TokInfo = Unsigned(mp);
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
if (wcharp) {
t->Tok = Ord(kind = WString_tok);
} else {
t->Tok = Ord(kind = String_tok);
}
} else {
if (wcharp) {
t->TokInfo = Unsigned(Yap_LookupWideAtom((wchar_t *)TokImage));
} else {
t->TokInfo = Unsigned(Yap_LookupAtom(TokImage));
}
if (!(t->TokInfo)) {
LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR;
LOCAL_ErrorMessage = "Code Space Overflow";
if (p)
t->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
}
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
t->Tok = Ord(kind = Name_tok);
if (ch == '(')
solo_flag = FALSE;
}
break;
case SY:
och = ch;
ch = getchr(inp_stream);
if (och == '/' && ch == '*') {
if (store_comments) {
CHECK_SPACE();
open_comment('/', inp_stream PASS_REGS);
while ((och != '*' || ch != '/') && chtype(ch) != EF) {
och = ch;
CHECK_SPACE();
extend_comment(ch PASS_REGS);
ch = getchr(inp_stream);
}
if (chtype(ch) != EF) {
CHECK_SPACE();
extend_comment(ch PASS_REGS);
}
close_comment( PASS_REGS1 );
} else {
while ((och != '*' || ch != '/') && chtype(ch) != EF) {
och = ch;
ch = getchr(inp_stream);
}
}
if (chtype(ch) == EF) {
t->Tok = Ord(kind = eot_tok);
} else {
/* leave comments */
ch = getchr(inp_stream);
if (t == l) {
/* we found a comment before reading characters */
while (chtype(ch) == BS) {
ch = getchr(inp_stream);
}
CHECK_SPACE();
*tposp = Yap_StreamPosition(inp_stream);
}
}
goto restart;
}
enter_symbol:
if (och == '.' && (chtype(ch) == BS || chtype(ch) == EF
|| chtype(ch) == CC)) {
if (chtype(ch) == CC)
while ((ch = getchr(inp_stream)) != 10 && chtype(ch) != EF);
t->Tok = Ord(kind = eot_tok);
} else {
TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE;
charp = TokImage;
*charp++ = och;
for (; chtype(ch) == SY; ch = getchr(inp_stream))
*charp++ = ch;
*charp = '\0';
t->TokInfo = Unsigned(Yap_LookupAtom(TokImage));
if (t->TokInfo == (CELL)NIL) {
LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR;
LOCAL_ErrorMessage = "Code Space Overflow";
if (p)
t->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
}
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
t->Tok = Ord(kind = Name_tok);
if (ch == '(')
solo_flag = FALSE;
else
solo_flag = TRUE;
}
break;
case SL:
{
char chs[2];
chs[0] = ch;
chs[1] = '\0';
ch = getchr(inp_stream);
t->TokInfo = Unsigned(Yap_LookupAtom(chs));
t->Tok = Ord(kind = Name_tok);
if (ch == '(')
solo_flag = FALSE;
}
break;
case BK:
och = ch;
ch = getchr(inp_stream);
t->TokInfo = och;
if (t->TokInfo == '(' && !solo_flag) {
t->TokInfo = 'l';
solo_flag = TRUE;
} else if (och == '[') {
while (chtype(ch) == BS) { ch = getchr(inp_stream); };
if (ch == ']') {
t->TokInfo = Unsigned(AtomNil);
t->Tok = Ord(kind = Name_tok);
ch = getchr(inp_stream);
solo_flag = FALSE;
break;
}
} else if (och == '{') {
while (chtype(ch) == BS) { ch = getchr(inp_stream); };
if (ch == '}') {
t->TokInfo = Unsigned(AtomBraces);
t->Tok = Ord(kind = Name_tok);
ch = getchr(inp_stream);
solo_flag = FALSE;
break;
}
}
t->Tok = Ord(kind = Ponctuation_tok);
break;
case EF:
t->Tok = Ord(kind = eot_tok);
break;
default:
#ifdef DEBUG
fprintf(GLOBAL_stderr, "\n++++ token: wrong char type %c %d\n", ch, chtype(ch));
#endif
t->Tok = Ord(kind = eot_tok);
}
#ifdef DEBUG
if(GLOBAL_Option[2]) fprintf(GLOBAL_stderr,"[Token %d %ld]",Ord(kind),(unsigned long int)t->TokInfo);
#endif
if (LOCAL_ErrorMessage) {
/* insert an error token to inform the system of what happened */
TokEntry *e = (TokEntry *) AllocScannerMemory(sizeof(TokEntry));
if (e == NULL) {
LOCAL_ErrorMessage = "Trail Overflow";
LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR;
p->Tok = Ord(kind = eot_tok);
/* serious error now */
return l;
}
p->TokNext = e;
e->Tok = Error_tok;
e->TokInfo = MkAtomTerm(Yap_LookupAtom(LOCAL_ErrorMessage));
e->TokPos = GetCurInpPos(inp_stream);
e->TokNext = NULL;
LOCAL_ErrorMessage = NULL;
p = e;
}
} while (kind != eot_tok);
return (l);
}
void
Yap_clean_tokenizer(TokEntry *tokstart, VarEntry *vartable, VarEntry *anonvartable, Term commentable)
{
CACHE_REGS
struct scanner_extra_alloc *ptr = LOCAL_ScannerExtraBlocks;
while (ptr) {
struct scanner_extra_alloc *next = ptr->next;
free(ptr);
ptr = next;
}
LOCAL_Comments = TermNil;
LOCAL_CommentsNextChar = LOCAL_CommentsTail = NULL;
free(LOCAL_CommentsBuff);
LOCAL_CommentsBuff = NULL;
LOCAL_CommentsBuffLim = 0;
}