This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/C/scanner.c
2002-06-17 15:28:01 +00:00

1566 lines
36 KiB
C
Raw Blame History

/*************************************************************************
* *
* YAP Prolog *
* *
* Yap Prolog was developed at NCCUP - Universidade do Porto *
* *
* Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-1997 *
* *
**************************************************************************
* *
* File: %W% %G% *
* Last rev: *
* mods: *
* comments: Prolog's scanner *
* *
*************************************************************************/
#ifdef SCCS
static char SccsId[] = "@(#)scanner.c 1.2";
#endif
/*
* Description:
*
* This module produces a list of tokens for use by the parser. The calling
* program should supply a routine int nextch(charpos) int *charpos; which,
* when called should produce the next char or -1 if none availlable. The
* scanner will stop producing tokens when it either finds an end of file
* (-1) or a token consisting of just '.' followed by a blank or control
* char. Scanner errors will be signalled by the scanner exiting with a non-
* zero ErrorMsg and ErrorPos. Note that, even in this case, the scanner
* will try to find the end of the term. A function char
* *AllocScannerMemory(nbytes) should be supplied for allocating (temporary)
* space for strings and for the table of prolog variables occurring in the
* term.
*
*/
#include "Yap.h"
#include "Yatom.h"
#include "Heap.h"
#include "yapio.h"
#include "alloc.h"
#include "eval.h"
#include "iopreds.h"
#if HAVE_STRING_H
#include <string.h>
#endif
/* You just can't trust some machines */
#define my_isxdigit(C,SU,SL) (chtype[C] == NU || (C >= 'A' && \
C <= (SU)) || (C >= 'a' && C <= (SL)))
#define my_isupper(C) ( C >= 'A' && C <= 'Z' )
STATIC_PROTO(void my_ungetch, (void));
STATIC_PROTO(int my_getch, (void));
STATIC_PROTO(Term float_send, (char *));
STATIC_PROTO(Term get_num, (void));
STATIC_PROTO(enum TokenKinds token, (void));
/* token table with some help from Richard O'Keefe's PD scanner */
static char chtype0[NUMBER_OF_CHARS+1] =
{
EF,
/* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
/* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
/* sp ! " # $ % & ' ( ) * + , - . / */
BS, SL, DC, SY, LC, CC, SY, QT, BK, BK, SY, SY, BK, SY, SY, SY,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, SY, SL, SY, SY, SY, SY,
/* @ A B C D E F G H I J K L M N O */
SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, BK, SY, BK, SY, UL,
/* ` a b c d e f g h i j k l m n o */
SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
/* p q r s t u v w x y z { | } ~ del */
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, BK, BK, BK, SY, BS,
/* 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 */
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
/* 144 145 <20> 147 148 149 150 151 152 153 154 155 156 157 158 159 */
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
BS, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SY, SY, SY,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
SY, SY, LC, LC, SY, SY, SY, SY, SY, LC, LC, SY, SY, SY, SY, SY,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
#ifdef vms
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, LC,
#else
UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC,
#endif
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> */
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
/* <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> cannot write the last three because of lcc */
#ifdef vms
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC
#else
LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC
#endif
};
char *chtype = chtype0+1;
int eot_before_eof = FALSE;
static int ch, chbuff, o_ch;
static char *TokImage;
static int BUF = FALSE;
static Int TokenPos;
static CELL TokenInfo;
static int (*Nextch) (int);
static int (*QuotedNextch) (int);
char *
AllocScannerMemory(unsigned int size)
{
char *AuxSpScan;
AuxSpScan = (char *)TR;
size = AdjustSize(size);
TR = (tr_fr_ptr)(AuxSpScan+size);
#if !OS_HANDLES_TR_OVERFLOW
if (Unsigned(TrailTop) == Unsigned(TR)) {
if(!growtrail (sizeof(CELL) * 16 * 1024L)) {
return(NULL);
}
}
#endif
return (AuxSpScan);
}
inline static void
my_ungetch(void)
{
chbuff = ch;
ch = o_ch;
BUF = TRUE;
}
inline static int
my_getch(void)
{
o_ch = ch;
if (BUF) {
BUF = FALSE;
ch = chbuff;
}
else {
ch = (*Nextch) (c_input_stream);
}
#ifdef DEBUG
if (Option[1])
YP_fprintf(YP_stderr, "[getch %c]", ch);
#endif
return(ch);
}
inline static int
my_get_quoted_ch(void)
{
o_ch = ch;
if (BUF) {
BUF = FALSE;
ch = chbuff;
}
else {
ch = (*QuotedNextch) (c_input_stream);
}
#ifdef DEBUG
if (Option[1])
YP_fprintf(YP_stderr, "[getch %c]",ch);
#endif
return (ch);
}
extern double atof(const char *);
static Term
float_send(char *s)
{
Float f = (Float)atof(s);
#if HAVE_FINITE
if (yap_flags[LANGUAGE_MODE_FLAG] == 1) { /* iso */
if (!finite(f)) {
ErrorMessage = "Float overflow while scanning";
return(MkEvalFl(0.0));
}
}
#endif
return (MkEvalFl(f));
}
/* we have an overflow at s */
static Term
read_int_overflow(const char *s, Int base, Int val)
{
#ifdef USE_GMP
/* try to scan it as a bignum */
MP_INT *new = PreAllocBigNum();
mpz_init_set_str (new, s, base);
return(MkBigIntTerm(new));
#else
/* try to scan it as a float */
return(MkIntegerTerm(val));
#endif
}
/* reads a number, either integer or float */
static Term
get_num(void)
{
char *s = (char *)TR, *sp = s;
Int val = 0, base = ch - '0';
int might_be_float = TRUE, has_overflow = FALSE;
*sp++ = ch;
my_getch();
/*
* because of things like 00'2, 03'2 and even better 12'2, I need to
* do this (have mercy)
*/
if (chtype[ch] == NU) {
*sp++ = ch;
base = 10 * base + ch - '0';
my_getch();
}
if (ch == '\'') {
if (base > 36) {
ErrorMessage = "Admissible bases are 0..36";
return (TermNil);
}
might_be_float = FALSE;
*sp++ = ch;
restart:
my_getch();
if (base == 0) {
Int ascii = ch;
if (ch == '\\' &&
yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
/* escape sequence */
ch = my_get_quoted_ch();
switch (ch) {
case 10:
goto restart;
case 'a':
ascii = '\a';
break;
case 'b':
ascii = '\b';
break;
case 'r':
ascii = '\r';
break;
case 'f':
ascii = '\f';
break;
case 't':
ascii = '\t';
break;
case 'n':
ascii = '\n';
break;
case 'v':
ascii = '\v';
break;
case '\\':
ascii = '\\';
break;
case '\'':
ascii = '\'';
break;
case '"':
ascii = '"';
break;
case '`':
ascii = '`';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
/* character in octal: maximum of 3 digits, terminates with \ */
{
unsigned char so_far = ch-'0';
my_get_quoted_ch();
if (ch >= '0' && ch < '8') {/* octal */
so_far = so_far*8+(ch-'0');
my_get_quoted_ch();
if (ch >= '0' && ch < '8') { /* octal */
ascii = so_far*8+(ch-'0');
my_get_quoted_ch();
if (ch != '\\') {
ErrorMessage = "invalid octal escape sequence";
}
} else if (ch == '\\') {
ascii = so_far;
} else {
ErrorMessage = "invalid octal escape sequence";
}
} else if (ch == '\\') {
ascii = so_far;
} else {
ErrorMessage = "invalid octal escape sequence";
}
}
break;
case 'x':
/* hexadecimal character (YAP allows empty hexadecimal */
{
unsigned char so_far = 0;
my_get_quoted_ch();
if (my_isxdigit(ch,'f','F')) {/* hexa */
so_far = so_far * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
my_get_quoted_ch();
if (my_isxdigit(ch,'f','F')) { /* hexa */
ascii = so_far * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
my_get_quoted_ch();
if (ch != '\\') {
ErrorMessage = "invalid hexadecimal escape sequence";
}
} else if (ch == '\\') {
ascii = so_far;
} else {
ErrorMessage = "invalid hexadecimal escape sequence";
}
} else if (ch == '\\') {
ascii = so_far;
my_get_quoted_ch();
}
}
break;
default:
/* accept sequence. Note that the ISO standard does not
consider this sequence legal, whereas SICStus would
eat up the escape sequence. */
ErrorMessage = "invalid escape sequence";
}
}
/* a quick way to represent ASCII */
my_getch();
return (MkIntTerm(ascii));
}
else if (base >= 10 && base <= 36) {
int upper_case = 'A' - 11 + base;
int lower_case = 'a' - 11 + base;
while (my_isxdigit(ch, upper_case, lower_case)) {
Int oval = val;
*sp++ = ch;
val = val * base + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
if (oval >= val && oval != 0) /* overflow */
has_overflow = (has_overflow || TRUE);
my_getch();
}
}
}
else if ((ch == 'x' || ch == 'X') && base == 0) {
might_be_float = FALSE;
*sp++ = ch;
my_getch();
while (my_isxdigit(ch, 'F', 'f')) {
Int oval = val;
*sp++ = ch;
val = val * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
if (oval >= val && oval != 0) /* overflow */
has_overflow = (has_overflow || TRUE);
my_getch();
}
}
else if ((ch == 'o') && base == 0) {
might_be_float = FALSE;
base = 8;
*sp++ = ch;
my_getch();
}
else {
val = base;
base = 10;
}
while (chtype[ch] == NU) {
Int oval = val;
*sp++ = ch;
if (ch - '0' >= base)
return (MkIntegerTerm(val));
val = val * base + ch - '0';
if (oval >= val && oval != 0) /* overflow */
has_overflow = (has_overflow || TRUE);
my_getch();
}
if (might_be_float && (ch == '.' || ch == 'e' || ch == 'E')) {
if (ch == '.') {
*sp++ = '.';
if (chtype[my_getch()] != NU) {
my_ungetch();
*--sp = '\0';
if (has_overflow)
return(read_int_overflow(s,base,val));
return (MkIntegerTerm(val));
}
do
*sp++ = ch;
while (chtype[my_getch()] == NU);
}
if (ch == 'e' || ch == 'E') {
*sp++ = 'e';
my_getch();
if (ch == '-') {
*sp++ = ch;
my_getch();
}
else if (ch == '+')
my_getch();
if (chtype[ch] != NU) {
my_ungetch();
*--sp = '\0';
return (float_send(s));
}
do
*sp++ = ch;
while (chtype[my_getch()] == NU);
}
*sp = '\0';
return (float_send(s));
} else if (has_overflow) {
*sp = '\0';
/* skip base */
if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
return(read_int_overflow(s+2,16,val));
if (s[1] == '\'')
return(read_int_overflow(s+2,base,val));
if (s[2] == '\'')
return(read_int_overflow(s+3,base,val));
return(read_int_overflow(s,base,val));
} else
return (MkIntegerTerm(val));
}
/* given a function Nxtch scan until we either find the number
or end of file */
Term
scan_num(int (*Nxtch) (int))
{
Term out;
int sign = 1;
Nextch = Nxtch;
ErrorMessage = NULL;
ch = Nextch(c_input_stream);
if (ch == '-') {
sign = -1;
ch = Nextch(c_input_stream);
} else if (ch == '+') {
ch = Nextch(c_input_stream);
}
if (chtype[ch] != NU) {
return(TermNil);
}
out = get_num();
if (sign == -1) {
if (IsIntegerTerm(out))
out = MkIntegerTerm(-IntegerOfTerm(out));
else if (IsFloatTerm(out))
out = MkFloatTerm(-FloatOfTerm(out));
}
if (ErrorMessage != NULL || ch != -1)
return(TermNil);
return(out);
}
/* gets a token */
static enum TokenKinds
token(void)
{
int och, quote, isvar;
char *charp, *mp;
unsigned int len;
TokImage = ((AtomEntry *) ( PreAllocCodeSpace()))->StrOfAE;
charp = TokImage;
while (chtype[ch] == BS)
my_getch();
TokenPos = GetCurInpPos();
switch (chtype[ch]) {
case CC:
while (my_getch() != 10 && chtype[ch] != EF);
if (chtype[ch] != EF) {
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (token());
} else {
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (eot_tok);
}
case UC:
case UL:
case LC:
isvar = (chtype[ch] != LC);
*charp++ = ch;
for (my_getch(); chtype[ch] <= NU; my_getch())
*charp++ = ch;
*charp++ = '\0';
if (!isvar) {
/* don't do this in iso */
TokenInfo = Unsigned(LookupAtom(TokImage));
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (Name_tok);
}
else {
TokenInfo = Unsigned(LookupVar(TokImage));
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (Var_tok);
}
case NU:
TokenInfo = get_num();
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (Number_tok);
case QT:
case DC:
quote = ch;
len = 0;
my_get_quoted_ch();
while (1) {
if (charp + 1024 > (char *)AuxSp) {
ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
break;
}
if (ch == quote) {
my_get_quoted_ch();
if (ch != quote)
break;
*charp++ = ch;
my_get_quoted_ch();
} else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
/* escape sequence */
ch = my_get_quoted_ch();
switch (ch) {
case 10:
/* don't add characters */
printf("I am here\n");
my_get_quoted_ch();
break;
case 'a':
*charp++ = '\a';
my_get_quoted_ch();
break;
case 'b':
*charp++ = '\b';
my_get_quoted_ch();
break;
case 'r':
*charp++ = '\r';
my_get_quoted_ch();
break;
case 'f':
*charp++ = '\f';
my_get_quoted_ch();
break;
case 't':
*charp++ = '\t';
my_get_quoted_ch();
break;
case 'n':
*charp++ = '\n';
my_get_quoted_ch();
break;
case 'v':
*charp++ = '\v';
my_get_quoted_ch();
break;
case '\\':
*charp++ = '\\';
my_get_quoted_ch();
break;
case '\'':
*charp++ = '\'';
my_get_quoted_ch();
break;
case '"':
*charp++ = '"';
my_get_quoted_ch();
break;
case '`':
*charp++ = '`';
my_get_quoted_ch();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
/* character in octal: maximum of 3 digits, terminates with \ */
{
unsigned char so_far = ch-'0';
my_get_quoted_ch();
if (ch >= '0' && ch < '8') {/* octal */
so_far = so_far*8+(ch-'0');
my_get_quoted_ch();
if (ch >= '0' && ch < '8') { /* octal */
*charp++ = so_far*8+(ch-'0');
my_get_quoted_ch();
if (ch != '\\') {
ErrorMessage = "invalid octal escape sequence";
} else {
my_get_quoted_ch();
}
} else if (ch == '\\') {
*charp++ = so_far;
my_get_quoted_ch();
} else {
ErrorMessage = "invalid octal escape sequence";
}
} else if (ch == '\\') {
*charp++ = so_far;
my_get_quoted_ch();
} else {
ErrorMessage = "invalid octal escape sequence";
}
}
break;
case 'x':
/* hexadecimal character (YAP allows empty hexadecimal */
{
unsigned char so_far = 0;
my_get_quoted_ch();
if (my_isxdigit(ch,'f','F')) {/* hexa */
so_far = so_far * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
my_get_quoted_ch();
if (my_isxdigit(ch,'f','F')) { /* hexa */
*charp++ = so_far * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
my_get_quoted_ch();
if (ch != '\\') {
ErrorMessage = "invalid hexadecimal escape sequence";
} else {
my_get_quoted_ch();
}
} else if (ch == '\\') {
*charp++ = so_far;
my_get_quoted_ch();
} else {
ErrorMessage = "invalid hexadecimal escape sequence";
}
} else if (ch == '\\') {
*charp++ = so_far;
my_get_quoted_ch();
} else {
ErrorMessage = "invalid hexadecimal escape sequence";
}
}
break;
default:
/* accept sequence. Note that the ISO standard does not
consider this sequence legal, whereas SICStus would
eat up the escape sequence. */
ErrorMessage = "invalid escape sequence";
}
} else if (chtype[ch] == EF) {
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (eot_tok);
} else {
*charp++ = ch;
my_get_quoted_ch();
}
++len;
if (charp > (char *)AuxSp - 1024) {
/* Not enough space to read in the string. */
ErrorMessage = "not enough heap space to read in string or quoted atom";
/* serious error now */
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return(eot_tok);
}
}
*charp = '\0';
if (quote == '"') {
mp = AllocScannerMemory(len + 1);
if (mp == NULL) {
ErrorMessage = "not enough stack space to read in string or quoted atom";
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return(eot_tok);
}
strcpy(mp, TokImage);
TokenInfo = Unsigned(mp);
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (String_tok);
}
else {
TokenInfo = Unsigned(LookupAtom(TokImage));
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (Name_tok);
}
case SY:
och = ch;
my_getch();
if (och == '/' && ch == '*') {
while ((och != '*' || ch != '/') && chtype[ch] != EF) {
och = ch;
my_getch();
}
if (chtype[ch] == EF) {
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (eot_tok);
}
my_getch();
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (token());
}
if (och == '.' && (chtype[ch] == BS || chtype[ch] == EF
|| chtype[ch] == CC)) {
eot_before_eof = TRUE;
if (chtype[ch] == CC)
while (my_getch() != 10 && chtype[ch] != EF);
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (eot_tok);
}
else {
*charp++ = och;
for (; chtype[ch] == SY; my_getch())
*charp++ = ch;
*charp = '\0';
TokenInfo = Unsigned(LookupAtom(TokImage));
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (Name_tok);
}
case SL:
*charp++ = ch;
*charp++ = '\0';
my_getch();
TokenInfo = Unsigned(LookupAtom(TokImage));
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (Name_tok);
case BK:
och = ch;
do {
my_getch();
} while (chtype[ch] == BS);
if (och == '[' && ch == ']') {
TokenInfo = Unsigned(AtomNil);
my_getch();
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (Name_tok);
}
else {
TokenInfo = och;
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (Ponctuation_tok);
}
case EF:
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (eot_tok);
#ifdef DEBUG
default:
YP_fprintf(YP_stderr, "\n++++ token: wrong char type %c %d\n", ch, chtype[ch]);
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (eot_tok);
#else
default:
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return (eot_tok); /* Just to make lint happy */
#endif
}
}
TokEntry *
tokenizer(int (*Nxtch) (int), int (*QuotedNxtch) (int))
{
TokEntry *t, *l, *p;
enum TokenKinds kind;
int solo_flag = TRUE;
ErrorMessage = NULL;
VarTable = NULL;
AnonVarTable = NULL;
Nextch = Nxtch;
QuotedNextch = QuotedNxtch;
eot_before_eof = FALSE;
l = NIL;
p = NIL; /* Just to make lint happy */
ch = ' ';
my_getch();
while (chtype[ch] == BS)
my_getch();
do {
t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry));
if (t == NULL) {
ErrorMessage = "not enough stack space to read in term";
if (p != NIL)
p->TokInfo = eot_tok;
/* serious error now */
return(l);
}
if (l == NIL)
l = t;
else
p->TokNext = t;
p = t;
if ((kind = token()) == Name_tok && ch == '(')
solo_flag = FALSE;
else if (kind == Ponctuation_tok && TokenInfo == '(' && !solo_flag) {
TokenInfo = 'l';
solo_flag = TRUE;
}
t->Tok = Ord(kind);
#ifdef DEBUG
if(Option[2]) YP_fprintf(YP_stderr,"[Token %d %ld]",Ord(kind),(unsigned long int)TokenInfo);
#endif
t->TokInfo = (Term) TokenInfo;
t->TokPos = TokenPos;
t->TokNext = NIL;
} while (kind != eot_tok);
return (l);
}
extern int PlFGetchar(void);
#if DEBUG
static inline int
debug_fgetch(void)
{
int ch = PlFGetchar();
if (Option[1])
YP_fprintf(YP_stderr, "[getch %c,%d]", ch,ch);
return (ch);
}
#define my_fgetch() (ch = debug_fgetch())
#else
#define my_fgetch() (ch = PlFGetchar())
#endif
TokEntry *
fast_tokenizer(void)
{
/* I hope, a compressed version of the last
* three files */
TokEntry *t, *l, *p;
enum TokenKinds kind;
register int ch, och;
int solo_flag = TRUE;
ErrorMessage = NULL;
VarTable = NULL;
AnonVarTable = NULL;
eot_before_eof = FALSE;
l = NIL;
p = NIL; /* Just to make lint happy */
my_fgetch();
while (chtype[ch] == BS)
my_fgetch();
if (chtype[ch] == EF)
return(NIL);
do {
t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry));
if (t == NULL) {
ErrorMessage = "not enough stack space to read in term";
if (p != NIL)
p->TokInfo = eot_tok;
/* serious error now */
return(l);
}
if (l == NIL)
l = t;
else
p->TokNext = t;
p = t;
/* old code for token() */
{
int quote, isvar;
char *charp, *mp;
unsigned int len;
get_tok:
charp = TokImage = ((AtomEntry *) ( PreAllocCodeSpace()))->StrOfAE;
while (chtype[ch] == BS)
my_fgetch();
TokenPos = GetCurInpPos();
switch (chtype[ch]) {
case CC:
while (my_fgetch() != 10 && chtype[ch] != EF);
if (chtype[ch] != EF) {
my_fgetch();
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
goto get_tok;
}
else
kind = eot_tok;
break;
case UC:
case UL:
case LC:
isvar = (chtype[ch] != LC);
*charp++ = ch;
inside_letters:
for (my_fgetch(); chtype[ch] <= NU; my_fgetch())
*charp++ = ch;
*charp++ = '\0';
if (!isvar) {
TokenInfo = Unsigned(LookupAtom(TokImage));
if (ch == '(')
solo_flag = FALSE;
kind = Name_tok;
}
else {
TokenInfo = Unsigned(LookupVar(TokImage));
kind = Var_tok;
}
break;
case NU:
{
char *sp = TokImage;
Int val = 0, base = ch - '0';
int might_be_float = TRUE, has_overflow = FALSE;
*sp++ = ch;
my_fgetch();
/*
* because of things like 00'2, 03'2
* and even better 12'2, I need to do
* this (have mercy)
*/
if (chtype[ch] == NU) {
*sp++ = ch;
base = 10 * base + ch - '0';
my_fgetch();
}
if (ch == '\'') {
might_be_float = FALSE;
*sp++ = ch;
restart:
my_fgetch();
if (base == 0) {
Int ascii = ch;
/*
* a quick way to
* represent ASCII
*/
if (ch == '\\' &&
yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
/* escape sequence */
ch = my_fgetch();
switch (ch) {
case 10:
goto restart;
case 'a':
ascii = '\a';
break;
case 'b':
ascii = '\b';
break;
case 'r':
ascii = '\r';
break;
case 'f':
ascii = '\f';
break;
case 't':
ascii = '\t';
break;
case 'n':
ascii = '\n';
break;
case 'v':
ascii = '\v';
break;
case '\\':
ascii = '\\';
break;
case '\'':
ascii = '\'';
break;
case '"':
ascii = '"';
break;
case '`':
ascii = '`';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
/* character in octal: maximum of 3 digits, terminates with \ */
{
unsigned char so_far = ch-'0';
my_fgetch();
if (ch >= '0' && ch < '8') {/* octal */
so_far = so_far*8+(ch-'0');
my_fgetch();
if (ch >= '0' && ch < '8') { /* octal */
ascii = so_far*8+(ch-'0');
my_fgetch();
if (ch != '\\') {
ErrorMessage = "invalid octal escape sequence";
}
} else if (ch == '\\') {
ascii = so_far;
} else {
ErrorMessage = "invalid octal escape sequence";
}
} else if (ch == '\\') {
ascii = so_far;
} else {
ErrorMessage = "invalid octal escape sequence";
}
}
break;
case 'x':
/* hexadecimal character (YAP allows empty hexadecimal */
{
unsigned char so_far = 0;
my_fgetch();
if (my_isxdigit(ch,'f','F')) {/* hexa */
so_far = so_far * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
my_fgetch();
if (my_isxdigit(ch,'f','F')) { /* hexa */
ascii = so_far * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
my_fgetch();
if (ch != '\\') {
ErrorMessage = "invalid hexadecimal escape sequence";
}
} else if (ch == '\\') {
ascii = so_far;
} else {
ErrorMessage = "invalid hexadecimal escape sequence";
}
} else if (ch == '\\') {
ascii = so_far;
my_fgetch();
} else {
ErrorMessage = "invalid hexadecimal escape sequence";
}
}
break;
default:
/* accept sequence. Note that the ISO standard does not
consider this sequence legal, whereas SICStus would
eat up the escape sequence. */
ErrorMessage = "invalid escape sequence";
}
}
my_fgetch();
TokenInfo = (CELL) MkIntTerm(ascii);
goto end_of_read_number;
}
else if (base >= 10 && base <= 36) {
int upper_case = 'A' - 11 + base;
int lower_case = 'a' - 11 + base;
while (my_isxdigit(ch, upper_case, lower_case)) {
Int oval = val;
*sp++ = ch;
val = val * base + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
if (oval >= val && oval != 0) /* overflow */
has_overflow = (has_overflow || TRUE);
my_fgetch();
}
}
}
else if ((ch == 'x' || ch == 'X') && base == 0) {
might_be_float = FALSE;
*sp++ = ch;
my_fgetch();
while (my_isxdigit(ch, 'F', 'f')) {
Int oval = val;
*sp++ = ch;
val = val * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
if (oval >= val && oval != 0) /* overflow */
has_overflow = (has_overflow || TRUE);
my_fgetch();
}
}
else {
val = base;
base = 10;
}
while (chtype[ch] == NU) {
Int oval = val;
*sp++ = ch;
if (ch - '0' >= base) {
TokenInfo = (CELL) MkIntegerTerm(val);
goto end_of_read_number;
}
val = val * base + ch - '0';
if (oval >= val && oval != 0) /* overflow */
has_overflow = (has_overflow || TRUE);
my_fgetch();
}
if (might_be_float && (ch == '.' || ch == 'e' || ch == 'E')) {
if (ch == '.') {
*sp++ = '.';
if (chtype[my_fgetch()] != NU) {
/*
* first
* process
* the new
* token
*/
t->Tok = Ord(Number_tok);
#ifdef DEBUG
/*
* if(Option[2
* ])
* YP_fprintf(YP_stderr,"[To
* ken %d
* %d]",Ord(ki
* nd),TokenIn
* fo);
*/
#endif
if (has_overflow)
t->TokInfo = read_int_overflow(TokImage,base,val);
else
t->TokInfo = MkIntegerTerm(val);
t->TokPos = TokenPos;
t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry));
if (t == NULL) {
ErrorMessage = "not enough stack space to read in term";
if (p != NIL)
p->TokInfo = eot_tok;
/* serious error now */
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return(l);
}
if (l == NIL)
l = t;
else
p->TokNext = t;
p = t;
/*
* continue
* analysis
*/
och = '.';
goto inside_symbol;
}
do
*sp++ = ch;
while (chtype[my_fgetch()] == NU);
}
if (ch == 'e' || ch == 'E') {
*sp++ = 'e';
my_fgetch();
if (ch == '-') {
*sp++ = ch;
my_fgetch();
}
else if (ch == '+')
my_fgetch();
if (chtype[ch] != NU) {
/*
* first
* finish
* processing
*/
--sp;
och = *sp;
*sp = '\0';
/*
* first
* process
* the new
* token
*/
t->Tok = Ord(Number_tok);
t->TokPos = TokenPos;
t->TokInfo = float_send(TokImage);
t =
(TokEntry *) AllocScannerMemory(sizeof(TokEntry));
if (t == NULL) {
ErrorMessage = "not enough stack space to read in term";
if (p != NIL)
p->TokInfo = eot_tok;
/* serious error now */
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
return(l);
}
if (l == NIL)
l = t;
else
p->TokNext = t;
p = t;
/*
* now try to
* backtrack
* statically
*/
if (chtype[och] == SY)
goto inside_symbol;
else {
charp = TokImage;
isvar = (chtype[och] != LC);
*charp++ = och;
goto inside_letters;
}
}
do
*sp++ = ch;
while (chtype[my_fgetch()] == NU);
}
*sp = '\0';
TokenInfo = (CELL) float_send(TokImage);
goto end_of_read_number;
}
if (has_overflow) {
*sp = '\0';
/* skip base */
if (TokImage[0] == '0' && (TokImage[1] == 'x' || TokImage[1] == 'X'))
TokenInfo = read_int_overflow(TokImage+2,16,val);
else if (TokImage[1] == '\'')
TokenInfo = read_int_overflow(TokImage+2,base,val);
else if (TokImage[2] == '\'')
TokenInfo = read_int_overflow(TokImage+3,base,val);
else
TokenInfo = read_int_overflow(TokImage,base,val);
} else
TokenInfo = (CELL) MkIntegerTerm(val);
}
end_of_read_number:
kind = Number_tok;
break;
case QT:
case DC:
quote = ch;
len = 0;
my_fgetch();
while (1) {
if (charp + 1024 > (char *)AuxSp) {
ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
break;
}
if (ch == quote) {
my_fgetch();
if (ch != quote)
break;
*charp++ = ch;
my_fgetch();
} else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
/* escape sequence */
ch = my_fgetch();
switch (ch) {
case 10:
/* just skip */
my_fgetch();
break;
case 'a':
*charp++ = '\a';
my_fgetch();
break;
case 'b':
*charp++ = '\b';
my_fgetch();
break;
case 'r':
*charp++ = '\r';
my_fgetch();
break;
case 'f':
*charp++ = '\f';
my_fgetch();
break;
case 't':
*charp++ = '\t';
my_fgetch();
break;
case 'n':
*charp++ = '\n';
my_fgetch();
break;
case 'v':
*charp++ = '\v';
my_fgetch();
break;
case '\\':
*charp++ = '\\';
my_fgetch();
break;
case '\'':
*charp++ = '\'';
my_fgetch();
break;
case '"':
*charp++ = '"';
my_fgetch();
break;
case '`':
*charp++ = '`';
my_fgetch();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
/* character in octal: maximum of 3 digits, terminates with \ */
{
unsigned char so_far = ch-'0';
my_fgetch();
if (ch >= '0' && ch < '8') {/* octal */
so_far = so_far*8+(ch-'0');
my_fgetch();
if (ch >= '0' && ch < '8') { /* octal */
*charp++ = so_far*8+(ch-'0');
my_fgetch();
if (ch != '\\') {
ErrorMessage = "invalid octal escape sequence";
} else {
my_fgetch();
}
} else if (ch == '\\') {
*charp++ = so_far;
my_fgetch();
} else {
ErrorMessage = "invalid octal escape sequence";
}
} else if (ch == '\\') {
*charp++ = so_far;
my_fgetch();
} else {
ErrorMessage = "invalid octal escape sequence";
}
}
break;
case 'x':
/* hexadecimal character (YAP allows empty hexadecimal */
{
unsigned char so_far = 0;
my_fgetch();
if (my_isxdigit(ch,'f','F')) {/* hexa */
so_far = so_far * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
my_fgetch();
if (my_isxdigit(ch,'f','F')) { /* hexa */
*charp++ = so_far * 16 + (chtype[ch] == NU ? ch - '0' :
(my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
my_fgetch();
if (ch != '\\') {
ErrorMessage = "invalid hexadecimal escape sequence";
} else {
my_fgetch();
}
} else if (ch == '\\') {
*charp++ = so_far;
my_fgetch();
} else {
ErrorMessage = "invalid hexadecimal escape sequence";
}
} else if (ch == '\\') {
*charp++ = so_far;
my_fgetch();
} else {
ErrorMessage = "invalid hexadecimal escape sequence";
}
}
break;
default:
/* accept sequence. Note that the ISO standard does not
consider this sequence legal, whereas SICStus would
eat up the escape sequence. */
ErrorMessage = "invalid escape sequence";
}
} else {
*charp++ = ch;
my_fgetch();
}
if (chtype[ch] == EF) {
kind = eot_tok;
break;
}
++len;
if (charp > (char *)AuxSp - 1024) {
/* Not enough space to read in the string. */
ErrorMessage = "not enough heap space to read in string or quoted atom";
/* serious error now */
kind = eot_tok;
}
}
*charp = '\0';
if (quote == '"') {
mp = AllocScannerMemory(len + 1);
if (mp == NULL) {
ErrorMessage = "not enough stack space to read in string or quoted atom";
/* serious error now */
kind = eot_tok;
}
strcpy(mp, TokImage);
TokenInfo = Unsigned(mp);
kind = String_tok;
}
else {
TokenInfo = Unsigned(LookupAtom(TokImage));
if (ch == '(')
solo_flag = FALSE;
kind = Name_tok;
}
break;
case SY:
och = ch;
my_fgetch();
inside_symbol:
if (och == '/' && ch == '*') {
while ((ch != '/' || och != '*') && chtype[ch] != EF) {
och = ch;
my_fgetch();
}
if (chtype[ch] == EF) {
kind = eot_tok;
break;
}
my_fgetch();
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
goto get_tok;
}
if (och == '.' && (chtype[ch] == BS || chtype[ch] == EF
|| chtype[ch] == CC)) {
eot_before_eof = TRUE;
if (chtype[ch] == CC)
while (my_fgetch() != 10 && chtype[ch] != EF);
kind = eot_tok;
}
else {
*charp++ = och;
for (; chtype[ch] == SY; my_fgetch())
*charp++ = ch;
*charp = '\0';
TokenInfo = Unsigned(LookupAtom(TokImage));
if (ch == '(')
solo_flag = FALSE;
kind = Name_tok;
}
break;
case SL:
*charp++ = ch;
*charp++ = '\0';
my_fgetch();
TokenInfo = Unsigned(LookupAtom(TokImage));
if (ch == '(')
solo_flag = FALSE;
kind = Name_tok;
break;
case BK:
och = ch;
do {
/* skip spaces to look for stuff such as [ ] */
my_fgetch();
} while (chtype[ch] == BS);
if (och == '[' && ch == ']') {
TokenInfo = Unsigned(AtomNil);
my_fgetch();
if (ch == '(')
solo_flag = FALSE;
kind = Name_tok;
}
else {
if (!solo_flag && och == '(') {
TokenInfo = 'l';
solo_flag = TRUE;
}
else
TokenInfo = och;
kind = Ponctuation_tok;
}
break;
case EF:
kind = eot_tok;
break;
#ifdef DEBUG
default:
YP_fprintf(YP_stderr, "\n++++ token: wrong char type %c %d\n", ch, chtype[ch]);
kind = eot_tok;
#else
default:
kind = eot_tok; /* Just to make lint happy */
#endif
}
}
t->Tok = Ord(kind);
#ifdef DEBUG
if(Option[2]) YP_fprintf(YP_stderr,"[Token %d %ld]\n",Ord(kind),(unsigned long int)TokenInfo);
#endif
t->TokInfo = (Term) TokenInfo;
t->TokPos = TokenPos;
t->TokNext = NIL;
ReleasePreAllocCodeSpace((CODEADDR)TokImage);
} while (kind != eot_tok);
return (l);
}