/************************************************************************* * * * YAP Prolog * * * * Yap Prolog was developed at NCCUP - Universidade do Porto * * * * Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-1997 * * * ************************************************************************** * * * File: %W% %G% * * Last rev: * * mods: * * comments: Prolog's scanner * * * *************************************************************************/ #ifdef SCCS static char SccsId[] = "@(#)scanner.c 1.2"; #endif /* * Description: * * This module produces a list of tokens for use by the parser. The calling * program should supply a routine int nextch(charpos) int *charpos; which, * when called should produce the next char or -1 if none availlable. The * scanner will stop producing tokens when it either finds an end of file * (-1) or a token consisting of just '.' followed by a blank or control * char. Scanner errors will be signalled by the scanner exiting with a non- * zero ErrorMsg and ErrorPos. Note that, even in this case, the scanner * will try to find the end of the term. A function char * *AllocScannerMemory(nbytes) should be supplied for allocating (temporary) * space for strings and for the table of prolog variables occurring in the * term. * */ #include "Yap.h" #include "Yatom.h" #include "Heap.h" #include "yapio.h" #include "alloc.h" #include "eval.h" #if _MSC_VER || defined(__MINGW32__) #if HAVE_FINITE==1 #undef HAVE_FINITE #endif #include #endif #include "iopreds.h" #if HAVE_STRING_H #include #endif /* You just can't trust some machines */ #define my_isxdigit(C,SU,SL) (chtype[C] == NU || (C >= 'A' && \ C <= (SU)) || (C >= 'a' && C <= (SL))) #define my_isupper(C) ( C >= 'A' && C <= 'Z' ) STATIC_PROTO(void my_ungetch, (void)); STATIC_PROTO(int my_getch, (void)); STATIC_PROTO(Term float_send, (char *)); STATIC_PROTO(Term get_num, (void)); STATIC_PROTO(enum TokenKinds token, (void)); /* token table with some help from Richard O'Keefe's PD scanner */ static char chtype0[NUMBER_OF_CHARS+1] = { EF, /* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */ BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, /* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */ BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, /* sp ! " # $ % & ' ( ) * + , - . / */ BS, SL, DC, SY, LC, CC, SY, QT, BK, BK, SY, SY, BK, SY, SY, SY, /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, SY, SL, SY, SY, SY, SY, /* @ A B C D E F G H I J K L M N O */ SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, /* P Q R S T U V W X Y Z [ \ ] ^ _ */ UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, BK, SY, BK, SY, UL, /* ` a b c d e f g h i j k l m n o */ SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, /* p q r s t u v w x y z { | } ~ del */ LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, BK, BK, BK, SY, BS, /* 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 */ BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, /* 144 145 ’ 147 148 149 150 151 152 153 154 155 156 157 158 159 */ BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, /*   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ */ BS, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SY, SY, SY, /* ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ */ SY, SY, LC, LC, SY, SY, SY, SY, SY, LC, LC, SY, SY, SY, SY, SY, /* À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï */ UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, /* Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß */ #ifdef vms UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, LC, #else UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC, #endif /* à á â ã ä å æ ç è é ê ë ì í î ï */ LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, /* ð ñ ò ó ô õ ö ÷ ø ù ú û ü cannot write the last three because of lcc */ #ifdef vms LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC #else LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC #endif }; #define chtype (chtype0+1) char *Yap_chtype = chtype0+1; static int ch, chbuff, o_ch; static char *TokImage; static int BUF = FALSE; static Int TokenPos; static CELL TokenInfo; static int (*Nextch) (int); static int (*QuotedNextch) (int); static char * AllocScannerMemory(unsigned int size) { char *AuxSpScan; AuxSpScan = (char *)TR; size = AdjustSize(size); TR = (tr_fr_ptr)(AuxSpScan+size); #if !OS_HANDLES_TR_OVERFLOW if (Unsigned(Yap_TrailTop) == Unsigned(TR)) { if(!Yap_growtrail (sizeof(CELL) * 16 * 1024L)) { return(NULL); } } #endif return (AuxSpScan); } char * Yap_AllocScannerMemory(unsigned int size) { return AllocScannerMemory(size); } inline static void my_ungetch(void) { chbuff = ch; ch = o_ch; BUF = TRUE; } inline static int my_getch(void) { o_ch = ch; if (BUF) { BUF = FALSE; ch = chbuff; } else { ch = (*Nextch) (Yap_c_input_stream); } #ifdef DEBUG if (Yap_Option[1]) fprintf(Yap_stderr, "[getch %c]", ch); #endif return(ch); } inline static int my_get_quoted_ch(void) { o_ch = ch; if (BUF) { BUF = FALSE; ch = chbuff; } else { ch = (*QuotedNextch) (Yap_c_input_stream); } #ifdef DEBUG if (Yap_Option[1]) fprintf(Yap_stderr, "[getch %c]",ch); #endif return (ch); } extern double atof(const char *); static Term float_send(char *s) { Float f = (Float)atof(s); #if HAVE_FINITE if (yap_flags[LANGUAGE_MODE_FLAG] == 1) { /* iso */ if (!finite(f)) { Yap_ErrorMessage = "Float overflow while scanning"; return(MkEvalFl(0.0)); } } #endif return (MkEvalFl(f)); } /* we have an overflow at s */ static Term read_int_overflow(const char *s, Int base, Int val) { #ifdef USE_GMP /* try to scan it as a bignum */ MP_INT *new = Yap_PreAllocBigNum(); mpz_init_set_str (new, s, base); return(Yap_MkBigIntTerm(new)); #else /* try to scan it as a float */ return(MkIntegerTerm(val)); #endif } /* reads a number, either integer or float */ static Term get_num(void) { char *s = (char *)TR, *sp = s; Int val = 0, base = ch - '0'; int might_be_float = TRUE, has_overflow = FALSE; *sp++ = ch; my_getch(); /* * because of things like 00'2, 03'2 and even better 12'2, I need to * do this (have mercy) */ if (chtype[ch] == NU) { *sp++ = ch; base = 10 * base + ch - '0'; my_getch(); } if (ch == '\'') { if (base > 36) { Yap_ErrorMessage = "Admissible bases are 0..36"; return (TermNil); } might_be_float = FALSE; *sp++ = ch; restart: my_getch(); if (base == 0) { Int ascii = ch; if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) { /* escape sequence */ ch = my_get_quoted_ch(); switch (ch) { case 10: goto restart; case 'a': ascii = '\a'; break; case 'b': ascii = '\b'; break; case 'r': ascii = '\r'; break; case 'f': ascii = '\f'; break; case 't': ascii = '\t'; break; case 'n': ascii = '\n'; break; case 'v': ascii = '\v'; break; case '\\': ascii = '\\'; break; case '\'': ascii = '\''; break; case '"': ascii = '"'; break; case '`': ascii = '`'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* character in octal: maximum of 3 digits, terminates with \ */ { unsigned char so_far = ch-'0'; my_get_quoted_ch(); if (ch >= '0' && ch < '8') {/* octal */ so_far = so_far*8+(ch-'0'); my_get_quoted_ch(); if (ch >= '0' && ch < '8') { /* octal */ ascii = so_far*8+(ch-'0'); my_get_quoted_ch(); if (ch != '\\') { Yap_ErrorMessage = "invalid octal escape sequence"; } } else if (ch == '\\') { ascii = so_far; } else { Yap_ErrorMessage = "invalid octal escape sequence"; } } else if (ch == '\\') { ascii = so_far; } else { Yap_ErrorMessage = "invalid octal escape sequence"; } } break; case 'x': /* hexadecimal character (YAP allows empty hexadecimal */ { unsigned char so_far = 0; my_get_quoted_ch(); if (my_isxdigit(ch,'f','F')) {/* hexa */ so_far = so_far * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); my_get_quoted_ch(); if (my_isxdigit(ch,'f','F')) { /* hexa */ ascii = so_far * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); my_get_quoted_ch(); if (ch != '\\') { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } else if (ch == '\\') { ascii = so_far; } else { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } else if (ch == '\\') { ascii = so_far; my_get_quoted_ch(); } } break; default: /* accept sequence. Note that the ISO standard does not consider this sequence legal, whereas SICStus would eat up the escape sequence. */ Yap_ErrorMessage = "invalid escape sequence"; } } /* a quick way to represent ASCII */ my_getch(); return (MkIntTerm(ascii)); } else if (base >= 10 && base <= 36) { int upper_case = 'A' - 11 + base; int lower_case = 'a' - 11 + base; while (my_isxdigit(ch, upper_case, lower_case)) { Int oval = val; *sp++ = ch; val = val * base + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); if (oval >= val && oval != 0) /* overflow */ has_overflow = (has_overflow || TRUE); my_getch(); } } } else if ((ch == 'x' || ch == 'X') && base == 0) { might_be_float = FALSE; *sp++ = ch; my_getch(); while (my_isxdigit(ch, 'F', 'f')) { Int oval = val; *sp++ = ch; val = val * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); if (oval >= val && oval != 0) /* overflow */ has_overflow = (has_overflow || TRUE); my_getch(); } } else if ((ch == 'o') && base == 0) { might_be_float = FALSE; base = 8; *sp++ = ch; my_getch(); } else { val = base; base = 10; } while (chtype[ch] == NU) { Int oval = val; *sp++ = ch; if (ch - '0' >= base) return (MkIntegerTerm(val)); val = val * base + ch - '0'; if (oval >= val && oval != 0) /* overflow */ has_overflow = (has_overflow || TRUE); my_getch(); } if (might_be_float && (ch == '.' || ch == 'e' || ch == 'E')) { if (ch == '.') { *sp++ = '.'; if (chtype[my_getch()] != NU) { my_ungetch(); *--sp = '\0'; if (has_overflow) return(read_int_overflow(s,base,val)); return (MkIntegerTerm(val)); } do *sp++ = ch; while (chtype[my_getch()] == NU); } if (ch == 'e' || ch == 'E') { *sp++ = 'e'; my_getch(); if (ch == '-') { *sp++ = ch; my_getch(); } else if (ch == '+') my_getch(); if (chtype[ch] != NU) { my_ungetch(); *--sp = '\0'; return (float_send(s)); } do *sp++ = ch; while (chtype[my_getch()] == NU); } *sp = '\0'; return (float_send(s)); } else if (has_overflow) { *sp = '\0'; /* skip base */ if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) return(read_int_overflow(s+2,16,val)); if (s[1] == '\'') return(read_int_overflow(s+2,base,val)); if (s[2] == '\'') return(read_int_overflow(s+3,base,val)); return(read_int_overflow(s,base,val)); } else return (MkIntegerTerm(val)); } /* given a function Nxtch scan until we either find the number or end of file */ Term Yap_scan_num(int (*Nxtch) (int)) { Term out; int sign = 1; Nextch = Nxtch; Yap_ErrorMessage = NULL; ch = Nextch(Yap_c_input_stream); if (ch == '-') { sign = -1; ch = Nextch(Yap_c_input_stream); } else if (ch == '+') { ch = Nextch(Yap_c_input_stream); } if (chtype[ch] != NU) { return(TermNil); } out = get_num(); if (sign == -1) { if (IsIntegerTerm(out)) out = MkIntegerTerm(-IntegerOfTerm(out)); else if (IsFloatTerm(out)) out = MkFloatTerm(-FloatOfTerm(out)); } if (Yap_ErrorMessage != NULL || ch != -1) return(TermNil); return(out); } /* gets a token */ static enum TokenKinds token(void) { int och, quote, isvar; char *charp, *mp; unsigned int len; TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE; charp = TokImage; while (chtype[ch] == BS) my_getch(); TokenPos = GetCurInpPos(); switch (chtype[ch]) { case CC: while (my_getch() != 10 && chtype[ch] != EF); if (chtype[ch] != EF) { Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (token()); } else { Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (eot_tok); } case UC: case UL: case LC: isvar = (chtype[ch] != LC); *charp++ = ch; for (my_getch(); chtype[ch] <= NU; my_getch()) *charp++ = ch; *charp++ = '\0'; if (!isvar) { /* don't do this in iso */ TokenInfo = Unsigned(Yap_LookupAtom(TokImage)); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (Name_tok); } else { TokenInfo = Unsigned(Yap_LookupVar(TokImage)); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (Var_tok); } case NU: TokenInfo = get_num(); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (Number_tok); case QT: case DC: quote = ch; len = 0; my_get_quoted_ch(); while (1) { if (charp + 1024 > (char *)AuxSp) { Yap_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)"; break; } if (ch == 10 && yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { /* in ISO a new line terminates a string */ Yap_ErrorMessage = "layout character \n inside quotes"; break; } if (ch == quote) { my_get_quoted_ch(); if (ch != quote) break; *charp++ = ch; my_get_quoted_ch(); } else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) { /* escape sequence */ ch = my_get_quoted_ch(); switch (ch) { case 10: /* don't add characters */ my_get_quoted_ch(); break; case 'a': *charp++ = '\a'; my_get_quoted_ch(); break; case 'b': *charp++ = '\b'; my_get_quoted_ch(); break; case 'r': *charp++ = '\r'; my_get_quoted_ch(); break; case 'f': *charp++ = '\f'; my_get_quoted_ch(); break; case 't': *charp++ = '\t'; my_get_quoted_ch(); break; case 'n': *charp++ = '\n'; my_get_quoted_ch(); break; case 'v': *charp++ = '\v'; my_get_quoted_ch(); break; case '\\': *charp++ = '\\'; my_get_quoted_ch(); break; case '\'': *charp++ = '\''; my_get_quoted_ch(); break; case '"': *charp++ = '"'; my_get_quoted_ch(); break; case '`': *charp++ = '`'; my_get_quoted_ch(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* character in octal: maximum of 3 digits, terminates with \ */ { unsigned char so_far = ch-'0'; my_get_quoted_ch(); if (ch >= '0' && ch < '8') {/* octal */ so_far = so_far*8+(ch-'0'); my_get_quoted_ch(); if (ch >= '0' && ch < '8') { /* octal */ *charp++ = so_far*8+(ch-'0'); my_get_quoted_ch(); if (ch != '\\') { Yap_ErrorMessage = "invalid octal escape sequence"; } else { my_get_quoted_ch(); } } else if (ch == '\\') { *charp++ = so_far; my_get_quoted_ch(); } else { Yap_ErrorMessage = "invalid octal escape sequence"; } } else if (ch == '\\') { *charp++ = so_far; my_get_quoted_ch(); } else { Yap_ErrorMessage = "invalid octal escape sequence"; } } break; case 'x': /* hexadecimal character (YAP allows empty hexadecimal */ { unsigned char so_far = 0; my_get_quoted_ch(); if (my_isxdigit(ch,'f','F')) {/* hexa */ so_far = so_far * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); my_get_quoted_ch(); if (my_isxdigit(ch,'f','F')) { /* hexa */ *charp++ = so_far * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); my_get_quoted_ch(); if (ch != '\\') { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } else { my_get_quoted_ch(); } } else if (ch == '\\') { *charp++ = so_far; my_get_quoted_ch(); } else { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } else if (ch == '\\') { *charp++ = so_far; my_get_quoted_ch(); } else { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } break; default: /* accept sequence. Note that the ISO standard does not consider this sequence legal, whereas SICStus would eat up the escape sequence. */ Yap_ErrorMessage = "invalid escape sequence"; } } else if (chtype[ch] == EF) { Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (eot_tok); } else { *charp++ = ch; my_get_quoted_ch(); } ++len; if (charp > (char *)AuxSp - 1024) { /* Not enough space to read in the string. */ Yap_ErrorMessage = "not enough heap space to read in string or quoted atom"; /* serious error now */ Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return(eot_tok); } } *charp = '\0'; if (quote == '"') { mp = AllocScannerMemory(len + 1); if (mp == NULL) { Yap_ErrorMessage = "not enough stack space to read in string or quoted atom"; Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return(eot_tok); } strcpy(mp, TokImage); TokenInfo = Unsigned(mp); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (String_tok); } else { TokenInfo = Unsigned(Yap_LookupAtom(TokImage)); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (Name_tok); } case SY: och = ch; my_getch(); if (och == '/' && ch == '*') { while ((och != '*' || ch != '/') && chtype[ch] != EF) { och = ch; my_getch(); } if (chtype[ch] == EF) { Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (eot_tok); } my_getch(); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (token()); } if (och == '.' && (chtype[ch] == BS || chtype[ch] == EF || chtype[ch] == CC)) { Yap_eot_before_eof = TRUE; if (chtype[ch] == CC) while (my_getch() != 10 && chtype[ch] != EF); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (eot_tok); } else { *charp++ = och; for (; chtype[ch] == SY; my_getch()) *charp++ = ch; *charp = '\0'; TokenInfo = Unsigned(Yap_LookupAtom(TokImage)); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (Name_tok); } case SL: *charp++ = ch; *charp++ = '\0'; my_getch(); TokenInfo = Unsigned(Yap_LookupAtom(TokImage)); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (Name_tok); case BK: och = ch; do { my_getch(); } while (chtype[ch] == BS); if (och == '[' && ch == ']') { TokenInfo = Unsigned(AtomNil); my_getch(); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (Name_tok); } else { TokenInfo = och; Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (Ponctuation_tok); } case EF: Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (eot_tok); #ifdef DEBUG default: fprintf(Yap_stderr, "\n++++ token: wrong char type %c %d\n", ch, chtype[ch]); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (eot_tok); #else default: Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return (eot_tok); /* Just to make lint happy */ #endif } } TokEntry * Yap_tokenizer(int (*Nxtch) (int), int (*QuotedNxtch) (int)) { TokEntry *t, *l, *p; enum TokenKinds kind; int solo_flag = TRUE; Yap_ErrorMessage = NULL; Yap_VarTable = NULL; Yap_AnonVarTable = NULL; Nextch = Nxtch; QuotedNextch = QuotedNxtch; Yap_eot_before_eof = FALSE; l = NIL; p = NIL; /* Just to make lint happy */ ch = ' '; my_getch(); while (chtype[ch] == BS) { my_getch(); } do { t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (t == NULL) { Yap_ErrorMessage = "not enough stack space to read in term"; if (p != NIL) p->TokInfo = eot_tok; /* serious error now */ return(l); } if (l == NIL) l = t; else p->TokNext = t; p = t; if ((kind = token()) == Name_tok && ch == '(') solo_flag = FALSE; else if (kind == Ponctuation_tok && TokenInfo == '(' && !solo_flag) { TokenInfo = 'l'; solo_flag = TRUE; } t->Tok = Ord(kind); #ifdef DEBUG if(Yap_Option[2]) fprintf(Yap_stderr,"[Token %d %ld]",Ord(kind),(unsigned long int)TokenInfo); #endif t->TokInfo = (Term) TokenInfo; t->TokPos = TokenPos; t->TokNext = NIL; if (Yap_ErrorMessage) { /* insert an error token to inform the system on what happened */ TokEntry *e = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (e == NULL) { Yap_ErrorMessage = "not enough stack space to read in term"; p->TokInfo = eot_tok; /* serious error now */ return(l); } p->TokNext = e; e->Tok = Error_tok; e->TokInfo = MkAtomTerm(Yap_LookupAtom(Yap_ErrorMessage)); e->TokPos = TokenPos; e->TokNext = NIL; Yap_ErrorMessage = NULL; p = e; } } while (kind != eot_tok); return (l); } #if DEBUG static inline int debug_fgetch(void) { int ch = Yap_PlFGetchar(); if (Yap_Option[1]) fprintf(Yap_stderr, "[getch %c,%d]", ch,ch); return (ch); } #define my_fgetch() (ch = debug_fgetch()) #else #define my_fgetch() (ch = Yap_PlFGetchar()) #endif TokEntry * Yap_fast_tokenizer(void) { /* I hope, a compressed version of the last * three files */ TokEntry *t, *l, *p; enum TokenKinds kind; register int ch, och; int solo_flag = TRUE; Yap_ErrorMessage = NULL; Yap_VarTable = NULL; Yap_AnonVarTable = NULL; Yap_eot_before_eof = FALSE; l = NIL; p = NIL; /* Just to make lint happy */ my_fgetch(); while (chtype[ch] == BS) my_fgetch(); if (chtype[ch] == EF) return(NIL); do { t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (t == NULL) { Yap_ErrorMessage = "not enough stack space to read in term"; if (p != NIL) p->TokInfo = eot_tok; /* serious error now */ return(l); } if (l == NIL) l = t; else p->TokNext = t; p = t; /* old code for token() */ { int quote, isvar; char *charp, *mp; unsigned int len; get_tok: charp = TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE; while (chtype[ch] == BS) my_fgetch(); TokenPos = GetCurInpPos(); switch (chtype[ch]) { case CC: while (my_fgetch() != 10 && chtype[ch] != EF); if (chtype[ch] != EF) { my_fgetch(); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); goto get_tok; } else kind = eot_tok; break; case UC: case UL: case LC: isvar = (chtype[ch] != LC); *charp++ = ch; inside_letters: for (my_fgetch(); chtype[ch] <= NU; my_fgetch()) *charp++ = ch; *charp++ = '\0'; if (!isvar) { TokenInfo = Unsigned(Yap_LookupAtom(TokImage)); if (ch == '(') solo_flag = FALSE; kind = Name_tok; } else { TokenInfo = Unsigned(Yap_LookupVar(TokImage)); kind = Var_tok; } break; case NU: { char *sp = TokImage; Int val = 0, base = ch - '0'; int might_be_float = TRUE, has_overflow = FALSE; *sp++ = ch; my_fgetch(); /* * because of things like 00'2, 03'2 * and even better 12'2, I need to do * this (have mercy) */ if (chtype[ch] == NU) { *sp++ = ch; base = 10 * base + ch - '0'; my_fgetch(); } if (ch == '\'') { might_be_float = FALSE; *sp++ = ch; restart: my_fgetch(); if (base == 0) { Int ascii = ch; /* * a quick way to * represent ASCII */ if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) { /* escape sequence */ my_fgetch(); switch (ch) { case 10: goto restart; case 'a': ascii = '\a'; break; case 'b': ascii = '\b'; break; case 'r': ascii = '\r'; break; case 'f': ascii = '\f'; break; case 't': ascii = '\t'; break; case 'n': ascii = '\n'; break; case 'v': ascii = '\v'; break; case '\\': ascii = '\\'; break; case '\'': ascii = '\''; break; case '"': ascii = '"'; break; case '`': ascii = '`'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* character in octal: maximum of 3 digits, terminates with \ */ { unsigned char so_far = ch-'0'; my_fgetch(); if (ch >= '0' && ch < '8') {/* octal */ so_far = so_far*8+(ch-'0'); my_fgetch(); if (ch >= '0' && ch < '8') { /* octal */ ascii = so_far*8+(ch-'0'); my_fgetch(); if (ch != '\\') { Yap_ErrorMessage = "invalid octal escape sequence"; } } else if (ch == '\\') { ascii = so_far; } else { Yap_ErrorMessage = "invalid octal escape sequence"; } } else if (ch == '\\') { ascii = so_far; } else { Yap_ErrorMessage = "invalid octal escape sequence"; } } break; case 'x': /* hexadecimal character (YAP allows empty hexadecimal */ { unsigned char so_far = 0; my_fgetch(); if (my_isxdigit(ch,'f','F')) {/* hexa */ so_far = so_far * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); my_fgetch(); if (my_isxdigit(ch,'f','F')) { /* hexa */ ascii = so_far * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); my_fgetch(); if (ch != '\\') { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } else if (ch == '\\') { ascii = so_far; } else { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } else if (ch == '\\') { ascii = so_far; my_fgetch(); } else { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } break; default: /* accept sequence. Note that the ISO standard does not consider this sequence legal, whereas SICStus would eat up the escape sequence. */ Yap_ErrorMessage = "invalid escape sequence"; } } my_fgetch(); TokenInfo = (CELL) MkIntTerm(ascii); goto end_of_read_number; } else if (base >= 10 && base <= 36) { int upper_case = 'A' - 11 + base; int lower_case = 'a' - 11 + base; while (my_isxdigit(ch, upper_case, lower_case)) { Int oval = val; *sp++ = ch; val = val * base + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); if (oval >= val && oval != 0) /* overflow */ has_overflow = (has_overflow || TRUE); my_fgetch(); } } } else if ((ch == 'x' || ch == 'X') && base == 0) { might_be_float = FALSE; *sp++ = ch; my_fgetch(); while (my_isxdigit(ch, 'F', 'f')) { Int oval = val; *sp++ = ch; val = val * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); if (oval >= val && oval != 0) /* overflow */ has_overflow = (has_overflow || TRUE); my_fgetch(); } } else { val = base; base = 10; } while (chtype[ch] == NU) { Int oval = val; *sp++ = ch; if (ch - '0' >= base) { TokenInfo = (CELL) MkIntegerTerm(val); goto end_of_read_number; } val = val * base + ch - '0'; if (oval >= val && oval != 0) /* overflow */ has_overflow = (has_overflow || TRUE); my_fgetch(); } if (might_be_float && (ch == '.' || ch == 'e' || ch == 'E')) { if (ch == '.') { *sp++ = '.'; if (chtype[my_fgetch()] != NU) { /* * first * process * the new * token */ t->Tok = Ord(Number_tok); #ifdef DEBUG /* * if(Yap_Option[2 * ]) * fprintf(Yap_stderr,"[To * ken %d * %d]",Ord(ki * nd),TokenIn * fo); */ #endif if (has_overflow) t->TokInfo = read_int_overflow(TokImage,base,val); else t->TokInfo = MkIntegerTerm(val); t->TokPos = TokenPos; t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (t == NULL) { Yap_ErrorMessage = "not enough stack space to read in term"; if (p != NIL) p->TokInfo = eot_tok; /* serious error now */ Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return(l); } if (l == NIL) l = t; else p->TokNext = t; p = t; /* * continue * analysis */ och = '.'; goto inside_symbol; } do *sp++ = ch; while (chtype[my_fgetch()] == NU); } if (ch == 'e' || ch == 'E') { *sp++ = 'e'; my_fgetch(); if (ch == '-') { *sp++ = ch; my_fgetch(); } else if (ch == '+') my_fgetch(); if (chtype[ch] != NU) { /* * first * finish * processing */ --sp; och = *sp; *sp = '\0'; /* * first * process * the new * token */ t->Tok = Ord(Number_tok); t->TokPos = TokenPos; t->TokInfo = float_send(TokImage); t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (t == NULL) { Yap_ErrorMessage = "not enough stack space to read in term"; if (p != NIL) p->TokInfo = eot_tok; /* serious error now */ Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); return(l); } if (l == NIL) l = t; else p->TokNext = t; p = t; /* * now try to * backtrack * statically */ if (chtype[och] == SY) goto inside_symbol; else { charp = TokImage; isvar = (chtype[och] != LC); *charp++ = och; goto inside_letters; } } do *sp++ = ch; while (chtype[my_fgetch()] == NU); } *sp = '\0'; TokenInfo = (CELL) float_send(TokImage); goto end_of_read_number; } if (has_overflow) { *sp = '\0'; /* skip base */ if (TokImage[0] == '0' && (TokImage[1] == 'x' || TokImage[1] == 'X')) TokenInfo = read_int_overflow(TokImage+2,16,val); else if (TokImage[1] == '\'') TokenInfo = read_int_overflow(TokImage+2,base,val); else if (TokImage[2] == '\'') TokenInfo = read_int_overflow(TokImage+3,base,val); else TokenInfo = read_int_overflow(TokImage,base,val); } else TokenInfo = (CELL) MkIntegerTerm(val); } end_of_read_number: kind = Number_tok; break; case QT: case DC: quote = ch; len = 0; my_fgetch(); while (1) { if (charp + 1024 > (char *)AuxSp) { Yap_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)"; break; } if (ch == 10 && yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { /* in ISO a new line terminates a string */ Yap_ErrorMessage = "layout character \n inside quotes"; break; } if (ch == quote) { my_fgetch(); if (ch != quote) break; *charp++ = ch; my_fgetch(); } else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) { /* escape sequence */ my_fgetch(); switch (ch) { case 10: /* just skip */ my_fgetch(); break; case 'a': *charp++ = '\a'; my_fgetch(); break; case 'b': *charp++ = '\b'; my_fgetch(); break; case 'r': *charp++ = '\r'; my_fgetch(); break; case 'f': *charp++ = '\f'; my_fgetch(); break; case 't': *charp++ = '\t'; my_fgetch(); break; case 'n': *charp++ = '\n'; my_fgetch(); break; case 'v': *charp++ = '\v'; my_fgetch(); break; case '\\': *charp++ = '\\'; my_fgetch(); break; case '\'': *charp++ = '\''; my_fgetch(); break; case '"': *charp++ = '"'; my_fgetch(); break; case '`': *charp++ = '`'; my_fgetch(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* character in octal: maximum of 3 digits, terminates with \ */ { unsigned char so_far = ch-'0'; my_fgetch(); if (ch >= '0' && ch < '8') {/* octal */ so_far = so_far*8+(ch-'0'); my_fgetch(); if (ch >= '0' && ch < '8') { /* octal */ *charp++ = so_far*8+(ch-'0'); my_fgetch(); if (ch != '\\') { Yap_ErrorMessage = "invalid octal escape sequence"; } else { my_fgetch(); } } else if (ch == '\\') { *charp++ = so_far; my_fgetch(); } else { Yap_ErrorMessage = "invalid octal escape sequence"; } } else if (ch == '\\') { *charp++ = so_far; my_fgetch(); } else { Yap_ErrorMessage = "invalid octal escape sequence"; } } break; case 'x': /* hexadecimal character (YAP allows empty hexadecimal */ { unsigned char so_far = 0; my_fgetch(); if (my_isxdigit(ch,'f','F')) {/* hexa */ so_far = so_far * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); my_fgetch(); if (my_isxdigit(ch,'f','F')) { /* hexa */ *charp++ = so_far * 16 + (chtype[ch] == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); my_fgetch(); if (ch != '\\') { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } else { my_fgetch(); } } else if (ch == '\\') { *charp++ = so_far; my_fgetch(); } else { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } else if (ch == '\\') { *charp++ = so_far; my_fgetch(); } else { Yap_ErrorMessage = "invalid hexadecimal escape sequence"; } } break; default: /* accept sequence. Note that the ISO standard does not consider this sequence legal, whereas SICStus would eat up the escape sequence. */ Yap_ErrorMessage = "invalid escape sequence"; } } else { *charp++ = ch; my_fgetch(); } if (chtype[ch] == EF) { kind = eot_tok; break; } ++len; if (charp > (char *)AuxSp - 1024) { /* Not enough space to read in the string. */ Yap_ErrorMessage = "not enough heap space to read in string or quoted atom"; /* serious error now */ kind = eot_tok; } } *charp = '\0'; if (quote == '"') { mp = AllocScannerMemory(len + 1); if (mp == NULL) { Yap_ErrorMessage = "not enough stack space to read in string or quoted atom"; /* serious error now */ kind = eot_tok; } strcpy(mp, TokImage); TokenInfo = Unsigned(mp); kind = String_tok; } else { TokenInfo = Unsigned(Yap_LookupAtom(TokImage)); if (ch == '(') solo_flag = FALSE; kind = Name_tok; } break; case SY: och = ch; my_fgetch(); inside_symbol: if (och == '/' && ch == '*') { while ((ch != '/' || och != '*') && chtype[ch] != EF) { och = ch; my_fgetch(); } if (chtype[ch] == EF) { kind = eot_tok; break; } my_fgetch(); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); goto get_tok; } if (och == '.' && (chtype[ch] == BS || chtype[ch] == EF || chtype[ch] == CC)) { Yap_eot_before_eof = TRUE; if (chtype[ch] == CC) while (my_fgetch() != 10 && chtype[ch] != EF); kind = eot_tok; } else { *charp++ = och; for (; chtype[ch] == SY; my_fgetch()) *charp++ = ch; *charp = '\0'; TokenInfo = Unsigned(Yap_LookupAtom(TokImage)); if (ch == '(') solo_flag = FALSE; kind = Name_tok; } break; case SL: *charp++ = ch; *charp++ = '\0'; my_fgetch(); TokenInfo = Unsigned(Yap_LookupAtom(TokImage)); if (ch == '(') solo_flag = FALSE; kind = Name_tok; break; case BK: och = ch; do { /* skip spaces to look for stuff such as [ ] */ my_fgetch(); } while (chtype[ch] == BS); if (och == '[' && ch == ']') { TokenInfo = Unsigned(AtomNil); my_fgetch(); if (ch == '(') solo_flag = FALSE; kind = Name_tok; } else { if (!solo_flag && och == '(') { TokenInfo = 'l'; solo_flag = TRUE; } else TokenInfo = och; kind = Ponctuation_tok; } break; case EF: kind = eot_tok; break; #ifdef DEBUG default: fprintf(Yap_stderr, "\n++++ token: wrong char type %c %d\n", ch, chtype[ch]); kind = eot_tok; #else default: kind = eot_tok; /* Just to make lint happy */ #endif } } t->Tok = Ord(kind); #ifdef DEBUG if(Yap_Option[2]) fprintf(Yap_stderr,"[Token %d %ld]\n",Ord(kind),(unsigned long int)TokenInfo); #endif t->TokInfo = (Term) TokenInfo; t->TokPos = TokenPos; t->TokNext = NIL; Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); if (Yap_ErrorMessage) { /* insert an error token to inform the system on what happened */ TokEntry *e = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (e == NULL) { Yap_ErrorMessage = "not enough stack space to read in term"; p->TokInfo = eot_tok; /* serious error now */ return(l); } p->TokNext = e; e->Tok = Error_tok; e->TokInfo = MkAtomTerm(Yap_LookupAtom(Yap_ErrorMessage)); e->TokPos = TokenPos; e->TokNext = NIL; Yap_ErrorMessage = NULL; p = e; } } while (kind != eot_tok); return (l); }