/************************************************************************* * * * YAP Prolog * * * * Yap Prolog was developed at NCCUP - Universidade do Porto * * * * Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003 * * * ************************************************************************** * * * File: %W% %G% * * Last rev: 22-1-03 * * mods: * * comments: Prolog's scanner * * * *************************************************************************/ /* * Description: * * This module produces a list of tokens for use by the parser. The calling * program should supply a routine int nextch(charpos) int *charpos; which, * when called should produce the next char or -1 if none availlable. The * scanner will stop producing tokens when it either finds an end of file * (-1) or a token consisting of just '.' followed by a blank or control * char. Scanner errors will be signalled by the scanner exiting with a non- * zero ErrorMsg and ErrorPos. Note that, even in this case, the scanner * will try to find the end of the term. A function char * *AllocScannerMemory(nbytes) should be supplied for allocating (temporary) * space for strings and for the table of prolog variables occurring in the * term. * */ #include "Yap.h" #include "Yatom.h" #include "YapHeap.h" #include "SWI-Stream.h" #include "yapio.h" #include "alloc.h" #include "eval.h" #if _MSC_VER || defined(__MINGW32__) #if HAVE_FINITE==1 #undef HAVE_FINITE #endif #include #endif #include "iopreds.h" #if HAVE_STRING_H #include #endif #if HAVE_WCTYPE_H #include #endif /* You just can't trust some machines */ #define my_isxdigit(C,SU,SL) (chtype(C) == NU || (C >= 'A' && \ C <= (SU)) || (C >= 'a' && C <= (SL))) #define my_isupper(C) ( C >= 'A' && C <= 'Z' ) #define my_islower(C) ( C >= 'a' && C <= 'z' ) static Term float_send(char *, int); static Term get_num(int *, int *, IOSTREAM *,char *,UInt,int); /* token table with some help from Richard O'Keefe's PD scanner */ static char chtype0[NUMBER_OF_CHARS+1] = { EF, /* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */ BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, /* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */ BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, /* sp ! " # $ % & ' ( ) * + , - . / */ BS, SL, DC, SY, LC, CC, SY, QT, BK, BK, SY, SY, BK, SY, SY, SY, /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, SY, SL, SY, SY, SY, SY, /* @ A B C D E F G H I J K L M N O */ SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, /* P Q R S T U V W X Y Z [ \ ] ^ _ */ UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, BK, SY, BK, SY, UL, /* ` a b c d e f g h i j k l m n o */ SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, /* p q r s t u v w x y z { | } ~ del */ LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, BK, BK, BK, SY, BS, /* 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 */ BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, /* 144 145 ’ 147 148 149 150 151 152 153 154 155 156 157 158 159 */ BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, /*   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ */ BS, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SY, SY, SY, /* ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ */ SY, SY, LC, LC, SY, SY, SY, SY, SY, LC, LC, SY, SY, SY, SY, SY, /* À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï */ UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, /* Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß */ #ifdef vms UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, LC, #else UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC, #endif /* à á â ã ä å æ ç è é ê ë ì í î ï */ LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, /* ð ñ ò ó ô õ ö ÷ ø ù ú û ü cannot write the last three because of lcc */ #ifdef vms LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC #else LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC #endif }; char *Yap_chtype = chtype0+1; int Yap_wide_chtype(Int ch) { #if HAVE_WCTYPE_H if (iswalnum(ch)) { if (iswlower(ch)) return LC; if (iswdigit(ch)) return NU; return UC; } if (iswpunct(ch)) return SY; #endif return BS; } static inline int getchr__(IOSTREAM *inp) { int c = Sgetcode(inp); if ( !CharConversionTable || c < 0 || c >= 256 ) return c; return CharConversionTable[c]; } #define getchr(inp) getchr__(inp) #define getchrq(inp) Sgetcode(inp) EXTERN inline int GetCurInpPos (IOSTREAM *inp_stream) { return inp_stream->posbuf.lineno; } /* in case there is an overflow */ typedef struct scanner_extra_alloc { struct scanner_extra_alloc *next; void *filler; } ScannerExtraBlock; static char * AllocScannerMemory(unsigned int size) { CACHE_REGS char *AuxSpScan; AuxSpScan = LOCAL_ScannerStack; size = AdjustSize(size); if (LOCAL_ScannerExtraBlocks) { struct scanner_extra_alloc *ptr; if (!(ptr = (struct scanner_extra_alloc *)malloc(size+sizeof(ScannerExtraBlock)))) { return NULL; } ptr->next = LOCAL_ScannerExtraBlocks; LOCAL_ScannerExtraBlocks = ptr; return (char *)(ptr+1); } else if (LOCAL_TrailTop <= AuxSpScan+size) { UInt alloc_size = sizeof(CELL) * K16; if (size > alloc_size) alloc_size = size; if(!Yap_growtrail(alloc_size, TRUE)) { struct scanner_extra_alloc *ptr; if (!(ptr = (struct scanner_extra_alloc *)malloc(size+sizeof(ScannerExtraBlock)))) { return NULL; } ptr->next = LOCAL_ScannerExtraBlocks; LOCAL_ScannerExtraBlocks = ptr; return (char *)(ptr+1); } } LOCAL_ScannerStack = AuxSpScan+size; return AuxSpScan; } static void PopScannerMemory(char *block, unsigned int size) { CACHE_REGS if (block == LOCAL_ScannerStack-size) { LOCAL_ScannerStack -= size; } else if (block == (char *)(LOCAL_ScannerExtraBlocks+1)) { struct scanner_extra_alloc *ptr = LOCAL_ScannerExtraBlocks; LOCAL_ScannerExtraBlocks = ptr->next; free(ptr); } } char * Yap_AllocScannerMemory(unsigned int size) { /* I assume memory has been initialised */ return AllocScannerMemory(size); } extern double atof(const char *); static Term float_send(char *s, int sign) { CACHE_REGS Float f = (Float)atof(s); #if HAVE_FINITE if (yap_flags[LANGUAGE_MODE_FLAG] == 1) { /* iso */ if (!finite(f)) { CACHE_REGS LOCAL_ErrorMessage = "Float overflow while scanning"; return(MkEvalFl(0.0)); } } #endif return (MkEvalFl(f*sign)); } /* we have an overflow at s */ static Term read_int_overflow(const char *s, Int base, Int val, int sign) { #ifdef USE_GMP /* try to scan it as a bignum */ mpz_t new; Term t; mpz_init_set_str (new, s, base); if (sign < 0) mpz_neg(new, new); t = Yap_MkBigIntTerm(new); mpz_clear(new); return t; #else /* try to scan it as a float */ return MkIntegerTerm(val); #endif } static int send_error_message(char s[]) { CACHE_REGS LOCAL_ErrorMessage = s; return 0; } static wchar_t read_quoted_char(int *scan_nextp, IOSTREAM *inp_stream) { int ch; /* escape sequence */ restart: ch = getchrq(inp_stream); switch (ch) { case 10: do { ch = getchrq(inp_stream); if (ch == '\\') goto restart; if (chtype(ch) != BS || ch == 10) { return ch; } } while (TRUE); case 'a': return '\a'; case 'b': return '\b'; case 'c': if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { return send_error_message("invalid escape sequence \\c"); } else { /* sicstus */ ch = getchrq(inp_stream); if (chtype(ch) == SL) { goto restart; } else { return 'c'; } } case 'd': return 127; case 'e': return '\x1B'; /* , a.k.a. \e */ case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 's': /* space */ if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { return send_error_message("invalid escape sequence \\s"); } else return ' '; case 't': return '\t'; case 'u': { int i; wchar_t wc='\0'; for (i=0; i< 4; i++) { ch = getchrq(inp_stream); if (ch>='0' && ch <= '9') { wc += (ch-'0')<<((3-i)*4); } else if (ch>='a' && ch <= 'f') { wc += ((ch-'a')+10)<<((3-i)*4); } else if (ch>='A' && ch <= 'F') { wc += ((ch-'A')+10)<<((3-i)*4); } else { return send_error_message("invalid escape sequence"); } } return wc; } case 'U': { int i; wchar_t wc='\0'; for (i=0; i< 8; i++) { ch = getchrq(inp_stream); if (ch>='0' && ch <= '9') { wc += (ch-'0')<<((7-i)*4); } else if (ch>='a' && ch <= 'f') { wc += ((ch-'a')+10)<<((7-i)*4); } else if (ch>='A' && ch <= 'F') { wc += ((ch-'A')+10)<<((7-i)*4); } else { return send_error_message("invalid escape sequence"); } } return wc; } case 'v': return '\v'; case 'z': /* Prolog end-of-file */ if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { return send_error_message("invalid escape sequence \\z"); } else return -1; case '\\': return '\\'; case '\'': return '\''; case '"': return '"'; case '`': return '`'; case '^': if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { return send_error_message("invalid escape sequence"); } else { ch = getchrq(inp_stream); if (ch == '?') {/* delete character */ return 127; } else if (ch >= 'a' && ch < 'z') {/* hexa */ return ch - 'a'; } else if (ch >= 'A' && ch < 'Z') {/* hexa */ return ch - 'A'; } else { return '^'; } } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* character in octal: maximum of 3 digits, terminates with \ */ /* follow ISO */ if (TRUE || yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { unsigned char so_far = ch-'0'; ch = getchrq(inp_stream); if (ch >= '0' && ch < '8') {/* octal */ so_far = so_far*8+(ch-'0'); ch = getchrq(inp_stream); if (ch >= '0' && ch < '8') { /* octal */ so_far = so_far*8+(ch-'0'); ch = getchrq(inp_stream); if (ch != '\\') { return send_error_message("invalid octal escape sequence"); } return so_far; } else if (ch == '\\') { return so_far; } else { return send_error_message("invalid octal escape sequence"); } } else if (ch == '\\') { return so_far; } else { return send_error_message("invalid octal escape sequence"); } } else { /* sicstus */ unsigned char so_far = ch-'0'; ch = getchrq(inp_stream); if (ch >= '0' && ch < '8') {/* octal */ so_far = so_far*8+(ch-'0'); ch = getchrq(inp_stream); if (ch >= '0' && ch < '8') { /* octal */ return so_far*8+(ch-'0'); } else { *scan_nextp = FALSE; return so_far; } } else { *scan_nextp = FALSE; return so_far; } } case 'x': /* hexadecimal character (YAP allows empty hexadecimal */ if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { unsigned char so_far = 0; ch = getchrq(inp_stream); if (my_isxdigit(ch,'f','F')) {/* hexa */ so_far = so_far * 16 + (chtype(ch) == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); ch = getchrq(inp_stream); if (my_isxdigit(ch,'f','F')) { /* hexa */ so_far = so_far * 16 + (chtype(ch) == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); ch = getchrq(inp_stream); if (ch == '\\') { return so_far; } else { return send_error_message("invalid hexadecimal escape sequence"); } } else if (ch == '\\') { return so_far; } else { return send_error_message("invalid hexadecimal escape sequence"); } } else if (ch == '\\') { return so_far; } else { return send_error_message("invalid hexadecimal escape sequence"); } } else { /* sicstus mode */ unsigned char so_far = 0; ch = getchrq(inp_stream); so_far = (chtype(ch) == NU ? ch - '0' : my_isupper(ch) ? ch - 'A' + 10 : my_islower(ch) ? ch - 'a' +10 : 0); ch = getchrq(inp_stream); return so_far*16 + (chtype(ch) == NU ? ch - '0' : my_isupper(ch) ? ch - 'A' +10 : my_islower(ch) ? ch - 'a' + 10 : 0); } default: /* accept sequence. Note that the ISO standard does not consider this sequence legal, whereas SICStus would eat up the escape sequence. */ if (yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { return send_error_message("invalid escape sequence"); } else { /* sicstus */ if (chtype(ch) == SL) { goto restart; } else { return ch; } } } } static int num_send_error_message(char s[]) { CACHE_REGS LOCAL_ErrorMessage = s; return TermNil; } /* reads a number, either integer or float */ static Term get_num(int *chp, int *chbuffp, IOSTREAM *inp_stream, char *s, UInt max_size, int sign) { CACHE_REGS char *sp = s; int ch = *chp; Int val = 0L, base = ch - '0'; int might_be_float = TRUE, has_overflow = FALSE; *sp++ = ch; ch = getchr(inp_stream); /* * because of things like 00'2, 03'2 and even better 12'2, I need to * do this (have mercy) */ if (chtype(ch) == NU) { *sp++ = ch; if (--max_size == 0) { return num_send_error_message("Number Too Long"); } base = 10 * base + ch - '0'; ch = getchr(inp_stream); } if (ch == '\'') { if (base > 36) { return num_send_error_message("Admissible bases are 0..36"); } might_be_float = FALSE; if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = ch; ch = getchr(inp_stream); if (base == 0) { wchar_t ascii = ch; int scan_extra = TRUE; if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) { ascii = read_quoted_char(&scan_extra, inp_stream); } /* a quick way to represent ASCII */ if (scan_extra) *chp = getchr(inp_stream); if (sign == -1) { return MkIntegerTerm(-ascii); } return MkIntegerTerm(ascii); } else if (base >= 10 && base <= 36) { int upper_case = 'A' - 11 + base; int lower_case = 'a' - 11 + base; while (my_isxdigit(ch, upper_case, lower_case)) { Int oval = val; int chval = (chtype(ch) == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = ch; val = oval * base + chval; if (oval != (val-chval)/base) /* overflow */ has_overflow = (has_overflow || TRUE); ch = getchr(inp_stream); } } } else if (ch == 'x' && base == 0) { might_be_float = FALSE; if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = ch; ch = getchr(inp_stream); while (my_isxdigit(ch, 'F', 'f')) { Int oval = val; int chval = (chtype(ch) == NU ? ch - '0' : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10); if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = ch; val = val * 16 + chval; if (oval != (val-chval)/16) /* overflow */ has_overflow = TRUE; ch = getchr(inp_stream); } *chp = ch; } else if (ch == 'o' && base == 0) { might_be_float = FALSE; base = 8; ch = getchr(inp_stream); } else if (ch == 'b' && base == 0) { might_be_float = FALSE; base = 2; ch = getchr(inp_stream); } else { val = base; base = 10; } while (chtype(ch) == NU) { Int oval = val; if (!(val == 0 && ch == '0') || has_overflow) { if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = ch; } if (ch - '0' >= base) { if (sign == -1) return MkIntegerTerm(-val); return MkIntegerTerm(val); } val = val * base + ch - '0'; if (val/base != oval || val -oval*base != ch-'0') /* overflow */ has_overflow = TRUE; ch = getchr(inp_stream); } if (might_be_float && ( ch == '.' || ch == 'e' || ch == 'E')) { if (yap_flags[STRICT_ISO_FLAG] && (ch == 'e' || ch == 'E')) { return num_send_error_message("Float format not allowed in ISO mode"); } if (ch == '.') { if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = '.'; if (chtype(ch = getchr(inp_stream)) != NU) { *chbuffp = '.'; *chp = ch; *--sp = '\0'; if (has_overflow) return read_int_overflow(s,base,val,sign); if (sign == -1) return MkIntegerTerm(-val); return MkIntegerTerm(val); } do { if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = ch; } while (chtype(ch = getchr(inp_stream)) == NU); } if (ch == 'e' || ch == 'E') { char *sp0 = sp; char cbuff = ch; if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = ch; ch = getchr(inp_stream); if (ch == '-') { cbuff = '-'; if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = '-'; ch = getchr(inp_stream); } else if (ch == '+') { cbuff = '+'; ch = getchr(inp_stream); } if (chtype(ch) != NU) { /* error */ char *sp; *chp = ch; *chbuffp = cbuff; *sp0 = '\0'; for (sp = s; sp < sp0; sp++) { if (*sp == '.') return float_send(s,sign); } return MkIntegerTerm(sign*val); } do { if (--max_size == 0) { return num_send_error_message("Number Too Long"); } *sp++ = ch; } while (chtype(ch = getchr(inp_stream)) == NU); } *sp = '\0'; *chp = ch; return float_send(s,sign); } else if (has_overflow) { *sp = '\0'; /* skip base */ *chp = ch; if (s[0] == '0' && s[1] == 'x') return read_int_overflow(s+2,16,val,sign); else if (s[0] == '0' && s[1] == 'o') return read_int_overflow(s+2,8,val,sign); else if (s[0] == '0' && s[1] == 'b') return read_int_overflow(s+2,2,val,sign); if (s[1] == '\'') return read_int_overflow(s+2,base,val,sign); if (s[2] == '\'') return read_int_overflow(s+3,base,val,sign); return read_int_overflow(s,base,val,sign); } else { *chp = ch; return MkIntegerTerm(val*sign); } } /* given a function getchr scan until we either find the number or end of file */ Term Yap_scan_num(IOSTREAM *inp) { CACHE_REGS Term out; int sign = 1; int ch, cherr; char *ptr; LOCAL_ErrorMessage = NULL; LOCAL_ScannerStack = (char *)TR; LOCAL_ScannerExtraBlocks = NULL; if (!(ptr = AllocScannerMemory(4096))) { LOCAL_ErrorMessage = "Trail Overflow"; LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR; return TermNil; } ch = getchr(inp); while (chtype(ch) == BS) { ch = getchr(inp); } if (ch == '-') { sign = -1; ch = getchr(inp); } else if (ch == '+') { ch = getchr(inp); } if (chtype(ch) != NU) { Yap_clean_tokenizer(NULL, NULL, NULL, 0L); return TermNil; } cherr = '\0'; if (ASP-H < 1024) return TermNil; out = get_num(&ch, &cherr, inp, ptr, 4096, sign); /* */ PopScannerMemory(ptr, 4096); Yap_clean_tokenizer(NULL, NULL, NULL, 0L); if (LOCAL_ErrorMessage != NULL || ch != -1 || cherr) return TermNil; return out; } #define CHECK_SPACE() \ if (ASP-H < 1024) { \ LOCAL_ErrorMessage = "Stack Overflow"; \ LOCAL_Error_TYPE = OUT_OF_STACK_ERROR; \ LOCAL_Error_Size = 0L; \ if (p) \ p->Tok = Ord(kind = eot_tok); \ /* serious error now */ \ return l; \ } static void open_comment(int ch, IOSTREAM *inp_stream USES_REGS) { CELL *h0 = H; H += 5; h0[0] = AbsAppl(h0+2); h0[1] = TermNil; if (!LOCAL_CommentsTail) { /* first comment */ LOCAL_Comments = AbsPair(h0); } else { /* extra comment */ *LOCAL_CommentsTail = AbsPair(h0); } LOCAL_CommentsTail = h0+1; h0 += 2; h0[0] = (CELL)FunctorMinus; h0[1] = Yap_StreamPosition(inp_stream); h0[2] = TermNil; LOCAL_CommentsNextChar = h0+2; LOCAL_CommentsBuff = (wchar_t *)malloc(1024*sizeof(wchar_t)); LOCAL_CommentsBuffLim = 1024; LOCAL_CommentsBuff[0] = ch; LOCAL_CommentsBuffPos = 1; } static void extend_comment(int ch USES_REGS) { LOCAL_CommentsBuff[LOCAL_CommentsBuffPos] = ch; LOCAL_CommentsBuffPos++; if (LOCAL_CommentsBuffPos == LOCAL_CommentsBuffLim-1) { LOCAL_CommentsBuff = (wchar_t *)realloc(LOCAL_CommentsBuff,sizeof(wchar_t)*(LOCAL_CommentsBuffLim+4096)); LOCAL_CommentsBuffLim += 4096; } } static void close_comment( USES_REGS1 ) { LOCAL_CommentsBuff[LOCAL_CommentsBuffPos] = '\0'; *LOCAL_CommentsNextChar = Yap_MkBlobWideStringTerm(LOCAL_CommentsBuff, LOCAL_CommentsBuffPos); free(LOCAL_CommentsBuff); LOCAL_CommentsBuff = NULL; LOCAL_CommentsBuffLim = 0; } static wchar_t * ch_to_wide(char *base, char *charp) { CACHE_REGS int n = charp-base, i; wchar_t *nb = (wchar_t *)base; if ((nb+n) + 1024 > (wchar_t *)AuxSp) { LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR; LOCAL_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)"; return NULL; } for (i=n; i > 0; i--) { nb[i-1] = (unsigned char)base[i-1]; } return nb+n; } #define add_ch_to_buff(ch) \ if (wcharp) { *wcharp++ = (ch); if (wcharp >= (wchar_t *)AuxSp-1024) goto huge_var_error; charp = (char *)wcharp; } \ else { \ if (ch > MAX_ISO_LATIN1 && !wcharp) { \ /* does not fit in ISO-LATIN */ \ wcharp = ch_to_wide(TokImage, charp); \ if (!wcharp) goto huge_var_error; \ *wcharp++ = (ch); charp = (char *)wcharp; \ } else { if (charp >= (char *)AuxSp-1024) goto huge_var_error; *charp++ = ch; } \ } TokEntry * Yap_tokenizer(IOSTREAM *inp_stream, int store_comments, Term *tposp) { CACHE_REGS TokEntry *t, *l, *p; enum TokenKinds kind; int solo_flag = TRUE; int ch; wchar_t *wcharp; LOCAL_ErrorMessage = NULL; LOCAL_Error_Size = 0; LOCAL_VarTable = NULL; LOCAL_AnonVarTable = NULL; LOCAL_ScannerStack = (char *)TR; LOCAL_ScannerExtraBlocks = NULL; l = NULL; p = NULL; /* Just to make lint happy */ ch = getchr(inp_stream); while (chtype(ch) == BS) { ch = getchr(inp_stream); } *tposp = Yap_StreamPosition(inp_stream); LOCAL_StartLine = inp_stream->posbuf.lineno; do { wchar_t och; int quote, isvar; char *charp, *mp; unsigned int len; char *TokImage = NULL; t = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); t->TokNext = NULL; if (t == NULL) { LOCAL_ErrorMessage = "Trail Overflow"; LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR; if (p) p->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } if (!l) l = t; else p->TokNext = t; p = t; restart: while (chtype(ch) == BS) { ch = getchr(inp_stream); } t->TokPos = GetCurInpPos(inp_stream); switch (chtype(ch)) { case CC: if (store_comments) { CHECK_SPACE(); open_comment(ch, inp_stream PASS_REGS); continue_comment: while ((ch = getchr(inp_stream)) != 10 && chtype(ch) != EF) { CHECK_SPACE(); extend_comment(ch PASS_REGS); } CHECK_SPACE(); extend_comment(ch PASS_REGS); if (chtype(ch) != EF) { ch = getchr(inp_stream); if (chtype(ch) == CC) { extend_comment(ch PASS_REGS); goto continue_comment; } } close_comment( PASS_REGS1 ); } else { while ((ch = getchr(inp_stream)) != 10 && chtype(ch) != EF); } if (chtype(ch) != EF) { /* blank space */ if (t == l) { /* we found a comment before reading characters */ while (chtype(ch) == BS) { ch = getchr(inp_stream); } CHECK_SPACE(); *tposp = Yap_StreamPosition(inp_stream); } goto restart; } else { t->Tok = Ord(kind = eot_tok); } break; case UC: case UL: case LC: och = ch; ch = getchr(inp_stream); scan_name: TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE; charp = TokImage; wcharp = NULL; isvar = (chtype(och) != LC); add_ch_to_buff(och); for (; chtype(ch) <= NU; ch = getchr(inp_stream)) { if (charp == (char *)AuxSp-1024) { huge_var_error: /* huge atom or variable, we are in trouble */ LOCAL_ErrorMessage = "Code Space Overflow due to huge atom"; LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR; Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); if (p) p->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } add_ch_to_buff(ch); } while (ch == '\'' && isvar && yap_flags[VARS_CAN_HAVE_QUOTE_FLAG]) { if (charp == (char *)AuxSp-1024) { goto huge_var_error; } add_ch_to_buff(ch); ch = getchr(inp_stream); } add_ch_to_buff('\0'); if (!isvar) { Atom ae; /* don't do this in iso */ if (wcharp) { ae = Yap_LookupWideAtom((wchar_t *)TokImage); } else { ae = Yap_LookupAtom(TokImage); } if (ae == NIL) { LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR; LOCAL_ErrorMessage = "Code Space Overflow"; if (p) t->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } t->TokInfo = Unsigned(ae); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); if (ch == '(') solo_flag = FALSE; t->Tok = Ord(kind = Name_tok); } else { t->TokInfo = Unsigned(Yap_LookupVar(TokImage)); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); t->Tok = Ord(kind = Var_tok); } break; case NU: { int cherr; int cha = ch; char *ptr; cherr = 0; if (!(ptr = AllocScannerMemory(4096))) { LOCAL_ErrorMessage = "Trail Overflow"; LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR; if (p) t->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } CHECK_SPACE(); if ((t->TokInfo = get_num(&cha,&cherr,inp_stream,ptr,4096,1)) == 0L) { if (p) p->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } PopScannerMemory(ptr, 4096); ch = cha; if (cherr) { TokEntry *e; t->Tok = Number_tok; t->TokPos = GetCurInpPos(inp_stream); e = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (e == NULL) { LOCAL_ErrorMessage = "Trail Overflow"; LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR; if (p) p->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } else { e->TokNext = NULL; } t->TokNext = e; t = e; p = e; switch (cherr) { case 'e': case 'E': och = cherr; goto scan_name; break; case '=': case '_': /* handle error while parsing a float */ { TokEntry *e2; t->Tok = Ord(Var_tok); t->TokInfo = Unsigned(Yap_LookupVar("E")); t->TokPos = GetCurInpPos(inp_stream); e2 = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (e2 == NULL) { LOCAL_ErrorMessage = "Trail Overflow"; LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR; if (p) p->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } else { e2->TokNext = NULL; } t->TokNext = e2; t = e2; p = e2; if (cherr == '=') och = '+'; else och = '-'; } goto enter_symbol; case '+': case '-': /* handle error while parsing a float */ { TokEntry *e2; t->Tok = Name_tok; if (ch == '(') solo_flag = FALSE; t->TokInfo = Unsigned(AtomE); t->TokPos = GetCurInpPos(inp_stream); e2 = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (e2 == NULL) { LOCAL_ErrorMessage = "Trail Overflow"; LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR; t->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } else { e2->TokNext = NULL; } t->TokNext = e2; t = e2; p = e2; } default: och = cherr; goto enter_symbol; } } else { t->Tok = Ord(kind = Number_tok); } } break; case QT: case DC: TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE; charp = TokImage; quote = ch; len = 0; ch = getchrq(inp_stream); wcharp = NULL; while (TRUE) { if (charp + 1024 > (char *)AuxSp) { LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR; LOCAL_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)"; break; } if (ch == 10 && yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { /* in ISO a new line terminates a string */ LOCAL_ErrorMessage = "layout character \n inside quotes"; break; } if (ch == quote) { ch = getchrq(inp_stream); if (ch != quote) break; add_ch_to_buff(ch); ch = getchrq(inp_stream); } else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) { int scan_next = TRUE; ch = read_quoted_char(&scan_next, inp_stream); add_ch_to_buff(ch); if (scan_next) { ch = getchrq(inp_stream); } } else if (chtype(ch) == EF && ch <= MAX_ISO_LATIN1) { Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); t->Tok = Ord(kind = eot_tok); break; } else { add_ch_to_buff(ch); ch = getchrq(inp_stream); } ++len; if (charp > (char *)AuxSp - 1024) { /* Not enough space to read in the string. */ LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR; LOCAL_ErrorMessage = "not enough space to read in string or quoted atom"; /* serious error now */ Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); t->Tok = Ord(kind = eot_tok); return l; } } if (wcharp) { *wcharp = '\0'; } else { *charp = '\0'; } if (quote == '"') { if (wcharp) { mp = AllocScannerMemory(sizeof(wchar_t)*(len+1)); } else { mp = AllocScannerMemory(len + 1); } if (mp == NULL) { LOCAL_ErrorMessage = "not enough heap space to read in string or quoted atom"; Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); t->Tok = Ord(kind = eot_tok); return l; } if (wcharp) wcscpy((wchar_t *)mp,(wchar_t *)TokImage); else strcpy(mp, TokImage); t->TokInfo = Unsigned(mp); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); if (wcharp) { t->Tok = Ord(kind = WString_tok); } else { t->Tok = Ord(kind = String_tok); } } else { if (wcharp) { t->TokInfo = Unsigned(Yap_LookupWideAtom((wchar_t *)TokImage)); } else { t->TokInfo = Unsigned(Yap_LookupAtom(TokImage)); } if (!(t->TokInfo)) { LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR; LOCAL_ErrorMessage = "Code Space Overflow"; if (p) t->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); t->Tok = Ord(kind = Name_tok); if (ch == '(') solo_flag = FALSE; } break; case SY: och = ch; ch = getchr(inp_stream); if (och == '/' && ch == '*') { if (store_comments) { CHECK_SPACE(); open_comment('/', inp_stream PASS_REGS); while ((och != '*' || ch != '/') && chtype(ch) != EF) { och = ch; CHECK_SPACE(); extend_comment(ch PASS_REGS); ch = getchr(inp_stream); } if (chtype(ch) != EF) { CHECK_SPACE(); extend_comment(ch PASS_REGS); } close_comment( PASS_REGS1 ); } else { while ((och != '*' || ch != '/') && chtype(ch) != EF) { och = ch; ch = getchr(inp_stream); } } if (chtype(ch) == EF) { t->Tok = Ord(kind = eot_tok); } else { /* leave comments */ ch = getchr(inp_stream); if (t == l) { /* we found a comment before reading characters */ while (chtype(ch) == BS) { ch = getchr(inp_stream); } CHECK_SPACE(); *tposp = Yap_StreamPosition(inp_stream); } } goto restart; } enter_symbol: if (och == '.' && (chtype(ch) == BS || chtype(ch) == EF || chtype(ch) == CC)) { if (chtype(ch) == CC) while ((ch = getchr(inp_stream)) != 10 && chtype(ch) != EF); t->Tok = Ord(kind = eot_tok); } else { Atom ae; TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE; charp = TokImage; wcharp = NULL; add_ch_to_buff(och); for (; chtype(ch) == SY; ch = getchr(inp_stream)) { if (charp == (char *)AuxSp-1024) { goto huge_var_error; } add_ch_to_buff(ch); } add_ch_to_buff('\0'); if (wcharp) { ae = Yap_LookupWideAtom((wchar_t *)TokImage); } else { ae = Yap_LookupAtom(TokImage); } if (ae == NIL) { LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR; LOCAL_ErrorMessage = "Code Space Overflow"; if (p) t->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } t->TokInfo = Unsigned(ae); if (t->TokInfo == (CELL)NIL) { LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR; LOCAL_ErrorMessage = "Code Space Overflow"; if (p) t->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); t->Tok = Ord(kind = Name_tok); if (ch == '(') solo_flag = FALSE; else solo_flag = TRUE; } break; case SL: { char chs[2]; chs[0] = ch; chs[1] = '\0'; ch = getchr(inp_stream); t->TokInfo = Unsigned(Yap_LookupAtom(chs)); t->Tok = Ord(kind = Name_tok); if (ch == '(') solo_flag = FALSE; } break; case BK: och = ch; ch = getchr(inp_stream); t->TokInfo = och; if (t->TokInfo == '(' && !solo_flag) { t->TokInfo = 'l'; solo_flag = TRUE; } else if (och == '[') { while (chtype(ch) == BS) { ch = getchr(inp_stream); }; if (ch == ']') { t->TokInfo = Unsigned(AtomNil); t->Tok = Ord(kind = Name_tok); ch = getchr(inp_stream); solo_flag = FALSE; break; } } else if (och == '{') { while (chtype(ch) == BS) { ch = getchr(inp_stream); }; if (ch == '}') { t->TokInfo = Unsigned(AtomBraces); t->Tok = Ord(kind = Name_tok); ch = getchr(inp_stream); solo_flag = FALSE; break; } } t->Tok = Ord(kind = Ponctuation_tok); break; case EF: t->Tok = Ord(kind = eot_tok); break; default: #ifdef DEBUG fprintf(GLOBAL_stderr, "\n++++ token: wrong char type %c %d\n", ch, chtype(ch)); #endif t->Tok = Ord(kind = eot_tok); } #ifdef DEBUG if(GLOBAL_Option[2]) fprintf(GLOBAL_stderr,"[Token %d %ld]",Ord(kind),(unsigned long int)t->TokInfo); #endif if (LOCAL_ErrorMessage) { /* insert an error token to inform the system of what happened */ TokEntry *e = (TokEntry *) AllocScannerMemory(sizeof(TokEntry)); if (e == NULL) { LOCAL_ErrorMessage = "Trail Overflow"; LOCAL_Error_TYPE = OUT_OF_TRAIL_ERROR; p->Tok = Ord(kind = eot_tok); /* serious error now */ return l; } p->TokNext = e; e->Tok = Error_tok; e->TokInfo = MkAtomTerm(Yap_LookupAtom(LOCAL_ErrorMessage)); e->TokPos = GetCurInpPos(inp_stream); e->TokNext = NULL; LOCAL_ErrorMessage = NULL; p = e; } } while (kind != eot_tok); return (l); } void Yap_clean_tokenizer(TokEntry *tokstart, VarEntry *vartable, VarEntry *anonvartable, Term commentable) { CACHE_REGS struct scanner_extra_alloc *ptr = LOCAL_ScannerExtraBlocks; while (ptr) { struct scanner_extra_alloc *next = ptr->next; free(ptr); ptr = next; } LOCAL_Comments = TermNil; LOCAL_CommentsNextChar = LOCAL_CommentsTail = NULL; if (LOCAL_CommentsBuff) { free(LOCAL_CommentsBuff); LOCAL_CommentsBuff = NULL; } LOCAL_CommentsBuffLim = 0; }