improvements to language support.

This commit is contained in:
Vitor Santos Costa 2010-05-05 12:45:11 +01:00
parent 950144a63f
commit f3b6a044f6
3 changed files with 61 additions and 49 deletions

View File

@ -186,11 +186,13 @@ DefaultEncoding(void)
char *s = getenv("LANG"); char *s = getenv("LANG");
size_t sz; size_t sz;
/* if we don't have a LNAG then just use ISO_LATIN1 */ /* if we don't have a LANG then just use ISO_LATIN1 */
if (s == NULL)
s = getenv("LC_CTYPE");
if (s == NULL) if (s == NULL)
return ENC_ISO_LATIN1; return ENC_ISO_LATIN1;
sz = strlen(s); sz = strlen(s);
if (sz > 5) { if (sz >= 5) {
if (s[sz-5] == 'U' && if (s[sz-5] == 'U' &&
s[sz-4] == 'T' && s[sz-4] == 'T' &&
s[sz-3] == 'F' && s[sz-3] == 'F' &&

View File

@ -119,8 +119,22 @@ EF,
#endif #endif
}; };
char *Yap_chtype = chtype0+1; char *Yap_chtype = chtype0+1;
int
Yap_wide_chtype(Int ch) {
if (iswalnum(ch)) {
if (iswlower(ch)) return LC;
if (iswdigit(ch)) return NU;
return UC;
}
if (iswpunct(ch)) return SY;
return BS;
}
/* in case there is an overflow */ /* in case there is an overflow */
typedef struct scanner_extra_alloc { typedef struct scanner_extra_alloc {
struct scanner_extra_alloc *next; struct scanner_extra_alloc *next;
@ -737,6 +751,17 @@ ch_to_wide(char *base, char *charp)
return nb+n; return nb+n;
} }
#define add_ch_to_buff(ch) \
if (wcharp) { *wcharp++ = (ch); charp = (char *)wcharp; } \
else { \
if (ch > MAX_ISO_LATIN1 && !wcharp) { \
/* does not fit in ISO-LATIN */ \
wcharp = ch_to_wide(TokImage, charp); \
if (!wcharp) goto huge_var_error; \
*wcharp++ = (ch); charp = (char *)wcharp; \
} else *charp++ = ch; \
}
TokEntry * TokEntry *
Yap_tokenizer(int inp_stream, Term *tposp) Yap_tokenizer(int inp_stream, Term *tposp)
{ {
@ -820,8 +845,9 @@ Yap_tokenizer(int inp_stream, Term *tposp)
scan_name: scan_name:
TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE; TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE;
charp = TokImage; charp = TokImage;
wcharp = NULL;
isvar = (chtype(och) != LC); isvar = (chtype(och) != LC);
*charp++ = och; add_ch_to_buff(och);
for (; chtype(ch) <= NU; ch = Nxtch(inp_stream)) { for (; chtype(ch) <= NU; ch = Nxtch(inp_stream)) {
if (charp == (char *)AuxSp-1024) { if (charp == (char *)AuxSp-1024) {
huge_var_error: huge_var_error:
@ -835,19 +861,24 @@ Yap_tokenizer(int inp_stream, Term *tposp)
UNLOCK(Stream[inp_stream].streamlock); UNLOCK(Stream[inp_stream].streamlock);
return l; return l;
} }
*charp++ = ch; add_ch_to_buff(ch);
} }
while (ch == '\'' && isvar && yap_flags[VARS_CAN_HAVE_QUOTE_FLAG]) { while (ch == '\'' && isvar && yap_flags[VARS_CAN_HAVE_QUOTE_FLAG]) {
if (charp == (char *)AuxSp-1024) { if (charp == (char *)AuxSp-1024) {
goto huge_var_error; goto huge_var_error;
} }
*charp++ = ch; add_ch_to_buff(ch);
ch = Nxtch(inp_stream); ch = Nxtch(inp_stream);
} }
*charp++ = '\0'; add_ch_to_buff('\0');
if (!isvar) { if (!isvar) {
Atom ae;
/* don't do this in iso */ /* don't do this in iso */
Atom ae = Yap_LookupAtom(TokImage); if (wcharp) {
ae = Yap_LookupWideAtom((wchar_t *)TokImage);
} else {
ae = Yap_LookupAtom(TokImage);
}
if (ae == NIL) { if (ae == NIL) {
Yap_Error_TYPE = OUT_OF_HEAP_ERROR; Yap_Error_TYPE = OUT_OF_HEAP_ERROR;
Yap_ErrorMessage = "Code Space Overflow"; Yap_ErrorMessage = "Code Space Overflow";
@ -1005,18 +1036,10 @@ Yap_tokenizer(int inp_stream, Term *tposp)
wcharp = NULL; wcharp = NULL;
while (TRUE) { while (TRUE) {
if (wcharp && wcharp + 1024 > (wchar_t *)AuxSp) { if (charp + 1024 > (char *)AuxSp) {
Yap_Error_TYPE = OUT_OF_AUXSPACE_ERROR; Yap_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
Yap_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)"; Yap_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
break; break;
} else if (charp + 1024 > (char *)AuxSp) {
Yap_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
Yap_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
break;
}
if (!wcharp && ch > MAX_ISO_LATIN1){
/* does not fit in ISO-LATIN */
wcharp = ch_to_wide(TokImage, charp);
} }
if (ch == 10 && yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) { if (ch == 10 && yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
/* in ISO a new line terminates a string */ /* in ISO a new line terminates a string */
@ -1027,25 +1050,12 @@ Yap_tokenizer(int inp_stream, Term *tposp)
ch = QuotedNxtch(inp_stream); ch = QuotedNxtch(inp_stream);
if (ch != quote) if (ch != quote)
break; break;
if (wcharp) add_ch_to_buff(ch);
*wcharp++ = ch;
else
*charp++ = ch;
ch = QuotedNxtch(inp_stream); ch = QuotedNxtch(inp_stream);
} else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) { } else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
int scan_next = TRUE; int scan_next = TRUE;
if (wcharp) ch = read_quoted_char(&scan_next, inp_stream, QuotedNxtch);
*wcharp++ = read_quoted_char(&scan_next, inp_stream, QuotedNxtch); add_ch_to_buff(ch);
else {
wchar_t next = read_quoted_char(&scan_next, inp_stream, QuotedNxtch);
if (next > MAX_ISO_LATIN1){
/* does not fit in ISO-LATIN */
wcharp = ch_to_wide(TokImage, charp);
*wcharp++ = next;
} else {
*charp++ = next;
}
}
if (scan_next) { if (scan_next) {
ch = QuotedNxtch(inp_stream); ch = QuotedNxtch(inp_stream);
} }
@ -1054,10 +1064,7 @@ Yap_tokenizer(int inp_stream, Term *tposp)
t->Tok = Ord(kind = eot_tok); t->Tok = Ord(kind = eot_tok);
break; break;
} else { } else {
if (wcharp) add_ch_to_buff(ch);
*wcharp++ = ch;
else
*charp++ = ch;
ch = QuotedNxtch(inp_stream); ch = QuotedNxtch(inp_stream);
} }
++len; ++len;
@ -1106,15 +1113,15 @@ Yap_tokenizer(int inp_stream, Term *tposp)
t->TokInfo = Unsigned(Yap_LookupWideAtom((wchar_t *)TokImage)); t->TokInfo = Unsigned(Yap_LookupWideAtom((wchar_t *)TokImage));
} else { } else {
t->TokInfo = Unsigned(Yap_LookupAtom(TokImage)); t->TokInfo = Unsigned(Yap_LookupAtom(TokImage));
if (t->TokInfo == (CELL)NIL) { }
Yap_Error_TYPE = OUT_OF_HEAP_ERROR; if (!(t->TokInfo)) {
Yap_ErrorMessage = "Code Space Overflow"; Yap_Error_TYPE = OUT_OF_HEAP_ERROR;
if (p) Yap_ErrorMessage = "Code Space Overflow";
t->Tok = Ord(kind = eot_tok); if (p)
/* serious error now */ t->Tok = Ord(kind = eot_tok);
UNLOCK(Stream[inp_stream].streamlock); /* serious error now */
return l; UNLOCK(Stream[inp_stream].streamlock);
} return l;
} }
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
t->Tok = Ord(kind = Name_tok); t->Tok = Ord(kind = Name_tok);

View File

@ -263,17 +263,20 @@ typedef enum {
#define MAX_ISO_LATIN1 255 #define MAX_ISO_LATIN1 255
/****************** character definition table **************************/ /****************** character definition table **************************/
#define NUMBER_OF_CHARS 256 #define NUMBER_OF_CHARS 256
extern char *Yap_chtype; extern char *Yap_chtype;
EXTERN inline int STD_PROTO(chtype,(int)); EXTERN inline int STD_PROTO(chtype,(Int));
int STD_PROTO(Yap_wide_chtype,(Int));
EXTERN inline int EXTERN inline int
chtype(int ch) chtype(Int ch)
{ {
if (ch < 256) if (ch < NUMBER_OF_CHARS)
return Yap_chtype[ch]; return Yap_chtype[ch];
return SL; printf("type=%d\n",Yap_wide_chtype(ch));
return Yap_wide_chtype(ch);
} }