improvements to language support.
This commit is contained in:
parent
950144a63f
commit
f3b6a044f6
|
@ -186,11 +186,13 @@ DefaultEncoding(void)
|
||||||
char *s = getenv("LANG");
|
char *s = getenv("LANG");
|
||||||
size_t sz;
|
size_t sz;
|
||||||
|
|
||||||
/* if we don't have a LNAG then just use ISO_LATIN1 */
|
/* if we don't have a LANG then just use ISO_LATIN1 */
|
||||||
|
if (s == NULL)
|
||||||
|
s = getenv("LC_CTYPE");
|
||||||
if (s == NULL)
|
if (s == NULL)
|
||||||
return ENC_ISO_LATIN1;
|
return ENC_ISO_LATIN1;
|
||||||
sz = strlen(s);
|
sz = strlen(s);
|
||||||
if (sz > 5) {
|
if (sz >= 5) {
|
||||||
if (s[sz-5] == 'U' &&
|
if (s[sz-5] == 'U' &&
|
||||||
s[sz-4] == 'T' &&
|
s[sz-4] == 'T' &&
|
||||||
s[sz-3] == 'F' &&
|
s[sz-3] == 'F' &&
|
||||||
|
|
93
C/scanner.c
93
C/scanner.c
|
@ -119,8 +119,22 @@ EF,
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
char *Yap_chtype = chtype0+1;
|
char *Yap_chtype = chtype0+1;
|
||||||
|
|
||||||
|
int
|
||||||
|
Yap_wide_chtype(Int ch) {
|
||||||
|
if (iswalnum(ch)) {
|
||||||
|
if (iswlower(ch)) return LC;
|
||||||
|
if (iswdigit(ch)) return NU;
|
||||||
|
return UC;
|
||||||
|
}
|
||||||
|
if (iswpunct(ch)) return SY;
|
||||||
|
return BS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* in case there is an overflow */
|
/* in case there is an overflow */
|
||||||
typedef struct scanner_extra_alloc {
|
typedef struct scanner_extra_alloc {
|
||||||
struct scanner_extra_alloc *next;
|
struct scanner_extra_alloc *next;
|
||||||
|
@ -737,6 +751,17 @@ ch_to_wide(char *base, char *charp)
|
||||||
return nb+n;
|
return nb+n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define add_ch_to_buff(ch) \
|
||||||
|
if (wcharp) { *wcharp++ = (ch); charp = (char *)wcharp; } \
|
||||||
|
else { \
|
||||||
|
if (ch > MAX_ISO_LATIN1 && !wcharp) { \
|
||||||
|
/* does not fit in ISO-LATIN */ \
|
||||||
|
wcharp = ch_to_wide(TokImage, charp); \
|
||||||
|
if (!wcharp) goto huge_var_error; \
|
||||||
|
*wcharp++ = (ch); charp = (char *)wcharp; \
|
||||||
|
} else *charp++ = ch; \
|
||||||
|
}
|
||||||
|
|
||||||
TokEntry *
|
TokEntry *
|
||||||
Yap_tokenizer(int inp_stream, Term *tposp)
|
Yap_tokenizer(int inp_stream, Term *tposp)
|
||||||
{
|
{
|
||||||
|
@ -820,8 +845,9 @@ Yap_tokenizer(int inp_stream, Term *tposp)
|
||||||
scan_name:
|
scan_name:
|
||||||
TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE;
|
TokImage = ((AtomEntry *) ( Yap_PreAllocCodeSpace()))->StrOfAE;
|
||||||
charp = TokImage;
|
charp = TokImage;
|
||||||
|
wcharp = NULL;
|
||||||
isvar = (chtype(och) != LC);
|
isvar = (chtype(och) != LC);
|
||||||
*charp++ = och;
|
add_ch_to_buff(och);
|
||||||
for (; chtype(ch) <= NU; ch = Nxtch(inp_stream)) {
|
for (; chtype(ch) <= NU; ch = Nxtch(inp_stream)) {
|
||||||
if (charp == (char *)AuxSp-1024) {
|
if (charp == (char *)AuxSp-1024) {
|
||||||
huge_var_error:
|
huge_var_error:
|
||||||
|
@ -835,19 +861,24 @@ Yap_tokenizer(int inp_stream, Term *tposp)
|
||||||
UNLOCK(Stream[inp_stream].streamlock);
|
UNLOCK(Stream[inp_stream].streamlock);
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
*charp++ = ch;
|
add_ch_to_buff(ch);
|
||||||
}
|
}
|
||||||
while (ch == '\'' && isvar && yap_flags[VARS_CAN_HAVE_QUOTE_FLAG]) {
|
while (ch == '\'' && isvar && yap_flags[VARS_CAN_HAVE_QUOTE_FLAG]) {
|
||||||
if (charp == (char *)AuxSp-1024) {
|
if (charp == (char *)AuxSp-1024) {
|
||||||
goto huge_var_error;
|
goto huge_var_error;
|
||||||
}
|
}
|
||||||
*charp++ = ch;
|
add_ch_to_buff(ch);
|
||||||
ch = Nxtch(inp_stream);
|
ch = Nxtch(inp_stream);
|
||||||
}
|
}
|
||||||
*charp++ = '\0';
|
add_ch_to_buff('\0');
|
||||||
if (!isvar) {
|
if (!isvar) {
|
||||||
|
Atom ae;
|
||||||
/* don't do this in iso */
|
/* don't do this in iso */
|
||||||
Atom ae = Yap_LookupAtom(TokImage);
|
if (wcharp) {
|
||||||
|
ae = Yap_LookupWideAtom((wchar_t *)TokImage);
|
||||||
|
} else {
|
||||||
|
ae = Yap_LookupAtom(TokImage);
|
||||||
|
}
|
||||||
if (ae == NIL) {
|
if (ae == NIL) {
|
||||||
Yap_Error_TYPE = OUT_OF_HEAP_ERROR;
|
Yap_Error_TYPE = OUT_OF_HEAP_ERROR;
|
||||||
Yap_ErrorMessage = "Code Space Overflow";
|
Yap_ErrorMessage = "Code Space Overflow";
|
||||||
|
@ -1005,18 +1036,10 @@ Yap_tokenizer(int inp_stream, Term *tposp)
|
||||||
wcharp = NULL;
|
wcharp = NULL;
|
||||||
|
|
||||||
while (TRUE) {
|
while (TRUE) {
|
||||||
if (wcharp && wcharp + 1024 > (wchar_t *)AuxSp) {
|
if (charp + 1024 > (char *)AuxSp) {
|
||||||
Yap_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
|
Yap_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
|
||||||
Yap_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
|
Yap_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
|
||||||
break;
|
break;
|
||||||
} else if (charp + 1024 > (char *)AuxSp) {
|
|
||||||
Yap_Error_TYPE = OUT_OF_AUXSPACE_ERROR;
|
|
||||||
Yap_ErrorMessage = "Heap Overflow While Scanning: please increase code space (-h)";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!wcharp && ch > MAX_ISO_LATIN1){
|
|
||||||
/* does not fit in ISO-LATIN */
|
|
||||||
wcharp = ch_to_wide(TokImage, charp);
|
|
||||||
}
|
}
|
||||||
if (ch == 10 && yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
|
if (ch == 10 && yap_flags[CHARACTER_ESCAPE_FLAG] == ISO_CHARACTER_ESCAPES) {
|
||||||
/* in ISO a new line terminates a string */
|
/* in ISO a new line terminates a string */
|
||||||
|
@ -1027,25 +1050,12 @@ Yap_tokenizer(int inp_stream, Term *tposp)
|
||||||
ch = QuotedNxtch(inp_stream);
|
ch = QuotedNxtch(inp_stream);
|
||||||
if (ch != quote)
|
if (ch != quote)
|
||||||
break;
|
break;
|
||||||
if (wcharp)
|
add_ch_to_buff(ch);
|
||||||
*wcharp++ = ch;
|
|
||||||
else
|
|
||||||
*charp++ = ch;
|
|
||||||
ch = QuotedNxtch(inp_stream);
|
ch = QuotedNxtch(inp_stream);
|
||||||
} else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
|
} else if (ch == '\\' && yap_flags[CHARACTER_ESCAPE_FLAG] != CPROLOG_CHARACTER_ESCAPES) {
|
||||||
int scan_next = TRUE;
|
int scan_next = TRUE;
|
||||||
if (wcharp)
|
ch = read_quoted_char(&scan_next, inp_stream, QuotedNxtch);
|
||||||
*wcharp++ = read_quoted_char(&scan_next, inp_stream, QuotedNxtch);
|
add_ch_to_buff(ch);
|
||||||
else {
|
|
||||||
wchar_t next = read_quoted_char(&scan_next, inp_stream, QuotedNxtch);
|
|
||||||
if (next > MAX_ISO_LATIN1){
|
|
||||||
/* does not fit in ISO-LATIN */
|
|
||||||
wcharp = ch_to_wide(TokImage, charp);
|
|
||||||
*wcharp++ = next;
|
|
||||||
} else {
|
|
||||||
*charp++ = next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (scan_next) {
|
if (scan_next) {
|
||||||
ch = QuotedNxtch(inp_stream);
|
ch = QuotedNxtch(inp_stream);
|
||||||
}
|
}
|
||||||
|
@ -1054,10 +1064,7 @@ Yap_tokenizer(int inp_stream, Term *tposp)
|
||||||
t->Tok = Ord(kind = eot_tok);
|
t->Tok = Ord(kind = eot_tok);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
if (wcharp)
|
add_ch_to_buff(ch);
|
||||||
*wcharp++ = ch;
|
|
||||||
else
|
|
||||||
*charp++ = ch;
|
|
||||||
ch = QuotedNxtch(inp_stream);
|
ch = QuotedNxtch(inp_stream);
|
||||||
}
|
}
|
||||||
++len;
|
++len;
|
||||||
|
@ -1106,15 +1113,15 @@ Yap_tokenizer(int inp_stream, Term *tposp)
|
||||||
t->TokInfo = Unsigned(Yap_LookupWideAtom((wchar_t *)TokImage));
|
t->TokInfo = Unsigned(Yap_LookupWideAtom((wchar_t *)TokImage));
|
||||||
} else {
|
} else {
|
||||||
t->TokInfo = Unsigned(Yap_LookupAtom(TokImage));
|
t->TokInfo = Unsigned(Yap_LookupAtom(TokImage));
|
||||||
if (t->TokInfo == (CELL)NIL) {
|
}
|
||||||
Yap_Error_TYPE = OUT_OF_HEAP_ERROR;
|
if (!(t->TokInfo)) {
|
||||||
Yap_ErrorMessage = "Code Space Overflow";
|
Yap_Error_TYPE = OUT_OF_HEAP_ERROR;
|
||||||
if (p)
|
Yap_ErrorMessage = "Code Space Overflow";
|
||||||
t->Tok = Ord(kind = eot_tok);
|
if (p)
|
||||||
/* serious error now */
|
t->Tok = Ord(kind = eot_tok);
|
||||||
UNLOCK(Stream[inp_stream].streamlock);
|
/* serious error now */
|
||||||
return l;
|
UNLOCK(Stream[inp_stream].streamlock);
|
||||||
}
|
return l;
|
||||||
}
|
}
|
||||||
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
|
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
|
||||||
t->Tok = Ord(kind = Name_tok);
|
t->Tok = Ord(kind = Name_tok);
|
||||||
|
|
11
H/yapio.h
11
H/yapio.h
|
@ -263,17 +263,20 @@ typedef enum {
|
||||||
#define MAX_ISO_LATIN1 255
|
#define MAX_ISO_LATIN1 255
|
||||||
|
|
||||||
/****************** character definition table **************************/
|
/****************** character definition table **************************/
|
||||||
|
|
||||||
#define NUMBER_OF_CHARS 256
|
#define NUMBER_OF_CHARS 256
|
||||||
extern char *Yap_chtype;
|
extern char *Yap_chtype;
|
||||||
|
|
||||||
EXTERN inline int STD_PROTO(chtype,(int));
|
EXTERN inline int STD_PROTO(chtype,(Int));
|
||||||
|
int STD_PROTO(Yap_wide_chtype,(Int));
|
||||||
|
|
||||||
EXTERN inline int
|
EXTERN inline int
|
||||||
chtype(int ch)
|
chtype(Int ch)
|
||||||
{
|
{
|
||||||
if (ch < 256)
|
if (ch < NUMBER_OF_CHARS)
|
||||||
return Yap_chtype[ch];
|
return Yap_chtype[ch];
|
||||||
return SL;
|
printf("type=%d\n",Yap_wide_chtype(ch));
|
||||||
|
return Yap_wide_chtype(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Reference in New Issue