Fix encoding
This commit is contained in:
10
C/atomic.c
10
C/atomic.c
@@ -921,7 +921,7 @@ cont_string_code3( USES_REGS1 )
|
||||
s0 = UStringOfTerm( t2 );
|
||||
i = IntOfTerm(EXTRA_CBACK_ARG(3,1)); // offset in coded string, increases by 1..6
|
||||
j = IntOfTerm(EXTRA_CBACK_ARG(3,2)); // offset in UNICODE string, always increases by 1
|
||||
s = (s0+i) + get_utf8( (unsigned char *)s0+i, &chr );
|
||||
s = (s0+i) + get_utf8( (unsigned char *)s0+i, -1, &chr );
|
||||
if (s[0]) {
|
||||
EXTRA_CBACK_ARG(3,1) = MkIntTerm(s-s0);
|
||||
EXTRA_CBACK_ARG(3,2) = MkIntTerm(j+1);
|
||||
@@ -983,7 +983,7 @@ string_code3( USES_REGS1 )
|
||||
if (ns == NULL) {
|
||||
cut_fail(); // silently fail?
|
||||
}
|
||||
get_utf8( (unsigned char *)ns, &chr);
|
||||
get_utf8( (unsigned char *)ns, -1, &chr);
|
||||
if ( chr == '\0') cut_fail();
|
||||
if (Yap_unify(ARG3, MkIntegerTerm(chr))) cut_succeed();
|
||||
cut_fail();
|
||||
@@ -1042,7 +1042,7 @@ get_string_code3( USES_REGS1 )
|
||||
if (ns == NULL) {
|
||||
return FALSE;
|
||||
} else {
|
||||
get_utf8( ns, &chr);
|
||||
get_utf8( ns, -1, &chr);
|
||||
if ( chr != '\0') return Yap_unify(ARG3, MkIntegerTerm(chr));
|
||||
}
|
||||
}
|
||||
@@ -1611,7 +1611,7 @@ build_new_atomic(int mask, wchar_t *wp, const unsigned char *p, size_t min, size
|
||||
buf = buf_from_tstring(HR);
|
||||
while (len) {
|
||||
utf8proc_int32_t chr;
|
||||
cp += get_utf8((unsigned char *)cp, &chr);
|
||||
cp += get_utf8((unsigned char *)cp, -1, &chr);
|
||||
buf += put_utf8((unsigned char *)buf, chr);
|
||||
len--;
|
||||
}
|
||||
@@ -1834,7 +1834,7 @@ cont_sub_atomic( USES_REGS1 )
|
||||
/* found one, check if there is any left */
|
||||
while (min <= sz-len) {
|
||||
int chr;
|
||||
p += get_utf8((unsigned char *)p, &chr);
|
||||
p += get_utf8((unsigned char *)p, -1, &chr);
|
||||
after--;
|
||||
min++;
|
||||
if (cmpn_utf8(p, UStringOfTerm(nat), len) == 0)
|
||||
|
@@ -365,7 +365,7 @@ Yap_OpaqueTermToString(Term t, char *str, size_t max)
|
||||
str_index += sprintf(& str[str_index], "\"");
|
||||
do {
|
||||
utf8proc_int32_t chr;
|
||||
ptr += get_utf8(ptr, &chr);
|
||||
ptr += get_utf8(ptr, -1, &chr);
|
||||
if (chr == '\0') break;
|
||||
str_index += sprintf(str+str_index, "%C", chr);
|
||||
} while (TRUE);
|
||||
|
12
C/flags.c
12
C/flags.c
@@ -195,15 +195,11 @@ static bool mkprompt(Term inp) {
|
||||
|
||||
static bool getenc(Term inp) {
|
||||
CACHE_REGS
|
||||
if (IsVarTerm(inp)) {
|
||||
return Yap_unify(inp, MkAtomTerm(Yap_LookupAtom(enc_name(LOCAL_encoding))));
|
||||
}
|
||||
if (!IsAtomTerm(inp)) {
|
||||
Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag");
|
||||
if (!IsVarTerm(inp) && !IsAtomTerm(inp)) {
|
||||
Yap_Error(TYPE_ERROR_ATOM, inp, "get_encoding");
|
||||
return false;
|
||||
}
|
||||
enc_id((char *)RepAtom(AtomOfTerm(inp))->StrOfAE);
|
||||
return true;
|
||||
return Yap_unify(inp, MkAtomTerm(Yap_LookupAtom(enc_name(LOCAL_encoding))));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -217,7 +213,7 @@ if (!IsAtomTerm(inp) ) {
|
||||
Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag");
|
||||
return false;
|
||||
}
|
||||
enc_id( RepAtom( AtomOfTerm( inp ) )->StrOfAE );
|
||||
enc_id( RepAtom( AtomOfTerm( inp ) )->StrOfAE, ENC_OCTET );
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
10
C/scanner.c
10
C/scanner.c
@@ -1232,7 +1232,12 @@ const char *Yap_tokRep(TokEntry *tokptr) {
|
||||
|
||||
switch (tokptr->Tok) {
|
||||
case Name_tok:
|
||||
return (char *)RepAtom((Atom)info)->StrOfAE;
|
||||
if (IsWideAtom((Atom)info)) {
|
||||
wchar_t *wc = RepAtom((Atom)info)->WStrOfAE;
|
||||
Term s = Yap_WCharsToString(wc PASS_REGS);
|
||||
return StringOfTerm(s);
|
||||
}
|
||||
return RepAtom((Atom)info)->StrOfAE;
|
||||
case Number_tok:
|
||||
if ((b = Yap_TermToString(info, buf, sze, &length, &LOCAL_encoding,
|
||||
flags)) != buf) {
|
||||
@@ -2038,9 +2043,6 @@ TokEntry *Yap_tokenizer(struct stream_desc *inp_stream, bool store_comments,
|
||||
char err[1024];
|
||||
snprintf(err, 1023, "\n++++ token: unrecognised char %c (%d), type %c\n",
|
||||
ch, ch, chtype(ch));
|
||||
#if DEBUG
|
||||
fprintf(stderr, "%s", err);
|
||||
#endif
|
||||
}
|
||||
t->Tok = Ord(kind = eot_tok);
|
||||
t->TokInfo = TermEof;
|
||||
|
32
C/text.c
32
C/text.c
@@ -408,7 +408,7 @@ write_strings( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
|
||||
buf = buf_from_tstring(HR);
|
||||
while (*cp && cp < lim) {
|
||||
utf8proc_int32_t chr;
|
||||
cp += get_utf8(cp, &chr);
|
||||
cp += get_utf8(cp, -1, &chr);
|
||||
buf += put_utf8(buf, chr);
|
||||
}
|
||||
if (max >= min) *buf++ = '\0';
|
||||
@@ -496,7 +496,7 @@ write_atoms( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng U
|
||||
while (cp < lim && *cp) {
|
||||
utf8proc_int32_t chr;
|
||||
CELL *cl;
|
||||
cp += get_utf8(cp, &chr);
|
||||
cp += get_utf8(cp, -1, &chr);
|
||||
if (chr == '\0') break;
|
||||
w[0] = chr;
|
||||
cl = HR;
|
||||
@@ -581,7 +581,7 @@ write_codes( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng U
|
||||
LOCAL_TERM_ERROR( t, 2*(lim-s) );
|
||||
while (*cp && cp < lim) {
|
||||
utf8proc_int32_t chr;
|
||||
cp += get_utf8(cp, &chr);
|
||||
cp += get_utf8(cp, -1, &chr);
|
||||
HR[0] = MkIntTerm(chr);
|
||||
HR[1] = AbsPair(HR+2);
|
||||
HR += 2;
|
||||
@@ -659,7 +659,7 @@ write_atom( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng US
|
||||
|
||||
while (*s && s < lim) {
|
||||
utf8proc_int32_t chr;
|
||||
s += get_utf8(s, &chr);
|
||||
s += get_utf8(s,-1, &chr);
|
||||
*ptr++ = chr;
|
||||
}
|
||||
*ptr++ = '\0';
|
||||
@@ -743,14 +743,14 @@ write_wbuffer( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
|
||||
return -1;
|
||||
while (*cp && cp < lim) {
|
||||
utf8proc_int32_t chr;
|
||||
cp += get_utf8(cp, &chr);
|
||||
cp += get_utf8(cp, -1, &chr);
|
||||
*buf++ = chr;
|
||||
}
|
||||
if (max >= min) *buf++ = '\0';
|
||||
else while (max < min) {
|
||||
utf8proc_int32_t chr;
|
||||
max++;
|
||||
cp += get_utf8(cp, &chr);
|
||||
cp += get_utf8(cp, -1, &chr);
|
||||
*buf++ = chr;
|
||||
}
|
||||
*buf = '\0';
|
||||
@@ -890,14 +890,14 @@ write_buffer( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
|
||||
return -1;
|
||||
while (*cp && cp < lim) {
|
||||
utf8proc_int32_t chr;
|
||||
cp += get_utf8(cp, &chr);
|
||||
cp += get_utf8(cp, -1, &chr);
|
||||
*buf++ = chr;
|
||||
}
|
||||
if (max >= min) *buf++ = '\0';
|
||||
else while (max < min) {
|
||||
utf8proc_int32_t chr;
|
||||
max++;
|
||||
cp += get_utf8(cp, &chr);
|
||||
cp += get_utf8(cp, -1, &chr);
|
||||
*buf++ = chr;
|
||||
}
|
||||
sz_end = buf-out->val.uc;
|
||||
@@ -1104,7 +1104,7 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
|
||||
{
|
||||
utf8proc_int32_t chr1, chr2;
|
||||
unsigned char *w2 = s2;
|
||||
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, &chr2); if (chr1-chr2) return chr1-chr2; }
|
||||
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, -1, &chr2); if (chr1-chr2) return chr1-chr2; }
|
||||
}
|
||||
return 0;
|
||||
case ENC_WCHAR:
|
||||
@@ -1126,21 +1126,21 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
|
||||
{
|
||||
utf8proc_int32_t chr1, chr2;
|
||||
unsigned char *w2 = s2;
|
||||
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
|
||||
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, -1, &chr1); if (chr1-chr2) return chr1-chr2; }
|
||||
}
|
||||
return 0;
|
||||
case ENC_ISO_UTF8:
|
||||
{
|
||||
utf8proc_int32_t chr1, chr2;
|
||||
unsigned char *w2 = s2;
|
||||
for (i = 0; i < l; i++) { w2 += get_utf8(w2, &chr2); w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
|
||||
for (i = 0; i < l; i++) { w2 += get_utf8(w2, -1, &chr2); w1 += get_utf8(w1,-1, &chr1); if (chr1-chr2) return chr1-chr2; }
|
||||
}
|
||||
return 0;
|
||||
case ENC_WCHAR:
|
||||
{
|
||||
utf8proc_int32_t chr1, chr2;
|
||||
wchar_t *w2 = s2;
|
||||
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
|
||||
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, -1, &chr1); if (chr1-chr2) return chr1-chr2; }
|
||||
}
|
||||
return 0;
|
||||
default:
|
||||
@@ -1162,7 +1162,7 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
|
||||
{
|
||||
utf8proc_int32_t chr1, chr2;
|
||||
unsigned char *w2 = s2;
|
||||
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, &chr2); if (chr1-chr2) return chr1-chr2; }
|
||||
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, -1, &chr2); if (chr1-chr2) return chr1-chr2; }
|
||||
}
|
||||
return 0;
|
||||
case ENC_WCHAR:
|
||||
@@ -1232,7 +1232,7 @@ concat( int n, seq_tv_t *out, void *sv[], encoding_t encv[], size_t lengv[] USES
|
||||
} else {
|
||||
unsigned char *ptr = sv[i];
|
||||
utf8proc_int32_t chr;
|
||||
while ( (ptr += get_utf8( ptr, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; }
|
||||
while ( (ptr += get_utf8( ptr, -1, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; }
|
||||
}
|
||||
}
|
||||
*buf++ = '\0';
|
||||
@@ -1276,7 +1276,7 @@ slice( size_t min, size_t max, void *buf, seq_tv_t *out, encoding_t enc USES_REG
|
||||
unsigned char *ptr = skip_utf8 (buf, min );
|
||||
utf8proc_int32_t chr;
|
||||
if (!ptr) return NULL;
|
||||
while ( min++ < max ) { ptr += get_utf8(ptr, & chr); nbuf += put_utf8(nbuf, chr); }
|
||||
while ( min++ < max ) { ptr += get_utf8(ptr, -1, & chr); nbuf += put_utf8(nbuf, chr); }
|
||||
}
|
||||
*nbuf ++ = '\0';
|
||||
close_tstring( nbuf PASS_REGS );
|
||||
@@ -1313,7 +1313,7 @@ slice( size_t min, size_t max, void *buf, seq_tv_t *out, encoding_t enc USES_REG
|
||||
utf8proc_int32_t chr;
|
||||
|
||||
LOCAL_ERROR( MkAtomTerm(Yap_LookupAtom(buf)), max-min );
|
||||
while ( min++ < max ) { ptr += get_utf8(ptr, & chr); *nbuf++ = chr; }
|
||||
while ( min++ < max ) { ptr += get_utf8(ptr, -1, & chr); *nbuf++ = chr; }
|
||||
nbuf[0] = '\0';
|
||||
at = Yap_LookupMaybeWideAtom( (wchar_t*)HR );
|
||||
}
|
||||
|
Reference in New Issue
Block a user