Fix encoding

This commit is contained in:
Vítor Santos Costa
2016-02-18 12:10:58 +00:00
parent 05a978ce82
commit 484168b1ce
18 changed files with 1557 additions and 1522 deletions

View File

@@ -921,7 +921,7 @@ cont_string_code3( USES_REGS1 )
s0 = UStringOfTerm( t2 );
i = IntOfTerm(EXTRA_CBACK_ARG(3,1)); // offset in coded string, increases by 1..6
j = IntOfTerm(EXTRA_CBACK_ARG(3,2)); // offset in UNICODE string, always increases by 1
s = (s0+i) + get_utf8( (unsigned char *)s0+i, &chr );
s = (s0+i) + get_utf8( (unsigned char *)s0+i, -1, &chr );
if (s[0]) {
EXTRA_CBACK_ARG(3,1) = MkIntTerm(s-s0);
EXTRA_CBACK_ARG(3,2) = MkIntTerm(j+1);
@@ -983,7 +983,7 @@ string_code3( USES_REGS1 )
if (ns == NULL) {
cut_fail(); // silently fail?
}
get_utf8( (unsigned char *)ns, &chr);
get_utf8( (unsigned char *)ns, -1, &chr);
if ( chr == '\0') cut_fail();
if (Yap_unify(ARG3, MkIntegerTerm(chr))) cut_succeed();
cut_fail();
@@ -1042,7 +1042,7 @@ get_string_code3( USES_REGS1 )
if (ns == NULL) {
return FALSE;
} else {
get_utf8( ns, &chr);
get_utf8( ns, -1, &chr);
if ( chr != '\0') return Yap_unify(ARG3, MkIntegerTerm(chr));
}
}
@@ -1611,7 +1611,7 @@ build_new_atomic(int mask, wchar_t *wp, const unsigned char *p, size_t min, size
buf = buf_from_tstring(HR);
while (len) {
utf8proc_int32_t chr;
cp += get_utf8((unsigned char *)cp, &chr);
cp += get_utf8((unsigned char *)cp, -1, &chr);
buf += put_utf8((unsigned char *)buf, chr);
len--;
}
@@ -1834,7 +1834,7 @@ cont_sub_atomic( USES_REGS1 )
/* found one, check if there is any left */
while (min <= sz-len) {
int chr;
p += get_utf8((unsigned char *)p, &chr);
p += get_utf8((unsigned char *)p, -1, &chr);
after--;
min++;
if (cmpn_utf8(p, UStringOfTerm(nat), len) == 0)

View File

@@ -365,7 +365,7 @@ Yap_OpaqueTermToString(Term t, char *str, size_t max)
str_index += sprintf(& str[str_index], "\"");
do {
utf8proc_int32_t chr;
ptr += get_utf8(ptr, &chr);
ptr += get_utf8(ptr, -1, &chr);
if (chr == '\0') break;
str_index += sprintf(str+str_index, "%C", chr);
} while (TRUE);

View File

@@ -195,15 +195,11 @@ static bool mkprompt(Term inp) {
static bool getenc(Term inp) {
CACHE_REGS
if (IsVarTerm(inp)) {
return Yap_unify(inp, MkAtomTerm(Yap_LookupAtom(enc_name(LOCAL_encoding))));
}
if (!IsAtomTerm(inp)) {
Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag");
if (!IsVarTerm(inp) && !IsAtomTerm(inp)) {
Yap_Error(TYPE_ERROR_ATOM, inp, "get_encoding");
return false;
}
enc_id((char *)RepAtom(AtomOfTerm(inp))->StrOfAE);
return true;
return Yap_unify(inp, MkAtomTerm(Yap_LookupAtom(enc_name(LOCAL_encoding))));
}
/*
@@ -217,7 +213,7 @@ if (!IsAtomTerm(inp) ) {
Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag");
return false;
}
enc_id( RepAtom( AtomOfTerm( inp ) )->StrOfAE );
enc_id( RepAtom( AtomOfTerm( inp ) )->StrOfAE, ENC_OCTET );
return true;
}
*/

View File

@@ -1232,7 +1232,12 @@ const char *Yap_tokRep(TokEntry *tokptr) {
switch (tokptr->Tok) {
case Name_tok:
return (char *)RepAtom((Atom)info)->StrOfAE;
if (IsWideAtom((Atom)info)) {
wchar_t *wc = RepAtom((Atom)info)->WStrOfAE;
Term s = Yap_WCharsToString(wc PASS_REGS);
return StringOfTerm(s);
}
return RepAtom((Atom)info)->StrOfAE;
case Number_tok:
if ((b = Yap_TermToString(info, buf, sze, &length, &LOCAL_encoding,
flags)) != buf) {
@@ -2038,9 +2043,6 @@ TokEntry *Yap_tokenizer(struct stream_desc *inp_stream, bool store_comments,
char err[1024];
snprintf(err, 1023, "\n++++ token: unrecognised char %c (%d), type %c\n",
ch, ch, chtype(ch));
#if DEBUG
fprintf(stderr, "%s", err);
#endif
}
t->Tok = Ord(kind = eot_tok);
t->TokInfo = TermEof;

View File

@@ -408,7 +408,7 @@ write_strings( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
buf = buf_from_tstring(HR);
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
buf += put_utf8(buf, chr);
}
if (max >= min) *buf++ = '\0';
@@ -496,7 +496,7 @@ write_atoms( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng U
while (cp < lim && *cp) {
utf8proc_int32_t chr;
CELL *cl;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
if (chr == '\0') break;
w[0] = chr;
cl = HR;
@@ -581,7 +581,7 @@ write_codes( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng U
LOCAL_TERM_ERROR( t, 2*(lim-s) );
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
HR[0] = MkIntTerm(chr);
HR[1] = AbsPair(HR+2);
HR += 2;
@@ -659,7 +659,7 @@ write_atom( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng US
while (*s && s < lim) {
utf8proc_int32_t chr;
s += get_utf8(s, &chr);
s += get_utf8(s,-1, &chr);
*ptr++ = chr;
}
*ptr++ = '\0';
@@ -743,14 +743,14 @@ write_wbuffer( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
return -1;
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
if (max >= min) *buf++ = '\0';
else while (max < min) {
utf8proc_int32_t chr;
max++;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
*buf = '\0';
@@ -890,14 +890,14 @@ write_buffer( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
return -1;
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
if (max >= min) *buf++ = '\0';
else while (max < min) {
utf8proc_int32_t chr;
max++;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
sz_end = buf-out->val.uc;
@@ -1104,7 +1104,7 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{
utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, &chr2); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, -1, &chr2); if (chr1-chr2) return chr1-chr2; }
}
return 0;
case ENC_WCHAR:
@@ -1126,21 +1126,21 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{
utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, -1, &chr1); if (chr1-chr2) return chr1-chr2; }
}
return 0;
case ENC_ISO_UTF8:
{
utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2;
for (i = 0; i < l; i++) { w2 += get_utf8(w2, &chr2); w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { w2 += get_utf8(w2, -1, &chr2); w1 += get_utf8(w1,-1, &chr1); if (chr1-chr2) return chr1-chr2; }
}
return 0;
case ENC_WCHAR:
{
utf8proc_int32_t chr1, chr2;
wchar_t *w2 = s2;
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, -1, &chr1); if (chr1-chr2) return chr1-chr2; }
}
return 0;
default:
@@ -1162,7 +1162,7 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{
utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, &chr2); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, -1, &chr2); if (chr1-chr2) return chr1-chr2; }
}
return 0;
case ENC_WCHAR:
@@ -1232,7 +1232,7 @@ concat( int n, seq_tv_t *out, void *sv[], encoding_t encv[], size_t lengv[] USES
} else {
unsigned char *ptr = sv[i];
utf8proc_int32_t chr;
while ( (ptr += get_utf8( ptr, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; }
while ( (ptr += get_utf8( ptr, -1, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; }
}
}
*buf++ = '\0';
@@ -1276,7 +1276,7 @@ slice( size_t min, size_t max, void *buf, seq_tv_t *out, encoding_t enc USES_REG
unsigned char *ptr = skip_utf8 (buf, min );
utf8proc_int32_t chr;
if (!ptr) return NULL;
while ( min++ < max ) { ptr += get_utf8(ptr, & chr); nbuf += put_utf8(nbuf, chr); }
while ( min++ < max ) { ptr += get_utf8(ptr, -1, & chr); nbuf += put_utf8(nbuf, chr); }
}
*nbuf ++ = '\0';
close_tstring( nbuf PASS_REGS );
@@ -1313,7 +1313,7 @@ slice( size_t min, size_t max, void *buf, seq_tv_t *out, encoding_t enc USES_REG
utf8proc_int32_t chr;
LOCAL_ERROR( MkAtomTerm(Yap_LookupAtom(buf)), max-min );
while ( min++ < max ) { ptr += get_utf8(ptr, & chr); *nbuf++ = chr; }
while ( min++ < max ) { ptr += get_utf8(ptr, -1, & chr); *nbuf++ = chr; }
nbuf[0] = '\0';
at = Yap_LookupMaybeWideAtom( (wchar_t*)HR );
}

View File

@@ -572,7 +572,7 @@ static void write_string(const unsigned char *s,
qt = '"';
wrputc(qt, stream);
do {
ptr += get_utf8(ptr, &chr);
ptr += get_utf8(ptr, -1, &chr);
if (chr == '\0')
break;
write_quoted(chr, qt, stream);