fixes for UTF-8
This commit is contained in:
parent
b24dc4893d
commit
53822922c8
57
C/text.c
57
C/text.c
@ -239,12 +239,7 @@ static Int SkipListCodes(unsigned char **bufp, Term *l, Term **tailp,
|
|||||||
return -REPRESENTATION_ERROR_CHARACTER_CODE;
|
return -REPRESENTATION_ERROR_CHARACTER_CODE;
|
||||||
} else {
|
} else {
|
||||||
AtomEntry *ae = RepAtom(AtomOfTerm(hd));
|
AtomEntry *ae = RepAtom(AtomOfTerm(hd));
|
||||||
if ((ae->StrOfAE)[1] != '\0') {
|
st = stpcpy(st, ae->StrOfAE);
|
||||||
length = -REPRESENTATION_ERROR_CHARACTER;
|
|
||||||
} else {
|
|
||||||
ch = RepAtom(AtomOfTerm(hd))->StrOfAE[0];
|
|
||||||
*wide |= ch > 0x80;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else if (IsIntegerTerm(hd)) {
|
} else if (IsIntegerTerm(hd)) {
|
||||||
ch = IntegerOfTerm(hd);
|
ch = IntegerOfTerm(hd);
|
||||||
@ -263,12 +258,13 @@ static Int SkipListCodes(unsigned char **bufp, Term *l, Term **tailp,
|
|||||||
*tailp = l;
|
*tailp = l;
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
// now copy char to buffer
|
// now copy char to buffer
|
||||||
int chsz = put_utf8(st, ch);
|
int chsz = put_utf8(st, ch);
|
||||||
if (chsz > 0) {
|
if (chsz > 0) {
|
||||||
st += chsz;
|
st += chsz;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
l = RepPair(*l) + 1;
|
l = RepPair(*l) + 1;
|
||||||
do_derefa(v, l, derefa2_unk, derefa2_nonvar);
|
do_derefa(v, l, derefa2_unk, derefa2_nonvar);
|
||||||
} while (*l != *s && IsPairTerm(*l));
|
} while (*l != *s && IsPairTerm(*l));
|
||||||
@ -440,7 +436,7 @@ unsigned char *Yap_readText(seq_tv_t *inp, size_t *lengp) {
|
|||||||
if (lengp)
|
if (lengp)
|
||||||
*lengp = sz;
|
*lengp = sz;
|
||||||
if (inp->type & YAP_STRING_WITH_BUFFER)
|
if (inp->type & YAP_STRING_WITH_BUFFER)
|
||||||
return UStringOfTerm(inp->val.t);
|
return (unsigned char*)UStringOfTerm(inp->val.t);
|
||||||
inp->type |= YAP_STRING_IN_TMP;
|
inp->type |= YAP_STRING_IN_TMP;
|
||||||
char *o = Malloc(sz+1);
|
char *o = Malloc(sz+1);
|
||||||
strcpy(o, s);
|
strcpy(o, s);
|
||||||
@ -571,17 +567,19 @@ static Term write_atoms(void *s0, seq_tv_t *out, size_t leng USES_REGS) {
|
|||||||
|
|
||||||
unsigned char *s = s0, *lim = s + strnlen((char *)s, max);
|
unsigned char *s = s0, *lim = s + strnlen((char *)s, max);
|
||||||
unsigned char *cp = s;
|
unsigned char *cp = s;
|
||||||
unsigned char w[10], *wp = w;
|
unsigned char w[10];
|
||||||
|
int wp = 0;
|
||||||
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
||||||
while (cp < lim && *cp) {
|
while (cp < lim && *cp) {
|
||||||
utf8proc_int32_t chr;
|
utf8proc_int32_t chr;
|
||||||
CELL *cl;
|
CELL *cl;
|
||||||
s += get_utf8(s, 1, &chr);
|
s += get_utf8(s, -1, &chr);
|
||||||
if (chr == '\0') {
|
if (chr == '\0') {
|
||||||
wp[0] = '\0';
|
w[0] = '\0';
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
wp += put_utf8(w, chr);
|
wp = put_utf8(w, chr);
|
||||||
|
w[wp] = '\0';
|
||||||
cl = HR;
|
cl = HR;
|
||||||
HR += 2;
|
HR += 2;
|
||||||
cl[0] = MkAtomTerm(Yap_ULookupAtom(w));
|
cl[0] = MkAtomTerm(Yap_ULookupAtom(w));
|
||||||
@ -994,47 +992,52 @@ bool Yap_Concat_Text(int tot, seq_tv_t inp[], seq_tv_t *out USES_REGS) {
|
|||||||
//
|
//
|
||||||
bool Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp,
|
bool Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp,
|
||||||
seq_tv_t outv[] USES_REGS) {
|
seq_tv_t outv[] USES_REGS) {
|
||||||
unsigned char *buf;
|
const unsigned char *buf;
|
||||||
size_t l;
|
size_t b_l, u_l;
|
||||||
|
|
||||||
inp->type |= YAP_STRING_IN_TMP;
|
inp->type |= YAP_STRING_IN_TMP;
|
||||||
buf = Yap_readText(inp, &l PASS_REGS);
|
buf = Yap_readText(inp, &b_l PASS_REGS);
|
||||||
if (!buf) {
|
if (!buf) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
u_l = strlen_utf8(buf);
|
||||||
if (!cuts) {
|
if (!cuts) {
|
||||||
if (n == 2) {
|
if (n == 2) {
|
||||||
size_t l0, l1;
|
size_t b_l0, b_l1, u_l0, u_l1;
|
||||||
unsigned char *buf0, *buf1;
|
unsigned char *buf0, *buf1;
|
||||||
|
|
||||||
if (outv[0].val.t) {
|
if (outv[0].val.t) {
|
||||||
buf0 = Yap_readText(outv, &l0 PASS_REGS);
|
buf0 = Yap_readText(outv, &b_l0 PASS_REGS);
|
||||||
if (!buf0) {
|
if (!buf0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (cmp_Text(buf, buf0, l0) != 0) {
|
if (bcmp(buf, buf0, b_l0) != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
l1 = l - l0;
|
u_l0 = strlen_utf8(buf0);
|
||||||
|
u_l1 = u_l - u_l0;
|
||||||
|
|
||||||
buf1 = slice(l0, l, buf PASS_REGS);
|
buf1 = slice(u_l0, u_l, buf PASS_REGS);
|
||||||
bool rc = write_Text(buf1, outv + 1, l1 PASS_REGS);
|
b_l1 = strlen(buf1);
|
||||||
|
bool rc = write_Text(buf1, outv + 1, b_l1 PASS_REGS);
|
||||||
if (!rc) {
|
if (!rc) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return rc;
|
return rc;
|
||||||
} else /* if (outv[1].val.t) */ {
|
} else /* if (outv[1].val.t) */ {
|
||||||
buf1 = Yap_readText(outv + 1, &l1 PASS_REGS);
|
buf1 = Yap_readText(outv + 1, &b_l1 PASS_REGS);
|
||||||
if (!buf1) {
|
if (!buf1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
l0 = l - l1;
|
u_l1 = strlen_utf8(buf1);
|
||||||
if (cmp_Text(skip_utf8((const unsigned char *)buf, l0), buf1, l1) !=
|
b_l0 = b_l - b_l1;
|
||||||
|
u_l0 = u_l - u_l1;
|
||||||
|
if (bcmp(skip_utf8((const char *)buf, b_l0), buf1, b_l1) !=
|
||||||
0) {
|
0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
buf0 = slice(0, l0, buf PASS_REGS);
|
buf0 = slice(0, u_l0, buf PASS_REGS);
|
||||||
bool rc = write_Text(buf0, outv, l0 PASS_REGS);
|
bool rc = write_Text(buf0, outv, b_l0 PASS_REGS);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1048,7 +1051,7 @@ bool Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp,
|
|||||||
if (i > 0 && cuts[i] == 0)
|
if (i > 0 && cuts[i] == 0)
|
||||||
break;
|
break;
|
||||||
void *bufi = slice(next, cuts[i], buf PASS_REGS);
|
void *bufi = slice(next, cuts[i], buf PASS_REGS);
|
||||||
if (!write_Text(bufi, outv + i, cuts[i] - next PASS_REGS)) {
|
if (!write_Text(bufi, outv + i, strlen(bufi) PASS_REGS)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user