1679 lines
44 KiB
C
1679 lines
44 KiB
C
/*************************************************************************
|
|
* *
|
|
* YAP Prolog *
|
|
* *
|
|
* Yap Prolog was developed at NCCUP - Universidade do Porto *
|
|
* *
|
|
* Copyright L.Damas, V. Santos Costa and Universidade do Porto 1985-- *
|
|
* *
|
|
**************************************************************************
|
|
* *
|
|
* File: strings.c *
|
|
* comments: General-conversion of character sequences. *
|
|
* *
|
|
* Last rev: $Date: 2008-07-24 16:02:00 $,$Author: vsc $ *
|
|
* *
|
|
*************************************************************************/
|
|
|
|
#include "Yap.h"
|
|
#include "YapHeap.h"
|
|
#include "YapText.h"
|
|
#include "Yatom.h"
|
|
#include "eval.h"
|
|
#include "yapio.h"
|
|
|
|
#include <string.h>
|
|
#include <wchar.h>
|
|
|
|
#ifndef HAVE_WCSNLEN
|
|
inline static size_t min_size(size_t i, size_t j) { return (i < j ? i : j); }
|
|
#define wcsnlen(S, N) min_size(N, wcslen(S))
|
|
#endif
|
|
|
|
static inline unsigned char *getChar(unsigned char *p, int *c) {
|
|
*c = *p;
|
|
return p + 1;
|
|
}
|
|
|
|
static inline wchar_t *getWchar(wchar_t *p, int *c) {
|
|
*c = *p;
|
|
return p + 1;
|
|
}
|
|
|
|
#ifndef NAN
|
|
#define NAN (0.0 / 0.0)
|
|
#endif
|
|
|
|
static Term Globalize(Term v USES_REGS) {
|
|
if (!IsVarTerm(v = Deref(v))) {
|
|
return v;
|
|
}
|
|
if (VarOfTerm(v) > HR && VarOfTerm(v) < LCL0) {
|
|
Bind_Local(VarOfTerm(v), MkVarTerm());
|
|
v = Deref(v);
|
|
}
|
|
return v;
|
|
}
|
|
|
|
static Int SkipListCodes(unsigned char **bufp, Term *l, Term **tailp,
|
|
Int *atoms, bool *wide, seq_tv_t *inp USES_REGS) {
|
|
Int length = 0;
|
|
Term *s; /* slow */
|
|
Term v; /* temporary */
|
|
*wide = false;
|
|
size_t max = 1;
|
|
unsigned char *st0 = *bufp, *st;
|
|
unsigned char *smax = NULL;
|
|
|
|
do_derefa(v, l, derefa_unk, derefa_nonvar);
|
|
*tailp = l;
|
|
s = l;
|
|
|
|
if (inp->type & YAP_STRING_TRUNC) {
|
|
max = inp->max;
|
|
} else {
|
|
max = 0; // basically, this will never be reached;
|
|
}
|
|
|
|
if (!st0) {
|
|
if (inp->type & YAP_STRING_MALLOC) {
|
|
*bufp = st0 = (unsigned char *)malloc(MAXPATHLEN + 1);
|
|
smax = st0 + (MAXPATHLEN - 8); // give 8 bytes for max UTF-8 size + '\0';
|
|
} else {
|
|
*bufp = st0 = (unsigned char *)Yap_PreAllocCodeSpace();
|
|
smax = (unsigned char *)AuxTop -
|
|
8; // give 8 bytes for max UTF-8 size + '\0';
|
|
}
|
|
} else if (inp->sz > 0) {
|
|
smax = st0 + (inp->sz - 8); // give 8 bytes for max UTF-8 size + '\0';
|
|
} else {
|
|
// AUX_ERROR( *l, 2*(length+1), st0, unsigned char);
|
|
return 0;
|
|
}
|
|
*bufp = st = st0;
|
|
|
|
if (*l == TermNil) {
|
|
return 0;
|
|
}
|
|
if (IsPairTerm(*l)) {
|
|
Term hd0 = HeadOfTerm(*l);
|
|
if (IsVarTerm(hd0)) {
|
|
return -INSTANTIATION_ERROR;
|
|
}
|
|
// are we looking for atoms/codes?
|
|
// whatever the case, we should be consistent throughout,
|
|
// so we should be consistent with the first arg.
|
|
if (*atoms == 1) {
|
|
if (!IsIntegerTerm(hd0)) {
|
|
return -INSTANTIATION_ERROR;
|
|
}
|
|
} else if (*atoms == 2) {
|
|
if (!IsAtomTerm(hd0)) {
|
|
return -TYPE_ERROR_ATOM;
|
|
}
|
|
}
|
|
|
|
do {
|
|
int ch;
|
|
length++;
|
|
if (length == max) {
|
|
*st++ = '\0';
|
|
}
|
|
{
|
|
Term hd = Deref(RepPair(*l)[0]);
|
|
if (IsVarTerm(hd)) {
|
|
return -INSTANTIATION_ERROR;
|
|
} else if (IsAtomTerm(hd)) {
|
|
(*atoms)++;
|
|
if (*atoms < length) {
|
|
*tailp = l;
|
|
return -TYPE_ERROR_NUMBER;
|
|
}
|
|
if (IsWideAtom(AtomOfTerm(hd))) {
|
|
int ch;
|
|
if ((RepAtom(AtomOfTerm(hd))->WStrOfAE)[1] != '\0') {
|
|
length = -REPRESENTATION_ERROR_CHARACTER;
|
|
}
|
|
ch = RepAtom(AtomOfTerm(hd))->WStrOfAE[0];
|
|
*wide = true;
|
|
} else {
|
|
AtomEntry *ae = RepAtom(AtomOfTerm(hd));
|
|
if ((ae->StrOfAE)[1] != '\0') {
|
|
length = -REPRESENTATION_ERROR_CHARACTER;
|
|
} else {
|
|
ch = RepAtom(AtomOfTerm(hd))->StrOfAE[0];
|
|
*wide |= ch > 0x80;
|
|
}
|
|
}
|
|
} else if (IsIntegerTerm(hd)) {
|
|
ch = IntegerOfTerm(hd);
|
|
if (*atoms)
|
|
length = -TYPE_ERROR_ATOM;
|
|
else if (ch < 0) {
|
|
*tailp = l;
|
|
length = -DOMAIN_ERROR_NOT_LESS_THAN_ZERO;
|
|
} else {
|
|
*wide |= ch > 0x80;
|
|
}
|
|
} else {
|
|
length = -TYPE_ERROR_INTEGER;
|
|
}
|
|
if (length < 0) {
|
|
*tailp = l;
|
|
return length;
|
|
}
|
|
}
|
|
// now copy char to buffer
|
|
size_t chsz = put_utf8(st, ch);
|
|
if (smax <= st + chsz) {
|
|
*st++ = '\0';
|
|
*tailp = l;
|
|
return length;
|
|
} else {
|
|
st += chsz;
|
|
}
|
|
l = RepPair(*l) + 1;
|
|
do_derefa(v, l, derefa2_unk, derefa2_nonvar);
|
|
} while (*l != *s && IsPairTerm(*l));
|
|
}
|
|
if (IsVarTerm(*l)) {
|
|
return -INSTANTIATION_ERROR;
|
|
}
|
|
if (*l != TermNil) {
|
|
return -TYPE_ERROR_LIST;
|
|
}
|
|
st[0] = '\0';
|
|
*tailp = l;
|
|
|
|
return length;
|
|
}
|
|
|
|
static void *to_buffer(void *buf, Term t, seq_tv_t *inp, bool *widep,
|
|
Int *atoms, size_t *lenp USES_REGS) {
|
|
CELL *r = NULL;
|
|
Int n;
|
|
|
|
if (!buf) {
|
|
inp->sz = *lenp;
|
|
}
|
|
unsigned char *bufc = buf;
|
|
n = SkipListCodes(&bufc, &t, &r, atoms, widep, inp PASS_REGS);
|
|
if (n < 0) {
|
|
LOCAL_Error_TYPE = -n;
|
|
LOCAL_Error_Term = *r;
|
|
return NULL;
|
|
}
|
|
*lenp = n;
|
|
return bufc;
|
|
}
|
|
|
|
static void *Yap_ListOfCodesToBuffer(void *buf, Term t, seq_tv_t *inp,
|
|
bool *widep, size_t *lenp USES_REGS) {
|
|
Int atoms = 1; // we only want lists of atoms
|
|
return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS);
|
|
}
|
|
|
|
static void *Yap_ListOfAtomsToBuffer(void *buf, Term t, seq_tv_t *inp,
|
|
bool *widep, size_t *lenp USES_REGS) {
|
|
Int atoms = 2; // we only want lists of integer codes
|
|
return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS);
|
|
}
|
|
|
|
static void *Yap_ListToBuffer(void *buf, Term t, seq_tv_t *inp, bool *widep,
|
|
size_t *lenp USES_REGS) {
|
|
Int atoms = 0; // we accept both types of lists.
|
|
return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS);
|
|
}
|
|
|
|
#if USE_GEN_TYPE_ERROR
|
|
static yap_error_number gen_type_error(int flags) {
|
|
if ((flags & (YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT |
|
|
YAP_STRING_FLOAT | YAP_STRING_ATOMS_CODES | YAP_STRING_BIG)) ==
|
|
(YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT |
|
|
YAP_STRING_ATOMS_CODES | YAP_STRING_BIG))
|
|
return TYPE_ERROR_TEXT;
|
|
if ((flags & (YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT |
|
|
YAP_STRING_FLOAT | YAP_STRING_BIG)) ==
|
|
(YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT |
|
|
YAP_STRING_BIG))
|
|
return TYPE_ERROR_ATOMIC;
|
|
if ((flags & (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG)) ==
|
|
(YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG))
|
|
return TYPE_ERROR_NUMBER;
|
|
if (flags & YAP_STRING_ATOM)
|
|
return TYPE_ERROR_ATOM;
|
|
if (flags & YAP_STRING_STRING)
|
|
return TYPE_ERROR_STRING;
|
|
if (flags & (YAP_STRING_CODES | YAP_STRING_ATOMS))
|
|
return TYPE_ERROR_LIST;
|
|
return TYPE_ERROR_NUMBER;
|
|
}
|
|
#endif
|
|
|
|
void *Yap_readText(void *buf, seq_tv_t *inp, encoding_t *enc, int *minimal,
|
|
size_t *lengp USES_REGS) {
|
|
char *s, *s0 = buf;
|
|
wchar_t *ws;
|
|
bool wide;
|
|
|
|
/* we know what the term is */
|
|
if (!(inp->type & (YAP_STRING_CHARS | YAP_STRING_WCHARS))) {
|
|
if (!(inp->type & YAP_STRING_TERM)) {
|
|
if (IsVarTerm(inp->val.t)) {
|
|
LOCAL_Error_TYPE = INSTANTIATION_ERROR;
|
|
} else if (!IsAtomTerm(inp->val.t) && inp->type == YAP_STRING_ATOM) {
|
|
LOCAL_Error_TYPE = TYPE_ERROR_ATOM;
|
|
} else if (!IsStringTerm(inp->val.t) && inp->type == YAP_STRING_STRING) {
|
|
LOCAL_Error_TYPE = TYPE_ERROR_STRING;
|
|
} else if (!IsPairTerm(inp->val.t) && !IsStringTerm(inp->val.t) &&
|
|
inp->type == (YAP_STRING_ATOMS_CODES | YAP_STRING_STRING)) {
|
|
LOCAL_Error_TYPE = TYPE_ERROR_LIST;
|
|
} else if (!IsNumTerm(inp->val.t) &&
|
|
(inp->type & (YAP_STRING_INT | YAP_STRING_FLOAT |
|
|
YAP_STRING_BIG)) == inp->type) {
|
|
LOCAL_Error_TYPE = TYPE_ERROR_NUMBER;
|
|
}
|
|
LOCAL_Error_Term = inp->val.t;
|
|
}
|
|
}
|
|
if (LOCAL_Error_TYPE != YAP_NO_ERROR)
|
|
return NULL;
|
|
|
|
// this is a term, extract the UTF8 representation
|
|
if (IsStringTerm(inp->val.t) && inp->type & YAP_STRING_STRING) {
|
|
const char *s = StringOfTerm(inp->val.t);
|
|
*enc = ENC_ISO_UTF8;
|
|
*minimal = FALSE;
|
|
if (lengp)
|
|
*lengp = strlen(s);
|
|
return (void *)s;
|
|
}
|
|
if (IsAtomTerm(inp->val.t) && inp->type & YAP_STRING_ATOM) {
|
|
// this is a term, extract to a buffer, and representation is wide
|
|
*minimal = TRUE;
|
|
Atom at = AtomOfTerm(inp->val.t);
|
|
if (IsWideAtom(at)) {
|
|
ws = at->WStrOfAE;
|
|
*lengp = wcslen(ws);
|
|
*enc = ENC_WCHAR;
|
|
return ws;
|
|
} else {
|
|
s = (char *)at->StrOfAE;
|
|
*lengp = strlen(s);
|
|
*enc = ENC_ISO_LATIN1;
|
|
return s;
|
|
}
|
|
}
|
|
if (((inp->type & (YAP_STRING_CODES | YAP_STRING_ATOMS)) ==
|
|
(YAP_STRING_CODES | YAP_STRING_ATOMS))) {
|
|
s = Yap_ListToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS);
|
|
// this is a term, extract to a sfer, and representation is wide
|
|
*minimal = true;
|
|
*enc = ENC_ISO_UTF8;
|
|
return s;
|
|
}
|
|
if (inp->type == YAP_STRING_CODES) {
|
|
s = Yap_ListOfCodesToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS);
|
|
// this is a term, extract to a sfer, and representation is wide
|
|
*minimal = true;
|
|
*enc = ENC_ISO_UTF8;
|
|
return s;
|
|
}
|
|
if (inp->type == YAP_STRING_ATOMS) {
|
|
s = Yap_ListOfAtomsToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS);
|
|
// this is a term, extract to a buffer, and representation is wide
|
|
*minimal = true;
|
|
*enc = ENC_ISO_UTF8;
|
|
return s;
|
|
}
|
|
if (inp->type & YAP_STRING_INT && IsIntegerTerm(inp->val.t)) {
|
|
if (s0)
|
|
s = s0;
|
|
else
|
|
s = Yap_PreAllocCodeSpace();
|
|
AUX_ERROR(inp->val.t, LOCAL_MAX_SIZE, s, char);
|
|
if (snprintf(s, LOCAL_MAX_SIZE - 1, Int_FORMAT, IntegerOfTerm(inp->val.t)) <
|
|
0) {
|
|
AUX_ERROR(inp->val.t, 2 * LOCAL_MAX_SIZE, s, char);
|
|
}
|
|
*enc = ENC_ISO_LATIN1;
|
|
*lengp = strlen(s);
|
|
return s;
|
|
}
|
|
if (inp->type & YAP_STRING_FLOAT && IsFloatTerm(inp->val.t)) {
|
|
if (s0)
|
|
s = s0;
|
|
else
|
|
s = Yap_PreAllocCodeSpace();
|
|
AUX_ERROR(inp->val.t, LOCAL_MAX_SIZE, s, char);
|
|
if (!Yap_FormatFloat(FloatOfTerm(inp->val.t), &s, LOCAL_MAX_SIZE - 1)) {
|
|
AUX_ERROR(inp->val.t, 2 * LOCAL_MAX_SIZE, s, char);
|
|
}
|
|
*lengp = strlen(s);
|
|
*enc = ENC_ISO_LATIN1;
|
|
return s;
|
|
}
|
|
#if USE_GMP
|
|
if (inp->type & YAP_STRING_BIG && IsBigIntTerm(inp->val.t)) {
|
|
if (s0)
|
|
s = s0;
|
|
else
|
|
s = Yap_PreAllocCodeSpace();
|
|
if (!Yap_mpz_to_string(Yap_BigIntOfTerm(inp->val.t), s, LOCAL_MAX_SIZE - 1,
|
|
10)) {
|
|
AUX_ERROR(inp->val.t, LOCAL_MAX_SIZE, s, char);
|
|
}
|
|
*enc = ENC_ISO_LATIN1;
|
|
*lengp = strlen(s);
|
|
return s;
|
|
}
|
|
#endif
|
|
if (inp->type & YAP_STRING_TERM) {
|
|
encoding_t enc = ENC_ISO_UTF8;
|
|
char *o = Yap_TermToString(inp->val.t, lengp, enc, 0);
|
|
return o;
|
|
}
|
|
if (inp->type & YAP_STRING_CHARS) {
|
|
*enc = inp->enc;
|
|
if (inp->type & YAP_STRING_NCHARS)
|
|
*lengp = inp->sz;
|
|
else
|
|
*lengp = strlen(inp->val.c);
|
|
return (void *)inp->val.c;
|
|
}
|
|
if (inp->type & YAP_STRING_WCHARS) {
|
|
*enc = ENC_WCHAR;
|
|
if (inp->type & YAP_STRING_NCHARS)
|
|
*lengp = inp->sz;
|
|
else
|
|
*lengp = wcslen(inp->val.w);
|
|
return (void *)inp->val.w;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static Term write_strings(void *s0, seq_tv_t *out, encoding_t enc, int minimal,
|
|
size_t leng USES_REGS) {
|
|
size_t min = 0, max = leng;
|
|
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
|
|
if (out->type & YAP_STRING_NCHARS)
|
|
min = out->sz;
|
|
if (out->type & YAP_STRING_TRUNC && out->max < max)
|
|
max = out->max;
|
|
}
|
|
|
|
switch (enc) {
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *s = s0, *lim = s + (max = strlen_utf8(s));
|
|
Term t = init_tstring(PASS_REGS1);
|
|
unsigned char *cp = s, *buf;
|
|
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
buf = buf_from_tstring(HR);
|
|
while (*cp && cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
cp += get_utf8(cp, -1, &chr);
|
|
buf += put_utf8(buf, chr);
|
|
}
|
|
if (max >= min)
|
|
*buf++ = '\0';
|
|
else
|
|
while (max < min) {
|
|
max++;
|
|
buf += put_utf8(buf, '\0');
|
|
}
|
|
|
|
close_tstring(buf PASS_REGS);
|
|
out->val.t = t;
|
|
} break;
|
|
case ENC_ISO_LATIN1: {
|
|
unsigned char *s = s0, *lim = s + (max = strlen_latin_utf8(s0));
|
|
Term t = init_tstring(PASS_REGS1);
|
|
unsigned char *cp = s;
|
|
unsigned char *buf;
|
|
utf8proc_int32_t chr;
|
|
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
buf = buf_from_tstring(HR);
|
|
while (cp < lim) {
|
|
cp = getChar(cp, &chr);
|
|
buf += put_utf8(buf, chr);
|
|
}
|
|
if (max >= min)
|
|
*buf++ = '\0';
|
|
else
|
|
while (max < min) {
|
|
max++;
|
|
buf += put_utf8(buf, chr);
|
|
}
|
|
close_tstring(buf PASS_REGS);
|
|
out->val.t = t;
|
|
} break;
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = s0, *lim = s + (max = strlen_ucs2_utf8(s0));
|
|
Term t = init_tstring(PASS_REGS1);
|
|
wchar_t *wp = s;
|
|
unsigned char *buf;
|
|
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
buf = buf_from_tstring(HR);
|
|
while (wp < lim) {
|
|
utf8proc_int32_t chr;
|
|
wp = getWchar(wp, &chr);
|
|
buf += put_utf8(buf, chr);
|
|
}
|
|
if (max >= min)
|
|
*buf++ = '\0';
|
|
else
|
|
while (max < min) {
|
|
max++;
|
|
buf += put_utf8(buf, '\0');
|
|
}
|
|
close_tstring(buf PASS_REGS);
|
|
out->val.t = t;
|
|
} break;
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
|
|
return out->val.t;
|
|
}
|
|
|
|
static Term write_atoms(void *s0, seq_tv_t *out, encoding_t enc, int minimal,
|
|
size_t leng USES_REGS) {
|
|
Term t = AbsPair(HR);
|
|
size_t sz = 0;
|
|
size_t max = leng;
|
|
if (leng == 0) {
|
|
out->val.t = t;
|
|
return TermNil;
|
|
}
|
|
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
|
|
if (out->type & YAP_STRING_TRUNC && out->max < max)
|
|
max = out->max;
|
|
}
|
|
|
|
switch (enc) {
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *s = s0, *lim = s + strnlen((char *)s, max);
|
|
unsigned char *cp = s;
|
|
wchar_t w[2];
|
|
w[1] = '\0';
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
while (cp < lim && *cp) {
|
|
utf8proc_int32_t chr;
|
|
CELL *cl;
|
|
cp += get_utf8(cp, -1, &chr);
|
|
if (chr == '\0')
|
|
break;
|
|
w[0] = chr;
|
|
cl = HR;
|
|
HR += 2;
|
|
cl[0] = MkAtomTerm(Yap_LookupMaybeWideAtom(w));
|
|
cl[1] = AbsPair(HR);
|
|
sz++;
|
|
if (sz == max)
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case ENC_ISO_LATIN1: {
|
|
unsigned char *s = s0, *lim = s + strnlen(s0, max);
|
|
unsigned char *cp = s;
|
|
char w[2];
|
|
w[1] = '\0';
|
|
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
while (cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
cp = getChar(cp, &chr);
|
|
if (chr == '\0')
|
|
break;
|
|
w[0] = chr;
|
|
HR[0] = MkAtomTerm(Yap_LookupAtom(w));
|
|
HR[1] = AbsPair(HR + 2);
|
|
HR += 2;
|
|
sz++;
|
|
if (sz == max)
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = s0, *lim = s + wcsnlen(s, max);
|
|
wchar_t *cp = s;
|
|
wchar_t w[2];
|
|
w[1] = '\0';
|
|
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
while (*cp && cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
cp = getWchar(cp, &chr);
|
|
if (chr == '\0')
|
|
break;
|
|
w[0] = chr;
|
|
HR[0] = MkAtomTerm(Yap_LookupMaybeWideAtom(w));
|
|
HR[1] = AbsPair(HR + 2);
|
|
HR += 2;
|
|
sz++;
|
|
if (sz == max)
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
if (out->type & YAP_STRING_DIFF) {
|
|
if (sz == 0)
|
|
t = out->dif;
|
|
else
|
|
HR[-1] = Globalize(out->dif PASS_REGS);
|
|
} else {
|
|
if (sz == 0)
|
|
t = TermNil;
|
|
else
|
|
HR[-1] = TermNil;
|
|
}
|
|
out->val.t = t;
|
|
return (t);
|
|
}
|
|
|
|
static Term write_codes(void *s0, seq_tv_t *out, encoding_t enc, int minimal,
|
|
size_t leng USES_REGS) {
|
|
Term t = AbsPair(HR);
|
|
size_t min = 0, max = leng;
|
|
size_t sz = 0;
|
|
|
|
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
|
|
if (out->type & YAP_STRING_NCHARS)
|
|
min = out->sz;
|
|
if (out->type & YAP_STRING_TRUNC && out->max < max)
|
|
max = out->max;
|
|
}
|
|
|
|
switch (enc) {
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *s = s0, *lim = s + strnlen(s0, max);
|
|
unsigned char *cp = s;
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
while (*cp && cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
cp += get_utf8(cp, -1, &chr);
|
|
HR[0] = MkIntTerm(chr);
|
|
HR[1] = AbsPair(HR + 2);
|
|
HR += 2;
|
|
sz++;
|
|
if (sz == max)
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case ENC_ISO_LATIN1: {
|
|
unsigned char *s = s0, *lim = s + strnlen(s0, max);
|
|
unsigned char *cp = s;
|
|
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
while (cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
cp = getChar(cp, &chr);
|
|
HR[0] = MkIntTerm(chr);
|
|
HR[1] = AbsPair(HR + 2);
|
|
HR += 2;
|
|
sz++;
|
|
if (sz == max)
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = s0, *lim = s + wcsnlen(s, max);
|
|
wchar_t *cp = s;
|
|
|
|
LOCAL_TERM_ERROR(t, 2 * (lim - s));
|
|
while (cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
cp = getWchar(cp, &chr);
|
|
HR[0] = MkIntTerm(chr);
|
|
HR[1] = AbsPair(HR + 2);
|
|
HR += 2;
|
|
sz++;
|
|
if (sz == max)
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
while (sz < min) {
|
|
HR[0] = MkIntTerm(MkIntTerm(0));
|
|
HR[1] = AbsPair(HR + 2);
|
|
HR += 2;
|
|
sz++;
|
|
}
|
|
if (out->type & YAP_STRING_DIFF) {
|
|
if (sz == 0)
|
|
t = out->dif;
|
|
else
|
|
HR[-1] = Globalize(out->dif PASS_REGS);
|
|
} else {
|
|
if (sz == 0)
|
|
t = TermNil;
|
|
else
|
|
HR[-1] = TermNil;
|
|
}
|
|
out->val.t = t;
|
|
return (t);
|
|
}
|
|
|
|
static Atom write_atom(void *s0, seq_tv_t *out, encoding_t enc, int minimal,
|
|
size_t leng USES_REGS) {
|
|
size_t max = leng;
|
|
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
|
|
if (out->type & YAP_STRING_TRUNC && out->max < max)
|
|
max = out->max;
|
|
}
|
|
|
|
switch (enc) {
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *s = s0, *lim = s + strnlen(s0, max);
|
|
wchar_t *buf = malloc(sizeof(wchar_t) * ((lim + 2) - s)), *ptr = buf;
|
|
Atom at;
|
|
|
|
while (*s && s < lim) {
|
|
utf8proc_int32_t chr;
|
|
s += get_utf8(s, -1, &chr);
|
|
*ptr++ = chr;
|
|
}
|
|
*ptr++ = '\0';
|
|
at = Yap_LookupMaybeWideAtomWithLength(buf, max);
|
|
free(buf);
|
|
out->val.a = at;
|
|
return at;
|
|
}
|
|
case ENC_ISO_LATIN1: {
|
|
char *s = s0;
|
|
Atom at;
|
|
|
|
max = strnlen(s, max);
|
|
at = Yap_LookupAtomWithLength(s, max);
|
|
out->val.a = at;
|
|
return at;
|
|
}
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = s0;
|
|
Atom at;
|
|
|
|
max = wcsnlen(s, max);
|
|
out->val.a = at = Yap_LookupMaybeWideAtomWithLength(s, max);
|
|
return at;
|
|
}
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc));
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static size_t write_wbuffer(void *s0, seq_tv_t *out, encoding_t enc,
|
|
int minimal, size_t leng USES_REGS) {
|
|
size_t min = 0, max = leng, sz_end, sz;
|
|
out->enc = ENC_WCHAR;
|
|
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
|
|
if (out->type & YAP_STRING_NCHARS)
|
|
min = out->sz;
|
|
if (out->type & YAP_STRING_TRUNC && out->max < max)
|
|
max = out->max;
|
|
}
|
|
if (out->enc != enc ||
|
|
out->type & (YAP_STRING_WITH_BUFFER | YAP_STRING_MALLOC)) {
|
|
if (enc != ENC_WCHAR) {
|
|
sz = strlen((char *)s0) + 1;
|
|
} else {
|
|
sz = wcslen((wchar_t *)s0) + 1;
|
|
}
|
|
if (sz < min)
|
|
sz = min;
|
|
sz *= sizeof(wchar_t);
|
|
if (out->type & (YAP_STRING_MALLOC)) {
|
|
out->val.w = malloc(sz);
|
|
} else if (!(out->type & (YAP_STRING_WITH_BUFFER))) {
|
|
if (ASP - (sz / sizeof(CELL) + 1) > HR + 1024) {
|
|
out->val.w =
|
|
(wchar_t *)(ASP - ((sz * sizeof(wchar_t *) / sizeof(CELL) + 1)));
|
|
} else
|
|
return -1;
|
|
}
|
|
} else {
|
|
out->val.w = s0;
|
|
sz_end = (wcslen(s0) + 1) * sizeof(wchar_t);
|
|
}
|
|
if (out->enc == ENC_WCHAR) {
|
|
switch (enc) {
|
|
case ENC_WCHAR:
|
|
if (out->type & (YAP_STRING_WITH_BUFFER | YAP_STRING_MALLOC)) {
|
|
wchar_t *s = s0;
|
|
size_t n = wcslen(s) + 1;
|
|
if (n < min)
|
|
n = min;
|
|
memcpy(out->val.c, s0, n * sizeof(wchar_t));
|
|
out->val.w[n] = '\0';
|
|
sz_end = n + 1;
|
|
}
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *s = s0, *lim = s + (max = strnlen(s0, max));
|
|
unsigned char *cp = s;
|
|
wchar_t *buf0, *buf;
|
|
|
|
buf = buf0 = out->val.w;
|
|
if (!buf)
|
|
return -1;
|
|
while (*cp && cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
cp += get_utf8(cp, -1, &chr);
|
|
*buf++ = chr;
|
|
}
|
|
if (max >= min)
|
|
*buf++ = '\0';
|
|
else
|
|
while (max < min) {
|
|
utf8proc_int32_t chr;
|
|
max++;
|
|
cp += get_utf8(cp, -1, &chr);
|
|
*buf++ = chr;
|
|
}
|
|
*buf = '\0';
|
|
sz_end = (buf - buf0) + 1;
|
|
} break;
|
|
case ENC_ISO_LATIN1: {
|
|
char *s = s0;
|
|
size_t n = strlen(s), i;
|
|
if (n < min)
|
|
n = min;
|
|
for (i = 0; i < n; i++)
|
|
out->val.w[i] = s[i];
|
|
out->val.w[n] = '\0';
|
|
sz_end = n + 1;
|
|
} break;
|
|
default:
|
|
sz_end = -1;
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
}
|
|
sz_end *= sizeof(wchar_t);
|
|
if (out->type & (YAP_STRING_MALLOC)) {
|
|
out->val.c = realloc(out->val.c, sz_end);
|
|
}
|
|
out->sz = sz_end;
|
|
return sz_end;
|
|
}
|
|
|
|
size_t write_buffer(void *s0, seq_tv_t *out, encoding_t enc, int minimal,
|
|
size_t leng USES_REGS) {
|
|
size_t min = 0, max = leng, sz_end;
|
|
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
|
|
if (out->type & YAP_STRING_NCHARS)
|
|
min = out->sz;
|
|
if (out->type & YAP_STRING_TRUNC && out->max < max)
|
|
max = out->max;
|
|
}
|
|
if (out->enc != enc) {
|
|
size_t sz;
|
|
if (enc != ENC_WCHAR)
|
|
sz = strlen((char *)s0) + 1;
|
|
else
|
|
sz = wcslen((wchar_t *)s0) + 1;
|
|
if (sz < min)
|
|
sz = min;
|
|
if (!minimal)
|
|
sz *= 4;
|
|
if (out->type & (YAP_STRING_MALLOC)) {
|
|
out->val.c = malloc(sz);
|
|
} else if (!(out->type & (YAP_STRING_WITH_BUFFER))) {
|
|
if (ASP - (sz / sizeof(CELL) + 1) > HR + 1024) {
|
|
out->val.c = Yap_PreAllocCodeSpace();
|
|
}
|
|
}
|
|
} else {
|
|
out->val.c = s0;
|
|
}
|
|
if (out->enc == ENC_ISO_UTF8) {
|
|
switch (enc) {
|
|
case ENC_ISO_UTF8:
|
|
if (out->type & (YAP_STRING_WITH_BUFFER | YAP_STRING_MALLOC)) {
|
|
char *s = s0;
|
|
size_t n = strlen(s) + 1;
|
|
out->val.c[n] = '\0';
|
|
sz_end = n + 1;
|
|
} else {
|
|
sz_end = strlen(out->val.c) + 1;
|
|
}
|
|
break;
|
|
case ENC_ISO_LATIN1: {
|
|
unsigned char *s = s0, *lim = s + (max = strnlen(s0, max));
|
|
unsigned char *cp = s, *buf0, *buf;
|
|
|
|
buf = buf0 = s0;
|
|
if (!buf)
|
|
return -1;
|
|
while (*cp && cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
chr = *cp++;
|
|
buf += put_utf8(buf, chr);
|
|
}
|
|
if (max >= min)
|
|
*buf++ = '\0';
|
|
else
|
|
while (max < min) {
|
|
max++;
|
|
utf8proc_int32_t chr;
|
|
chr = *cp++;
|
|
buf += put_utf8(buf, chr);
|
|
}
|
|
buf[0] = '\0';
|
|
sz_end = (buf + 1) - buf0;
|
|
} break;
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = s0;
|
|
unsigned char *buf = out->val.uc;
|
|
size_t n = wcslen(s), i;
|
|
if (n < min)
|
|
n = min;
|
|
for (i = 0; i < n; i++) {
|
|
utf8proc_int32_t chr = s[i];
|
|
buf += put_utf8(buf, chr);
|
|
}
|
|
*buf++ = '\0';
|
|
sz_end = (buf + 1) - out->val.uc;
|
|
} break;
|
|
default:
|
|
sz_end = -1;
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
} else if (out->enc == ENC_ISO_LATIN1) {
|
|
switch (enc) {
|
|
case ENC_ISO_LATIN1:
|
|
if (out->type & YAP_STRING_WITH_BUFFER) {
|
|
char *s = s0;
|
|
size_t n = strlen(s), i;
|
|
if (n < min)
|
|
n = min;
|
|
memcpy(out->val.c, s0, n);
|
|
for (i = 0; i < n; i++)
|
|
out->val.w[i] = s[i];
|
|
out->val.w[n] = '\0';
|
|
sz_end = (n + 1) * sizeof(wchar_t);
|
|
} else {
|
|
sz_end = strlen(out->val.c) + 1;
|
|
}
|
|
break;
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *s = s0, *lim = s + (max = strnlen(s0, max));
|
|
unsigned char *cp = s;
|
|
unsigned char *buf0, *buf;
|
|
|
|
buf = buf0 = out->val.uc;
|
|
if (!buf)
|
|
return -1;
|
|
while (*cp && cp < lim) {
|
|
utf8proc_int32_t chr;
|
|
cp += get_utf8(cp, -1, &chr);
|
|
*buf++ = chr;
|
|
}
|
|
if (max >= min)
|
|
*buf++ = '\0';
|
|
else
|
|
while (max < min) {
|
|
utf8proc_int32_t chr;
|
|
max++;
|
|
cp += get_utf8(cp, -1, &chr);
|
|
*buf++ = chr;
|
|
}
|
|
sz_end = buf - out->val.uc;
|
|
} break;
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = s0;
|
|
size_t n = wcslen(s), i;
|
|
if (n < min)
|
|
n = min;
|
|
for (i = 0; i < n; i++)
|
|
out->val.c[i] = s[i];
|
|
out->val.c[n] = '\0';
|
|
sz_end = n + 1;
|
|
} break;
|
|
default:
|
|
sz_end = -1;
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
} else {
|
|
// no other encodings are supported.
|
|
sz_end = -1;
|
|
}
|
|
if (out->type & (YAP_STRING_MALLOC)) {
|
|
out->val.c = realloc(out->val.c, sz_end);
|
|
}
|
|
out->sz = sz_end;
|
|
return sz_end;
|
|
}
|
|
|
|
static size_t write_length(void *s0, seq_tv_t *out, encoding_t enc, int minimal,
|
|
size_t leng USES_REGS) {
|
|
size_t max = -1;
|
|
|
|
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
|
|
if (out->type & YAP_STRING_NCHARS && out->sz != (size_t)-1)
|
|
return out->sz;
|
|
if (out->type & YAP_STRING_TRUNC)
|
|
max = out->max;
|
|
}
|
|
|
|
switch (enc) {
|
|
case ENC_ISO_UTF8: {
|
|
const unsigned char *s = s0;
|
|
return strlen_utf8(s);
|
|
}
|
|
case ENC_ISO_LATIN1: {
|
|
const char *s = s0;
|
|
return strnlen(s, max);
|
|
}
|
|
case ENC_WCHAR: {
|
|
const wchar_t *s = s0;
|
|
return wcsnlen(s, max);
|
|
}
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
return (size_t)-1;
|
|
}
|
|
|
|
static Term write_number(void *s0, seq_tv_t *out, encoding_t enc, int minimal,
|
|
int size USES_REGS) {
|
|
return Yap_StringToNumberTerm(s0, &enc);
|
|
}
|
|
|
|
static Term string_to_term(void *s0, seq_tv_t *out, encoding_t enc, int minimal,
|
|
size_t leng USES_REGS) {
|
|
Term o = out->val.t =
|
|
Yap_StringToTerm(s0, strlen(s0) + 1, &enc, GLOBAL_MaxPriority, NULL);
|
|
return o;
|
|
}
|
|
|
|
bool write_Text(void *inp, seq_tv_t *out, encoding_t enc, int minimal,
|
|
size_t leng USES_REGS) {
|
|
/* we know what the term is */
|
|
switch (out->type & YAP_TYPE_MASK) {
|
|
case YAP_STRING_STRING:
|
|
out->val.t = write_strings(inp, out, enc, minimal, leng PASS_REGS);
|
|
return out->val.t != 0;
|
|
case YAP_STRING_ATOMS:
|
|
out->val.t = write_atoms(inp, out, enc, minimal, leng PASS_REGS);
|
|
return out->val.t != 0;
|
|
case YAP_STRING_CODES:
|
|
out->val.t = write_codes(inp, out, enc, minimal, leng PASS_REGS);
|
|
return out->val.t != 0;
|
|
case YAP_STRING_LENGTH:
|
|
out->val.l = write_length(inp, out, enc, minimal, leng PASS_REGS);
|
|
return out->val.l != (size_t)(-1);
|
|
case YAP_STRING_ATOM:
|
|
out->val.a = write_atom(inp, out, enc, minimal, leng PASS_REGS);
|
|
return out->val.a != NULL;
|
|
case YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG:
|
|
out->val.t = write_number(inp, out, enc, minimal, leng PASS_REGS);
|
|
return out->val.t != 0;
|
|
case YAP_STRING_CHARS: {
|
|
size_t sz = write_buffer(inp, out, enc, minimal, leng PASS_REGS);
|
|
return ((Int)sz > 0);
|
|
}
|
|
case YAP_STRING_WCHARS: {
|
|
size_t sz = write_wbuffer(inp, out, enc, minimal, leng PASS_REGS);
|
|
return ((Int)sz > 0);
|
|
}
|
|
default:
|
|
if (!(out->type & YAP_STRING_TERM))
|
|
return 0;
|
|
if (out->type & (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG))
|
|
if ((out->val.t = write_number(inp, out, enc, minimal, leng PASS_REGS)) !=
|
|
0L)
|
|
return out->val.t != 0;
|
|
if (out->type & (YAP_STRING_ATOM))
|
|
if (write_atom(inp, out, enc, minimal, leng PASS_REGS) != NIL) {
|
|
Atom at = out->val.a;
|
|
if (at != NIL)
|
|
out->val.t = MkAtomTerm(at);
|
|
return at != NIL;
|
|
}
|
|
if ((out->val.t = string_to_term(inp, out, enc, minimal, leng PASS_REGS)) !=
|
|
0L)
|
|
return out->val.t != 0;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
static size_t upcase(void *s0, seq_tv_t *out, encoding_t enc USES_REGS) {
|
|
size_t max = -1;
|
|
|
|
|
|
switch (enc) {
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *s = s0;
|
|
while (*s) {
|
|
// assumes the two code have always the same size;
|
|
utf8proc_int32_t chr;
|
|
get_utf8(s, -1, &chr);
|
|
chr = utf8proc_toupper(chr);
|
|
s += put_utf8(s, chr);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
case ENC_ISO_LATIN1: {
|
|
unsigned char *s = s0;
|
|
utf8proc_int32_t chr;
|
|
|
|
while ((chr = *s)) {
|
|
// assumes the two code have always the same size;
|
|
chr = *s;
|
|
chr = utf8proc_toupper(chr);
|
|
*s++ = chr;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = s0;
|
|
utf8proc_int32_t chr;
|
|
|
|
while ((chr = *s)) {
|
|
// assumes the two code have always the same size;
|
|
chr = *s;
|
|
chr = utf8proc_toupper(chr);
|
|
*s++ = chr;
|
|
}
|
|
return true;
|
|
}
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static size_t downcase(void *s0, seq_tv_t *out, encoding_t enc USES_REGS) {
|
|
size_t max = -1;
|
|
|
|
|
|
switch (enc) {
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *s = s0;
|
|
while (*s) {
|
|
// assumes the two code have always the same size;
|
|
utf8proc_int32_t chr;
|
|
get_utf8(s, -1, &chr);
|
|
chr = utf8proc_tolower(chr);
|
|
s += put_utf8(s, chr);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
case ENC_ISO_LATIN1: {
|
|
unsigned char *s = s0;
|
|
utf8proc_int32_t chr;
|
|
|
|
while ((chr = *s)) {
|
|
// assumes the two code have always the same size;
|
|
chr = *s;
|
|
chr = utf8proc_tolower(chr);
|
|
*s++ = chr;
|
|
}
|
|
return true;
|
|
}
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = s0;
|
|
utf8proc_int32_t chr;
|
|
|
|
while ((chr = *s)) {
|
|
// assumes the two code have always the same size;
|
|
chr = *s;
|
|
chr = utf8proc_tolower(chr);
|
|
*s++ = chr;
|
|
}
|
|
return true;
|
|
}
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
|
|
}
|
|
return false;
|
|
}
|
|
|
|
int Yap_CVT_Text(seq_tv_t *inp, seq_tv_t *out USES_REGS) {
|
|
encoding_t enc;
|
|
int minimal = FALSE;
|
|
char *buf;
|
|
size_t leng;
|
|
|
|
buf = Yap_readText(NULL, inp, &enc, &minimal, &leng PASS_REGS);
|
|
if (!buf)
|
|
return 0L;
|
|
if (out->type & (YAP_STRING_UPCASE|YAP_STRING_DOWNCASE)) {
|
|
if (out->type & YAP_STRING_UPCASE) {
|
|
if (!upcase(buf, out, enc))
|
|
return false;
|
|
}
|
|
if (out->type & YAP_STRING_DOWNCASE) {
|
|
if (!downcase(buf, out, enc))
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return write_Text(buf, out, enc, minimal, leng PASS_REGS);
|
|
}
|
|
|
|
static void *compute_end(void *s0, encoding_t enc) {
|
|
switch (enc) {
|
|
case ENC_ISO_LATIN1:
|
|
case ENC_ISO_UTF8: {
|
|
char *s = (char *)s0;
|
|
return s + (1 + strlen(s));
|
|
}
|
|
case ENC_WCHAR: {
|
|
wchar_t *s = (wchar_t *)s0;
|
|
return s + (1 + wcslen(s));
|
|
}
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void *advance_Text(void *s, int l, encoding_t enc) {
|
|
switch (enc) {
|
|
case ENC_ISO_LATIN1:
|
|
return ((char *)s) + l;
|
|
case ENC_ISO_UTF8:
|
|
return (char *)skip_utf8(s, l);
|
|
case ENC_WCHAR:
|
|
return ((wchar_t *)s) + l;
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc), __FUNCTION__);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
static int cmp_Text(void *s1, void *s2, int l, encoding_t enc1,
|
|
encoding_t enc2) {
|
|
Int i;
|
|
switch (enc1) {
|
|
case ENC_ISO_LATIN1: {
|
|
char *w1 = (char *)s1;
|
|
switch (enc2) {
|
|
case ENC_ISO_LATIN1:
|
|
return strncmp(s1, s2, l);
|
|
case ENC_ISO_UTF8: {
|
|
utf8proc_int32_t chr1, chr2;
|
|
unsigned char *w2 = s2;
|
|
for (i = 0; i < l; i++) {
|
|
chr1 = *w1++;
|
|
w2 += get_utf8(w2, -1, &chr2);
|
|
if (chr1 - chr2)
|
|
return chr1 - chr2;
|
|
}
|
|
}
|
|
return 0;
|
|
case ENC_WCHAR: {
|
|
utf8proc_int32_t chr1, chr2;
|
|
wchar_t *w2 = s2;
|
|
for (i = 0; i < l; i++) {
|
|
chr1 = *w1++;
|
|
chr2 = *w2++;
|
|
if (chr1 - chr2)
|
|
return chr1 - chr2;
|
|
}
|
|
}
|
|
return 0;
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc2), __FUNCTION__);
|
|
}
|
|
}
|
|
case ENC_ISO_UTF8: {
|
|
unsigned char *w1 = s1;
|
|
switch (enc2) {
|
|
case ENC_ISO_LATIN1: {
|
|
utf8proc_int32_t chr1, chr2;
|
|
unsigned char *w2 = s2;
|
|
for (i = 0; i < l; i++) {
|
|
chr2 = *w2++;
|
|
w1 += get_utf8(w1, -1, &chr1);
|
|
if (chr1 - chr2)
|
|
return chr1 - chr2;
|
|
}
|
|
}
|
|
return 0;
|
|
case ENC_ISO_UTF8: {
|
|
utf8proc_int32_t chr1, chr2;
|
|
unsigned char *w2 = s2;
|
|
for (i = 0; i < l; i++) {
|
|
w2 += get_utf8(w2, -1, &chr2);
|
|
w1 += get_utf8(w1, -1, &chr1);
|
|
if (chr1 - chr2)
|
|
return chr1 - chr2;
|
|
}
|
|
}
|
|
return 0;
|
|
case ENC_WCHAR: {
|
|
utf8proc_int32_t chr1, chr2;
|
|
wchar_t *w2 = s2;
|
|
for (i = 0; i < l; i++) {
|
|
chr2 = *w2++;
|
|
w1 += get_utf8(w1, -1, &chr1);
|
|
if (chr1 - chr2)
|
|
return chr1 - chr2;
|
|
}
|
|
}
|
|
return 0;
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc2), __FUNCTION__);
|
|
}
|
|
}
|
|
case ENC_WCHAR: {
|
|
wchar_t *w1 = (wchar_t *)s1;
|
|
switch (enc2) {
|
|
case ENC_ISO_LATIN1: {
|
|
utf8proc_int32_t chr1, chr2;
|
|
char *w2 = s2;
|
|
for (i = 0; i < l; i++) {
|
|
chr1 = *w1++;
|
|
chr2 = *w2++;
|
|
if (chr1 - chr2)
|
|
return chr1 - chr2;
|
|
}
|
|
}
|
|
return 0;
|
|
case ENC_ISO_UTF8: {
|
|
utf8proc_int32_t chr1, chr2;
|
|
unsigned char *w2 = s2;
|
|
for (i = 0; i < l; i++) {
|
|
chr1 = *w1++;
|
|
w2 += get_utf8(w2, -1, &chr2);
|
|
if (chr1 - chr2)
|
|
return chr1 - chr2;
|
|
}
|
|
}
|
|
return 0;
|
|
case ENC_WCHAR:
|
|
return wcsncmp(s1, s2, l);
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc2), __FUNCTION__);
|
|
}
|
|
}
|
|
default:
|
|
Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s",
|
|
enc_name(enc1), __FUNCTION__);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void *concat(int n, seq_tv_t *out, void *sv[], encoding_t encv[],
|
|
size_t lengv[] USES_REGS) {
|
|
if (out->type == YAP_STRING_STRING) {
|
|
/* we assume we concatenate strings only, or ASCII stuff like numbers */
|
|
Term t = init_tstring(PASS_REGS1);
|
|
unsigned char *buf = buf_from_tstring(HR);
|
|
int i;
|
|
for (i = 0; i < n; i++) {
|
|
if (encv[i] == ENC_WCHAR) {
|
|
wchar_t *ptr = sv[i];
|
|
utf8proc_int32_t chr;
|
|
while ((chr = *ptr++))
|
|
buf += put_utf8(buf, chr);
|
|
} else if (encv[i] == ENC_ISO_LATIN1) {
|
|
char *ptr = sv[i];
|
|
utf8proc_int32_t chr;
|
|
while ((chr = *ptr++))
|
|
buf += put_utf8(buf, chr);
|
|
} else {
|
|
char *ptr = sv[i];
|
|
utf8proc_int32_t chr;
|
|
while ((chr = *ptr++))
|
|
*buf++ = chr;
|
|
}
|
|
}
|
|
*buf++ = '\0';
|
|
close_tstring(buf PASS_REGS);
|
|
out->val.t = t;
|
|
return HR;
|
|
} else {
|
|
encoding_t enc = ENC_ISO_LATIN1;
|
|
size_t sz = 0;
|
|
|
|
int i;
|
|
for (i = 0; i < n; i++) {
|
|
if (encv[i] != ENC_ISO_LATIN1) {
|
|
enc = ENC_WCHAR;
|
|
}
|
|
sz += write_length(sv[i], out, encv[i], FALSE, lengv[i] PASS_REGS);
|
|
}
|
|
if (enc == ENC_WCHAR) {
|
|
/* wide atom */
|
|
wchar_t *buf = (wchar_t *)HR;
|
|
Atom at;
|
|
LOCAL_ERROR(MkAtomTerm(Yap_LookupWideAtom(buf)), sz + 3);
|
|
for (i = 0; i < n; i++) {
|
|
if (encv[i] == ENC_WCHAR) {
|
|
wchar_t *ptr = sv[i];
|
|
utf8proc_int32_t chr;
|
|
while ((chr = *ptr++) != '\0')
|
|
*buf++ = chr;
|
|
} else if (encv[i] == ENC_ISO_LATIN1) {
|
|
char *ptr = sv[i];
|
|
utf8proc_int32_t chr;
|
|
while ((chr = *ptr++) != '\0')
|
|
*buf++ = (unsigned char)chr;
|
|
} else {
|
|
unsigned char *ptr = sv[i];
|
|
utf8proc_int32_t chr;
|
|
while ((ptr += get_utf8(ptr, -1, &chr)) != NULL) {
|
|
if (chr == '\0')
|
|
break;
|
|
else
|
|
*buf++ = chr;
|
|
}
|
|
}
|
|
}
|
|
*buf++ = '\0';
|
|
at = out->val.a = Yap_LookupWideAtom((wchar_t *)HR);
|
|
return at;
|
|
} else {
|
|
/* atom */
|
|
char *buf = (char *)HR;
|
|
Atom at;
|
|
|
|
LOCAL_ERROR(MkAtomTerm(Yap_LookupAtom(buf)), sz / sizeof(CELL) + 3);
|
|
for (i = 0; i < n; i++) {
|
|
char *ptr = sv[i];
|
|
utf8proc_int32_t chr;
|
|
while ((chr = *ptr++) != '\0')
|
|
*buf++ = chr;
|
|
}
|
|
*buf++ = '\0';
|
|
at = out->val.a = Yap_LookupAtom((const char *)HR);
|
|
return at;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void *slice(size_t min, size_t max, void *buf, seq_tv_t *out,
|
|
encoding_t enc USES_REGS) {
|
|
if (out->type == YAP_STRING_STRING) {
|
|
/* we assume we concatenate strings only, or ASCII stuff like numbers */
|
|
Term t = init_tstring(PASS_REGS1);
|
|
unsigned char *nbuf = buf_from_tstring(HR);
|
|
if (enc == ENC_WCHAR) {
|
|
wchar_t *ptr = (wchar_t *)buf + min;
|
|
utf8proc_int32_t chr;
|
|
while (min++ < max) {
|
|
chr = *ptr++;
|
|
nbuf += put_utf8(nbuf, chr);
|
|
}
|
|
} else if (enc == ENC_ISO_LATIN1) {
|
|
unsigned char *ptr = (unsigned char *)buf + min;
|
|
utf8proc_int32_t chr;
|
|
while (min++ < max) {
|
|
chr = *ptr++;
|
|
nbuf += put_utf8(nbuf, chr);
|
|
}
|
|
} else {
|
|
unsigned char *ptr = skip_utf8(buf, min);
|
|
utf8proc_int32_t chr;
|
|
if (!ptr)
|
|
return NULL;
|
|
while (min++ < max) {
|
|
ptr += get_utf8(ptr, -1, &chr);
|
|
nbuf += put_utf8(nbuf, chr);
|
|
}
|
|
}
|
|
*nbuf++ = '\0';
|
|
close_tstring(nbuf PASS_REGS);
|
|
out->val.t = t;
|
|
return (void *)StringOfTerm(t);
|
|
} else {
|
|
Atom at;
|
|
/* atom */
|
|
if (enc == ENC_WCHAR) {
|
|
/* wide atom */
|
|
wchar_t *nbuf = (wchar_t *)HR;
|
|
wchar_t *ptr = (wchar_t *)buf + min;
|
|
if (max > min) {
|
|
LOCAL_ERROR(MkAtomTerm(Yap_LookupWideAtom(buf)),
|
|
(max - min) * sizeof(wchar_t));
|
|
memcpy(nbuf, ptr, (max - min) * sizeof(wchar_t));
|
|
}
|
|
nbuf[max - min] = '\0';
|
|
at = Yap_LookupMaybeWideAtom(nbuf);
|
|
} else if (enc == ENC_ISO_LATIN1) {
|
|
/* atom */
|
|
char *nbuf = (char *)HR;
|
|
|
|
if (max > min) {
|
|
char *ptr = (char *)buf + min;
|
|
LOCAL_ERROR(MkAtomTerm(Yap_LookupAtom(buf)), max - min);
|
|
memcpy(nbuf, ptr, (max - min));
|
|
}
|
|
nbuf[max - min] = '\0';
|
|
at = Yap_LookupAtom(nbuf);
|
|
} else {
|
|
/* atom */
|
|
wchar_t *nbuf = (wchar_t *)HR;
|
|
unsigned char *ptr = skip_utf8((unsigned char *)buf, min);
|
|
utf8proc_int32_t chr;
|
|
|
|
LOCAL_ERROR(MkAtomTerm(Yap_LookupAtom(buf)), max - min);
|
|
while (min++ < max) {
|
|
ptr += get_utf8(ptr, -1, &chr);
|
|
*nbuf++ = chr;
|
|
}
|
|
nbuf[0] = '\0';
|
|
at = Yap_LookupMaybeWideAtom((wchar_t *)HR);
|
|
}
|
|
out->val.a = at;
|
|
return at->StrOfAE;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
//
|
|
// Out must be an atom or a string
|
|
void *Yap_Concat_Text(int n, seq_tv_t inp[], seq_tv_t *out USES_REGS) {
|
|
encoding_t *encv;
|
|
void **bufv;
|
|
int minimal = FALSE;
|
|
void *buf;
|
|
size_t leng, *lengv;
|
|
int i;
|
|
Term t = ARG1;
|
|
bufv = (void **)malloc(n * sizeof(void *));
|
|
HEAP_TERM_ERROR(bufv, void *);
|
|
encv = (encoding_t *)malloc(n * sizeof(encoding_t));
|
|
HEAP_ERROR(encv, encoding_t);
|
|
buf = NULL;
|
|
for (i = 0; i < n; i++) {
|
|
void *nbuf =
|
|
Yap_readText(buf, inp + i, encv + i, &minimal, &leng PASS_REGS);
|
|
|
|
if (!nbuf)
|
|
return 0L;
|
|
bufv[i] = nbuf;
|
|
if ((char *)nbuf >= AuxBase && (char *)nbuf < AuxTop) {
|
|
buf = compute_end(nbuf, encv[i]);
|
|
}
|
|
}
|
|
lengv = (size_t *)malloc(n * sizeof(size_t));
|
|
HEAP_ERROR(lengv, size_t);
|
|
buf = concat(n, out, bufv, encv, lengv PASS_REGS);
|
|
free(bufv);
|
|
free(lengv);
|
|
free(encv);
|
|
return buf;
|
|
}
|
|
|
|
//
|
|
// out must be an atom or a string
|
|
void *Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp, encoding_t encv[],
|
|
seq_tv_t outv[] USES_REGS) {
|
|
encoding_t enc;
|
|
int minimal = FALSE;
|
|
void *buf, *store;
|
|
size_t l, leng;
|
|
int i, min;
|
|
|
|
buf = Yap_readText(NULL, inp, &enc, &minimal, &leng PASS_REGS);
|
|
if (!buf)
|
|
return NULL;
|
|
l = write_length(buf, inp, enc, minimal, leng PASS_REGS);
|
|
/* where to allocate next is the most complicated part */
|
|
if ((char *)buf >= AuxBase && (char *)buf < AuxTop) {
|
|
store = compute_end(buf, enc);
|
|
} else {
|
|
store = NULL;
|
|
}
|
|
|
|
if (!cuts) {
|
|
if (n == 2) {
|
|
size_t l0, l1;
|
|
size_t leng0, leng1;
|
|
encoding_t enc0, enc1;
|
|
int minimal0, minimal1;
|
|
void *buf0, *buf1;
|
|
|
|
if (outv[0].val.t) {
|
|
buf0 = Yap_readText(store, outv, &enc0, &minimal0, &leng0 PASS_REGS);
|
|
if (!buf0)
|
|
return NULL;
|
|
l0 = write_length(buf0, outv, enc, minimal0, leng0 PASS_REGS);
|
|
if (cmp_Text(buf, buf0, l0, enc, enc0) != 0)
|
|
return NULL;
|
|
|
|
l1 = l - l0;
|
|
|
|
buf1 = slice(l0, l, buf, outv + 1, enc PASS_REGS);
|
|
if (encv)
|
|
encv[1] = enc;
|
|
return buf1;
|
|
} else /* if (outv[1].val.t) */ {
|
|
buf1 =
|
|
Yap_readText(store, outv + 1, &enc1, &minimal1, &leng1 PASS_REGS);
|
|
if (!buf1)
|
|
return NULL;
|
|
l1 = write_length(buf1, outv + 1, enc1, minimal1, leng1 PASS_REGS);
|
|
if (l < l1)
|
|
return NULL;
|
|
l0 = l - l1;
|
|
if (cmp_Text(advance_Text(buf, l0, enc), buf1, l1, enc, enc1) != 0)
|
|
return NULL;
|
|
buf0 = slice(0, l0, buf, outv, enc PASS_REGS);
|
|
if (encv)
|
|
encv[0] = enc;
|
|
return buf0;
|
|
}
|
|
}
|
|
}
|
|
for (i = 0; i < n; i++) {
|
|
if (i == 0)
|
|
min = 0;
|
|
else
|
|
min = cuts[i - 1];
|
|
slice(min, cuts[i], buf, outv + i, enc PASS_REGS);
|
|
if (!(outv[i].val.a))
|
|
return NULL;
|
|
if (encv)
|
|
encv[i] = enc;
|
|
}
|
|
return (void *)outv;
|
|
;
|
|
}
|
|
|
|
/**
|
|
* Function to convert a generic text term (string, atom, list of codes, list of
|
|
atoms) into a buff
|
|
er.
|
|
*
|
|
* @param t the term
|
|
* @param buf the buffer, if NULL a buffer is malloced, and the user should
|
|
reclai it
|
|
* @param len buffer size
|
|
* @param enc encoding (UTF-8 is strongly recommended)
|
|
*
|
|
* @return the buffer, or NULL in case of failure. If so, Yap_Error may be
|
|
called.
|
|
*/
|
|
const char *Yap_TextTermToText(Term t, char *buf, size_t len, encoding_t enc) {
|
|
CACHE_REGS
|
|
seq_tv_t inp, out;
|
|
|
|
inp.val.t = t;
|
|
if (IsAtomTerm(t)) {
|
|
inp.type = YAP_STRING_ATOM;
|
|
if (IsWideAtom(AtomOfTerm(t)))
|
|
inp.enc = ENC_WCHAR;
|
|
else
|
|
inp.enc = ENC_ISO_LATIN1;
|
|
} else if (IsStringTerm(t)) {
|
|
inp.type = YAP_STRING_STRING;
|
|
inp.enc = ENC_ISO_UTF8;
|
|
} else if (IsPairTerm(t)) {
|
|
inp.type = (YAP_STRING_CODES | YAP_STRING_ATOMS);
|
|
} else {
|
|
Yap_Error(TYPE_ERROR_TEXT, t, NULL);
|
|
return false;
|
|
}
|
|
out.enc = enc;
|
|
out.type = YAP_STRING_CHARS;
|
|
if (!buf) {
|
|
inp.type |= YAP_STRING_MALLOC;
|
|
out.type |= YAP_STRING_MALLOC;
|
|
}
|
|
out.val.c = buf;
|
|
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
|
|
return NULL;
|
|
return out.val.c;
|
|
}
|
|
|
|
/**
|
|
* Convert from a text buffer (8-bit) to a term that has the same type as
|
|
* _Tguide_
|
|
*
|
|
* @param s the buffer
|
|
* @param tguide the guide
|
|
*
|
|
* @return the term
|
|
*/
|
|
Term Yap_MkTextTerm(const char *s, encoding_t enc, Term tguide) {
|
|
CACHE_REGS
|
|
if (IsAtomTerm(tguide))
|
|
return MkAtomTerm(Yap_LookupAtom(s));
|
|
if (IsStringTerm(tguide))
|
|
return MkStringTerm(s);
|
|
if (IsPairTerm(tguide) && IsAtomTerm(HeadOfTerm(tguide))) {
|
|
return Yap_CharsToListOfAtoms(s, enc PASS_REGS);
|
|
}
|
|
return Yap_CharsToListOfCodes(s, enc PASS_REGS);
|
|
}
|