This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/C/text.c

1185 lines
31 KiB
C
Raw Normal View History

/*************************************************************************
* *
* YAP Prolog *
* *
* Yap Prolog was developed at NCCUP - Universidade do Porto *
* *
* Copyright L.Damas, V. Santos Costa and Universidade do Porto 1985-- *
* *
**************************************************************************
* *
* File: strings.c *
* comments: General-conversion of character sequences. *
* *
* Last rev: $Date: 2008-07-24 16:02:00 $,$Author: vsc $ *
* *
*************************************************************************/
#include "Yap.h"
2017-02-20 14:21:46 +00:00
#include "YapEval.h"
#include "YapHeap.h"
#include "YapText.h"
#include "Yatom.h"
#include "yapio.h"
#include <YapText.h>
#include <string.h>
2014-06-19 15:05:46 +01:00
#include <wchar.h>
2014-03-15 23:47:29 +00:00
#ifndef HAVE_WCSNLEN
inline static size_t min_size(size_t i, size_t j) { return (i < j ? i : j); }
2014-03-20 12:19:38 +00:00
#define wcsnlen(S, N) min_size(N, wcslen(S))
2014-03-15 23:47:29 +00:00
#endif
#ifndef NAN
#define NAN (0.0 / 0.0)
#endif
#define MAX_PATHNAME 2048
struct mblock {
struct mblock *prev, *next;
int lvl;
size_t sz;
};
2016-10-20 04:38:17 +01:00
typedef struct TextBuffer_manager {
void *buf, *ptr;
size_t sz;
struct mblock *first[16];
struct mblock *last[16];
2016-10-20 04:38:17 +01:00
int lvl;
} text_buffer_t;
2016-08-16 05:42:49 +01:00
int push_text_stack(USES_REGS1) {
return LOCAL_TextBuffer->lvl++; }
int pop_text_stack(int i) {
int lvl = LOCAL_TextBuffer->lvl;
while (lvl >= i) {
struct mblock *p = LOCAL_TextBuffer->first[lvl];
while (p) {
struct mblock *np = p->next;
free(p);
p = np;
}
LOCAL_TextBuffer->first[lvl] = NULL;
LOCAL_TextBuffer->last[lvl] = NULL;
lvl--;
2016-10-20 04:38:17 +01:00
}
LOCAL_TextBuffer->lvl = i;
return lvl;
}
2016-12-10 07:04:37 +00:00
void *protected_pop_text_stack(int i, void *protected, bool tmp,
size_t sz USES_REGS) {
void *out = protected;
int lvl = LOCAL_TextBuffer->lvl;
while (lvl > i) {
struct mblock *p = LOCAL_TextBuffer->first[lvl];
while (p) {
struct mblock *np = p->next;
if (p + 1 == protected) {
if (tmp)
out = LOCAL_FileNameBuf;
else
out = p;
memcpy(out, protected, sz);
} else {
free(p);
}
p = np;
}
LOCAL_TextBuffer->first[lvl] = NULL;
LOCAL_TextBuffer->last[lvl] = NULL;
lvl--;
}
LOCAL_TextBuffer->lvl = lvl;
return out;
}
// void pop_text_stack(int i) { LOCAL_TextBuffer->lvl = i; }
void *Malloc(size_t sz USES_REGS) {
int lvl = LOCAL_TextBuffer->lvl;
if (sz == 0)
sz = 1024;
sz = ALIGN_BY_TYPE(sz + sizeof(struct mblock), CELL);
struct mblock *o = malloc(sz);
if (!o)
return NULL;
o->prev = LOCAL_TextBuffer->last[lvl];
2016-12-04 18:52:42 +00:00
if (o->prev) {
o->prev->next = o;
}
if (LOCAL_TextBuffer->first[lvl]) {
LOCAL_TextBuffer->last[lvl] = o;
} else {
LOCAL_TextBuffer->first[lvl] = LOCAL_TextBuffer->last[lvl] = o;
}
o->next = NULL;
o->sz = sz;
o->lvl = lvl;
return o + 1;
}
void *Realloc(void *pt, size_t sz USES_REGS) {
sz = ALIGN_BY_TYPE(sz + sizeof(struct mblock), CELL);
struct mblock *old = pt, *o;
old--;
int lvl = old->lvl;
o = realloc(old, sz);
if (o->prev)
o->prev->next = o;
if (o->next)
o->next->prev = o;
if (LOCAL_TextBuffer->first[lvl] == old) {
LOCAL_TextBuffer->first[lvl] = o;
}
if (LOCAL_TextBuffer->last[lvl] == old) {
LOCAL_TextBuffer->last[lvl] = o;
}
2017-02-20 14:40:24 +00:00
return o + 1;
2016-10-20 04:38:17 +01:00
}
void Free(void *pt USES_REGS) {
struct mblock *o = pt;
o--;
if (o->prev)
o->prev->next = o->next;
if (o->next)
o->next->prev = o->prev;
int lvl = o->lvl;
if (LOCAL_TextBuffer->first[lvl] == o) {
if (LOCAL_TextBuffer->last[lvl] == o) {
LOCAL_TextBuffer->first[lvl] = LOCAL_TextBuffer->last[lvl] = NULL;
}
LOCAL_TextBuffer->first[lvl] = o->next;
} else if (LOCAL_TextBuffer->last[lvl] == o) {
LOCAL_TextBuffer->last[lvl] = o->prev;
}
free(o);
}
2016-10-20 04:38:17 +01:00
void *Yap_InitTextAllocator(void) {
struct TextBuffer_manager *new = calloc(sizeof(struct TextBuffer_manager), 1);
return new;
}
2016-10-20 04:38:17 +01:00
static size_t MaxTmp(USES_REGS1) {
2016-10-20 04:38:17 +01:00
return ((char *)LOCAL_TextBuffer->buf + LOCAL_TextBuffer->sz) -
(char *)LOCAL_TextBuffer->ptr;
}
static Term Globalize(Term v USES_REGS) {
2016-08-30 14:27:32 +01:00
if (!IsVarTerm(v = Deref(v))) {
return v;
2016-08-30 14:27:32 +01:00
}
if (VarOfTerm(v) > HR && VarOfTerm(v) < LCL0) {
Bind_Local(VarOfTerm(v), MkVarTerm());
v = Deref(v);
}
return v;
}
static Int SkipListCodes(unsigned char **bufp, Term *l, Term **tailp,
Int *atoms, bool *wide, seq_tv_t *inp USES_REGS) {
2016-08-30 14:27:32 +01:00
Int length = 0;
Term *s; /* slow */
Term v; /* temporary */
*wide = false;
unsigned char *st0 = *bufp, *st;
2016-08-30 14:27:32 +01:00
if (!st0) {
st0 = Malloc(0);
}
2016-08-30 14:27:32 +01:00
do_derefa(v, l, derefa_unk, derefa_nonvar);
*tailp = l;
s = l;
2016-08-30 14:27:32 +01:00
*bufp = st = st0;
2016-08-30 14:27:32 +01:00
if (*l == TermNil) {
st[0] = '\0';
return 0;
}
if (IsPairTerm(*l)) {
Term hd0 = HeadOfTerm(*l);
if (IsVarTerm(hd0)) {
return -INSTANTIATION_ERROR;
}
2016-08-30 14:27:32 +01:00
// are we looking for atoms/codes?
// whatever the case, we should be consistent throughout,
// so we should be consistent with the first arg.
if (*atoms == 1) {
if (!IsIntegerTerm(hd0)) {
return -INSTANTIATION_ERROR;
2016-08-30 14:27:32 +01:00
}
} else if (*atoms == 2) {
if (!IsAtomTerm(hd0)) {
return -TYPE_ERROR_ATOM;
}
}
2016-08-30 14:27:32 +01:00
do {
int ch;
length++;
{
Term hd = Deref(RepPair(*l)[0]);
if (IsVarTerm(hd)) {
return -INSTANTIATION_ERROR;
} else if (IsAtomTerm(hd)) {
(*atoms)++;
if (*atoms < length) {
*tailp = l;
2016-12-04 18:52:42 +00:00
return -REPRESENTATION_ERROR_CHARACTER_CODE;
2016-08-30 14:27:32 +01:00
} else {
AtomEntry *ae = RepAtom(AtomOfTerm(hd));
if ((ae->StrOfAE)[1] != '\0') {
length = -REPRESENTATION_ERROR_CHARACTER;
} else {
ch = RepAtom(AtomOfTerm(hd))->StrOfAE[0];
*wide |= ch > 0x80;
}
}
} else if (IsIntegerTerm(hd)) {
ch = IntegerOfTerm(hd);
if (*atoms)
2016-11-08 07:37:36 +00:00
length = -REPRESENTATION_ERROR_CHARACTER;
2016-08-30 14:27:32 +01:00
else if (ch < 0) {
*tailp = l;
2016-11-08 07:37:36 +00:00
length = -REPRESENTATION_ERROR_CHARACTER_CODE;
2016-08-30 14:27:32 +01:00
} else {
*wide |= ch > 0x80;
}
} else {
length = -TYPE_ERROR_INTEGER;
}
if (length < 0) {
*tailp = l;
return length;
}
}
// now copy char to buffer
int chsz = put_utf8(st, ch);
if (chsz > 0) {
st += chsz;
}
l = RepPair(*l) + 1;
do_derefa(v, l, derefa2_unk, derefa2_nonvar);
} while (*l != *s && IsPairTerm(*l));
}
if (IsVarTerm(*l)) {
return -INSTANTIATION_ERROR;
}
if (*l != TermNil) {
return -TYPE_ERROR_LIST;
}
st[0] = '\0';
Malloc((st - st0) + 1);
*tailp = l;
return length;
}
2016-08-30 14:27:32 +01:00
static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
unsigned char *b0 = inp->val.uc;
size_t sz = *lengp = strlen(inp->val.c);
sz *= 2;
int ch;
unsigned char *buf = Malloc(sz + 1), *pt = buf;
*lengp = strlen(inp->val.c);
if (!buf)
return NULL;
while ((ch = *b0++)) {
int off = put_utf8(pt, ch);
if (off < 0)
continue;
pt += off;
}
*pt++ = '\0';
return buf;
}
static unsigned char *wchar2utf8(seq_tv_t *inp, size_t *lengp) {
2016-08-30 14:27:32 +01:00
*lengp = wcslen(inp->val.w);
size_t sz = *lengp * 4;
wchar_t *b0 = inp->val.w;
unsigned char *buf = Malloc(sz + 1), *pt = buf;
int ch;
if (!buf)
return NULL;
while ((ch = *b0++))
pt += put_utf8(pt, ch);
*pt++ = '\0';
return buf;
}
static void *slice(size_t min, size_t max, unsigned char *buf USES_REGS);
static unsigned char *to_buffer(unsigned char *buf, Term t, seq_tv_t *inp,
bool *widep, Int *atoms,
size_t *lenp USES_REGS) {
2016-08-30 14:27:32 +01:00
CELL *r = NULL;
Int n;
if (!buf) {
inp->max = *lenp;
}
unsigned char *bufc = buf;
n = SkipListCodes(&bufc, &t, &r, atoms, widep, inp PASS_REGS);
if (n < 0) {
LOCAL_Error_TYPE = -n;
return NULL;
}
*lenp = n;
return bufc;
}
static unsigned char *Yap_ListOfCodesToBuffer(unsigned char *buf, Term t,
seq_tv_t *inp, bool *widep,
size_t *lenp USES_REGS) {
2016-08-30 14:27:32 +01:00
Int atoms = 1; // we only want lists of atoms
return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS);
2015-10-18 11:47:01 +01:00
}
static unsigned char *Yap_ListOfAtomsToBuffer(unsigned char *buf, Term t,
seq_tv_t *inp, bool *widep,
size_t *lenp USES_REGS) {
2016-08-30 14:27:32 +01:00
Int atoms = 2; // we only want lists of integer codes
return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS);
2015-10-18 11:47:01 +01:00
}
static unsigned char *Yap_ListToBuffer(unsigned char *buf, Term t,
seq_tv_t *inp, bool *widep,
size_t *lenp USES_REGS) {
2016-08-30 14:27:32 +01:00
Int atoms = 0; // we accept both types of lists.
return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS);
}
2015-09-21 23:05:36 +01:00
#if USE_GEN_TYPE_ERROR
static yap_error_number gen_type_error(int flags) {
if ((flags & (YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT |
YAP_STRING_FLOAT | YAP_STRING_ATOMS_CODES | YAP_STRING_BIG)) ==
(YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT |
YAP_STRING_ATOMS_CODES | YAP_STRING_BIG))
2013-12-08 19:12:24 +00:00
return TYPE_ERROR_TEXT;
if ((flags & (YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT |
YAP_STRING_FLOAT | YAP_STRING_BIG)) ==
(YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT |
YAP_STRING_BIG))
2013-12-08 19:12:24 +00:00
return TYPE_ERROR_ATOMIC;
if ((flags & (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG)) ==
(YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG))
2013-12-08 19:12:24 +00:00
return TYPE_ERROR_NUMBER;
if (flags & YAP_STRING_ATOM)
2013-12-06 15:08:35 +00:00
return TYPE_ERROR_ATOM;
if (flags & YAP_STRING_STRING)
return TYPE_ERROR_STRING;
if (flags & (YAP_STRING_CODES | YAP_STRING_ATOMS))
2013-12-06 15:08:35 +00:00
return TYPE_ERROR_LIST;
return TYPE_ERROR_NUMBER;
}
2015-09-21 23:05:36 +01:00
#endif
2013-12-06 15:08:35 +00:00
2017-02-20 14:40:24 +00:00
// static int cnt;
unsigned char *Yap_readText(seq_tv_t *inp, size_t *lengp) {
2016-08-30 14:27:32 +01:00
unsigned char *s0 = NULL;
bool wide;
2017-02-20 14:40:24 +00:00
if (LOCAL_Error_TYPE != YAP_NO_ERROR) {
fprintf(stderr, "Sourious error %u\n", LOCAL_Error_TYPE);
LOCAL_Error_TYPE = YAP_NO_ERROR;
}
2016-08-30 14:27:32 +01:00
/* we know what the term is */
if (!(inp->type & (YAP_STRING_CHARS | YAP_STRING_WCHARS))) {
if (!(inp->type & YAP_STRING_TERM)) {
if (IsVarTerm(inp->val.t)) {
LOCAL_Error_TYPE = INSTANTIATION_ERROR;
} else if (!IsAtomTerm(inp->val.t) && inp->type == YAP_STRING_ATOM) {
LOCAL_Error_TYPE = TYPE_ERROR_ATOM;
} else if (!IsStringTerm(inp->val.t) && inp->type == YAP_STRING_STRING) {
LOCAL_Error_TYPE = TYPE_ERROR_STRING;
} else if (!IsPairOrNilTerm(inp->val.t) && !IsStringTerm(inp->val.t) &&
inp->type == (YAP_STRING_ATOMS_CODES | YAP_STRING_STRING)) {
LOCAL_Error_TYPE = TYPE_ERROR_LIST;
} else if (!IsNumTerm(inp->val.t) &&
(inp->type & (YAP_STRING_INT | YAP_STRING_FLOAT |
YAP_STRING_BIG)) == inp->type) {
LOCAL_Error_TYPE = TYPE_ERROR_NUMBER;
2016-08-22 23:01:31 +01:00
}
}
2016-08-30 14:27:32 +01:00
}
if (LOCAL_Error_TYPE != YAP_NO_ERROR)
return NULL;
if (IsAtomTerm(inp->val.t) && inp->type & YAP_STRING_ATOM) {
2016-08-30 14:27:32 +01:00
// this is a term, extract to a buffer, and representation is wide
// Yap_DebugPlWriteln(inp->val.t);
Atom at = AtomOfTerm(inp->val.t);
size_t sz = strlen(at->StrOfAE);
2016-12-04 18:52:42 +00:00
if (lengp)
*lengp = sz;
if (inp->type & YAP_STRING_WITH_BUFFER)
return at->UStrOfAE;
inp->type |= YAP_STRING_IN_TMP;
char *o = Malloc(sz+1);
strcpy(o, at->StrOfAE);
return (unsigned char *)o;
2016-08-30 14:27:32 +01:00
}
if (IsStringTerm(inp->val.t) && inp->type & YAP_STRING_STRING) {
// this is a term, extract to a buffer, and representation is wide
// Yap_DebugPlWriteln(inp->val.t);
const char *s = StringOfTerm(inp->val.t);
size_t sz = strlen( s );
2016-12-04 18:52:42 +00:00
if (lengp)
*lengp = sz;
if (inp->type & YAP_STRING_WITH_BUFFER)
return UStringOfTerm(inp->val.t);
inp->type |= YAP_STRING_IN_TMP;
char *o = Malloc(sz+1);
strcpy(o, s);
return (unsigned char *)o;
2016-08-30 14:27:32 +01:00
}
if (((inp->type & (YAP_STRING_CODES | YAP_STRING_ATOMS)) ==
(YAP_STRING_CODES | YAP_STRING_ATOMS)) &&
IsPairOrNilTerm(inp->val.t)) {
// Yap_DebugPlWriteln(inp->val.t);
2016-12-04 18:52:42 +00:00
return Yap_ListToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS);
2016-08-30 14:27:32 +01:00
// this is a term, extract to a sfer, and representation is wide
}
if (inp->type & YAP_STRING_CODES && IsPairOrNilTerm(inp->val.t)) {
// Yap_DebugPlWriteln(inp->val.t);
2016-12-04 18:52:42 +00:00
return Yap_ListOfCodesToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS);
2016-08-30 14:27:32 +01:00
// this is a term, extract to a sfer, and representation is wide
}
if (inp->type & YAP_STRING_ATOMS && IsPairOrNilTerm(inp->val.t)) {
// Yap_DebugPlWriteln(inp->val.t);
2016-12-04 18:52:42 +00:00
return Yap_ListOfAtomsToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS);
2016-08-30 14:27:32 +01:00
// this is a term, extract to a buffer, and representation is wide
}
if (inp->type & YAP_STRING_INT && IsIntegerTerm(inp->val.t)) {
// ASCII, so both LATIN1 and UTF-8
// Yap_DebugPlWriteln(inp->val.t);
char *s;
2017-01-09 14:58:01 +00:00
s = Malloc(0);
2016-08-30 14:27:32 +01:00
if (snprintf(s, MaxTmp(PASS_REGS1) - 1, Int_FORMAT,
IntegerOfTerm(inp->val.t)) < 0) {
AUX_ERROR(inp->val.t, 2 * MaxTmp(PASS_REGS1), s, char);
}
2017-02-20 14:40:24 +00:00
if (lengp)
*lengp = strlen(s);
2016-11-08 07:37:36 +00:00
return (unsigned char *)s;
2016-08-30 14:27:32 +01:00
}
if (inp->type & YAP_STRING_FLOAT && IsFloatTerm(inp->val.t)) {
char *s;
// Yap_DebugPlWriteln(inp->val.t);
2017-01-09 14:58:01 +00:00
if (!Yap_FormatFloat(FloatOfTerm(inp->val.t), &s, 1024)) {
return NULL;
}
2017-02-20 14:40:24 +00:00
if (lengp)
*lengp = strlen(s);
return (unsigned char *)s;
2016-08-30 14:27:32 +01:00
}
2013-12-16 15:35:34 +00:00
#if USE_GMP
2016-08-30 14:27:32 +01:00
if (inp->type & YAP_STRING_BIG && IsBigIntTerm(inp->val.t)) {
// Yap_DebugPlWriteln(inp->val.t);
char *s;
s = Malloc(0);
if (!Yap_mpz_to_string(Yap_BigIntOfTerm(inp->val.t), s, MaxTmp() - 1, 10)) {
AUX_ERROR(inp->val.t, MaxTmp(PASS_REGS1), s, char);
}
2017-02-20 14:40:24 +00:00
if (lengp)
*lengp = strlen(s);
2016-08-30 14:27:32 +01:00
return inp->val.uc = (unsigned char *)s;
}
2013-12-16 15:35:34 +00:00
#endif
2016-08-30 14:27:32 +01:00
if (inp->type & YAP_STRING_TERM) {
// Yap_DebugPlWriteln(inp->val.t);
char *s = (char *)Yap_TermToString(inp->val.t, lengp, ENC_ISO_UTF8, 0);
return inp->val.uc = (unsigned char *)s;
}
if (inp->type & YAP_STRING_CHARS) {
2017-02-20 14:40:24 +00:00
if (inp->enc == ENC_ISO_LATIN1) {
2016-08-30 14:27:32 +01:00
return latin2utf8(inp, lengp);
} else if (inp->enc == ENC_ISO_ASCII) {
if (lengp)
*lengp = strlen(inp->val.c);
return inp->val.uc;
2017-02-20 14:40:24 +00:00
}else { //if (inp->enc == ENC_ISO_UTF8) {
if (lengp)
*lengp = strlen(inp->val.c);
return inp->val.uc;
}
2016-08-30 14:27:32 +01:00
}
if (inp->type & YAP_STRING_WCHARS) {
// printf("%S\n",inp->val.w);
return wchar2utf8(inp, lengp);
}
return NULL;
}
static Term write_strings(unsigned char *s0, seq_tv_t *out,
size_t leng USES_REGS) {
2016-08-30 14:27:32 +01:00
size_t min = 0, max = leng;
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
if (out->type & YAP_STRING_NCHARS)
min = out->max;
if (out->type & YAP_STRING_TRUNC && out->max < max) {
2016-08-30 14:27:32 +01:00
max = out->max;
s0[max] = '\0';
}
2016-08-30 14:27:32 +01:00
}
char *s = (char *)s0, *lim = s + max;
2016-08-30 14:27:32 +01:00
Term t = init_tstring(PASS_REGS1);
LOCAL_TERM_ERROR(t, 2 * max);
unsigned char *buf = buf_from_tstring(HR);
strcpy( (char *)buf, s )
;
if (max+1 < min) {
LOCAL_TERM_ERROR(t, 2 * min);
memset(buf+min, max, '\0');
buf += min;
} else {
buf += max+1;
}
2016-08-30 14:27:32 +01:00
close_tstring(buf PASS_REGS);
out->val.t = t;
2016-08-30 14:27:32 +01:00
return out->val.t;
}
2016-08-30 14:27:32 +01:00
static Term write_atoms(void *s0, seq_tv_t *out, size_t leng USES_REGS) {
Term t = AbsPair(HR);
size_t sz = 0;
size_t max = leng;
if (leng == 0) {
out->val.t = t;
return TermNil;
}
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
if (out->type & YAP_STRING_TRUNC && out->max < max)
max = out->max;
}
unsigned char *s = s0, *lim = s + strnlen((char *)s, max);
unsigned char *cp = s;
unsigned char w[10], *wp = w;
2016-08-30 14:27:32 +01:00
LOCAL_TERM_ERROR(t, 2 * (lim - s));
while (cp < lim && *cp) {
utf8proc_int32_t chr;
CELL *cl;
s += get_utf8(s, 1, &chr);
if (chr == '\0') {
wp[0] = '\0';
2016-08-30 14:27:32 +01:00
break;
}
wp += put_utf8(w, chr);
2016-08-30 14:27:32 +01:00
cl = HR;
HR += 2;
cl[0] = MkAtomTerm(Yap_ULookupAtom(w));
2016-08-30 14:27:32 +01:00
cl[1] = AbsPair(HR);
sz++;
if (sz == max)
break;
}
if (out->type & YAP_STRING_DIFF) {
if (sz == 0)
t = out->dif;
else
2016-08-30 14:27:32 +01:00
HR[-1] = Globalize(out->dif PASS_REGS);
} else {
if (sz == 0)
t = TermNil;
else
HR[-1] = TermNil;
}
out->val.t = t;
return (t);
}
2016-08-30 14:27:32 +01:00
static Term write_codes(void *s0, seq_tv_t *out, size_t leng USES_REGS) {
Term t = AbsPair(HR);
size_t sz = 0;
size_t max = leng;
if (leng == 0) {
out->val.t = t;
2016-08-30 14:27:32 +01:00
return TermNil;
}
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
if (out->type & YAP_STRING_TRUNC && out->max < max)
max = out->max;
}
unsigned char *s = s0, *lim = s + strlen((char *)s);
unsigned char *cp = s;
2016-11-08 07:37:36 +00:00
2016-08-30 14:27:32 +01:00
LOCAL_TERM_ERROR(t, 2 * (lim - s));
while (*cp) {
utf8proc_int32_t chr;
CELL *cl;
cp += get_utf8(cp, -1, &chr);
if (chr == '\0')
break;
cl = HR;
HR += 2;
cl[0] = MkIntegerTerm(chr);
cl[1] = AbsPair(HR);
sz++;
if (sz == max)
break;
}
if (out->type & YAP_STRING_DIFF) {
if (sz == 0)
t = out->dif;
else
HR[-1] = Globalize(out->dif PASS_REGS);
} else {
if (sz == 0)
t = TermNil;
else
HR[-1] = TermNil;
}
out->val.t = t;
return (t);
}
2016-08-30 14:27:32 +01:00
static Atom write_atom(void *s0, seq_tv_t *out, size_t leng USES_REGS) {
unsigned char *s = s0;
int32_t ch;
2016-12-04 18:52:42 +00:00
if (leng == 0) {
return Yap_LookupAtom("");
}
if (strlen_utf8(s0) <= leng) {
return Yap_LookupAtom(s0);
} else {
size_t n = get_utf8(s, 1, &ch);
unsigned char *buf = Malloc(n + 1);
memcpy(buf, s0, n + 1);
return Yap_ULookupAtom(buf);
2016-08-30 14:27:32 +01:00
}
}
2016-08-30 14:27:32 +01:00
size_t write_buffer(unsigned char *s0, seq_tv_t *out, size_t leng USES_REGS) {
size_t min = 0, max = leng, room_end;
if (out->enc == ENC_ISO_UTF8) {
room_end = strlen((char *)s0) + 1;
2017-02-20 14:40:24 +00:00
if (out->val.uc == NULL) { // this should always be the case
2016-08-30 14:27:32 +01:00
out->val.uc = malloc(room_end < 16 ? 16 : room_end);
}
2016-08-30 14:27:32 +01:00
if (out->val.uc != s0) {
strcpy(out->val.c, (char *)s0);
}
2016-08-30 14:27:32 +01:00
} else if (out->enc == ENC_ISO_LATIN1) {
room_end = strlen((char *)s0) + 1;
unsigned char *s = s0;
unsigned char *cp = s;
2016-08-30 14:27:32 +01:00
unsigned char *buf = out->val.uc;
if (!buf)
return -1;
while (*cp) {
2016-08-30 14:27:32 +01:00
utf8proc_int32_t chr;
int off = get_utf8(cp, -1, &chr);
if (off <= 0 || chr > 255)
return -1;
if (off == max)
break;
cp += off;
*buf++ = chr;
}
2016-08-30 14:27:32 +01:00
if (max >= min)
*buf++ = '\0';
else
while (max < min) {
utf8proc_int32_t chr;
2016-08-30 14:27:32 +01:00
max++;
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
room_end = buf - out->val.uc;
} else if (out->enc == ENC_WCHAR) {
unsigned char *s = s0, *lim = s + (max = strnlen((char *)s0, max));
unsigned char *cp = s;
wchar_t *buf0, *buf;
2015-08-07 22:57:53 +01:00
2016-08-30 14:27:32 +01:00
buf = buf0 = out->val.w;
if (!buf)
return -1;
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
2016-08-30 14:27:32 +01:00
if (max >= min)
*buf++ = '\0';
else
while (max < min) {
utf8proc_int32_t chr;
max++;
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
*buf = '\0';
room_end = (buf - buf0) + 1;
} else {
// no other encodings are supported.
room_end = -1;
}
return room_end;
2015-08-07 22:57:53 +01:00
}
static size_t write_length(const unsigned char *s0, seq_tv_t *out,
size_t leng USES_REGS) {
2016-08-30 14:27:32 +01:00
return leng;
}
2016-12-04 18:52:42 +00:00
static Term write_number(unsigned char *s, seq_tv_t *out, int size,
bool error_on USES_REGS) {
2016-08-30 14:27:32 +01:00
Term t;
int i = push_text_stack();
2016-11-08 07:37:36 +00:00
t = Yap_StringToNumberTerm((char *)s, &out->enc, error_on);
2016-12-04 18:52:42 +00:00
pop_text_stack(i);
return t;
}
static Term string_to_term(void *s, seq_tv_t *out, size_t leng USES_REGS) {
2016-08-30 14:27:32 +01:00
Term o;
2016-12-04 18:52:42 +00:00
o = out->val.t = Yap_BufferToTerm(s, strlen(s) + 1, TermNil);
2016-08-30 14:27:32 +01:00
return o;
2016-05-30 11:22:47 +01:00
}
bool write_Text(unsigned char *inp, seq_tv_t *out, size_t leng USES_REGS) {
2016-08-30 14:27:32 +01:00
/* we know what the term is */
if (out->type == 0) {
return true;
}
2016-08-30 14:27:32 +01:00
if (out->type & YAP_STRING_TERM) {
if ((out->val.t = string_to_term(inp, out, leng PASS_REGS)) != 0L)
return out->val.t != 0;
}
if (out->type & (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG)) {
2016-12-04 18:52:42 +00:00
if ((out->val.t = write_number(
2017-02-20 14:40:24 +00:00
inp, out, leng, !(out->type & YAP_STRING_ATOM) PASS_REGS)) != 0L) {
2016-08-30 14:27:32 +01:00
// Yap_DebugPlWriteln(out->val.t);
return true;
2016-05-30 11:22:47 +01:00
}
2016-08-30 14:27:32 +01:00
if (!(out->type & YAP_STRING_ATOM))
return false;
}
if (out->type & (YAP_STRING_ATOM)) {
if ((out->val.a = write_atom(inp, out, leng PASS_REGS)) != NIL) {
2016-08-30 14:27:32 +01:00
Atom at = out->val.a;
if (at && (out->type & YAP_STRING_OUTPUT_TERM))
out->val.t = MkAtomTerm(at);
// Yap_DebugPlWriteln(out->val.t);
return at != NIL;
2016-05-30 11:22:47 +01:00
}
2016-08-30 14:27:32 +01:00
}
switch (out->type & YAP_TYPE_MASK) {
case YAP_STRING_CHARS: {
size_t room = write_buffer(inp, out, leng PASS_REGS);
// printf("%s\n", out->val.c);
return ((Int)room > 0);
}
case YAP_STRING_WCHARS: {
size_t room = write_buffer(inp, out, leng PASS_REGS);
// printf("%S\n", out->val.w);
return ((Int)room > 0);
}
case YAP_STRING_STRING:
out->val.t = write_strings(inp, out, leng PASS_REGS);
// Yap_DebugPlWriteln(out->val.t);
return out->val.t != 0;
case YAP_STRING_ATOMS:
out->val.t = write_atoms(inp, out, leng PASS_REGS);
// Yap_DebugPlWriteln(out->val.t);
return out->val.t != 0;
case YAP_STRING_CODES:
out->val.t = write_codes(inp, out, leng PASS_REGS);
// Yap_DebugPlWriteln(out->val.t);
return out->val.t != 0;
case YAP_STRING_LENGTH:
out->val.l = write_length(inp, out, leng PASS_REGS);
// printf("s\n",out->val.l);
return out->val.l != (size_t)(-1);
case YAP_STRING_ATOM:
out->val.a = write_atom(inp, out, leng PASS_REGS);
// Yap_DebugPlWriteln(out->val.t);
return out->val.a != NULL;
case YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG:
2016-11-08 07:37:36 +00:00
out->val.t = write_number(inp, out, leng, true PASS_REGS);
2016-08-30 14:27:32 +01:00
// Yap_DebugPlWriteln(out->val.t);
return out->val.t != 0;
2016-12-04 18:52:42 +00:00
default: { return true; }
2016-08-30 14:27:32 +01:00
}
return false;
2016-05-30 11:22:47 +01:00
}
static size_t upcase(void *s0, seq_tv_t *out USES_REGS) {
2016-05-30 11:22:47 +01:00
2016-08-30 14:27:32 +01:00
unsigned char *s = s0;
while (*s) {
// assumes the two code have always the same size;
utf8proc_int32_t chr;
get_utf8(s, -1, &chr);
chr = utf8proc_toupper(chr);
s += put_utf8(s, chr);
}
return true;
}
2016-05-30 11:22:47 +01:00
static size_t downcase(void *s0, seq_tv_t *out USES_REGS) {
2016-05-30 11:22:47 +01:00
2016-08-30 14:27:32 +01:00
unsigned char *s = s0;
while (*s) {
// assumes the two code have always the same size;
utf8proc_int32_t chr;
get_utf8(s, -1, &chr);
chr = utf8proc_tolower(chr);
s += put_utf8(s, chr);
}
return true;
}
2016-08-07 15:46:43 +01:00
bool Yap_CVT_Text(seq_tv_t *inp, seq_tv_t *out USES_REGS) {
2016-08-30 14:27:32 +01:00
unsigned char *buf;
bool rc;
size_t leng;
/*
f//printfmark(stderr, "[ %d ", n++) ;
2016-08-30 14:27:32 +01:00
if (inp->type & (YAP_STRING_TERM|YAP_STRING_ATOM|YAP_STRING_ATOMS_CODES
|YAP_STRING_STRING))
//Yap_DebugPlWriteln(inp->val.t);
else if (inp->type & YAP_STRING_WCHARS) fprintf(stderr,"S %S\n", inp->val
.w);
else fprintf(stderr,"s %s\n", inp->val.c);
*/
2017-02-20 14:40:24 +00:00
// cnt++;
2016-08-30 14:27:32 +01:00
buf = Yap_readText(inp, &leng PASS_REGS);
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
if (out->max < leng) {
2017-02-20 14:40:24 +00:00
const unsigned char *ptr = skip_utf8(buf, out->max);
2016-08-30 14:27:32 +01:00
size_t diff = (ptr - buf);
char *nbuf = Malloc(diff + 1);
memcpy(nbuf, buf, diff);
nbuf[diff] = '\0';
leng = out->max;
2016-08-07 15:46:43 +01:00
}
2016-08-30 14:27:32 +01:00
// else if (out->type & YAP_STRING_NCHARS &&
// const unsigned char *ptr = skip_utf8(buf, leng)
}
if (!buf) {
return 0L;
}
2017-02-20 14:40:24 +00:00
2016-08-30 14:27:32 +01:00
if (out->type & (YAP_STRING_UPCASE | YAP_STRING_DOWNCASE)) {
if (out->type & YAP_STRING_UPCASE) {
if (!upcase(buf, out)) {
return false;
}
2016-05-30 11:22:47 +01:00
}
2016-08-30 14:27:32 +01:00
if (out->type & YAP_STRING_DOWNCASE) {
if (!downcase(buf, out)) {
return false;
}
2016-05-30 11:22:47 +01:00
}
2016-08-30 14:27:32 +01:00
}
rc = write_Text(buf, out, leng PASS_REGS);
/* fprintf(stderr, " -> ");
if (!rc) fprintf(stderr, "NULL");
else if (out->type &
(YAP_STRING_TERM|YAP_STRING_ATOMS_CODES
|YAP_STRING_STRING)) //Yap_DebugPlWrite(out->val.t);
else if (out->type &
YAP_STRING_ATOM) //Yap_DebugPlWriteln(MkAtomTerm(out->val.a));
else if (out->type & YAP_STRING_WCHARS) fprintf(stderr, "%S",
out->val.w);
else
fprintf(stderr, "%s", out->val.c);
fprintf(stderr, "\n]\n"); */
return rc;
2013-12-04 23:01:30 +00:00
}
static int cmp_Text(const unsigned char *s1, const unsigned char *s2, int l) {
2016-08-30 14:27:32 +01:00
const unsigned char *w1 = s1;
utf8proc_int32_t chr1, chr2;
const unsigned char *w2 = s2;
int i;
for (i = 0; i < l; i++) {
w2 += get_utf8(w2, -1, &chr2);
w1 += get_utf8(w1, -1, &chr1);
if (chr1 - chr2)
return chr1 - chr2;
}
return 0;
2013-12-04 23:01:30 +00:00
}
2016-11-08 07:37:36 +00:00
static unsigned char *concat(int n, void *sv[] USES_REGS) {
void *buf;
2016-08-30 14:27:32 +01:00
unsigned char *buf0;
size_t room = 0;
int i;
for (i = 0; i < n; i++) {
room += strlen((char *)sv[i]);
}
buf = Malloc(room + 1);
2016-11-08 07:37:36 +00:00
buf0 = buf;
2016-08-30 14:27:32 +01:00
for (i = 0; i < n; i++) {
2016-12-04 18:52:42 +00:00
#if _WIN32 || defined(__ANDROID__)
2017-02-22 08:56:20 +00:00
strcpy(buf, sv[i]);
2016-12-04 18:52:42 +00:00
buf = (char *)buf + strlen(buf);
2016-11-08 07:37:36 +00:00
#else
buf = stpcpy(buf, sv[i]);
2016-11-08 07:37:36 +00:00
#endif
2016-08-30 14:27:32 +01:00
}
return buf0;
2013-12-04 23:01:30 +00:00
}
static void *slice(size_t min, size_t max, unsigned char *buf USES_REGS) {
2016-08-30 14:27:32 +01:00
unsigned char *nbuf = Malloc((max - min) * 4 + 1);
const unsigned char *ptr = skip_utf8(buf, min);
unsigned char *nptr = nbuf;
utf8proc_int32_t chr;
while (min++ < max) {
ptr += get_utf8(ptr, -1, &chr);
nptr += put_utf8(nptr, chr);
}
nptr[0] = '\0';
return nbuf;
2013-12-04 23:01:30 +00:00
}
//
// Out must be an atom or a string
bool Yap_Concat_Text(int tot, seq_tv_t inp[], seq_tv_t *out USES_REGS) {
2016-11-08 07:37:36 +00:00
void **bufv;
2016-08-30 14:27:32 +01:00
unsigned char *buf;
int i;
size_t leng;
2016-08-30 14:27:32 +01:00
bufv = Malloc(tot * sizeof(unsigned char *));
if (!bufv) {
return NULL;
}
for (i = 0; i < tot; i++) {
inp[i].type |= YAP_STRING_WITH_BUFFER;
2016-08-30 14:27:32 +01:00
unsigned char *nbuf = Yap_readText(inp + i, &leng PASS_REGS);
if (!nbuf) {
return NULL;
2013-12-04 23:01:30 +00:00
}
2016-08-30 14:27:32 +01:00
bufv[i] = nbuf;
}
buf = concat(tot, bufv PASS_REGS);
2016-11-08 07:37:36 +00:00
bool rc = write_Text(buf, out, strlen_utf8(buf) PASS_REGS);
2016-08-30 14:27:32 +01:00
return rc;
2013-12-04 23:01:30 +00:00
}
2015-04-13 13:28:17 +01:00
//
bool Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp,
seq_tv_t outv[] USES_REGS) {
2016-08-30 14:27:32 +01:00
unsigned char *buf;
size_t l;
2016-08-30 14:27:32 +01:00
inp->type |= YAP_STRING_IN_TMP;
buf = Yap_readText(inp, &l PASS_REGS);
if (!buf) {
return false;
}
if (!cuts) {
if (n == 2) {
size_t l0, l1;
unsigned char *buf0, *buf1;
if (outv[0].val.t) {
buf0 = Yap_readText(outv, &l0 PASS_REGS);
if (!buf0) {
return false;
}
2016-08-30 14:27:32 +01:00
if (cmp_Text(buf, buf0, l0) != 0) {
return false;
}
l1 = l - l0;
buf1 = slice(l0, l, buf PASS_REGS);
bool rc = write_Text(buf1, outv + 1, l1 PASS_REGS);
if (!rc) {
return false;
}
return rc;
} else /* if (outv[1].val.t) */ {
buf1 = Yap_readText(outv + 1, &l1 PASS_REGS);
if (!buf1) {
return false;
}
l0 = l - l1;
if (cmp_Text(skip_utf8((const unsigned char *)buf, l0), buf1, l1) !=
0) {
return false;
}
2016-08-30 14:27:32 +01:00
buf0 = slice(0, l0, buf PASS_REGS);
bool rc = write_Text(buf0, outv, l0 PASS_REGS);
return rc;
}
}
}
int i, next;
for (i = 0; i < n; i++) {
if (i == 0)
next = 0;
else
next = cuts[i - 1];
2016-12-04 18:52:42 +00:00
if (i > 0 && cuts[i] == 0)
break;
2016-08-30 14:27:32 +01:00
void *bufi = slice(next, cuts[i], buf PASS_REGS);
if (!write_Text(bufi, outv + i, cuts[i] - next PASS_REGS)) {
return false;
}
2016-08-30 14:27:32 +01:00
}
2016-12-04 18:52:42 +00:00
2016-08-30 14:27:32 +01:00
return true;
2013-12-04 23:01:30 +00:00
}
2016-02-13 03:11:25 +00:00
/**
* Function to convert a generic text term (string, atom, list of codes, list
2016-11-08 07:37:36 +00:00
of<
atoms) into a buff
2016-02-13 03:11:25 +00:00
er.
*
2016-02-13 03:11:25 +00:00
* @param t the term
* @param buf the buffer, if NULL a buffer is malloced, and the user should
reclai it
2016-02-13 03:11:25 +00:00
* @param len buffer size
* @param enc encoding (UTF-8 is strongly recommended)
*
* @return the buffer, or NULL in case of failure. If so, Yap_Error may be
called.
2016-02-13 03:11:25 +00:00
*/
const char *Yap_TextTermToText(Term t, char *buf, size_t len, encoding_t enc) {
2016-08-30 14:27:32 +01:00
CACHE_REGS
seq_tv_t inp, out;
inp.val.t = t;
if (IsAtomTerm(t) && t != TermNil) {
inp.type = YAP_STRING_ATOM;
inp.enc = ENC_ISO_UTF8;
2016-08-30 14:27:32 +01:00
} else if (IsStringTerm(t)) {
inp.type = YAP_STRING_STRING;
inp.enc = ENC_ISO_UTF8;
} else if (IsPairOrNilTerm(t)) {
inp.type = (YAP_STRING_CODES | YAP_STRING_ATOMS);
} else {
Yap_Error(TYPE_ERROR_TEXT, t, NULL);
return false;
}
out.enc = enc;
out.type = YAP_STRING_CHARS;
out.val.c = buf;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return NULL;
return out.val.c;
2016-02-13 03:11:25 +00:00
}
2016-07-31 10:41:25 +01:00
/**
* Convert from a predicate structure to an UTF-8 string of the form
*
* module:name/arity.
*
* The result is in very volatile memory.
*
* @param s the buffer
*
* @return the temporary string
*/
const char *Yap_PredIndicatorToUTF8String(PredEntry *ap) {
2016-08-30 14:27:32 +01:00
CACHE_REGS
Atom at;
2016-12-04 18:52:42 +00:00
arity_t arity = 0;
2016-08-30 14:27:32 +01:00
Functor f;
char *s, *smax, *s0;
s = s0 = malloc(1024);
smax = s + 1024;
Term tmod = ap->ModuleOfPred;
if (tmod) {
Yap_AtomToUTF8Text(AtomOfTerm(tmod), s);
s += strlen(s);
if (smax - s > 1) {
strcat(s, ":");
2016-07-31 10:41:25 +01:00
} else {
2016-08-30 14:27:32 +01:00
return NULL;
2016-07-31 10:41:25 +01:00
}
2016-08-30 14:27:32 +01:00
s++;
} else {
if (smax - s > strlen("prolog:")) {
s = strcpy(s, "prolog:");
2016-07-31 10:41:25 +01:00
} else {
2016-08-30 14:27:32 +01:00
return NULL;
2016-07-31 10:41:25 +01:00
}
2016-08-30 14:27:32 +01:00
}
// follows the actual functor
if (ap->ModuleOfPred == IDB_MODULE) {
if (ap->PredFlags & NumberDBPredFlag) {
Int key = ap->src.IndxId;
snprintf(s, smax - s, "%" PRIdPTR, key);
return LOCAL_FileNameBuf;
} else if (ap->PredFlags & AtomDBPredFlag) {
at = (Atom)(ap->FunctorOfPred);
if (!Yap_AtomToUTF8Text(at, s))
return NULL;
2016-08-30 14:27:32 +01:00
} else {
f = ap->FunctorOfPred;
at = NameOfFunctor(f);
arity = ArityOfFunctor(f);
}
2016-08-30 14:27:32 +01:00
} else {
arity = ap->ArityOfPE;
if (arity) {
at = NameOfFunctor(ap->FunctorOfPred);
} else {
at = (Atom)(ap->FunctorOfPred);
}
}
if (!Yap_AtomToUTF8Text(at, s)) {
return NULL;
}
s += strlen(s);
snprintf(s, smax - s, "/%" PRIdPTR, arity);
return s0;
2016-07-31 10:41:25 +01:00
}
/**
* Convert from a text buffer (8-bit) to a term that has the same type as
* _Tguide_
*
* @param s the buffer
* @param tguide the guide
*
* @return the term
2016-02-13 03:11:25 +00:00
*/
Term Yap_MkTextTerm(const char *s, encoding_t enc, Term tguide) {
2016-08-30 14:27:32 +01:00
CACHE_REGS
if (IsAtomTerm(tguide))
return MkAtomTerm(Yap_LookupAtom(s));
if (IsStringTerm(tguide))
return MkStringTerm(s);
if (IsPairTerm(tguide) && IsAtomTerm(HeadOfTerm(tguide))) {
return Yap_CharsToListOfAtoms(s, enc PASS_REGS);
}
return Yap_CharsToListOfCodes(s, enc PASS_REGS);
2016-02-13 03:11:25 +00:00
}