text handling/UNICODE

This commit is contained in:
Vitor Santos Costa 2017-09-21 09:47:21 +01:00
parent 4d1c031051
commit 70f4f7adcf
9 changed files with 1148 additions and 779 deletions

View File

@ -309,42 +309,40 @@ static Int char_code(USES_REGS1) {
*/
static Int name(USES_REGS1) { /* name(?Atomic,?String) */
Term t = Deref(ARG2), NewT, AtomNameT = Deref(ARG1);
Term t2 = Deref(ARG2), NewT, t1 = Deref(ARG1);
LOCAL_MAX_SIZE = 1024;
int l = push_text_stack();
restart_aux:
if (Yap_IsGroundTerm(AtomNameT)) {
if (!IsVarTerm(t) && !IsPairTerm(t) && t != TermNil) {
if (Yap_IsGroundTerm(t1)) {
if (!IsVarTerm(t2) && !IsPairTerm(t2) && t2 != TermNil) {
Yap_Error(TYPE_ERROR_LIST, ARG2, "name/2");
pop_text_stack(l);
ReleaseAndReturn(FALSE);
}
// verify if an atom, int, float or bi§gnnum
NewT = Yap_AtomicToListOfCodes(AtomNameT PASS_REGS);
NewT = Yap_AtomicToListOfCodes(t1 PASS_REGS);
if (NewT) {
pop_text_stack(l);
ReleaseAndReturn(Yap_unify(NewT, ARG2));
}
// else
} else if (IsVarTerm(t)) {
Yap_Error(INSTANTIATION_ERROR, t, "name/2");
} else if (IsVarTerm(t2)) {
Yap_Error(INSTANTIATION_ERROR, t2, "name/2");
pop_text_stack(l);
return FALSE;
return false;
} else {
Term at = Yap_ListToAtomic(t PASS_REGS);
Term at = Yap_ListToAtomic(t2 PASS_REGS);
if (at) {
pop_text_stack(l);
ReleaseAndReturn(Yap_unify(at, ARG1));
}
}
if (LOCAL_Error_TYPE && Yap_HandleError("atom/2")) {
AtomNameT = Deref(ARG1);
t = Deref(ARG2);
t1 = Deref(ARG1);
t2 = Deref(ARG2);
goto restart_aux;
}
pop_text_stack(l);
ReleaseAndReturn(FALSE);
ReleaseAndReturn(false);
}
static Int string_to_atomic(

View File

@ -2221,7 +2221,7 @@ X_API char *YAP_WriteBuffer(Term t, char *buf, size_t sze, int flags) {
BACKUP_MACHINE_REGS();
inp.val.t = t;
inp.type = YAP_STRING_TERM;
inp.type = YAP_STRING_TERM|YAP_STRING_DATUM;
out.type = YAP_STRING_CHARS;
out.val.c = buf;
out.max = sze - 1;

View File

@ -53,7 +53,8 @@ typedef struct TextBuffer_manager {
int push_text_stack(USES_REGS1) {
return LOCAL_TextBuffer->lvl++; }
return LOCAL_TextBuffer->lvl++;
}
int pop_text_stack(int i) {
int lvl = LOCAL_TextBuffer->lvl;
@ -385,7 +386,7 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
bool wide;
if (LOCAL_Error_TYPE != YAP_NO_ERROR) {
fprintf(stderr, "Sourious error %u\n", LOCAL_Error_TYPE);
fprintf(stderr, "Spurious error %u\n", LOCAL_Error_TYPE);
LOCAL_Error_TYPE = YAP_NO_ERROR;
}
/* we know what the term is */
@ -400,10 +401,9 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
} else if (!IsPairOrNilTerm(inp->val.t) && !IsStringTerm(inp->val.t) &&
inp->type == (YAP_STRING_ATOMS_CODES | YAP_STRING_STRING)) {
LOCAL_Error_TYPE = TYPE_ERROR_LIST;
} else if (!IsNumTerm(inp->val.t) &&
(inp->type & (YAP_STRING_INT | YAP_STRING_FLOAT |
YAP_STRING_BIG)) == inp->type) {
LOCAL_Error_TYPE = TYPE_ERROR_NUMBER;
} else if (!IsPairOrNilTerm(inp->val.t) && !IsStringTerm(inp->val.t)
&& !IsAtomTerm(inp->val.t) && !(inp->type & YAP_STRING_DATUM)) {
LOCAL_Error_TYPE = TYPE_ERROR_TEXT;
}
}
}
@ -653,7 +653,7 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
if (strlen_utf8(s0) <= leng) {
return Yap_LookupAtom(s0);
} else {
size_t n = get_utf8(s, 1, &ch);
size_t n = get_utf8(s, -1, &ch);
unsigned char *buf = Malloc(n + 1);
memcpy(buf, s0, n + 1);
return Yap_ULookupAtom(buf);
@ -736,15 +736,20 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
static Term write_number(unsigned char *s, seq_tv_t *out, int size,
bool error_on USES_REGS) {
Term t;
yap_error_number erro = LOCAL_Error_TYPE;
int i = push_text_stack();
t = Yap_StringToNumberTerm((char *) s, &out->enc, error_on);
pop_text_stack(i);
LOCAL_Error_TYPE = erro;
return t;
}
static Term string_to_term(void *s, seq_tv_t *out, size_t leng USES_REGS) {
Term o;
yap_error_number erro = LOCAL_Error_TYPE;
o = out->val.t = Yap_BufferToTerm(s, strlen(s) + 1, TermNil);
LOCAL_Error_TYPE = erro;
return o;
}
@ -754,7 +759,7 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
return true;
}
if (out->type & YAP_STRING_TERM) {
if (out->type & YAP_STRING_DATUM) {
if ((out->val.t = string_to_term(inp, out, leng PASS_REGS)) != 0L)
return out->val.t != 0;
}
@ -814,7 +819,9 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
out->val.t = write_number(inp, out, leng, true PASS_REGS);
// Yap_DebugPlWriteln(out->val.t);
return out->val.t != 0;
default: { return true; }
default: {
return true;
}
}
return false;
}
@ -861,6 +868,9 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
*/
// cnt++;
buf = Yap_readText(inp, &leng PASS_REGS);
if (!buf) {
return 0L;
}
if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
if (out->max < leng) {
const unsigned char *ptr = skip_utf8(buf, out->max);
@ -868,14 +878,11 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
char *nbuf = Malloc(diff + 1);
memcpy(nbuf, buf, diff);
nbuf[diff] = '\0';
leng = out->max;
leng = diff;
}
// else if (out->type & YAP_STRING_NCHARS &&
// const unsigned char *ptr = skip_utf8(buf, leng)
}
if (!buf) {
return 0L;
}
if (out->type & (YAP_STRING_UPCASE | YAP_STRING_DOWNCASE)) {
if (out->type & YAP_STRING_UPCASE) {
@ -1120,7 +1127,8 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
if (ap->ModuleOfPred == IDB_MODULE) {
if (ap->PredFlags & NumberDBPredFlag) {
Int key = ap->src.IndxId;
snprintf(s, smax - s, "%" PRIdPTR, key);
snprintf(s, smax - s, "%"
PRIdPTR, key);
return LOCAL_FileNameBuf;
} else if (ap->PredFlags & AtomDBPredFlag) {
at = (Atom) (ap->FunctorOfPred);
@ -1143,7 +1151,8 @@ static unsigned char *latin2utf8(seq_tv_t *inp, size_t *lengp) {
return NULL;
}
s += strlen(s);
snprintf(s, smax - s, "/%" PRIdPTR, arity);
snprintf(s, smax - s, "/%"
PRIdPTR, arity);
return s0;
}

View File

@ -660,6 +660,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.val.t = t0;
inp.type = YAP_STRING_ATOM;
out.val.uc = NULL;
out.enc = ENC_ISO_UTF8;
out.type = YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
@ -687,6 +688,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
YAP_STRING_FLOAT | YAP_STRING_BIG | YAP_STRING_ATOMS_CODES;
out.val.uc = NULL;
out.type = YAP_STRING_STRING;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
@ -715,6 +717,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
YAP_STRING_FLOAT | YAP_STRING_BIG | YAP_STRING_TERM;
out.val.uc = NULL;
out.type = mod_to_type(mod PASS_REGS);
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
@ -754,6 +757,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_CHARS;
out.val.uc = NULL;
out.type = YAP_STRING_ATOM;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
return out.val.a;
@ -925,6 +929,8 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_ATOMS;
out.type = YAP_STRING_ATOM;
out.val.uc = NULL;
out.enc = ENC_ISO_UTF8;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return (Atom)NULL;
return out.val.a;
@ -937,6 +943,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
out.type =
YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG | YAP_STRING_TERM;
out.val.uc = NULL;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
return out.val.t;
@ -948,6 +955,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_ATOMS;
out.type = YAP_STRING_STRING;
out.val.uc = NULL;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
return out.val.t;
@ -959,6 +967,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_CODES;
out.type = YAP_STRING_ATOM;
out.val.uc = NULL;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return (Atom)NULL;
return out.val.a;
@ -969,6 +978,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.val.t = t0;
inp.type = YAP_STRING_CODES;
out.type = YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG;
out.enc = ENC_ISO_UTF8;
out.val.uc = NULL;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
@ -981,6 +991,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_CODES;
out.val.uc = NULL;
out.type = YAP_STRING_STRING;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
return out.val.t;
@ -992,6 +1003,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_ATOMS_CODES;
out.val.uc = NULL;
out.type = YAP_STRING_ATOM;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
return out.val.a;
@ -1002,6 +1014,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.val.t = t0;
inp.type = YAP_STRING_STRING | YAP_STRING_ATOMS_CODES | YAP_STRING_TERM;
out.val.uc = NULL;
out.enc = ENC_ISO_UTF8;
out.type = YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT |
YAP_STRING_BIG | YAP_STRING_OUTPUT_TERM;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
@ -1015,6 +1028,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.val.t = t0;
inp.type = YAP_STRING_STRING | YAP_STRING_ATOMS_CODES;
out.val.uc = NULL;
out.enc = ENC_ISO_UTF8;
out.type = YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
@ -1029,6 +1043,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_STRING | YAP_STRING_ATOMS_CODES | YAP_STRING_TERM;
out.val.uc = NULL;
out.type = YAP_STRING_STRING;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
@ -1044,6 +1059,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
YAP_STRING_OUTPUT_TERM;
out.val.uc = NULL;
out.type = YAP_STRING_STRING;
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
@ -1057,6 +1073,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_STRING | YAP_STRING_ATOMS_CODES | YAP_STRING_TERM;
out.val.uc = NULL;
out.type = mod_to_type(mod PASS_REGS);
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
@ -1070,6 +1087,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.type = YAP_STRING_STRING | YAP_STRING_ATOMS_CODES | YAP_STRING_TERM;
out.val.uc = NULL;
out.type = mod_to_bqtype(mod PASS_REGS);
out.enc = ENC_ISO_UTF8;
if (!Yap_CVT_Text(&inp, &out PASS_REGS))
return 0L;
@ -1083,6 +1101,7 @@ static inline size_t Yap_AtomToLength(Term t0 USES_REGS) {
inp.val.c0 = s;
inp.type = YAP_STRING_CHARS | YAP_STRING_NCHARS;
inp.enc = enc;
out.enc = ENC_ISO_UTF8;
out.type = YAP_STRING_ATOM;
out.val.uc = NULL;
out.max = len;

View File

@ -379,8 +379,8 @@ typedef void (*YAP_halt_hook)(int exit_code, void *closure);
/* each type has a tag */
typedef YAP_Int YAP_opaque_tag_t;
typedef YAP_Bool (*YAP_Opaque_CallOnFail)(void *);
typedef YAP_Bool (*YAP_Opaque_CallOnCut)(void *);
typedef YAP_Bool (*YAP_Opaque_CallOnFail)(YAP_Term);
typedef YAP_Bool (*YAP_Opaque_CallOnCut)(YAP_Term);
typedef YAP_Bool (*YAP_Opaque_CallOnWrite)(FILE *, YAP_opaque_tag_t, void *,
int);
typedef YAP_Int (*YAP_Opaque_CallOnGCMark)(YAP_opaque_tag_t, void *, YAP_Term *,

View File

@ -140,16 +140,16 @@ static UInt cvtFlags(unsigned flags) {
inptype |= YAP_STRING_FLOAT;
}
if (flags & CVT_VARIABLE) {
inptype |= YAP_STRING_TERM;
inptype |= YAP_STRING_DATUM;
}
if (flags & CVT_WRITE) {
inptype |= YAP_STRING_TERM;
inptype |= YAP_STRING_DATUM;
}
if (flags & CVT_WRITEQ) {
inptype |= YAP_STRING_TERM | YAP_STRING_WQ;
inptype |= YAP_STRING_DATUM | YAP_STRING_WQ;
}
if (flags & CVT_WRITE_CANONICAL) {
inptype |= YAP_STRING_TERM | YAP_STRING_WC;
inptype |= YAP_STRING_DATUM | YAP_STRING_WC;
}
return inptype;
}

View File

@ -116,13 +116,14 @@ static char SccsId[] = "%W% %G%";
}
bool Yap_set_stream_to_buf(StreamDesc *st, const char *buf, size_t nchars) {
bool Yap_set_stream_to_buf(StreamDesc *st, const char *buf, encoding_t enc, size_t nchars) {
FILE *f;
// like any file stream.
st->file = f = fmemopen((void *)buf, nchars, "r");
st->status = Input_Stream_f | InMemory_Stream_f | Seekable_Stream_f;
st->vfs = NULL;
st->encoding = enc;
Yap_DefaultStreamOps(st);
return true;
}

View File

@ -110,7 +110,7 @@ extern Term Yap_StringToNumberTerm(const char *s, encoding_t *encp,
extern int Yap_FormatFloat(Float f, char **s, size_t sz);
extern int Yap_open_buf_read_stream(const char *buf, size_t nchars,
encoding_t *encp, memBufSource src);
extern bool Yap_set_stream_to_buf(struct stream_desc *st, const char *buf,
extern bool Yap_set_stream_to_buf(struct stream_desc *st, const char *buf, encoding_t enc,
size_t nchars);
extern int Yap_open_buf_write_stream(encoding_t enc, memBufSource src);
extern Term Yap_BufferToTerm(const unsigned char *s, size_t sz, Term opts);

View File

@ -279,10 +279,352 @@ class YAPEngine;
/* Put header files here or function declarations like below */
#include "yapi.hh"
extern "C" {
#if THREADS
#define Yap_regp regcache
#endif
// we cannot consult YapInterface.h, that conflicts with what we