diff --git a/C/c_interface.c b/C/c_interface.c index 746598e50..8510797b6 100755 --- a/C/c_interface.c +++ b/C/c_interface.c @@ -1306,7 +1306,6 @@ X_API Term YAP_NBufferToString(const char *s, size_t len) { inp.val.c0 = s; inp.type = YAP_STRING_CHARS; out.type = YAP_STRING_CODES | YAP_STRING_NCHARS | YAP_STRING_TRUNC; - out.sz = len; out.max = len; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) return 0L; @@ -1344,7 +1343,6 @@ X_API Term YAP_NWideBufferToString(const wchar_t *s, size_t len) { inp.val.w0 = s; inp.type = YAP_STRING_WCHARS; out.type = YAP_STRING_CODES | YAP_STRING_NCHARS | YAP_STRING_TRUNC; - out.sz = len; out.max = len; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) return 0L; @@ -1427,7 +1425,6 @@ X_API Term YAP_NBufferToAtomList(const char *s, size_t len) { inp.val.c0 = s; inp.type = YAP_STRING_CHARS; out.type = YAP_STRING_ATOMS | YAP_STRING_NCHARS | YAP_STRING_TRUNC; - out.sz = len; out.max = len; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) return 0L; @@ -1465,7 +1462,6 @@ X_API Term YAP_NWideBufferToAtomList(const wchar_t *s, size_t len) { inp.val.w0 = s; inp.type = YAP_STRING_WCHARS; out.type = YAP_STRING_ATOMS | YAP_STRING_NCHARS | YAP_STRING_TRUNC; - out.sz = len; out.max = len; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) return 0L; @@ -1487,7 +1483,6 @@ X_API Term YAP_NWideBufferToAtomDiffList(const wchar_t *s, Term t0, inp.type = YAP_STRING_WCHARS; out.type = YAP_STRING_ATOMS | YAP_STRING_NCHARS | YAP_STRING_TRUNC | YAP_STRING_DIFF; - out.sz = len; out.max = len; out.dif = t0; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) @@ -1528,7 +1523,6 @@ X_API Term YAP_NBufferToDiffList(const char *s, Term t0, size_t len) { inp.type = YAP_STRING_CHARS; out.type = YAP_STRING_CODES | YAP_STRING_NCHARS | YAP_STRING_TRUNC | YAP_STRING_DIFF; - out.sz = len; out.max = len; out.dif = t0; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) @@ -1569,7 +1563,6 @@ X_API Term YAP_NWideBufferToDiffList(const wchar_t *s, Term t0, size_t len) { inp.type = YAP_STRING_WCHARS; out.type = YAP_STRING_CODES | YAP_STRING_NCHARS | YAP_STRING_TRUNC | YAP_STRING_DIFF; - out.sz = len; out.max = len; out.dif = t0; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) diff --git a/C/parser.c b/C/parser.c index 28ce98dcd..b2ccbdc8b 100755 --- a/C/parser.c +++ b/C/parser.c @@ -744,65 +744,9 @@ static Term ParseTerm(int prio, JMPBUFF *FailBuff, encoding_t enc, Term cmod USE break; case String_tok: /* build list on the heap */ - { - Volatile char *p = (char *)LOCAL_tokptr->TokInfo; - // we may be operating under a syntax error - yap_error_number oerr = LOCAL_Error_TYPE; - LOCAL_Error_TYPE = YAP_NO_ERROR; - t = Yap_CharsToTDQ(p, cmod, enc PASS_REGS); - if (!t) { - syntax_msg("line %d: could not convert \"%s\"",LOCAL_tokptr->TokPos, (char *)LOCAL_tokptr->TokInfo); - FAIL; - } - LOCAL_Error_TYPE = oerr; - NextToken; - } break; - - case WString_tok: /* build list on the heap */ - { - Volatile wchar_t *p = (wchar_t *)LOCAL_tokptr->TokInfo; - // we may be operating under a syntax error - yap_error_number oerr = LOCAL_Error_TYPE; - LOCAL_Error_TYPE = YAP_NO_ERROR; - t = Yap_WCharsToTDQ(p, cmod PASS_REGS); - if (!t) { - syntax_msg("line %d: could not convert \'%S\'",LOCAL_tokptr->TokPos, (wchar_t *)LOCAL_tokptr->TokInfo); - FAIL; - } - LOCAL_Error_TYPE = oerr; - NextToken; - } break; - - case BQString_tok: /* build list on the heap */ - { - Volatile char *p = (char *)LOCAL_tokptr->TokInfo; - // we may be operating under a syntax error - yap_error_number oerr = LOCAL_Error_TYPE; - LOCAL_Error_TYPE = YAP_NO_ERROR; - - t = Yap_CharsToTBQ(p, cmod, LOCAL_encoding PASS_REGS); - if (!t) { - syntax_msg("line %d: could not convert \'%s\"",LOCAL_tokptr->TokPos, (char *)LOCAL_tokptr->TokInfo); - FAIL; - } - LOCAL_Error_TYPE = oerr; - NextToken; - } break; - - case WBQString_tok: /* build list on the heap */ - { - Volatile wchar_t *p = (wchar_t *)LOCAL_tokptr->TokInfo; - t = Yap_WCharsToTBQ(p, cmod PASS_REGS); - // we may be operating under a syntax error - yap_error_number oerr = LOCAL_Error_TYPE; - LOCAL_Error_TYPE = YAP_NO_ERROR; - if (!t) { - syntax_msg("line %d: could not convert \"%S\"",LOCAL_tokptr->TokPos, (wchar_t *)LOCAL_tokptr->TokInfo); - FAIL; - } - LOCAL_Error_TYPE = oerr; - NextToken; - } break; + t = LOCAL_tokptr->TokInfo; + NextToken; + break; case Var_tok: varinfo = (VarEntry *)(LOCAL_tokptr->TokInfo); diff --git a/C/scanner.c b/C/scanner.c index d0a48cf9b..3187c7c1d 100755 --- a/C/scanner.c +++ b/C/scanner.c @@ -575,7 +575,7 @@ static TokEntry *AuxSpaceError__(TokEntry *p, TokEntry *l, /* huge atom or variable, we are in trouble */ LOCAL_ErrorMessage = (char *)msg; LOCAL_Error_TYPE = RESOURCE_ERROR_AUXILIARY_STACK; - Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); + Yap_ReleasePreAllocCodeSpace((COBEADDR)TokImage); if (p) { p->Tok = eot_tok; p->TokInfo = TermOutOfAuxspaceError; @@ -584,12 +584,13 @@ static TokEntry *AuxSpaceError__(TokEntry *p, TokEntry *l, return l; } -static void InitScannerMemory(void) { +static void * InitScannerMemory(void) { CACHE_REGS LOCAL_ErrorMessage = NULL; LOCAL_Error_Size = 0; - LOCAL_ScannerStack = (char *)TR; LOCAL_ScannerExtraBlocks = NULL; + LOCAL_ScannerStack = (char *)TR; + return (char *)TR; } static char *AllocScannerMemory(unsigned int size) { @@ -1624,6 +1625,7 @@ TokEntry *Yap_tokenizer(struct stream_desc *inp_stream, bool store_comments, LOCAL_ErrorMessage = "layout character \n inside quotes"; break; } + if (ch == quote) { ch = getchrq(inp_stream); if (ch != quote) @@ -1661,55 +1663,49 @@ TokEntry *Yap_tokenizer(struct stream_desc *inp_stream, bool store_comments, } else { *charp = '\0'; } - if (quote == '"' || quote == '`') { - if (wcharp) { - mp = AllocScannerMemory(sizeof(wchar_t) * (len + 1)); - } else { - mp = AllocScannerMemory(len + 1); - } - if (mp == NULL) { - LOCAL_ErrorMessage = - "not enough heap space to read in string or quoted atom"; + if (quote == '"' ) { + if (wcharp) { + t->TokInfo = Yap_WCharsToTDQ((wchar_t *)TokImage, + CurrentModule + PASS_REGS); + } else { + t->TokInfo = Yap_CharsToTDQ(TokImage, CurrentModule, LOCAL_encoding + PASS_REGS); + } + if (!(t->TokInfo)) { + return CodeSpaceError(t, p, l); + } + Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); + t->Tok = Ord(kind = String_tok); + } else if (quote == '`') { + if (wcharp) { + t->TokInfo = Yap_WCharsToTBQ((wchar_t *)TokImage, + CurrentModule PASS_REGS); + } else { + t->TokInfo = Yap_CharsToTBQ(TokImage, CurrentModule, + LOCAL_encoding + PASS_REGS); + } + if (!(t->TokInfo)) { + return CodeSpaceError(t, p, l); + } + Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); + t->Tok = Ord(kind = String_tok); + } else { + if (wcharp) { + t->TokInfo = Unsigned(Yap_LookupWideAtom((wchar_t *)TokImage)); + } else { + t->TokInfo = Unsigned(Yap_LookupAtom(TokImage)); + } + if (!(t->TokInfo)) { + return CodeSpaceError(t, p, l); + } Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); - t->Tok = Ord(kind = eot_tok); - t->TokInfo = TermOutOfHeapError; - return l; - } - if (wcharp) { - wcscpy((wchar_t *)mp, (wchar_t *)TokImage); - } else { - strcpy(mp, TokImage); - } - t->TokInfo = Unsigned(mp); - Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); - if (quote == '"') { - if (wcharp) { - t->Tok = Ord(kind = WString_tok); - } else { - t->Tok = Ord(kind = String_tok); - } - } else { - if (wcharp) { - t->Tok = Ord(kind = WBQString_tok); - } else { - t->Tok = Ord(kind = BQString_tok); - } - } - } else { - if (wcharp) { - t->TokInfo = Unsigned(Yap_LookupWideAtom((wchar_t *)TokImage)); - } else { - t->TokInfo = Unsigned(Yap_LookupAtom(TokImage)); - } - if (!(t->TokInfo)) { - return CodeSpaceError(t, p, l); - } - Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage); - t->Tok = Ord(kind = Name_tok); - if (ch == '(') - solo_flag = false; + t->Tok = Ord(kind = Name_tok); + if (ch == '(') + solo_flag = false; } - break; + break; case BS: if (ch == '\0') { @@ -2046,9 +2042,12 @@ TokEntry *Yap_tokenizer(struct stream_desc *inp_stream, bool store_comments, t->TokInfo = TermEof; } #if DEBUG - if (GLOBAL_Option[2]) - fprintf(stderr, "[Token %d %s]", Ord(kind), - Yap_tokRep(t, inp_stream->encoding)); + if (GLOBAL_Option[2]) { + static int n; + if (n == 975) jmp_deb(2); + fprintf(stderr, "[Token %d %s %d]", Ord(kind), + Yap_tokRep(t, inp_stream->encoding),n++); + } #endif if (LOCAL_ErrorMessage) { /* insert an error token to inform the system of what happened */ @@ -2069,10 +2068,12 @@ TokEntry *Yap_tokenizer(struct stream_desc *inp_stream, bool store_comments, return (l); } +int vsc_count; + void Yap_clean_tokenizer(TokEntry *tokstart, VarEntry *vartable, VarEntry *anonvartable) { CACHE_REGS - struct scanner_extra_alloc *ptr = LOCAL_ScannerExtraBlocks; + struct scanner_extra_alloc *ptr = LOCAL_ScannerExtraBlocks; while (ptr) { struct scanner_extra_alloc *next = ptr->next; free(ptr); @@ -2085,6 +2086,7 @@ void Yap_clean_tokenizer(TokEntry *tokstart, VarEntry *vartable, free(LOCAL_CommentsBuff); LOCAL_CommentsBuff = NULL; } + LOCAL_ScannerStack = NULL; LOCAL_CommentsBuffLim = 0; } diff --git a/C/text.c b/C/text.c index 766864616..168686a6d 100644 --- a/C/text.c +++ b/C/text.c @@ -22,6 +22,7 @@ #include "eval.h" #include "yapio.h" +#include #include #include @@ -30,199 +31,240 @@ inline static size_t min_size(size_t i, size_t j) { return (i < j ? i : j); } #define wcsnlen(S, N) min_size(N, wcslen(S)) #endif -static inline unsigned char *getChar(unsigned char *p, int *c) { - *c = *p; - return p + 1; -} - -static inline wchar_t *getWchar(wchar_t *p, int *c) { - *c = *p; - return p + 1; -} - #ifndef NAN #define NAN (0.0 / 0.0) #endif +void *buf__, *cur__; + +#define init_alloc(I) \ + void *ov__ = TR, *ocur__ = LOCAL_ScannerStack; \ + if (!LOCAL_ScannerStack) LOCAL_ScannerStack = (char *)TR + +#define protect_stack(s) + +#define export_buf(s) {} + +#define unprotect_stack(s) \ + TR = ov__, LOCAL_ScannerStack = ocur__ + //LOCAL_ScannerStack = ov__, TR = ot__ + + +static bool alloc_ovfl(size_t sz) { + return (char *) +(sz + 4096) > (char *) LOCAL_TrailTop; +} + +static void *Malloc(size_t sz USES_REGS) { + sz = ALIGN_BY_TYPE(sz, CELL); + if (alloc_ovfl(sz)) + return NULL; + void *o = LOCAL_ScannerStack; + LOCAL_ScannerStack = (void *) ((char *) LOCAL_ScannerStack + sz); + return o; +} + +static size_t MaxTmp(USES_REGS1) { + if (LOCAL_ScannerStack) { + return (char *) LOCAL_TrailTop - (char *) LOCAL_ScannerStack; + } + return 0; +} + static Term Globalize(Term v USES_REGS) { - if (!IsVarTerm(v = Deref(v))) { + if (!IsVarTerm(v = Deref(v))) { + return v; + } + if (VarOfTerm(v) > HR && VarOfTerm(v) < LCL0) { + Bind_Local(VarOfTerm(v), MkVarTerm()); + v = Deref(v); + } return v; - } - if (VarOfTerm(v) > HR && VarOfTerm(v) < LCL0) { - Bind_Local(VarOfTerm(v), MkVarTerm()); - v = Deref(v); - } - return v; } static Int SkipListCodes(unsigned char **bufp, Term *l, Term **tailp, Int *atoms, bool *wide, seq_tv_t *inp USES_REGS) { - Int length = 0; - Term *s; /* slow */ - Term v; /* temporary */ - *wide = false; - size_t max = 1; - unsigned char *st0 = *bufp, *st; - unsigned char *smax = NULL; + Int length = 0; + Term *s; /* slow */ + Term v; /* temporary */ + *wide = false; + unsigned char *st0 = *bufp, *st; - do_derefa(v, l, derefa_unk, derefa_nonvar); - *tailp = l; - s = l; - - if (inp->type & YAP_STRING_TRUNC) { - max = inp->max; - } else { - max = 0; // basically, this will never be reached; - } - - if (!st0) { - if (inp->type & YAP_STRING_MALLOC) { - *bufp = st0 = (unsigned char *)malloc(MAXPATHLEN + 1); - smax = st0 + (MAXPATHLEN - 8); // give 8 bytes for max UTF-8 size + '\0'; - } else { - *bufp = st0 = (unsigned char *)Yap_PreAllocCodeSpace(); - smax = (unsigned char *)AuxTop - - 8; // give 8 bytes for max UTF-8 size + '\0'; - } - } else if (inp->sz > 0) { - smax = st0 + (inp->sz - 8); // give 8 bytes for max UTF-8 size + '\0'; - } else { - // AUX_ERROR( *l, 2*(length+1), st0, unsigned char); - return 0; - } - *bufp = st = st0; - - if (*l == TermNil) { - return 0; - } - if (IsPairTerm(*l)) { - Term hd0 = HeadOfTerm(*l); - if (IsVarTerm(hd0)) { - return -INSTANTIATION_ERROR; - } - // are we looking for atoms/codes? - // whatever the case, we should be consistent throughout, - // so we should be consistent with the first arg. - if (*atoms == 1) { - if (!IsIntegerTerm(hd0)) { - return -INSTANTIATION_ERROR; - } - } else if (*atoms == 2) { - if (!IsAtomTerm(hd0)) { - return -TYPE_ERROR_ATOM; - } + if (!st0) { + st0 = Malloc(0); } - do { - int ch; - length++; - if (length == max) { - *st++ = '\0'; - } - { - Term hd = Deref(RepPair(*l)[0]); - if (IsVarTerm(hd)) { - return -INSTANTIATION_ERROR; - } else if (IsAtomTerm(hd)) { - (*atoms)++; - if (*atoms < length) { - *tailp = l; - return -TYPE_ERROR_NUMBER; - } - if (IsWideAtom(AtomOfTerm(hd))) { + do_derefa(v, l, derefa_unk, derefa_nonvar); + *tailp = l; + s = l; + + *bufp = st = st0; + + if (*l == TermNil) { + return 0; + } + if (IsPairTerm(*l)) { + Term hd0 = HeadOfTerm(*l); + if (IsVarTerm(hd0)) { + return -INSTANTIATION_ERROR; + } + // are we looking for atoms/codes? + // whatever the case, we should be consistent throughout, + // so we should be consistent with the first arg. + if (*atoms == 1) { + if (!IsIntegerTerm(hd0)) { + return -INSTANTIATION_ERROR; + } + } else if (*atoms == 2) { + if (!IsAtomTerm(hd0)) { + return -TYPE_ERROR_ATOM; + } + } + + do { int ch; - if ((RepAtom(AtomOfTerm(hd))->WStrOfAE)[1] != '\0') { - length = -REPRESENTATION_ERROR_CHARACTER; + length++; + { + Term hd = Deref(RepPair(*l)[0]); + if (IsVarTerm(hd)) { + return -INSTANTIATION_ERROR; + } else if (IsAtomTerm(hd)) { + (*atoms)++; + if (*atoms < length) { + *tailp = l; + return -TYPE_ERROR_NUMBER; + } + if (IsWideAtom(AtomOfTerm(hd))) { + int ch; + if ((RepAtom(AtomOfTerm(hd))->WStrOfAE)[1] != '\0') { + length = -REPRESENTATION_ERROR_CHARACTER; + } + ch = RepAtom(AtomOfTerm(hd))->WStrOfAE[0]; + *wide = true; + } else { + AtomEntry *ae = RepAtom(AtomOfTerm(hd)); + if ((ae->StrOfAE)[1] != '\0') { + length = -REPRESENTATION_ERROR_CHARACTER; + } else { + ch = RepAtom(AtomOfTerm(hd))->StrOfAE[0]; + *wide |= ch > 0x80; + } + } + } else if (IsIntegerTerm(hd)) { + ch = IntegerOfTerm(hd); + if (*atoms) + length = -TYPE_ERROR_ATOM; + else if (ch < 0) { + *tailp = l; + length = -DOMAIN_ERROR_NOT_LESS_THAN_ZERO; + } else { + *wide |= ch > 0x80; + } + } else { + length = -TYPE_ERROR_INTEGER; + } + if (length < 0) { + *tailp = l; + return length; + } } - ch = RepAtom(AtomOfTerm(hd))->WStrOfAE[0]; - *wide = true; - } else { - AtomEntry *ae = RepAtom(AtomOfTerm(hd)); - if ((ae->StrOfAE)[1] != '\0') { - length = -REPRESENTATION_ERROR_CHARACTER; - } else { - ch = RepAtom(AtomOfTerm(hd))->StrOfAE[0]; - *wide |= ch > 0x80; + // now copy char to buffer + int chsz = put_utf8(st, ch); + if (chsz > 0) { + st += chsz; } - } - } else if (IsIntegerTerm(hd)) { - ch = IntegerOfTerm(hd); - if (*atoms) - length = -TYPE_ERROR_ATOM; - else if (ch < 0) { - *tailp = l; - length = -DOMAIN_ERROR_NOT_LESS_THAN_ZERO; - } else { - *wide |= ch > 0x80; - } - } else { - length = -TYPE_ERROR_INTEGER; - } - if (length < 0) { - *tailp = l; - return length; - } - } - // now copy char to buffer - size_t chsz = put_utf8(st, ch); - if (smax <= st + chsz) { - *st++ = '\0'; - *tailp = l; - return length; - } else { - st += chsz; - } - l = RepPair(*l) + 1; - do_derefa(v, l, derefa2_unk, derefa2_nonvar); - } while (*l != *s && IsPairTerm(*l)); - } - if (IsVarTerm(*l)) { - return -INSTANTIATION_ERROR; - } - if (*l != TermNil) { - return -TYPE_ERROR_LIST; - } - st[0] = '\0'; - *tailp = l; + l = RepPair(*l) + 1; + do_derefa(v, l, derefa2_unk, derefa2_nonvar); + } while (*l != *s && IsPairTerm(*l)); + } + if (IsVarTerm(*l)) { + return -INSTANTIATION_ERROR; + } + if (*l != TermNil) { + return -TYPE_ERROR_LIST; + } + st[0] = '\0'; + Malloc((st - st0) + 1); + *tailp = l; - return length; + return length; } -static void *to_buffer(void *buf, Term t, seq_tv_t *inp, bool *widep, - Int *atoms, size_t *lenp USES_REGS) { - CELL *r = NULL; - Int n; - - if (!buf) { - inp->sz = *lenp; - } - unsigned char *bufc = buf; - n = SkipListCodes(&bufc, &t, &r, atoms, widep, inp PASS_REGS); - if (n < 0) { - LOCAL_Error_TYPE = -n; - LOCAL_Error_Term = *r; - return NULL; - } - *lenp = n; - return bufc; +static unsigned char *latin2utf8(seq_tv_t *inp, size_t +*lengp) { + unsigned char *b0 = inp->val.uc; + size_t sz = *lengp = strlen(inp->val.c); + sz *= 2; + int ch; + unsigned char *buf = Malloc(sz + 1), *pt = buf; + *lengp = strlen(inp->val.c); + if (!buf) + return NULL; + while ((ch = *b0++)) { + int off = put_utf8(pt, ch); + if (off < 0) + continue; + pt += off; + } + *pt++ = '\0'; + return buf; } -static void *Yap_ListOfCodesToBuffer(void *buf, Term t, seq_tv_t *inp, - bool *widep, size_t *lenp USES_REGS) { - Int atoms = 1; // we only want lists of atoms - return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS); + +static unsigned char *wchar2utf8(seq_tv_t *inp, size_t *lengp) { + *lengp = wcslen(inp->val.w); + size_t sz = *lengp * 4; + wchar_t *b0 = inp->val.w; + unsigned char *buf = Malloc(sz + 1), *pt = buf; + int ch; + if (!buf) + return NULL; + while ((ch = *b0++)) + pt += put_utf8(pt, ch); + *pt++ = '\0'; + return buf; } -static void *Yap_ListOfAtomsToBuffer(void *buf, Term t, seq_tv_t *inp, - bool *widep, size_t *lenp USES_REGS) { - Int atoms = 2; // we only want lists of integer codes - return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS); +static void *slice(size_t min, size_t max, unsigned char *buf USES_REGS); + +static unsigned char *to_buffer(unsigned char *buf, Term t, seq_tv_t *inp, + bool *widep, Int *atoms, + size_t *lenp USES_REGS) { + CELL *r = NULL; + Int n; + + if (!buf) { + inp->max = *lenp; + } + unsigned char *bufc = buf; + n = SkipListCodes(&bufc, &t, &r, atoms, widep, inp PASS_REGS); + if (n < 0) { + LOCAL_Error_TYPE = -n; + LOCAL_Error_Term = *r; + return NULL; + } + *lenp = n; + return bufc; } -static void *Yap_ListToBuffer(void *buf, Term t, seq_tv_t *inp, bool *widep, - size_t *lenp USES_REGS) { - Int atoms = 0; // we accept both types of lists. - return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS); +static unsigned char *Yap_ListOfCodesToBuffer(unsigned char *buf, Term t, + seq_tv_t *inp, bool *widep, + size_t *lenp USES_REGS) { + Int atoms = 1; // we only want lists of atoms + return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS); +} + +static unsigned char *Yap_ListOfAtomsToBuffer(unsigned char *buf, Term t, + seq_tv_t *inp, bool *widep, + size_t *lenp USES_REGS) { + Int atoms = 2; // we only want lists of integer codes + return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS); +} + +static unsigned char *Yap_ListToBuffer(unsigned char *buf, Term t, + seq_tv_t *inp, bool *widep, + size_t *lenp USES_REGS) { + Int atoms = 0; // we accept both types of lists. + return to_buffer(buf, t, inp, widep, &atoms, lenp PASS_REGS); } #if USE_GEN_TYPE_ERROR @@ -250,160 +292,154 @@ static yap_error_number gen_type_error(int flags) { } #endif -void *Yap_readText(void *buf, seq_tv_t *inp, encoding_t *enc, int *minimal, - size_t *lengp USES_REGS) { - char *s, *s0 = buf; - wchar_t *ws; - bool wide; +unsigned char *Yap_readText(seq_tv_t *inp, size_t *lengp) { + unsigned char *s0 = NULL; + bool wide; - /* we know what the term is */ - if (!(inp->type & (YAP_STRING_CHARS | YAP_STRING_WCHARS))) { - if (!(inp->type & YAP_STRING_TERM)) { - if (IsVarTerm(inp->val.t)) { - LOCAL_Error_TYPE = INSTANTIATION_ERROR; - } else if (!IsAtomTerm(inp->val.t) && inp->type == YAP_STRING_ATOM) { - LOCAL_Error_TYPE = TYPE_ERROR_ATOM; - } else if (!IsStringTerm(inp->val.t) && inp->type == YAP_STRING_STRING) { - LOCAL_Error_TYPE = TYPE_ERROR_STRING; - } else if (!IsPairTerm(inp->val.t) && !IsStringTerm(inp->val.t) && - inp->type == (YAP_STRING_ATOMS_CODES | YAP_STRING_STRING)) { - LOCAL_Error_TYPE = TYPE_ERROR_LIST; - } else if (!IsNumTerm(inp->val.t) && - (inp->type & (YAP_STRING_INT | YAP_STRING_FLOAT | - YAP_STRING_BIG)) == inp->type) { - LOCAL_Error_TYPE = TYPE_ERROR_NUMBER; - } - LOCAL_Error_Term = inp->val.t; + /* we know what the term is */ + if (!(inp->type & (YAP_STRING_CHARS | YAP_STRING_WCHARS))) { + if (!(inp->type & YAP_STRING_TERM)) { + if (IsVarTerm(inp->val.t)) { + LOCAL_Error_TYPE = INSTANTIATION_ERROR; + } else if (!IsAtomTerm(inp->val.t) && + inp->type == YAP_STRING_ATOM) { + LOCAL_Error_TYPE = TYPE_ERROR_ATOM; + } else if (!IsStringTerm(inp->val.t) && + inp->type == YAP_STRING_STRING) { + LOCAL_Error_TYPE = TYPE_ERROR_STRING; + } else if (!IsPairTerm(inp->val.t) && !IsStringTerm(inp->val.t) && + inp->type == + (YAP_STRING_ATOMS_CODES | YAP_STRING_STRING)) { + LOCAL_Error_TYPE = TYPE_ERROR_LIST; + } else if (!IsNumTerm(inp->val.t) && + (inp->type & (YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG)) == inp->type) { + LOCAL_Error_TYPE = TYPE_ERROR_NUMBER; + } + LOCAL_Error_Term = inp->val.t; + } } - } - if (LOCAL_Error_TYPE != YAP_NO_ERROR) - return NULL; + if (LOCAL_Error_TYPE != YAP_NO_ERROR) + return NULL; - // this is a term, extract the UTF8 representation - if (IsStringTerm(inp->val.t) && inp->type & YAP_STRING_STRING) { - const char *s = StringOfTerm(inp->val.t); - *enc = ENC_ISO_UTF8; - *minimal = FALSE; - if (lengp) - *lengp = strlen(s); - return (void *)s; - } - if (IsAtomTerm(inp->val.t) && inp->type & YAP_STRING_ATOM) { - // this is a term, extract to a buffer, and representation is wide - *minimal = TRUE; - Atom at = AtomOfTerm(inp->val.t); - if (IsWideAtom(at)) { - ws = at->WStrOfAE; - *lengp = wcslen(ws); - *enc = ENC_WCHAR; - return ws; - } else { - s = (char *)at->StrOfAE; - *lengp = strlen(s); - *enc = ENC_ISO_LATIN1; - return s; + if (IsAtomTerm(inp->val.t) && inp->type & YAP_STRING_ATOM) { + // this is a term, extract to a buffer, and representation is wide + //Yap_DebugPlWriteln(inp->val.t); + Atom at = AtomOfTerm(inp->val.t); + if (IsWideAtom(at)) { + inp->val.w = at->WStrOfAE; + return wchar2utf8(inp, lengp); + } else { + inp->val.c = at->StrOfAE; + return latin2utf8(inp, lengp); + } } - } - if (((inp->type & (YAP_STRING_CODES | YAP_STRING_ATOMS)) == - (YAP_STRING_CODES | YAP_STRING_ATOMS))) { - s = Yap_ListToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS); - // this is a term, extract to a sfer, and representation is wide - *minimal = true; - *enc = ENC_ISO_UTF8; - return s; - } - if (inp->type == YAP_STRING_CODES) { - s = Yap_ListOfCodesToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS); - // this is a term, extract to a sfer, and representation is wide - *minimal = true; - *enc = ENC_ISO_UTF8; - return s; - } - if (inp->type == YAP_STRING_ATOMS) { - s = Yap_ListOfAtomsToBuffer(s0, inp->val.t, inp, &wide, lengp PASS_REGS); - // this is a term, extract to a buffer, and representation is wide - *minimal = true; - *enc = ENC_ISO_UTF8; - return s; - } - if (inp->type & YAP_STRING_INT && IsIntegerTerm(inp->val.t)) { - if (s0) - s = s0; - else - s = Yap_PreAllocCodeSpace(); - AUX_ERROR(inp->val.t, LOCAL_MAX_SIZE, s, char); - if (snprintf(s, LOCAL_MAX_SIZE - 1, Int_FORMAT, IntegerOfTerm(inp->val.t)) < - 0) { - AUX_ERROR(inp->val.t, 2 * LOCAL_MAX_SIZE, s, char); + if (((inp->type & (YAP_STRING_CODES | YAP_STRING_ATOMS)) == + (YAP_STRING_CODES | YAP_STRING_ATOMS)) && IsPairTerm(inp->val.t)) { + //Yap_DebugPlWriteln(inp->val.t); + return inp->val.uc = + Yap_ListToBuffer(s0, inp->val.t, inp, &wide, lengp + PASS_REGS); + // this is a term, extract to a sfer, and representation is wide } - *enc = ENC_ISO_LATIN1; - *lengp = strlen(s); - return s; - } - if (inp->type & YAP_STRING_FLOAT && IsFloatTerm(inp->val.t)) { - if (s0) - s = s0; - else - s = Yap_PreAllocCodeSpace(); - AUX_ERROR(inp->val.t, LOCAL_MAX_SIZE, s, char); - if (!Yap_FormatFloat(FloatOfTerm(inp->val.t), &s, LOCAL_MAX_SIZE - 1)) { - AUX_ERROR(inp->val.t, 2 * LOCAL_MAX_SIZE, s, char); + if (inp->type & YAP_STRING_CODES && IsPairTerm(inp->val.t)) { + //Yap_DebugPlWriteln(inp->val.t); + return inp->val.uc = Yap_ListOfCodesToBuffer(s0, inp->val.t, inp, &wide, + lengp PASS_REGS); + // this is a term, extract to a sfer, and representation is wide + } + if (inp->type & YAP_STRING_ATOMS && IsPairTerm(inp->val.t)) { + //Yap_DebugPlWriteln(inp->val.t); + return inp->val.uc = Yap_ListOfAtomsToBuffer(s0, inp->val.t, inp, &wide, + lengp PASS_REGS); + // this is a term, extract to a buffer, and representation is wide + } + if (inp->type & YAP_STRING_INT && IsIntegerTerm(inp->val.t)) { + // ASCII, so both LATIN1 and UTF-8 + //Yap_DebugPlWriteln(inp->val.t); + char *s; + if (s0) + s = (char *) s0; + else + s = Malloc(0); + AUX_ERROR(inp->val.t, MaxTmp(PASS_REGS1), s, char); + if (snprintf(s, MaxTmp(PASS_REGS1) - 1, Int_FORMAT, + IntegerOfTerm(inp->val.t)) < 0) { + AUX_ERROR(inp->val.t, 2 * MaxTmp(PASS_REGS1), s, char); + } + *lengp = strlen(s); + Malloc(*lengp); + return inp->val.uc = (unsigned char *) s; + } + if (inp->type & YAP_STRING_FLOAT && IsFloatTerm(inp->val.t)) { + char *s; + //Yap_DebugPlWriteln(inp->val.t); + if (s0) + s = (char *) s0; + else + s = Malloc(0); + AUX_ERROR(inp->val.t, MaxTmp(PASS_REGS1), s, char); + if (!Yap_FormatFloat(FloatOfTerm(inp->val.t), &s, MaxTmp() - 1)) { + AUX_ERROR(inp->val.t, 2 * MaxTmp(PASS_REGS1), s, char); + } + *lengp = strlen(s); + Malloc(*lengp); + return inp->val.uc = (unsigned char *) s; } - *lengp = strlen(s); - *enc = ENC_ISO_LATIN1; - return s; - } #if USE_GMP - if (inp->type & YAP_STRING_BIG && IsBigIntTerm(inp->val.t)) { - if (s0) - s = s0; - else - s = Yap_PreAllocCodeSpace(); - if (!Yap_mpz_to_string(Yap_BigIntOfTerm(inp->val.t), s, LOCAL_MAX_SIZE - 1, - 10)) { - AUX_ERROR(inp->val.t, LOCAL_MAX_SIZE, s, char); + if (inp->type & YAP_STRING_BIG && IsBigIntTerm(inp->val.t)) { + //Yap_DebugPlWriteln(inp->val.t); + char *s; + if (s0) + s = 0; + else + s = Malloc(0); + if (!Yap_mpz_to_string(Yap_BigIntOfTerm(inp->val.t), s, MaxTmp() - 1, + 10)) { + AUX_ERROR(inp->val.t, MaxTmp(PASS_REGS1), s, char); + } + *lengp = strlen(s); + Malloc(*lengp); + return inp->val.uc = (unsigned char *) s; } - *enc = ENC_ISO_LATIN1; - *lengp = strlen(s); - return s; - } #endif - if (inp->type & YAP_STRING_TERM) { - encoding_t enc = ENC_ISO_UTF8; - char *o = Yap_TermToString(inp->val.t, lengp, enc, 0); - return o; - } - if (inp->type & YAP_STRING_CHARS) { - *enc = inp->enc; - if (inp->type & YAP_STRING_NCHARS) - *lengp = inp->sz; - else - *lengp = strlen(inp->val.c); - return (void *)inp->val.c; - } - if (inp->type & YAP_STRING_WCHARS) { - *enc = ENC_WCHAR; - if (inp->type & YAP_STRING_NCHARS) - *lengp = inp->sz; - else - *lengp = wcslen(inp->val.w); - return (void *)inp->val.w; - } - return NULL; + if (inp->type & YAP_STRING_TERM) { + //Yap_DebugPlWriteln(inp->val.t); + char *s = (char *) Yap_TermToString(inp->val.t, lengp, ENC_ISO_UTF8, 0); + return inp->val.uc = (unsigned char *) s; + } + if (inp->type & YAP_STRING_CHARS) { + //printf("%s\n",inp->val.c); + if (inp->enc == ENC_ISO_UTF8) { + if (lengp) + *lengp = strlen_utf8(inp->val.uc); + return inp->val.uc; + } else if (inp->enc == ENC_ISO_LATIN1) { + return latin2utf8(inp, lengp); + } else if (inp->enc == ENC_ISO_ASCII) { + if (lengp) + *lengp = strlen(inp->val.c); + return inp->val.uc; + } + } + if (inp->type & YAP_STRING_WCHARS) { +//printf("%S\n",inp->val.w); +return wchar2utf8(inp, lengp); + } + return NULL; } -static Term write_strings(void *s0, seq_tv_t *out, encoding_t enc, int minimal, +static Term write_strings(unsigned char *s0, seq_tv_t *out, size_t leng USES_REGS) { - size_t min = 0, max = leng; - if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { - if (out->type & YAP_STRING_NCHARS) - min = out->sz; - if (out->type & YAP_STRING_TRUNC && out->max < max) - max = out->max; - } + size_t min = 0, max = leng; + + if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { + if (out->type & YAP_STRING_NCHARS) + min = out->max; + if (out->type & YAP_STRING_TRUNC && out->max < max) + max = out->max; + } - switch (enc) { - case ENC_ISO_UTF8: { unsigned char *s = s0, *lim = s + (max = strlen_utf8(s)); Term t = init_tstring(PASS_REGS1); unsigned char *cp = s, *buf; @@ -411,1233 +447,557 @@ static Term write_strings(void *s0, seq_tv_t *out, encoding_t enc, int minimal, LOCAL_TERM_ERROR(t, 2 * (lim - s)); buf = buf_from_tstring(HR); while (*cp && cp < lim) { - utf8proc_int32_t chr; - cp += get_utf8(cp, -1, &chr); - buf += put_utf8(buf, chr); + utf8proc_int32_t chr; + int off; + off = get_utf8(cp, -1, &chr); + if (off > 0) cp += off; + else { + //Yap_Error(TYPE_ERROR_TEXT, t, NULL); + cp++; + } + off = put_utf8(buf, chr); + if (off > 0) buf += off; + } if (max >= min) - *buf++ = '\0'; + *buf++ = '\0'; else - while (max < min) { - max++; - buf += put_utf8(buf, '\0'); - } + while (max < min) { + max++; + buf += put_utf8(buf, '\0'); + } close_tstring(buf PASS_REGS); out->val.t = t; - } break; - case ENC_ISO_LATIN1: { - unsigned char *s = s0, *lim = s + (max = strlen_latin_utf8(s0)); - Term t = init_tstring(PASS_REGS1); - unsigned char *cp = s; - unsigned char *buf; - utf8proc_int32_t chr; - LOCAL_TERM_ERROR(t, 2 * (lim - s)); - buf = buf_from_tstring(HR); - while (cp < lim) { - cp = getChar(cp, &chr); - buf += put_utf8(buf, chr); - } - if (max >= min) - *buf++ = '\0'; - else - while (max < min) { - max++; - buf += put_utf8(buf, chr); - } - close_tstring(buf PASS_REGS); - out->val.t = t; - } break; - case ENC_WCHAR: { - wchar_t *s = s0, *lim = s + (max = strlen_ucs2_utf8(s0)); - Term t = init_tstring(PASS_REGS1); - wchar_t *wp = s; - unsigned char *buf; - - LOCAL_TERM_ERROR(t, 2 * (lim - s)); - buf = buf_from_tstring(HR); - while (wp < lim) { - utf8proc_int32_t chr; - wp = getWchar(wp, &chr); - buf += put_utf8(buf, chr); - } - if (max >= min) - *buf++ = '\0'; - else - while (max < min) { - max++; - buf += put_utf8(buf, '\0'); - } - close_tstring(buf PASS_REGS); - out->val.t = t; - } break; - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - - return out->val.t; + return out->val.t; } -static Term write_atoms(void *s0, seq_tv_t *out, encoding_t enc, int minimal, - size_t leng USES_REGS) { - Term t = AbsPair(HR); - size_t sz = 0; - size_t max = leng; - if (leng == 0) { - out->val.t = t; - return TermNil; - } - if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { - if (out->type & YAP_STRING_TRUNC && out->max < max) - max = out->max; - } +static Term write_atoms(void *s0, seq_tv_t *out, size_t leng USES_REGS) { + Term t = AbsPair(HR); + size_t sz = 0; + size_t max = leng; + if (leng == 0) { + out->val.t = t; + return TermNil; + } + if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { + if (out->type & YAP_STRING_TRUNC && out->max < max) + max = out->max; + } - switch (enc) { - case ENC_ISO_UTF8: { - unsigned char *s = s0, *lim = s + strnlen((char *)s, max); + unsigned char *s = s0, *lim = s + strnlen((char *) s, max); unsigned char *cp = s; wchar_t w[2]; w[1] = '\0'; LOCAL_TERM_ERROR(t, 2 * (lim - s)); while (cp < lim && *cp) { - utf8proc_int32_t chr; - CELL *cl; - cp += get_utf8(cp, -1, &chr); - if (chr == '\0') - break; - w[0] = chr; - cl = HR; - HR += 2; - cl[0] = MkAtomTerm(Yap_LookupMaybeWideAtom(w)); - cl[1] = AbsPair(HR); - sz++; - if (sz == max) - break; + utf8proc_int32_t chr; + CELL *cl; + cp += get_utf8(cp, -1, &chr); + if (chr == '\0') + break; + w[0] = chr; + cl = HR; + HR += 2; + cl[0] = MkAtomTerm(Yap_LookupMaybeWideAtom(w)); + cl[1] = AbsPair(HR); + sz++; + if (sz == max) + break; } - break; - } - case ENC_ISO_LATIN1: { - unsigned char *s = s0, *lim = s + strnlen(s0, max); - unsigned char *cp = s; - char w[2]; - w[1] = '\0'; + if (out->type & YAP_STRING_DIFF) { + if (sz == 0) + t = out->dif; + else + HR[-1] = Globalize(out->dif PASS_REGS); + } else { + if (sz == 0) + t = TermNil; + else + HR[-1] = TermNil; + } + out->val.t = t; + return (t); +} - LOCAL_TERM_ERROR(t, 2 * (lim - s)); - while (cp < lim) { - utf8proc_int32_t chr; - cp = getChar(cp, &chr); - if (chr == '\0') - break; - w[0] = chr; - HR[0] = MkAtomTerm(Yap_LookupAtom(w)); - HR[1] = AbsPair(HR + 2); - HR += 2; - sz++; - if (sz == max) - break; +static Term write_codes(void *s0, seq_tv_t *out, size_t leng USES_REGS) { + Term t = AbsPair(HR); + size_t sz = 0; + size_t max = leng; + if (leng == 0) { + out->val.t = t; + return TermNil; } - break; - } - case ENC_WCHAR: { - wchar_t *s = s0, *lim = s + wcsnlen(s, max); - wchar_t *cp = s; + if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { + if (out->type & YAP_STRING_TRUNC && out->max < max) + max = out->max; + } + + unsigned char *s = s0, *lim = s + strlen((char *) s); + unsigned char *cp = s; wchar_t w[2]; w[1] = '\0'; - LOCAL_TERM_ERROR(t, 2 * (lim - s)); - while (*cp && cp < lim) { - utf8proc_int32_t chr; - cp = getWchar(cp, &chr); - if (chr == '\0') - break; - w[0] = chr; - HR[0] = MkAtomTerm(Yap_LookupMaybeWideAtom(w)); - HR[1] = AbsPair(HR + 2); - HR += 2; - sz++; - if (sz == max) - break; - } - break; - } - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - if (out->type & YAP_STRING_DIFF) { - if (sz == 0) - t = out->dif; - else - HR[-1] = Globalize(out->dif PASS_REGS); - } else { - if (sz == 0) - t = TermNil; - else - HR[-1] = TermNil; - } - out->val.t = t; - return (t); -} - -static Term write_codes(void *s0, seq_tv_t *out, encoding_t enc, int minimal, - size_t leng USES_REGS) { - Term t = AbsPair(HR); - size_t min = 0, max = leng; - size_t sz = 0; - - if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { - if (out->type & YAP_STRING_NCHARS) - min = out->sz; - if (out->type & YAP_STRING_TRUNC && out->max < max) - max = out->max; - } - - switch (enc) { - case ENC_ISO_UTF8: { - unsigned char *s = s0, *lim = s + strnlen(s0, max); - unsigned char *cp = s; - LOCAL_TERM_ERROR(t, 2 * (lim - s)); - while (*cp && cp < lim) { - utf8proc_int32_t chr; - cp += get_utf8(cp, -1, &chr); - HR[0] = MkIntTerm(chr); - HR[1] = AbsPair(HR + 2); - HR += 2; - sz++; - if (sz == max) - break; - } - break; - } - case ENC_ISO_LATIN1: { - unsigned char *s = s0, *lim = s + strnlen(s0, max); - unsigned char *cp = s; - - LOCAL_TERM_ERROR(t, 2 * (lim - s)); - while (cp < lim) { - utf8proc_int32_t chr; - cp = getChar(cp, &chr); - HR[0] = MkIntTerm(chr); - HR[1] = AbsPair(HR + 2); - HR += 2; - sz++; - if (sz == max) - break; - } - break; - } - case ENC_WCHAR: { - wchar_t *s = s0, *lim = s + wcsnlen(s, max); - wchar_t *cp = s; - - LOCAL_TERM_ERROR(t, 2 * (lim - s)); - while (cp < lim) { - utf8proc_int32_t chr; - cp = getWchar(cp, &chr); - HR[0] = MkIntTerm(chr); - HR[1] = AbsPair(HR + 2); - HR += 2; - sz++; - if (sz == max) - break; - } - break; - } - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - while (sz < min) { - HR[0] = MkIntTerm(MkIntTerm(0)); - HR[1] = AbsPair(HR + 2); - HR += 2; - sz++; - } - if (out->type & YAP_STRING_DIFF) { - if (sz == 0) - t = out->dif; - else - HR[-1] = Globalize(out->dif PASS_REGS); - } else { - if (sz == 0) - t = TermNil; - else - HR[-1] = TermNil; - } - out->val.t = t; - return (t); -} - -static Atom write_atom(void *s0, seq_tv_t *out, encoding_t enc, int minimal, - size_t leng USES_REGS) { - size_t max = leng; - if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { - if (out->type & YAP_STRING_TRUNC && out->max < max) - max = out->max; - } - - switch (enc) { - case ENC_ISO_UTF8: { - unsigned char *s = s0, *lim = s + strnlen(s0, max); - wchar_t *buf = malloc(sizeof(wchar_t) * ((lim + 2) - s)), *ptr = buf; - Atom at; - - while (*s && s < lim) { - utf8proc_int32_t chr; - s += get_utf8(s, -1, &chr); - *ptr++ = chr; - } - *ptr++ = '\0'; - at = Yap_LookupMaybeWideAtomWithLength(buf, max); - free(buf); - out->val.a = at; - return at; - } - case ENC_ISO_LATIN1: { - char *s = s0; - Atom at; - - max = strnlen(s, max); - at = Yap_LookupAtomWithLength(s, max); - out->val.a = at; - return at; - } - case ENC_WCHAR: { - wchar_t *s = s0; - Atom at; - - max = wcsnlen(s, max); - out->val.a = at = Yap_LookupMaybeWideAtomWithLength(s, max); - return at; - } - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc)); - } - return NULL; -} - -static size_t write_wbuffer(void *s0, seq_tv_t *out, encoding_t enc, - int minimal, size_t leng USES_REGS) { - size_t min = 0, max = leng, sz_end, sz; - out->enc = ENC_WCHAR; - if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { - if (out->type & YAP_STRING_NCHARS) - min = out->sz; - if (out->type & YAP_STRING_TRUNC && out->max < max) - max = out->max; - } - if (out->enc != enc || - out->type & (YAP_STRING_WITH_BUFFER | YAP_STRING_MALLOC)) { - if (enc != ENC_WCHAR) { - sz = strlen((char *)s0) + 1; - } else { - sz = wcslen((wchar_t *)s0) + 1; - } - if (sz < min) - sz = min; - sz *= sizeof(wchar_t); - if (out->type & (YAP_STRING_MALLOC)) { - out->val.w = malloc(sz); - } else if (!(out->type & (YAP_STRING_WITH_BUFFER))) { - if (ASP - (sz / sizeof(CELL) + 1) > HR + 1024) { - out->val.w = - (wchar_t *)(ASP - ((sz * sizeof(wchar_t *) / sizeof(CELL) + 1))); - } else - return -1; - } - } else { - out->val.w = s0; - sz_end = (wcslen(s0) + 1) * sizeof(wchar_t); - } - if (out->enc == ENC_WCHAR) { - switch (enc) { - case ENC_WCHAR: - if (out->type & (YAP_STRING_WITH_BUFFER | YAP_STRING_MALLOC)) { - wchar_t *s = s0; - size_t n = wcslen(s) + 1; - if (n < min) - n = min; - memcpy(out->val.c, s0, n * sizeof(wchar_t)); - out->val.w[n] = '\0'; - sz_end = n + 1; - } - case ENC_ISO_UTF8: { - unsigned char *s = s0, *lim = s + (max = strnlen(s0, max)); - unsigned char *cp = s; - wchar_t *buf0, *buf; - - buf = buf0 = out->val.w; - if (!buf) - return -1; - while (*cp && cp < lim) { + while (*cp) { utf8proc_int32_t chr; + CELL *cl; cp += get_utf8(cp, -1, &chr); - *buf++ = chr; - } - if (max >= min) - *buf++ = '\0'; - else - while (max < min) { - utf8proc_int32_t chr; - max++; - cp += get_utf8(cp, -1, &chr); - *buf++ = chr; - } - *buf = '\0'; - sz_end = (buf - buf0) + 1; - } break; - case ENC_ISO_LATIN1: { - char *s = s0; - size_t n = strlen(s), i; - if (n < min) - n = min; - for (i = 0; i < n; i++) - out->val.w[i] = s[i]; - out->val.w[n] = '\0'; - sz_end = n + 1; - } break; - default: - sz_end = -1; - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); + if (chr == '\0') + break; + cl = HR; + HR += 2; + cl[0] = MkIntegerTerm(chr); + cl[1] = AbsPair(HR); + sz++; + if (sz == max) + break; } - } - sz_end *= sizeof(wchar_t); - if (out->type & (YAP_STRING_MALLOC)) { - out->val.c = realloc(out->val.c, sz_end); - } - out->sz = sz_end; - return sz_end; -} - -size_t write_buffer(void *s0, seq_tv_t *out, encoding_t enc, int minimal, - size_t leng USES_REGS) { - size_t min = 0, max = leng, sz_end; - if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { - if (out->type & YAP_STRING_NCHARS) - min = out->sz; - if (out->type & YAP_STRING_TRUNC && out->max < max) - max = out->max; - } - if (out->enc != enc) { - size_t sz; - if (enc != ENC_WCHAR) - sz = strlen((char *)s0) + 1; + if (out->type & YAP_STRING_DIFF) { + if (sz == 0) + t = out->dif; else - sz = wcslen((wchar_t *)s0) + 1; - if (sz < min) - sz = min; - if (!minimal) - sz *= 4; - if (out->type & (YAP_STRING_MALLOC)) { - out->val.uc = malloc(sz); - } else if (!(out->type & (YAP_STRING_WITH_BUFFER))) { - if (ASP - (sz / sizeof(CELL) + 1) > HR + 1024) { - out->val.c = Yap_PreAllocCodeSpace(); - } + HR[-1] = Globalize(out->dif PASS_REGS); + } else { + if (sz == 0) + t = TermNil; + else + HR[-1] = TermNil; } - } - if (out->enc == ENC_ISO_UTF8) { - switch (enc) { - case ENC_ISO_UTF8: - if (out->type & (YAP_STRING_WITH_BUFFER | YAP_STRING_MALLOC)) { - char *s = s0; - size_t n = strlen(s) + 1; - strcpy(out->val.c, s); - out->val.uc[n] = '\0'; - sz_end = n + 1; - } else { - out->val.c = s0; - sz_end = strlen(out->val.c) + 1; - } + out->val.t = t; + return (t); +} - break; - case ENC_ISO_LATIN1: { - unsigned char *s = s0, *lim = s + (max = strnlen(s0, max)); - unsigned char *cp = s, *buf0, *buf; - - buf = buf0 = out->val.uc; - if (!buf) - return -1; - while (*cp && cp < lim) { +static Atom write_atom(void *s0, seq_tv_t *out, size_t leng USES_REGS) { + { + unsigned char *s = s0; utf8proc_int32_t chr; - chr = *cp++; - buf += put_utf8(buf, chr); - } - if (max >= min) - *buf++ = '\0'; - else - while (max < min) { - max++; - utf8proc_int32_t chr; - chr = *cp++; - buf += put_utf8(buf, chr); + while (*s && get_utf8(s, -1, &chr) == 1) s++; + if (*s == '\0') + return out->val.a = Yap_LookupAtom((char*)s0); + s = s0; + size_t l = strlen(s0); + wchar_t *wbuf = Malloc(sizeof(wchar_t) * ((l + 1))), *wptr = wbuf; + Atom at; + if (!wbuf) + return NULL; + while (*s) { + utf8proc_int32_t chr; + int off = get_utf8(s, -1, &chr); + if (off < 0) { + s++; + continue; + } + s++; + *wptr++ = chr; } - buf[0] = '\0'; - sz_end = (buf + 1) - buf0; - } break; - case ENC_WCHAR: { - wchar_t *s = s0; - unsigned char *buf = out->val.uc; - size_t n = wcslen(s), i; - if (n < min) - n = min; - for (i = 0; i < n; i++) { - utf8proc_int32_t chr = s[i]; - buf += put_utf8(buf, chr); - } - *buf++ = '\0'; - sz_end = (buf + 1) - out->val.uc; - } break; - default: - sz_end = -1; - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - } else if (out->enc == ENC_ISO_LATIN1) { - switch (enc) { - case ENC_ISO_LATIN1: - if (out->type & YAP_STRING_WITH_BUFFER) { - char *s = s0; - size_t n = strlen(s), i; - if (n < min) - n = min; - memcpy(out->val.c, s0, n); - for (i = 0; i < n; i++) - out->val.w[i] = s[i]; - out->val.w[n] = '\0'; - sz_end = (n + 1) * sizeof(wchar_t); - } else { - sz_end = strlen(out->val.c) + 1; - out->val.c = s0; - } - break; - case ENC_ISO_UTF8: { - unsigned char *s = s0, *lim = s + (max = strnlen(s0, max)); - unsigned char *cp = s; - unsigned char *buf0, *buf; + *wptr++ = '\0'; - buf = buf0 = out->val.uc; - if (!buf) - return -1; - while (*cp && cp < lim) { - utf8proc_int32_t chr; - cp += get_utf8(cp, -1, &chr); - *buf++ = chr; - } - if (max >= min) - *buf++ = '\0'; - else - while (max < min) { - utf8proc_int32_t chr; - max++; - cp += get_utf8(cp, -1, &chr); - *buf++ = chr; + at = Yap_LookupMaybeWideAtom(wbuf); + out->val.a = at; + return at; + } +} + +size_t write_buffer(unsigned char *s0, seq_tv_t *out, size_t leng USES_REGS) { + size_t min = 0, max = leng, room_end; + if (out->enc == ENC_ISO_UTF8) { + room_end = strlen((char *) s0) + 1; + if (out->val.uc == NULL) { + out->val.uc = malloc(room_end); } - sz_end = buf - out->val.uc; - } break; - case ENC_WCHAR: { - wchar_t *s = s0; - size_t n = wcslen(s), i; - if (n < min) - n = min; - for (i = 0; i < n; i++) - out->val.c[i] = s[i]; - out->val.c[n] = '\0'; - sz_end = n + 1; - } break; - default: - sz_end = -1; - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); + if (out->val.uc != s0) { + strcpy(out->val.c, (char *) s0); + } + } else if (out->enc == ENC_ISO_LATIN1) { + room_end = strlen((char *) s0) + 1; + unsigned char *s = s0; + unsigned char *cp = s; + unsigned char *buf = out->val.uc; + if (!buf) + return -1; + while (*cp ) { + utf8proc_int32_t chr; + int off = get_utf8(cp, -1, &chr); + if (off <= 0) + off = + *buf++ = chr; + } + if (max >= min) + *buf++ = '\0'; + else + while (max < min) { + utf8proc_int32_t chr; + max++; + cp += get_utf8(cp, -1, &chr); + *buf++ = chr; + } + room_end = buf - out->val.uc; + } else if (out->enc == ENC_WCHAR) { + unsigned char *s = s0, *lim = s + (max = strnlen((char *) s0, max)); + unsigned char *cp = s; + wchar_t *buf0, *buf; + + buf = buf0 = out->val.w; + if (!buf) + return -1; + while (*cp && cp < lim) { + utf8proc_int32_t chr; + cp += get_utf8(cp, -1, &chr); + *buf++ = chr; + } + if (max >= min) + *buf++ = '\0'; + else + while (max < min) { + utf8proc_int32_t chr; + max++; + cp += get_utf8(cp, -1, &chr); + *buf++ = chr; + } + *buf = '\0'; + room_end = (buf - buf0) + 1; + } else { + // no other encodings are supported. + room_end = -1; } - } else { - // no other encodings are supported. - sz_end = -1; - } - if (out->type & (YAP_STRING_MALLOC)) { - out->val.c = realloc(out->val.c, sz_end); - } - out->sz = sz_end; - return sz_end; + return room_end; } -static size_t write_length(void *s0, seq_tv_t *out, encoding_t enc, int minimal, +static size_t write_length(const unsigned char *s0, seq_tv_t *out, size_t leng USES_REGS) { - size_t max = -1; - - if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { - if (out->type & YAP_STRING_NCHARS && out->sz != (size_t)-1) - return out->sz; - if (out->type & YAP_STRING_TRUNC) - max = out->max; - } - - switch (enc) { - case ENC_ISO_UTF8: { - const unsigned char *s = s0; - return strlen_utf8(s); - } - case ENC_ISO_LATIN1: { - const char *s = s0; - return strnlen(s, max); - } - case ENC_WCHAR: { - const wchar_t *s = s0; - return wcsnlen(s, max); - } - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - return (size_t)-1; + return leng; } -static Term write_number(void *s0, seq_tv_t *out, encoding_t enc, int minimal, - int size USES_REGS) { - return Yap_StringToNumberTerm(s0, &enc); +static Term write_number( unsigned char *s, seq_tv_t *out, int size + USES_REGS) { + Term t; + t = Yap_StringToNumberTerm((char *)s, &out->enc); + return t; } -static Term string_to_term(void *s0, seq_tv_t *out, encoding_t enc, int minimal, - size_t leng USES_REGS) { - Term o = out->val.t = - Yap_StringToTerm(s0, strlen(s0) + 1, &enc, GLOBAL_MaxPriority, NULL); - return o; +static Term string_to_term(void *s, seq_tv_t *out, size_t leng USES_REGS) { + Term o; + o = out->val.t = + Yap_StringToTerm(s, strlen(s) + 1, &out->enc, + GLOBAL_MaxPriority, NULL); + return o; } -bool write_Text(void *inp, seq_tv_t *out, encoding_t enc, int minimal, - size_t leng USES_REGS) { - /* we know what the term is */ - switch (out->type & YAP_TYPE_MASK) { - case YAP_STRING_STRING: - out->val.t = write_strings(inp, out, enc, minimal, leng PASS_REGS); - return out->val.t != 0; - case YAP_STRING_ATOMS: - out->val.t = write_atoms(inp, out, enc, minimal, leng PASS_REGS); - return out->val.t != 0; - case YAP_STRING_CODES: - out->val.t = write_codes(inp, out, enc, minimal, leng PASS_REGS); - return out->val.t != 0; - case YAP_STRING_LENGTH: - out->val.l = write_length(inp, out, enc, minimal, leng PASS_REGS); - return out->val.l != (size_t)(-1); - case YAP_STRING_ATOM: - out->val.a = write_atom(inp, out, enc, minimal, leng PASS_REGS); - return out->val.a != NULL; - case YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG: - out->val.t = write_number(inp, out, enc, minimal, leng PASS_REGS); - return out->val.t != 0; - case YAP_STRING_CHARS: { - size_t sz = write_buffer(inp, out, enc, minimal, leng PASS_REGS); - return ((Int)sz > 0); - } - case YAP_STRING_WCHARS: { - size_t sz = write_wbuffer(inp, out, enc, minimal, leng PASS_REGS); - return ((Int)sz > 0); - } - default: - if (!(out->type & YAP_STRING_TERM)) - return 0; - if (out->type & (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG)) - if ((out->val.t = write_number(inp, out, enc, minimal, leng PASS_REGS)) != - 0L) - return out->val.t != 0; - if (out->type & (YAP_STRING_ATOM)) - if (write_atom(inp, out, enc, minimal, leng PASS_REGS) != NIL) { - Atom at = out->val.a; - if (at != NIL) - out->val.t = MkAtomTerm(at); - return at != NIL; - } - if ((out->val.t = string_to_term(inp, out, enc, minimal, leng PASS_REGS)) != - 0L) - return out->val.t != 0; - } - return false; +bool write_Text(unsigned char *inp, seq_tv_t *out, size_t leng USES_REGS) { + /* we know what the term is */ + if (out->type & YAP_STRING_TERM) { + if ((out->val.t = string_to_term(inp, out, leng PASS_REGS)) != 0L) + return out->val.t != 0; + } + if (out->type & + (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG)) { + if ( + (out->val.t = write_number(inp, out, leng PASS_REGS)) != + 0L) { + //Yap_DebugPlWriteln(out->val.t); + + return true; + } + + if (!(out->type & YAP_STRING_ATOM)) + return false; + } + if (out->type & (YAP_STRING_ATOM)) { + if ( + write_atom(inp, out, leng PASS_REGS) != NIL) { + Atom at = out->val.a; + if (at != NIL) + out->val.t = MkAtomTerm(at); + //Yap_DebugPlWriteln(out->val.t); + return at != NIL; + } + } + + switch (out->type & YAP_TYPE_MASK) { + case YAP_STRING_CHARS: { + size_t room = write_buffer(inp, out, leng PASS_REGS); + //printf("%s\n", out->val.c); + return ((Int) room > 0); + } + case YAP_STRING_WCHARS: { + size_t room = write_buffer(inp, out, leng PASS_REGS); + //printf("%S\n", out->val.w); + return ((Int) room > 0); + } + case YAP_STRING_STRING: + out->val.t = write_strings(inp, out, leng PASS_REGS); + //Yap_DebugPlWriteln(out->val.t); + return out->val.t != 0; + case YAP_STRING_ATOMS: + out->val.t = write_atoms(inp, out, leng PASS_REGS); + //Yap_DebugPlWriteln(out->val.t); + return out->val.t != 0; + case YAP_STRING_CODES: + out->val.t = write_codes(inp, out, leng PASS_REGS); + //Yap_DebugPlWriteln(out->val.t); + return out->val.t != 0; + case YAP_STRING_LENGTH: + out->val.l = write_length(inp, out, leng PASS_REGS); + //printf("s\n",out->val.l); + return out->val.l != (size_t) (-1); + case YAP_STRING_ATOM: + out->val.a = write_atom(inp, out, leng PASS_REGS); + //Yap_DebugPlWriteln(out->val.t); + return out->val.a != NULL; + case YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG: + out->val.t = write_number(inp, out, leng PASS_REGS); + //Yap_DebugPlWriteln(out->val.t); + return out->val.t != 0; + default: { + } + } + return false; } -static size_t upcase(void *s0, seq_tv_t *out, encoding_t enc USES_REGS) { +static size_t upcase(void *s0, seq_tv_t *out USES_REGS) { - switch (enc) { - case ENC_ISO_UTF8: { unsigned char *s = s0; while (*s) { - // assumes the two code have always the same size; - utf8proc_int32_t chr; - get_utf8(s, -1, &chr); - chr = utf8proc_toupper(chr); - s += put_utf8(s, chr); + // assumes the two code have always the same size; + utf8proc_int32_t chr; + get_utf8(s, -1, &chr); + chr = utf8proc_toupper(chr); + s += put_utf8(s, chr); } return true; - } - - case ENC_ISO_LATIN1: { - unsigned char *s = s0; - utf8proc_int32_t chr; - - while ((chr = *s)) { - // assumes the two code have always the same size; - chr = *s; - chr = utf8proc_toupper(chr); - *s++ = chr; - } - return true; - } - - case ENC_WCHAR: { - wchar_t *s = s0; - utf8proc_int32_t chr; - - while ((chr = *s)) { - // assumes the two code have always the same size; - chr = *s; - chr = utf8proc_toupper(chr); - *s++ = chr; - } - return true; - } - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - return false; } -static size_t downcase(void *s0, seq_tv_t *out, encoding_t enc USES_REGS) { +static size_t downcase(void *s0, seq_tv_t *out USES_REGS) { - switch (enc) { - case ENC_ISO_UTF8: { unsigned char *s = s0; while (*s) { - // assumes the two code have always the same size; - utf8proc_int32_t chr; - get_utf8(s, -1, &chr); - chr = utf8proc_tolower(chr); - s += put_utf8(s, chr); + // assumes the two code have always the same size; + utf8proc_int32_t chr; + get_utf8(s, -1, &chr); + chr = utf8proc_tolower(chr); + s += put_utf8(s, chr); } return true; - } - - case ENC_ISO_LATIN1: { - unsigned char *s = s0; - utf8proc_int32_t chr; - - while ((chr = *s)) { - // assumes the two code have always the same size; - chr = *s; - chr = utf8proc_tolower(chr); - *s++ = chr; - } - return true; - } - case ENC_WCHAR: { - wchar_t *s = s0; - utf8proc_int32_t chr; - - while ((chr = *s)) { - // assumes the two code have always the same size; - chr = *s; - chr = utf8proc_tolower(chr); - *s++ = chr; - } - return true; - } - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - return false; } bool Yap_CVT_Text(seq_tv_t *inp, seq_tv_t *out USES_REGS) { - encoding_t enc; - int minimal = FALSE; - char *buf; - size_t leng; - bool new_malloc = false; + unsigned char *buf; + bool rc; - buf = Yap_readText(NULL, inp, &enc, &minimal, &leng PASS_REGS); - if (!buf) { - return 0L; - } else { - if (out->type & (YAP_STRING_MALLOC) && !(inp->type & (YAP_STRING_MALLOC))) { - size_t sz, len; - char *nbuf; - - if (enc == ENC_WCHAR) { - sz = sizeof(wchar_t)*((len = wcslen((wchar_t*)buf))+1); - } else if ( out->enc == ENC_WCHAR) { - sz = sizeof(wchar_t)*((len = strlen(buf))+1); - } else if (inp->enc == ENC_ISO_LATIN1) { - sz = 2 * (len = strlen(buf))+1; - } else { - sz = (len = strlen(buf))+1; + size_t leng; + init_alloc(__LINE__); + /* + f//printf(stderr, "[ %d ", n++) ; + if (inp->type & (YAP_STRING_TERM|YAP_STRING_ATOM|YAP_STRING_ATOMS_CODES + |YAP_STRING_STRING)) + //Yap_DebugPlWriteln(inp->val.t); + else if (inp->type & YAP_STRING_WCHARS) fprintf(stderr,"S %S\n", inp->val + .w); + else fprintf(stderr,"s %s\n", inp->val.c); + */ + buf = Yap_readText(inp, &leng PASS_REGS); + if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) { + if (out->max < leng) { + const unsigned char *ptr = skip_utf8(buf, leng); + size_t diff = (ptr - buf); + char *nbuf = Malloc(diff + 1); + memcpy(nbuf, buf, diff); + nbuf[diff] = '\0'; + leng = out->max; } - nbuf = malloc(sz); - if (!buf) { - return 0L; - } - new_malloc = true; - if (enc == ENC_WCHAR) { - wcscpy((wchar_t*)nbuf, (wchar_t*)buf); - } else { - strcpy(nbuf, buf); + // else if (out->type & YAP_STRING_NCHARS && + // const unsigned char *ptr = skip_utf8(buf, leng) + } + + if (!buf) { + unprotect_stack(NULL); + return 0L; + } + if (out->type & (YAP_STRING_UPCASE | YAP_STRING_DOWNCASE)) { + if (out->type & YAP_STRING_UPCASE) { + if (!upcase(buf, out)) { + unprotect_stack(NULL); + return false; + } + } + if (out->type & YAP_STRING_DOWNCASE) { + if (!downcase(buf, out)) { + unprotect_stack(NULL); + return false; + } } - buf = nbuf; } - } - if (out->type & (YAP_STRING_UPCASE | YAP_STRING_DOWNCASE)) { - if (out->type & YAP_STRING_UPCASE) { - if (!upcase(buf, out, enc)) - return false; - } - if (out->type & YAP_STRING_DOWNCASE) { - if (!downcase(buf, out, enc)) - return false; - } - } - bool rc = write_Text(buf, out, enc, minimal, leng PASS_REGS); - if (new_malloc && out->val.c != buf) { - free( buf ); - } - return rc; +rc = write_Text(buf, out, leng PASS_REGS); + unprotect_stack(out); + /* fprintf(stderr, " -> "); + if (!rc) fprintf(stderr, "NULL"); + else if (out->type & + (YAP_STRING_TERM|YAP_STRING_ATOMS_CODES + |YAP_STRING_STRING)) //Yap_DebugPlWrite(out->val.t); + else if (out->type & + YAP_STRING_ATOM) //Yap_DebugPlWriteln(MkAtomTerm(out->val.a)); + else if (out->type & YAP_STRING_WCHARS) fprintf(stderr, "%S", + out->val.w); + else + fprintf(stderr, "%s", out->val.c); + fprintf(stderr, "\n]\n"); */ + return rc; } -static void *compute_end(void *s0, encoding_t enc) { - switch (enc) { - case ENC_ISO_LATIN1: - case ENC_ISO_UTF8: { - char *s = (char *)s0; - return s + (1 + strlen(s)); - } - case ENC_WCHAR: { - wchar_t *s = (wchar_t *)s0; - return s + (1 + wcslen(s)); - } - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - return NULL; -} - -static void *advance_Text(void *s, int l, encoding_t enc) { - switch (enc) { - case ENC_ISO_LATIN1: - return ((char *)s) + l; - case ENC_ISO_UTF8: - return (char *)skip_utf8(s, l); - case ENC_WCHAR: - return ((wchar_t *)s) + l; - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc), __FUNCTION__); - } - return s; -} - -static int cmp_Text(void *s1, void *s2, int l, encoding_t enc1, - encoding_t enc2) { - Int i; - switch (enc1) { - case ENC_ISO_LATIN1: { - char *w1 = (char *)s1; - switch (enc2) { - case ENC_ISO_LATIN1: - return strncmp(s1, s2, l); - case ENC_ISO_UTF8: { - utf8proc_int32_t chr1, chr2; - unsigned char *w2 = s2; - for (i = 0; i < l; i++) { - chr1 = *w1++; - w2 += get_utf8(w2, -1, &chr2); - if (chr1 - chr2) - return chr1 - chr2; - } - } - return 0; - case ENC_WCHAR: { - utf8proc_int32_t chr1, chr2; - wchar_t *w2 = s2; - for (i = 0; i < l; i++) { - chr1 = *w1++; - chr2 = *w2++; - if (chr1 - chr2) - return chr1 - chr2; - } - } - return 0; - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc2), __FUNCTION__); - } - } - case ENC_ISO_UTF8: { - unsigned char *w1 = s1; - switch (enc2) { - case ENC_ISO_LATIN1: { - utf8proc_int32_t chr1, chr2; - unsigned char *w2 = s2; - for (i = 0; i < l; i++) { - chr2 = *w2++; - w1 += get_utf8(w1, -1, &chr1); - if (chr1 - chr2) - return chr1 - chr2; - } - } - return 0; - case ENC_ISO_UTF8: { - utf8proc_int32_t chr1, chr2; - unsigned char *w2 = s2; - for (i = 0; i < l; i++) { - w2 += get_utf8(w2, -1, &chr2); - w1 += get_utf8(w1, -1, &chr1); - if (chr1 - chr2) - return chr1 - chr2; - } - } - return 0; - case ENC_WCHAR: { - utf8proc_int32_t chr1, chr2; - wchar_t *w2 = s2; - for (i = 0; i < l; i++) { - chr2 = *w2++; - w1 += get_utf8(w1, -1, &chr1); - if (chr1 - chr2) - return chr1 - chr2; - } - } - return 0; - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc2), __FUNCTION__); - } - } - case ENC_WCHAR: { - wchar_t *w1 = (wchar_t *)s1; - switch (enc2) { - case ENC_ISO_LATIN1: { - utf8proc_int32_t chr1, chr2; - char *w2 = s2; - for (i = 0; i < l; i++) { - chr1 = *w1++; - chr2 = *w2++; - if (chr1 - chr2) - return chr1 - chr2; - } - } - return 0; - case ENC_ISO_UTF8: { - utf8proc_int32_t chr1, chr2; - unsigned char *w2 = s2; - for (i = 0; i < l; i++) { - chr1 = *w1++; - w2 += get_utf8(w2, -1, &chr2); - if (chr1 - chr2) - return chr1 - chr2; - } - } - return 0; - case ENC_WCHAR: - return wcsncmp(s1, s2, l); - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc2), __FUNCTION__); - } - } - default: - Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", - enc_name(enc1), __FUNCTION__); - } - return 0; -} - -static void *concat(int n, seq_tv_t *out, void *sv[], encoding_t encv[], - size_t lengv[] USES_REGS) { - if (out->type == YAP_STRING_STRING) { - /* we assume we concatenate strings only, or ASCII stuff like numbers */ - Term t = init_tstring(PASS_REGS1); - unsigned char *buf = buf_from_tstring(HR); +static int cmp_Text(const unsigned char *s1, const unsigned char *s2, int l) { + const unsigned char *w1 = s1; + utf8proc_int32_t chr1, chr2; + const unsigned char *w2 = s2; int i; - for (i = 0; i < n; i++) { - if (encv[i] == ENC_WCHAR) { - wchar_t *ptr = sv[i]; - utf8proc_int32_t chr; - while ((chr = *ptr++)) - buf += put_utf8(buf, chr); - } else if (encv[i] == ENC_ISO_LATIN1) { - char *ptr = sv[i]; - utf8proc_int32_t chr; - while ((chr = *ptr++)) - buf += put_utf8(buf, chr); - } else { - char *ptr = sv[i]; - utf8proc_int32_t chr; - while ((chr = *ptr++)) - *buf++ = chr; - } + for (i = 0; i < l; i++) { + w2 += get_utf8(w2, -1, &chr2); + w1 += get_utf8(w1, -1, &chr1); + if (chr1 - chr2) + return chr1 - chr2; } - *buf++ = '\0'; - close_tstring(buf PASS_REGS); - out->val.t = t; - return HR; - } else { - encoding_t enc = ENC_ISO_LATIN1; - size_t sz = 0; - - int i; - for (i = 0; i < n; i++) { - if (encv[i] != ENC_ISO_LATIN1) { - enc = ENC_WCHAR; - } - sz += write_length(sv[i], out, encv[i], FALSE, lengv[i] PASS_REGS); - } - if (enc == ENC_WCHAR) { - /* wide atom */ - wchar_t *buf = (wchar_t *)HR; - Atom at; - LOCAL_ERROR(MkAtomTerm(Yap_LookupWideAtom(buf)), sz + 3); - for (i = 0; i < n; i++) { - if (encv[i] == ENC_WCHAR) { - wchar_t *ptr = sv[i]; - utf8proc_int32_t chr; - while ((chr = *ptr++) != '\0') - *buf++ = chr; - } else if (encv[i] == ENC_ISO_LATIN1) { - char *ptr = sv[i]; - utf8proc_int32_t chr; - while ((chr = *ptr++) != '\0') - *buf++ = (unsigned char)chr; - } else { - unsigned char *ptr = sv[i]; - utf8proc_int32_t chr; - while ((ptr += get_utf8(ptr, -1, &chr)) != NULL) { - if (chr == '\0') - break; - else - *buf++ = chr; - } - } - } - *buf++ = '\0'; - at = out->val.a = Yap_LookupWideAtom((wchar_t *)HR); - return at; - } else { - /* atom */ - char *buf = (char *)HR; - Atom at; - - LOCAL_ERROR(MkAtomTerm(Yap_LookupAtom(buf)), sz / sizeof(CELL) + 3); - for (i = 0; i < n; i++) { - char *ptr = sv[i]; - utf8proc_int32_t chr; - while ((chr = *ptr++) != '\0') - *buf++ = chr; - } - *buf++ = '\0'; - at = out->val.a = Yap_LookupAtom((const char *)HR); - return at; - } - } - return NULL; + return 0; } -static void *slice(size_t min, size_t max, void *buf, seq_tv_t *out, - encoding_t enc USES_REGS) { - if (out->type == YAP_STRING_STRING) { - /* we assume we concatenate strings only, or ASCII stuff like numbers */ - Term t = init_tstring(PASS_REGS1); - unsigned char *nbuf = buf_from_tstring(HR); - if (enc == ENC_WCHAR) { - wchar_t *ptr = (wchar_t *)buf + min; - utf8proc_int32_t chr; - while (min++ < max) { - chr = *ptr++; - nbuf += put_utf8(nbuf, chr); - } - } else if (enc == ENC_ISO_LATIN1) { - const unsigned char *ptr = (const unsigned char *)buf + min; - utf8proc_int32_t chr; - while (min++ < max) { - chr = *ptr++; - nbuf += put_utf8(nbuf, chr); - } - } else { - const unsigned char *ptr = skip_utf8(buf, min); - utf8proc_int32_t chr; - if (!ptr) - return NULL; - while (min++ < max) { - ptr += get_utf8(ptr, -1, &chr); - nbuf += put_utf8(nbuf, chr); - } - } - *nbuf++ = '\0'; - close_tstring(nbuf PASS_REGS); - out->val.t = t; - return (void *)StringOfTerm(t); - } else { - Atom at; - /* atom */ - if (enc == ENC_WCHAR) { - /* wide atom */ - wchar_t *nbuf = (wchar_t *)HR; - wchar_t *ptr = (wchar_t *)buf + min; - if (max > min) { - LOCAL_ERROR(MkAtomTerm(Yap_LookupWideAtom(buf)), - (max - min) * sizeof(wchar_t)); - memcpy(nbuf, ptr, (max - min) * sizeof(wchar_t)); - } - nbuf[max - min] = '\0'; - at = Yap_LookupMaybeWideAtom(nbuf); - } else if (enc == ENC_ISO_LATIN1) { - /* atom */ - char *nbuf = (char *)HR; +static unsigned char *concat(int n, unsigned char *sv[] USES_REGS) { + char *buf; + unsigned char *buf0; + size_t room = 0; + int i; - if (max > min) { - char *ptr = (char *)buf + min; - LOCAL_ERROR(MkAtomTerm(Yap_LookupAtom(buf)), max - min); - memcpy(nbuf, ptr, (max - min)); - } - nbuf[max - min] = '\0'; - at = Yap_LookupAtom(nbuf); - } else { - /* atom */ - wchar_t *nbuf = (wchar_t *)HR; - const unsigned char *ptr = skip_utf8(buf, min); - utf8proc_int32_t chr; - - LOCAL_ERROR(MkAtomTerm(Yap_LookupAtom(buf)), max - min); - while (min++ < max) { - ptr += get_utf8(ptr, -1, &chr); - *nbuf++ = chr; - } - nbuf[0] = '\0'; - at = Yap_LookupMaybeWideAtom((wchar_t *)HR); + for (i = 0; i < n; i++) { + room += strlen((char *) sv[i]); } - out->val.a = at; - return at->StrOfAE; - } - return NULL; + buf = Malloc(room + 1); + buf0 = (unsigned char *) buf; + for (i = 0; i < n; i++) { + buf = stpcpy(buf, (char *) sv[i]); + } + return buf0; +} + +static void *slice(size_t min, size_t max, unsigned char *buf USES_REGS) { + unsigned char *nbuf = Malloc((max - min) * 4 + 1); + const unsigned char *ptr = skip_utf8(buf, min); + unsigned char *nptr = nbuf; + utf8proc_int32_t chr; + + while (min++ < max) { + ptr += get_utf8(ptr, -1, &chr); + nptr += put_utf8(nptr, chr); + } + nptr[0] = '\0'; + return nbuf; } // // Out must be an atom or a string -void *Yap_Concat_Text(int n, seq_tv_t inp[], seq_tv_t *out USES_REGS) { - encoding_t *encv; - void **bufv; - int minimal = FALSE; - void *buf; - size_t leng, *lengv; - int i; - Term t = ARG1; - bufv = (void **)malloc(n * sizeof(void *)); - HEAP_TERM_ERROR(bufv, void *); - encv = (encoding_t *)malloc(n * sizeof(encoding_t)); - HEAP_ERROR(encv, encoding_t); - buf = NULL; - for (i = 0; i < n; i++) { - void *nbuf = - Yap_readText(buf, inp + i, encv + i, &minimal, &leng PASS_REGS); - - if (!nbuf) - return 0L; - bufv[i] = nbuf; - if ((char *)nbuf >= AuxBase && (char *)nbuf < AuxTop) { - buf = compute_end(nbuf, encv[i]); +bool Yap_Concat_Text(int tot, seq_tv_t inp[], seq_tv_t *out USES_REGS) { + unsigned char **bufv; + unsigned char *buf; + size_t leng; + int i; + init_alloc(__LINE__); + bufv = Malloc(tot * sizeof(unsigned char *)); + if (!bufv) { + unprotect_stack(NULL); + return NULL; } - } - lengv = (size_t *)malloc(n * sizeof(size_t)); - HEAP_ERROR(lengv, size_t); - buf = concat(n, out, bufv, encv, lengv PASS_REGS); - free(bufv); - free(lengv); - free(encv); - return buf; + for (i = 0; i < tot; i++) { + inp[i].type |= YAP_STRING_IN_TMP; + unsigned char *nbuf = Yap_readText(inp + i, &leng PASS_REGS); + + if (!nbuf) { + unprotect_stack(NULL); + return NULL; + } + bufv[i] = nbuf; + } + buf = concat(tot, bufv PASS_REGS); + bool rc = write_Text(buf, out, leng PASS_REGS); + unprotect_stack(out); + return rc; } // -// out must be an atom or a string -void *Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp, encoding_t encv[], - seq_tv_t outv[] USES_REGS) { - encoding_t enc; - int minimal = FALSE; - void *buf, *store; - size_t l, leng; - int i, min; +bool Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp, + seq_tv_t outv[] USES_REGS) { + unsigned char *buf; + size_t l; + init_alloc(__LINE__); + inp->type |= YAP_STRING_IN_TMP; + buf = Yap_readText(inp, &l PASS_REGS); + if (!buf) { + unprotect_stack(NULL); - buf = Yap_readText(NULL, inp, &enc, &minimal, &leng PASS_REGS); - if (!buf) - return NULL; - l = write_length(buf, inp, enc, minimal, leng PASS_REGS); - /* where to allocate next is the most complicated part */ - if ((char *)buf >= AuxBase && (char *)buf < AuxTop) { - store = compute_end(buf, enc); - } else { - store = NULL; - } - - if (!cuts) { - if (n == 2) { - size_t l0, l1; - size_t leng0, leng1; - encoding_t enc0, enc1; - int minimal0, minimal1; - void *buf0, *buf1; - - if (outv[0].val.t) { - buf0 = Yap_readText(store, outv, &enc0, &minimal0, &leng0 PASS_REGS); - if (!buf0) - return NULL; - l0 = write_length(buf0, outv, enc, minimal0, leng0 PASS_REGS); - if (cmp_Text(buf, buf0, l0, enc, enc0) != 0) - return NULL; - - l1 = l - l0; - - buf1 = slice(l0, l, buf, outv + 1, enc PASS_REGS); - if (encv) - encv[1] = enc; - return buf1; - } else /* if (outv[1].val.t) */ { - buf1 = - Yap_readText(store, outv + 1, &enc1, &minimal1, &leng1 PASS_REGS); - if (!buf1) - return NULL; - l1 = write_length(buf1, outv + 1, enc1, minimal1, leng1 PASS_REGS); - if (l < l1) - return NULL; - l0 = l - l1; - if (cmp_Text(advance_Text(buf, l0, enc), buf1, l1, enc, enc1) != 0) - return NULL; - buf0 = slice(0, l0, buf, outv, enc PASS_REGS); - if (encv) - encv[0] = enc; - return buf0; - } + return false; } - } - for (i = 0; i < n; i++) { - if (i == 0) - min = 0; - else - min = cuts[i - 1]; - slice(min, cuts[i], buf, outv + i, enc PASS_REGS); - if (!(outv[i].val.a)) - return NULL; - if (encv) - encv[i] = enc; - } - return (void *)outv; - ; + if (!cuts) { + if (n == 2) { + size_t l0, l1; + unsigned char *buf0, *buf1; + + if (outv[0].val.t) { + buf0 = Yap_readText(outv, &l0 PASS_REGS); + if (!buf0) { + unprotect_stack(NULL); + return false; + } + if (cmp_Text(buf, buf0, l0) != 0) { + unprotect_stack(NULL); + return false; + } + l1 = l - l0; + + buf1 = slice(l0, l, buf PASS_REGS); + bool rc = write_Text(buf1, outv + 1, l1 PASS_REGS); + if (!rc) { + unprotect_stack(NULL); + return false; + } + unprotect_stack((outv+1)); + return rc; + } else /* if (outv[1].val.t) */ { + buf1 = Yap_readText(outv + 1, &l1 PASS_REGS); + if (!buf1) { + unprotect_stack(NULL); + return false; + } + l0 = l - l1; + if (cmp_Text(skip_utf8((const unsigned char *) buf, l0), buf1, + l1) != + 0) { + unprotect_stack(NULL); + return false; + } + buf0 = slice(0, l0, buf PASS_REGS); + bool rc = write_Text(buf0, outv, l0 PASS_REGS); + unprotect_stack((rc ? NULL : outv+0)); + return rc; + } + } + } + int i, next; + for (i = 0; i < n; i++) { + if (i == 0) + next = 0; + else + next = cuts[i-1]; + void *bufi = slice(next, cuts[i], buf PASS_REGS); + if (!write_Text(bufi, outv + i, cuts[i]-next PASS_REGS)) { + unprotect_stack(NULL); + return false; + } + } + unprotect_stack(outv); + + return true; } /** - * Function to convert a generic text term (string, atom, list of codes, list of + * Function to convert a generic text term (string, atom, list of codes, list +of atoms) into a buff er. * @@ -1651,31 +1011,30 @@ reclai it called. */ const char *Yap_TextTermToText(Term t, char *buf, size_t len, encoding_t enc) { - CACHE_REGS - seq_tv_t inp, out; - - inp.val.t = t; - if (IsAtomTerm(t)) { - inp.type = YAP_STRING_ATOM; - if (IsWideAtom(AtomOfTerm(t))) - inp.enc = ENC_WCHAR; - else - inp.enc = ENC_ISO_LATIN1; - } else if (IsStringTerm(t)) { - inp.type = YAP_STRING_STRING; - inp.enc = ENC_ISO_UTF8; - } else if (IsPairTerm(t)) { - inp.type = (YAP_STRING_CODES | YAP_STRING_ATOMS); - } else { - Yap_Error(TYPE_ERROR_TEXT, t, NULL); - return false; - } - out.enc = enc; - out.type = YAP_STRING_CHARS; - out.val.c = buf; - if (!Yap_CVT_Text(&inp, &out PASS_REGS)) - return NULL; - return out.val.c; + CACHE_REGS + seq_tv_t inp, out; + inp.val.t = t; + if (IsAtomTerm(t)) { + inp.type = YAP_STRING_ATOM; + if (IsWideAtom(AtomOfTerm(t))) + inp.enc = ENC_WCHAR; + else + inp.enc = ENC_ISO_LATIN1; + } else if (IsStringTerm(t)) { + inp.type = YAP_STRING_STRING; + inp.enc = ENC_ISO_UTF8; + } else if (IsPairTerm(t)) { + inp.type = (YAP_STRING_CODES | YAP_STRING_ATOMS); + } else { + Yap_Error(TYPE_ERROR_TEXT, t, NULL); + return false; + } + out.enc = enc; + out.type = YAP_STRING_CHARS; + out.val.c = buf; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return NULL; + return out.val.c; } /** @@ -1690,79 +1049,80 @@ const char *Yap_TextTermToText(Term t, char *buf, size_t len, encoding_t enc) { * @return the temporary string */ const char *Yap_PredIndicatorToUTF8String(PredEntry *ap) { - CACHE_REGS - char *s = LOCAL_FileNameBuf, *smax = s + YAP_FILENAME_MAX; - Atom at; - arity_t arity; - Functor f; - - Term tmod = ap->ModuleOfPred; - if (tmod) { - Yap_AtomToUTF8Text(AtomOfTerm(tmod), s); + CACHE_REGS + Atom at; + arity_t arity; + Functor f; + char *s, *smax, *s0; + s = s0 = malloc(1024); + smax = s + 1024; + Term tmod = ap->ModuleOfPred; + if (tmod) { + Yap_AtomToUTF8Text(AtomOfTerm(tmod), s); + s += strlen(s); + if (smax - s > 1) { + strcat(s, ":"); + } else { + return NULL; + } + s++; + } else { + if (smax - s > strlen("prolog:")) { + s = strcpy(s, "prolog:"); + } else { + return NULL; + } + } + // follows the actual functor + if (ap->ModuleOfPred == IDB_MODULE) { + if (ap->PredFlags & NumberDBPredFlag) { + Int key = ap->src.IndxId; + snprintf(s, smax - s, "%" + PRIdPTR, key); + return LOCAL_FileNameBuf; + } else if (ap->PredFlags & AtomDBPredFlag) { + at = (Atom) (ap->FunctorOfPred); + if (!Yap_AtomToUTF8Text(at, s)) + return NULL; + } else { + f = ap->FunctorOfPred; + at = NameOfFunctor(f); + arity = ArityOfFunctor(f); + } + } else { + arity = ap->ArityOfPE; + if (arity) { + at = NameOfFunctor(ap->FunctorOfPred); + } else { + at = (Atom) (ap->FunctorOfPred); + } + } + if (!Yap_AtomToUTF8Text(at, s)) { + return NULL; + } s += strlen(s); - if (smax -s > 1) - strcpy(s, ":"); - else - { - return NULL; - } - s++; - } else { - if (smax -s > strlen("prolog:") ) { - s = strcpy(s, "prolog:"); - } - else - { - return NULL; - } - } - // follows the actual functor - if (ap->ModuleOfPred == IDB_MODULE) { - if (ap->PredFlags & NumberDBPredFlag) { - Int key = ap->src.IndxId; - snprintf(s, smax - s, "%" PRIdPTR, key); - return LOCAL_FileNameBuf; - } else if (ap->PredFlags & AtomDBPredFlag) { - at = (Atom)(ap->FunctorOfPred); - if (!Yap_AtomToUTF8Text(at, s)) - return NULL; - } else { - f = ap->FunctorOfPred; - at = NameOfFunctor(f); - arity = ArityOfFunctor(f); - } - } else { - arity = ap->ArityOfPE; - if (arity) { - at = NameOfFunctor(ap->FunctorOfPred); - } else { - at = (Atom)(ap->FunctorOfPred); - } - } - if (!Yap_AtomToUTF8Text(at, s)) - return NULL; - s += strlen(s); - snprintf(s, smax - s, "/%" PRIdPTR, arity); - return LOCAL_FileNameBuf; + snprintf(s, smax - s, "/%" + PRIdPTR, arity); + return s0; } /** * Convert from a text buffer (8-bit) to a term that has the same type as * _Tguide_ * - * @param s the buffer - * @param tguide the guide + ≈* @param s the buffer +≈ * @param tguide the guide * - * @return the term +≈ * @return the term */ Term Yap_MkTextTerm(const char *s, encoding_t enc, Term tguide) { - CACHE_REGS - if (IsAtomTerm(tguide)) - return MkAtomTerm(Yap_LookupAtom(s)); - if (IsStringTerm(tguide)) - return MkStringTerm(s); - if (IsPairTerm(tguide) && IsAtomTerm(HeadOfTerm(tguide))) { - return Yap_CharsToListOfAtoms(s, enc PASS_REGS); - } - return Yap_CharsToListOfCodes(s, enc PASS_REGS); + CACHE_REGS + if (IsAtomTerm(tguide)) + return MkAtomTerm(Yap_LookupAtom(s)); + if (IsStringTerm(tguide)) + return MkStringTerm(s); + if (IsPairTerm(tguide) && IsAtomTerm(HeadOfTerm(tguide))) { + return Yap_CharsToListOfAtoms(s, enc PASS_REGS); + } + return Yap_CharsToListOfCodes(s, enc PASS_REGS); } diff --git a/H/YapText.h b/H/YapText.h index 52d108d0e..61d4cc252 100644 --- a/H/YapText.h +++ b/H/YapText.h @@ -175,8 +175,13 @@ inline static utf8proc_ssize_t strlen_utf8(const utf8proc_uint8_t *pt) { utf8proc_ssize_t l = utf8proc_iterate(pt, -1, &b); if (b == 0) return rc; - pt += l; - rc += l; + else if (b > 0) { + pt += l; + rc += l; + } else { + pt++; + } + } return rc; } @@ -298,7 +303,8 @@ typedef enum { YAP_STRING_WITH_BUFFER = 0x40000, // output on existing buffer YAP_STRING_MALLOC = 0x80000, // output on malloced buffer YAP_STRING_UPCASE = 0x100000, // output on malloced buffer - YAP_STRING_DOWNCASE = 0x200000 // output on malloced buffer + YAP_STRING_DOWNCASE = 0x200000, // output on malloced buffer + YAP_STRING_IN_TMP = 0x200000 // temporary space has been allocated } enum_seq_type_t; typedef UInt seq_type_t; @@ -325,7 +331,6 @@ typedef struct text_cvt { seq_type_t type; seq_val_t val; Term mod; // optional - size_t sz; // fixed sz, or -1 Term dif; // diff-list, usually TermNil size_t max; // max_size encoding_t enc; @@ -385,14 +390,14 @@ static inline seq_type_t mod_to_bqtype(Term mod USES_REGS) { // the routines -extern void *Yap_readText(void *buf, seq_tv_t *inp, encoding_t *enc, - int *minimal, size_t *lengp USES_REGS); -extern bool write_Text(void *inp, seq_tv_t *out, encoding_t enc, int minimal, +extern unsigned char *Yap_readText(seq_tv_t *inp, size_t *lengp + USES_REGS); +extern bool write_Text(unsigned char *inp, seq_tv_t *out, size_t leng USES_REGS); extern bool Yap_CVT_Text(seq_tv_t *inp, seq_tv_t *out USES_REGS); -extern void *Yap_Concat_Text(int n, seq_tv_t inp[], seq_tv_t *out USES_REGS); -extern void *Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp, - encoding_t encv[], seq_tv_t outv[] USES_REGS); +extern bool Yap_Concat_Text(int n, seq_tv_t inp[], seq_tv_t *out USES_REGS); +extern bool Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp, + seq_tv_t outv[] USES_REGS); // user friendly interface @@ -675,7 +680,6 @@ static inline Atom Yap_CharsToAtom(const char *s, encoding_t enc USES_REGS) { seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.enc = enc; inp.type = YAP_STRING_CHARS; out.val.uc = NULL; @@ -690,7 +694,6 @@ static inline Term Yap_CharsToListOfAtoms(const char *s, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.enc = enc; inp.type = YAP_STRING_CHARS; out.val.uc = NULL; @@ -705,7 +708,6 @@ static inline Term Yap_CharsToListOfCodes(const char *s, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.enc = enc; inp.type = YAP_STRING_CHARS; out.val.uc = NULL; @@ -719,7 +721,6 @@ static inline Term Yap_UTF8ToListOfCodes(const char *s USES_REGS) { seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.type = YAP_STRING_CHARS; inp.enc = ENC_ISO_UTF8; out.type = YAP_STRING_CODES; @@ -732,7 +733,6 @@ static inline Atom Yap_UTF8ToAtom(const unsigned char *s USES_REGS) { seq_tv_t inp, out; inp.val.uc0 = s; - inp.sz = 0; inp.type = YAP_STRING_CHARS; inp.enc = ENC_ISO_UTF8; out.type = YAP_STRING_ATOM; @@ -747,7 +747,6 @@ static inline Term Yap_CharsToDiffListOfCodes(const char *s, Term tail, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.enc = enc; inp.type = YAP_STRING_CHARS; out.val.uc = NULL; @@ -763,7 +762,6 @@ static inline Term Yap_UTF8ToDiffListOfCodes(const char *s, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.type = YAP_STRING_CHARS; inp.enc = ENC_ISO_UTF8; out.type = YAP_STRING_DIFF | YAP_STRING_CODES; @@ -779,7 +777,6 @@ static inline Term Yap_WCharsToDiffListOfCodes(const wchar_t *s, seq_tv_t inp, out; inp.val.w0 = s; - inp.sz = 0; inp.type = YAP_STRING_WCHARS; out.type = YAP_STRING_DIFF | YAP_STRING_CODES; out.val.uc = NULL; @@ -794,7 +791,6 @@ static inline Term Yap_CharsToString(const char *s, encoding_t enc USES_REGS) { seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.enc = enc; inp.type = YAP_STRING_CHARS; out.type = YAP_STRING_STRING; @@ -808,7 +804,6 @@ static inline char *Yap_AtomToUTF8Text(Atom at, const char *s USES_REGS) { seq_tv_t inp, out; inp.val.a = at; - inp.sz = 0; inp.type = YAP_STRING_ATOM; out.type = YAP_STRING_CHARS; out.val.uc = NULL; @@ -829,7 +824,6 @@ static inline Term Yap_CharsToTDQ(const char *s, Term mod, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.type = YAP_STRING_CHARS; inp.mod = mod; inp.enc = enc; @@ -847,7 +841,6 @@ static inline Term Yap_CharsToTBQ(const char *s, Term mod, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = 0; inp.type = YAP_STRING_CHARS; inp.mod = mod; inp.enc = enc; @@ -944,7 +937,7 @@ static inline Term Yap_ListToAtomic(Term t0 USES_REGS) { inp.type = YAP_STRING_STRING | YAP_STRING_ATOMS_CODES | YAP_STRING_TERM; out.val.uc = NULL; out.type = YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | - YAP_STRING_BIG | YAP_STRING_TERM; + YAP_STRING_BIG ; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) return 0L; return out.val.t; @@ -1028,7 +1021,6 @@ static inline Atom Yap_NCharsToAtom(const char *s, size_t len, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = len; inp.type = YAP_STRING_CHARS | YAP_STRING_NCHARS; inp.enc = enc; out.type = YAP_STRING_ATOM; @@ -1058,7 +1050,6 @@ static inline Term Yap_NCharsToListOfCodes(const char *s, size_t len, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = len; inp.type = YAP_STRING_CHARS | YAP_STRING_NCHARS; inp.enc = enc; out.type = YAP_STRING_CODES; @@ -1073,7 +1064,6 @@ static inline Term Yap_NCharsToString(const char *s, size_t len, seq_tv_t inp, out; inp.val.c0 = s; - inp.sz = len; inp.enc = enc; inp.type = YAP_STRING_CHARS | YAP_STRING_NCHARS; out.type = YAP_STRING_STRING; @@ -1089,7 +1079,6 @@ static inline Term Yap_NCharsToTDQ(const char *s, size_t len, encoding_t enc, inp.val.c0 = s; inp.type = YAP_STRING_CHARS | YAP_STRING_NCHARS; - inp.sz = len; inp.enc = enc; inp.mod = mod; out.type = mod_to_type(mod PASS_REGS); @@ -1107,9 +1096,9 @@ static inline Term Yap_NCharsToTBQ(const char *s, size_t len, encoding_t enc, inp.val.c0 = s; inp.type = YAP_STRING_CHARS | YAP_STRING_NCHARS; - inp.sz = len; inp.enc = enc; - inp.mod = mod; + + out.type = mod_to_bqtype(mod PASS_REGS); out.max = len; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) @@ -1167,7 +1156,6 @@ static inline Atom Yap_NWCharsToAtom(const wchar_t *s, size_t len USES_REGS) { seq_tv_t inp, out; inp.val.w0 = s; - inp.sz = len; inp.type = YAP_STRING_WCHARS | YAP_STRING_NCHARS; out.type = YAP_STRING_ATOM; out.max = len; @@ -1181,7 +1169,6 @@ static inline Term Yap_NWCharsToListOfAtoms(const wchar_t *s, seq_tv_t inp, out; inp.val.w0 = s; - inp.sz = len; inp.type = YAP_STRING_WCHARS | YAP_STRING_NCHARS; out.type = YAP_STRING_ATOMS; out.max = len; @@ -1195,7 +1182,6 @@ static inline Term Yap_NWCharsToListOfCodes(const wchar_t *s, seq_tv_t inp, out; inp.val.w0 = s; - inp.sz = len; inp.type = YAP_STRING_WCHARS | YAP_STRING_NCHARS; out.type = YAP_STRING_CODES; out.val.uc = NULL; @@ -1209,7 +1195,6 @@ static inline Term Yap_NWCharsToString(const wchar_t *s, size_t len USES_REGS) { seq_tv_t inp, out; inp.val.w0 = s; - inp.sz = len; inp.type = YAP_STRING_WCHARS | YAP_STRING_NCHARS; out.val.uc = NULL; out.type = YAP_STRING_STRING; @@ -1221,7 +1206,6 @@ static inline Term Yap_NWCharsToString(const wchar_t *s, size_t len USES_REGS) { static inline Atom Yap_StringToAtom(Term t0 USES_REGS) { seq_tv_t inp, out; - inp.sz = 0; inp.val.t = t0; inp.type = YAP_STRING_STRING; out.val.uc = NULL; @@ -1233,7 +1217,6 @@ static inline Atom Yap_StringToAtom(Term t0 USES_REGS) { static inline Atom Yap_StringSWIToAtom(Term t0 USES_REGS) { seq_tv_t inp, out; - inp.sz = 0; inp.val.t = t0; inp.type = YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG | YAP_STRING_ATOMS_CODES | @@ -1247,11 +1230,10 @@ static inline Atom Yap_StringSWIToAtom(Term t0 USES_REGS) { static inline size_t Yap_StringToAtomic(Term t0 USES_REGS) { seq_tv_t inp, out; - inp.sz = 0; inp.val.t = t0; inp.type = YAP_STRING_STRING; out.type = YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | - YAP_STRING_BIG | YAP_STRING_TERM; + YAP_STRING_BIG ; out.val.uc = NULL; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) return 0L; @@ -1260,7 +1242,6 @@ static inline size_t Yap_StringToAtomic(Term t0 USES_REGS) { static inline size_t Yap_StringToLength(Term t0 USES_REGS) { seq_tv_t inp, out; - inp.sz = 0; inp.val.t = t0; inp.type = YAP_STRING_STRING; out.type = YAP_STRING_LENGTH; @@ -1272,7 +1253,6 @@ static inline size_t Yap_StringToLength(Term t0 USES_REGS) { static inline size_t Yap_StringToListOfAtoms(Term t0 USES_REGS) { seq_tv_t inp, out; - inp.sz = 0; inp.val.t = t0; inp.type = YAP_STRING_STRING; out.type = YAP_STRING_ATOMS; @@ -1284,7 +1264,6 @@ static inline size_t Yap_StringToListOfAtoms(Term t0 USES_REGS) { static inline size_t Yap_StringSWIToListOfAtoms(Term t0 USES_REGS) { seq_tv_t inp, out; - inp.sz = 0; inp.val.t = t0; inp.type = YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG | YAP_STRING_ATOMS_CODES | @@ -1299,7 +1278,6 @@ static inline size_t Yap_StringSWIToListOfAtoms(Term t0 USES_REGS) { static inline size_t Yap_StringToListOfCodes(Term t0 USES_REGS) { seq_tv_t inp, out; inp.val.t = t0; - inp.sz = 0; inp.type = YAP_STRING_STRING; out.type = YAP_STRING_CODES; out.val.uc = NULL; @@ -1324,7 +1302,6 @@ static inline size_t Yap_StringSWIToListOfCodes(Term t0 USES_REGS) { static inline Term Yap_StringToNumber(Term t0 USES_REGS) { seq_tv_t inp, out; inp.val.t = t0; - inp.sz = 0; inp.type = YAP_STRING_STRING; out.type = YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG | YAP_STRING_TERM; @@ -1384,8 +1361,7 @@ static inline Term Yap_UTF8ToString(const char *s USES_REGS) { static inline Term Yap_WCharsToListOfCodes(const wchar_t *s USES_REGS) { seq_tv_t inp, out; inp.val.w0 = s; - inp.sz = 0; - inp.type = YAP_STRING_WCHARS; + inp.type = YAP_STRING_WCHARS; out.val.uc = NULL; out.type = YAP_STRING_CODES; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) @@ -1398,7 +1374,7 @@ static inline Term Yap_WCharsToTDQ(wchar_t *s, Term mod USES_REGS) { inp.val.w0 = s; inp.type = YAP_STRING_WCHARS; - inp.sz = 0; + inp.mod = mod; out.type = mod_to_type(mod PASS_REGS); out.val.uc = NULL; @@ -1414,7 +1390,6 @@ static inline Term Yap_WCharsToTBQ(wchar_t *s, Term mod USES_REGS) { inp.val.w = s; inp.type = YAP_STRING_WCHARS; - inp.sz = 0; inp.mod = mod; out.type = mod_to_bqtype(mod PASS_REGS); out.val.uc = NULL; @@ -1428,7 +1403,6 @@ static inline Term Yap_WCharsToTBQ(wchar_t *s, Term mod USES_REGS) { static inline Term Yap_WCharsToString(const wchar_t *s USES_REGS) { seq_tv_t inp, out; inp.val.w0 = s; - inp.sz = 0; inp.type = YAP_STRING_WCHARS; out.type = YAP_STRING_STRING; out.val.uc = NULL; @@ -1441,10 +1415,8 @@ static inline Atom Yap_ConcatAtoms(Term t1, Term t2 USES_REGS) { seq_tv_t inpv[2], out; inpv[0].val.t = t1; inpv[0].type = YAP_STRING_ATOM; - inpv[0].sz = 0; inpv[1].val.t = t2; inpv[1].type = YAP_STRING_ATOM; - inpv[1].sz = 0; out.type = YAP_STRING_ATOM; out.val.uc = NULL; if (!Yap_Concat_Text(2, inpv, &out PASS_REGS)) @@ -1457,11 +1429,9 @@ static inline Atom Yap_ConcatAtomics(Term t1, Term t2 USES_REGS) { inpv[0].val.t = t1; inpv[0].type = YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG | YAP_STRING_TERM; - inpv[0].sz = 0; inpv[1].val.t = t2; inpv[1].type = YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG | YAP_STRING_TERM; - inpv[1].sz = 0; out.type = YAP_STRING_ATOM; out.val.uc = NULL; if (!Yap_Concat_Text(2, inpv, &out PASS_REGS)) @@ -1473,10 +1443,8 @@ static inline Term Yap_ConcatStrings(Term t1, Term t2 USES_REGS) { seq_tv_t inpv[2], out; inpv[0].val.t = t1; inpv[0].type = YAP_STRING_STRING; - inpv[0].sz = 0; inpv[1].val.t = t2; inpv[1].type = YAP_STRING_STRING; - inpv[1].sz = 0; out.type = YAP_STRING_STRING; if (!Yap_Concat_Text(2, inpv, &out PASS_REGS)) @@ -1492,12 +1460,9 @@ static inline Atom Yap_SpliceAtom(Term t1, Atom ats[], size_t cut, cuts[1] = max; inp.type = YAP_STRING_ATOM; inp.val.t = t1; - inp.sz = 0; outv[0].type = YAP_STRING_ATOM; - outv[0].sz = 0; outv[1].type = YAP_STRING_ATOM; - outv[1].sz = 0; - if (!Yap_Splice_Text(2, cuts, &inp, (encoding_t *)NULL, outv PASS_REGS)) + if (!Yap_Splice_Text(2, cuts, &inp, outv PASS_REGS)) return (Atom)NULL; ats[0] = outv[0].val.a; ats[1] = outv[1].val.a; @@ -1508,14 +1473,11 @@ static inline Atom Yap_SubtractHeadAtom(Term t1, Term th USES_REGS) { seq_tv_t outv[2], inp; inp.type = YAP_STRING_ATOM; inp.val.t = t1; - inp.sz = 0; outv[0].type = YAP_STRING_ATOM; outv[0].val.t = th; - outv[0].sz = 0; outv[1].type = YAP_STRING_ATOM; outv[1].val.t = 0; - outv[1].sz = 0; - if (!Yap_Splice_Text(2, (size_t *)NULL, &inp, (encoding_t *)NULL, + if (!Yap_Splice_Text(2, (size_t *)NULL, &inp, outv PASS_REGS)) return (Atom)NULL; return outv[1].val.a; @@ -1525,13 +1487,11 @@ static inline Atom Yap_SubtractTailAtom(Term t1, Term th USES_REGS) { seq_tv_t outv[2], inp; inp.type = YAP_STRING_ATOM; inp.val.t = t1; - inp.sz = 0; outv[0].type = YAP_STRING_ATOM; outv[0].val.t = 0; - outv[0].sz = 0; outv[1].type = YAP_STRING_ATOM; outv[1].val.t = th; - if (!Yap_Splice_Text(2, (size_t *)NULL, &inp, (encoding_t *)NULL, + if (!Yap_Splice_Text(2, (size_t *)NULL, &inp, outv PASS_REGS)) return (Atom)NULL; return outv[0].val.a; @@ -1543,13 +1503,11 @@ static inline Term Yap_SpliceString(Term t1, Term ts[], size_t cut, size_t cuts[2]; inp.type = YAP_STRING_STRING; inp.val.t = t1; - inp.sz = 0; outv[0].type = YAP_STRING_STRING; outv[1].type = YAP_STRING_STRING; - outv[1].sz = 0; cuts[0] = cut; cuts[1] = max; - if (!Yap_Splice_Text(2, cuts, &inp, (encoding_t *)NULL, outv PASS_REGS)) + if (!Yap_Splice_Text(2, cuts, &inp, outv PASS_REGS)) return 0L; ts[0] = outv[0].val.t; ts[1] = outv[1].val.t; @@ -1560,14 +1518,11 @@ static inline Term Yap_SubtractHeadString(Term t1, Term th USES_REGS) { seq_tv_t outv[2], inp; inp.type = YAP_STRING_STRING; inp.val.t = t1; - inp.sz = 0; outv[0].type = YAP_STRING_STRING; outv[0].val.t = th; - outv[0].sz = 0; outv[1].type = YAP_STRING_STRING; outv[1].val.t = 0; - outv[1].sz = 0; - if (!Yap_Splice_Text(2, (size_t *)NULL, &inp, (encoding_t *)NULL, + if (!Yap_Splice_Text(2, (size_t *)NULL, &inp, outv PASS_REGS)) return 0L; return outv[1].val.t; @@ -1577,13 +1532,11 @@ static inline Term Yap_SubtractTailString(Term t1, Term th USES_REGS) { seq_tv_t outv[2], inp; inp.type = YAP_STRING_STRING; inp.val.t = t1; - inp.sz = 0; outv[0].type = YAP_STRING_STRING; outv[0].val.t = 0; - outv[0].sz = 0; outv[1].type = YAP_STRING_STRING; outv[1].val.t = th; - if (!Yap_Splice_Text(2, (size_t *)NULL, &inp, (encoding_t *)NULL, + if (!Yap_Splice_Text(2, (size_t *)NULL, &inp, outv PASS_REGS)) return 0L; return outv[0].val.t;