/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker and Anjo Anjewierden E-mail: jan@swi.psy.uva.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2002, University of Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "pl-incl.h" #include "pl-ctype.h" #include "pl-utf8.h" #include "pl-codelist.h" #include #include #if HAVE_LIMITS_H #include /* solaris compatibility */ #endif #undef LD #define LD LOCAL_LD /******************************* * UNIFIED TEXT STUFF * *******************************/ static inline size_t bufsize_text(PL_chars_t *text, size_t len) { size_t unit; switch(text->encoding) { case ENC_ISO_LATIN_1: case ENC_ASCII: case ENC_UTF8: case ENC_ANSI: unit = sizeof(char); break; case ENC_WCHAR: unit = sizeof(pl_wchar_t); break; default: assert(0); unit = sizeof(char); /*NOTREACHED*/ } return len*unit; } void PL_save_text(PL_chars_t *text, int flags) { if ( (flags & BUF_MALLOC) && text->storage != PL_CHARS_MALLOC ) { size_t bl = bufsize_text(text, text->length+1); void *new = PL_malloc(bl); memcpy(new, text->text.t, bl); text->text.t = new; text->storage = PL_CHARS_MALLOC; } else if ( text->storage == PL_CHARS_LOCAL ) { Buffer b = findBuffer(BUF_RING); size_t bl = bufsize_text(text, text->length+1); addMultipleBuffer(b, text->text.t, bl, char); text->text.t = baseBuffer(b, char); text->storage = PL_CHARS_RING; } else if ( text->storage == PL_CHARS_MALLOC ) { Buffer b = findBuffer(BUF_RING); size_t bl = bufsize_text(text, text->length+1); addMultipleBuffer(b, text->text.t, bl, char); PL_free_text(text); text->text.t = baseBuffer(b, char); text->storage = PL_CHARS_RING; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PL_from_stack_text() moves a string from the stack, so it won't get corrupted if GC/shift comes along. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static void PL_from_stack_text(PL_chars_t *text) { if ( text->storage == PL_CHARS_STACK ) { size_t bl = bufsize_text(text, text->length+1); if ( bl < sizeof(text->buf) ) { memcpy(text->buf, text->text.t, bl); text->text.t = text->buf; text->storage = PL_CHARS_LOCAL; } else { Buffer b = findBuffer(BUF_RING); addMultipleBuffer(b, text->text.t, bl, char); text->text.t = baseBuffer(b, char); text->storage = PL_CHARS_RING; } } } #define INT64_DIGITS 20 static char * ui64toa(uint64_t val, char *out) { char tmpBuf[INT64_DIGITS + 1]; char *ptrOrg = tmpBuf + INT64_DIGITS; char *ptr = ptrOrg; size_t nbDigs; do { int rem = val % 10; *--ptr = rem + '0'; val /= 10; } while ( val ); nbDigs = ptrOrg - ptr; memcpy(out, ptr, nbDigs); out += nbDigs; *out = '\0'; return out; /* points to the END */ }; static char * i64toa(int64_t val, char *out) { if ( val < 0 ) { *out++ = '-'; val = -val; } return ui64toa((uint64_t)val, out); } int PL_get_text__LD(term_t l, PL_chars_t *text, int flags ARG_LD) { word w = valHandle(l); if ( (flags & CVT_ATOM) && isAtom(w) ) #if __YAP_PROLOG__ { if ( !get_atom_ptr_text(AtomOfTerm(w), text) ) #else { if ( !get_atom_text(w, text) ) #endif goto maybe_write; } else if ( (flags & CVT_STRING) && isString(w) ) { if ( !get_string_text(w, text PASS_LD) ) goto maybe_write; PL_from_stack_text(text); } else if ( (flags & CVT_INTEGER) && isInteger(w) ) { number n; PL_get_number(l, &n); switch(n.type) { case V_INTEGER: { char *ep = i64toa(n.value.i, text->buf); text->text.t = text->buf; text->length = ep-text->text.t; text->storage = PL_CHARS_LOCAL; break; } #ifdef O_GMP case V_MPZ: { size_t sz = mpz_sizeinbase(n.value.mpz, 10) + 2; Buffer b = findBuffer(BUF_RING); if ( !growBuffer(b, sz) ) outOfCore(); mpz_get_str(b->base, 10, n.value.mpz); b->top = b->base + strlen(b->base); text->text.t = baseBuffer(b, char); text->length = entriesBuffer(b, char); text->storage = PL_CHARS_RING; break; } #endif default: assert(0); } text->encoding = ENC_ISO_LATIN_1; text->canonical = TRUE; } else if ( (flags & CVT_FLOAT) && isFloat(w) ) { format_float(valFloat(w), text->buf); text->text.t = text->buf; text->length = strlen(text->text.t); text->encoding = ENC_ISO_LATIN_1; text->storage = PL_CHARS_LOCAL; text->canonical = TRUE; } else if ( (flags & CVT_LIST) ) { Buffer b; CVT_result result; if ( (b = codes_or_chars_to_buffer(l, BUF_RING, FALSE, &result)) ) { text->length = entriesBuffer(b, char); addBuffer(b, EOS, char); text->text.t = baseBuffer(b, char); text->encoding = ENC_ISO_LATIN_1; } else if ( result.status == CVT_wide && (b = codes_or_chars_to_buffer(l, BUF_RING, TRUE, &result)) ) { text->length = entriesBuffer(b, pl_wchar_t); addBuffer(b, EOS, pl_wchar_t); text->text.w = baseBuffer(b, pl_wchar_t); text->encoding = ENC_WCHAR; } else if ( (flags & (CVT_WRITE|CVT_WRITE_CANONICAL)) ) { goto case_write; } else { if ( (flags & CVT_VARNOFAIL) && result.status == CVT_partial ) return 2; if ( (flags & CVT_EXCEPTION) ) { switch(result.status) { case CVT_partial: return PL_error(NULL, 0, NULL, ERR_INSTANTIATION); case CVT_nolist: goto error; case CVT_nocode: case CVT_nochar: { term_t culprit = PL_new_term_ref(); atom_t type; #if __YAP_PROLOG__ YAP_PutInSlot(culprit, result.culprit); #else *valTermRef(culprit) = result.culprit; #endif if ( result.status == CVT_nocode ) type = ATOM_character_code; else type = ATOM_character; return PL_error(NULL, 0, NULL, ERR_TYPE, type, culprit); } default: break; } } goto error; } text->storage = PL_CHARS_RING; text->canonical = TRUE; } else if ( (flags & CVT_VARIABLE) && isVar(w) ) { text->text.t = varName(l, text->buf); text->length = strlen(text->text.t); text->encoding = ENC_ISO_LATIN_1; text->storage = PL_CHARS_LOCAL; text->canonical = TRUE; } else if ( (flags & (CVT_WRITE|CVT_WRITE_CANONICAL)) ) { IOENC encodings[3]; IOENC *enc; char *r; int wflags; case_write: encodings[0] = ENC_ISO_LATIN_1; encodings[1] = ENC_WCHAR; encodings[2] = ENC_UNKNOWN; if ( (flags&CVT_WRITEQ) == CVT_WRITEQ ) wflags = PL_WRT_QUOTED|PL_WRT_NUMBERVARS; else if ( (flags&CVT_WRITE_CANONICAL) ) wflags = PL_WRT_QUOTED|PL_WRT_IGNOREOPS|PL_WRT_NUMBERVARS; else wflags = PL_WRT_NUMBERVARS; for(enc = encodings; *enc != ENC_UNKNOWN; enc++) { size_t size; IOSTREAM *fd; r = text->buf; size = sizeof(text->buf); fd = Sopenmem(&r, &size, "w"); fd->encoding = *enc; if ( PL_write_term(fd, l, 1200, wflags) && Sputcode(EOS, fd) >= 0 && Sflush(fd) >= 0 ) { text->encoding = *enc; text->storage = (r == text->buf ? PL_CHARS_LOCAL : PL_CHARS_MALLOC); text->canonical = TRUE; if ( *enc == ENC_ISO_LATIN_1 ) { text->length = size-1; text->text.t = r; } else { text->length = (size/sizeof(pl_wchar_t))-1; text->text.w = (pl_wchar_t *)r; } Sclose(fd); return TRUE; } else { Sclose(fd); if ( r != text->buf ) Sfree(r); } } goto error; } else { goto error; } succeed; maybe_write: if ( (flags & (CVT_WRITE|CVT_WRITE_CANONICAL)) ) goto case_write; error: if ( canBind(w) && (flags & CVT_VARNOFAIL) ) return 2; if ( (flags & CVT_EXCEPTION) ) { atom_t expected; if ( (flags & CVT_LIST) && !(flags&(CVT_ATOM|CVT_NUMBER)) ) expected = ATOM_list; /* List and/or string object */ else if ( flags & CVT_LIST ) expected = ATOM_text; else if ( flags & CVT_NUMBER ) expected = ATOM_atomic; else expected = ATOM_atom; return PL_error(NULL, 0, NULL, ERR_TYPE, expected, l); } fail; } atom_t textToAtom(PL_chars_t *text) { if ( !PL_canonise_text(text) ) return 0; if ( text->encoding == ENC_ISO_LATIN_1 ) { return lookupAtom(text->text.t, text->length); } else { return lookupUCSAtom(text->text.w, text->length); } } word textToString(PL_chars_t *text) { if ( !PL_canonise_text(text) ) return 0; if ( text->encoding == ENC_ISO_LATIN_1 ) { return globalString(text->length, text->text.t); } else { return globalWString(text->length, text->text.w); } } int PL_unify_text(term_t term, term_t tail, PL_chars_t *text, int type) { switch(type) { case PL_ATOM: { atom_t a = textToAtom(text); if ( a ) { int rval = _PL_unify_atomic(term, a); PL_unregister_atom(a); return rval; } return FALSE; } case PL_STRING: { word w = textToString(text); if ( w ) return _PL_unify_atomic(term, w); else return FALSE; } case PL_CODE_LIST: case PL_CHAR_LIST: { if ( text->length == 0 ) { if ( tail ) { GET_LD PL_put_term(tail, term); return TRUE; } else { return PL_unify_nil(term); } } else { GET_LD term_t l = PL_new_term_ref(); Word p0, p; switch(text->encoding) { case ENC_ISO_LATIN_1: { const unsigned char *s = (const unsigned char *)text->text.t; const unsigned char *e = &s[text->length]; if ( !(p0 = p = INIT_SEQ_STRING(text->length)) ) return FALSE; if ( type == PL_CODE_LIST ) { for( ; s < e; s++) p = EXTEND_SEQ_CODES(p, *s); } else { for( ; s < e; s++) p = EXTEND_SEQ_CHARS(p, *s); } break; } case ENC_WCHAR: { const pl_wchar_t *s = (const pl_wchar_t *)text->text.t; const pl_wchar_t *e = &s[text->length]; if ( !(p0 = p = INIT_SEQ_STRING(text->length)) ) return FALSE; if ( type == PL_CODE_LIST ) { for( ; s < e; s++) p = EXTEND_SEQ_CODES(p, *s); } else { for( ; s < e; s++) p = EXTEND_SEQ_CHARS(p, *s); } break; } case ENC_UTF8: { const char *s = text->text.t; const char *e = &s[text->length]; size_t len = utf8_strlen(s, text->length); if ( !(p0 = p = INIT_SEQ_STRING(len)) ) return FALSE; if ( type == PL_CODE_LIST ) { while (s < e) { int chr; s = utf8_get_char(s, &chr); p = EXTEND_SEQ_CODES(p, chr); } } else { while (s < e) { int chr; s = utf8_get_char(s, &chr); p = EXTEND_SEQ_CHARS(p, chr); } } break; } case ENC_ANSI: { const char *s = text->text.t; size_t rc, n = text->length; size_t len = 0; mbstate_t mbs; wchar_t wc; memset(&mbs, 0, sizeof(mbs)); while( n > 0 ) { if ( (rc=mbrtowc(&wc, s, n, &mbs)) == (size_t)-1 || rc == 0 ) return PL_error(NULL, 0, "cannot represent text in current locale", ERR_REPRESENTATION, ATOM_encoding); len++; n -= rc; s += rc; } if ( !(p0 = p = INIT_SEQ_STRING(len)) ) return FALSE; n = text->length; s = text->text.t; memset(&mbs, 0, sizeof(mbs)); while(n > 0) { rc = mbrtowc(&wc, s, n, &mbs); if ( type == PL_CODE_LIST ) p = EXTEND_SEQ_CODES(p, wc); else p = EXTEND_SEQ_CHARS(p, wc); s += rc; n -= rc; } break; } default: { assert(0); return FALSE; } } return CLOSE_SEQ_STRING(p, p0, tail, term, l ); } } default: { assert(0); return FALSE; } } } int PL_unify_text_range(term_t term, PL_chars_t *text, size_t offset, size_t len, int type) { if ( offset == 0 && len == text->length ) { return PL_unify_text(term, 0, text, type); } else { PL_chars_t sub; int rc; if ( offset > text->length || offset + len > text->length ) return FALSE; if ( len == 1 && type == PL_ATOM ) { GET_LD int c; if ( text->encoding == ENC_ISO_LATIN_1 ) c = text->text.t[offset]&0xff; else c = text->text.w[offset]; return PL_unify_atom(term, codeToAtom(c)); } sub.length = len; sub.storage = PL_CHARS_HEAP; if ( text->encoding == ENC_ISO_LATIN_1 ) { sub.text.t = text->text.t+offset; sub.encoding = ENC_ISO_LATIN_1; sub.canonical = TRUE; } else { sub.text.w = text->text.w+offset; sub.encoding = ENC_WCHAR; sub.canonical = FALSE; } rc = PL_unify_text(term, 0, &sub, type); PL_free_text(&sub); return rc; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - int PL_promote_text(PL_chars_t *text) Promote a text to USC if it is currently 8-bit text. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ int PL_promote_text(PL_chars_t *text) { if ( text->encoding != ENC_WCHAR ) { if ( text->storage == PL_CHARS_MALLOC ) { pl_wchar_t *new = PL_malloc(sizeof(pl_wchar_t)*(text->length+1)); pl_wchar_t *t = new; const unsigned char *s = (const unsigned char *)text->text.t; const unsigned char *e = &s[text->length]; while(stext.t); text->text.w = new; text->encoding = ENC_WCHAR; } else if ( text->storage == PL_CHARS_LOCAL && (text->length+1)*sizeof(pl_wchar_t) < sizeof(text->buf) ) { unsigned char buf[sizeof(text->buf)]; unsigned char *f = buf; unsigned char *e = &buf[text->length]; pl_wchar_t *t = (pl_wchar_t*)text->buf; memcpy(buf, text->buf, text->length*sizeof(char)); while(fencoding = ENC_WCHAR; } else { Buffer b = findBuffer(BUF_RING); const unsigned char *s = (const unsigned char *)text->text.t; const unsigned char *e = &s[text->length]; for( ; stext.w = baseBuffer(b, pl_wchar_t); text->encoding = ENC_WCHAR; text->storage = PL_CHARS_RING; } } succeed; } int PL_demote_text(PL_chars_t *text) { if ( text->encoding != ENC_ISO_LATIN_1 ) { if ( text->storage == PL_CHARS_MALLOC ) { char *new = PL_malloc(sizeof(char)*(text->length+1)); char *t = new; const pl_wchar_t *s = (const pl_wchar_t *)text->text.t; const pl_wchar_t *e = &s[text->length]; while(s 0xff ) { PL_free(new); return FALSE; } *t++ = *s++ & 0xff; } *t = EOS; PL_free(text->text.t); text->text.t = new; text->encoding = ENC_ISO_LATIN_1; } else if ( text->storage == PL_CHARS_LOCAL ) { pl_wchar_t buf[sizeof(text->buf)/sizeof(pl_wchar_t)]; pl_wchar_t *f = buf; pl_wchar_t *e = &buf[text->length]; char *t = text->buf; memcpy(buf, text->buf, text->length*sizeof(pl_wchar_t)); while(f 0xff ) return FALSE; *t++ = *f++ & 0xff; } *t = EOS; text->encoding = ENC_ISO_LATIN_1; } else { Buffer b = findBuffer(BUF_RING); const pl_wchar_t *s = (const pl_wchar_t*)text->text.w; const pl_wchar_t *e = &s[text->length]; for( ; s 0xff ) { unfindBuffer(BUF_RING); return FALSE; } addBuffer(b, *s&0xff, char); } addBuffer(b, EOS, char); text->text.t = baseBuffer(b, char); text->storage = PL_CHARS_RING; text->encoding = ENC_ISO_LATIN_1; } } succeed; } static int can_demote(PL_chars_t *text) { if ( text->encoding != ENC_ISO_LATIN_1 ) { const pl_wchar_t *w = (const pl_wchar_t*)text->text.w; const pl_wchar_t *e = &w[text->length]; for(; w 0xff ) return FALSE; } } return TRUE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Convert text to 8-bit according to flags. May hold REP_UTF8 to convert to UTF-8, REP_MB to convert to locale 8-bit representation or nothing to convert to ISO Latin-1. This predicate can fail of the text cannot be represented. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int wctobuffer(wchar_t c, mbstate_t *mbs, Buffer buf) { char b[PL_MB_LEN_MAX]; size_t n; if ( (n=wcrtomb(b, c, mbs)) != (size_t)-1 ) { size_t i; for(i=0; iencoding != target ) { Buffer b = findBuffer(BUF_RING); switch(text->encoding) { case ENC_ISO_LATIN_1: { const unsigned char *s = (const unsigned char*)text->text.t; const unsigned char *e = &s[text->length]; if ( target == ENC_UTF8 ) { for( ; stext.w; const pl_wchar_t *e = &w[text->length]; if ( target == ENC_UTF8 ) { for( ; wlength = sizeOfBuffer(b)-1; text->text.t = baseBuffer(b, char); text->encoding = target; text->storage = PL_CHARS_RING; } succeed; rep_error: if ( (flags & CVT_EXCEPTION) ) { char msg[128]; sprintf(msg, "Cannot represent char U%04x using %s encoding", norep, target == ENC_ISO_LATIN_1 ? "ISO Latin-1" : "current locale"); return PL_error(NULL, 0, msg, ERR_REPRESENTATION, ATOM_encoding); } fail; } int PL_canonise_text(PL_chars_t *text) { if ( !text->canonical ) { switch(text->encoding ) { case ENC_ISO_LATIN_1: break; /* nothing to do */ case ENC_WCHAR: { const pl_wchar_t *w = (const pl_wchar_t*)text->text.w; const pl_wchar_t *e = &w[text->length]; for(; w 0xff ) return TRUE; } return PL_demote_text(text); } case ENC_UTF8: { const char *s = text->text.t; const char *e = &s[text->length]; while(sencoding = ENC_ISO_LATIN_1; text->canonical = TRUE; } else { int chr; int wide = FALSE; size_t len = s - text->text.t; while(s 0xff ) /* requires wide characters */ wide = TRUE; len++; } s = (const char *)text->text.t; text->length = len; if ( wide ) { pl_wchar_t *to = PL_malloc(sizeof(pl_wchar_t)*(len+1)); text->text.w = to; while(sencoding = ENC_WCHAR; text->storage = PL_CHARS_MALLOC; } else { char *to = PL_malloc(len+1); text->text.t = to; while(sencoding = ENC_ISO_LATIN_1; text->storage = PL_CHARS_MALLOC; } text->canonical = TRUE; } succeed; } case ENC_ANSI: { mbstate_t mbs; size_t len = 0; int iso = TRUE; char *s = text->text.t; size_t rc, n = text->length; wchar_t wc; memset(&mbs, 0, sizeof(mbs)); while( n > 0 ) { if ( (rc=mbrtowc(&wc, s, n, &mbs)) == (size_t)-1 || rc == 0) return FALSE; /* encoding error */ if ( wc > 0xff ) iso = FALSE; len++; n -= rc; s += rc; } if ( n == 0 ) { const char *from = text->text.t; void *do_free; n = text->length; memset(&mbs, 0, sizeof(mbs)); if ( text->storage == PL_CHARS_MALLOC ) do_free = text->text.t; else do_free = NULL; if ( iso ) { char *to; text->encoding = ENC_ISO_LATIN_1; if ( len+1 < sizeof(text->buf) ) { text->text.t = text->buf; text->storage = PL_CHARS_LOCAL; } else { text->text.t = PL_malloc(len+1); text->storage = PL_CHARS_MALLOC; } to = text->text.t; while( n > 0 ) { rc = mbrtowc(&wc, from, n, &mbs); *to++ = (char)wc; n -= rc; from += rc; } *to = EOS; } else { wchar_t *to; char b2[sizeof(text->buf)]; text->encoding = ENC_WCHAR; if ( len+1 < sizeof(text->buf)/sizeof(wchar_t) ) { if ( text->text.t == text->buf ) { memcpy(b2, text->buf, sizeof(text->buf)); from = b2; } text->text.w = (wchar_t*)text->buf; } else { text->text.w = PL_malloc((len+1)*sizeof(wchar_t)); text->storage = PL_CHARS_MALLOC; } to = text->text.w; while( n > 0 ) { rc = mbrtowc(&wc, from, n, &mbs); *to++ = wc; n -= rc; from += rc; } *to = EOS; } text->length = len; text->canonical = TRUE; if ( do_free ) PL_free(do_free); succeed; } fail; } default: assert(0); } } succeed; } void PL_free_text(PL_chars_t *text) { if ( text->storage == PL_CHARS_MALLOC ) PL_free(text->text.t); } void PL_text_recode(PL_chars_t *text, IOENC encoding) { if ( text->encoding != encoding ) { switch(encoding) { case ENC_UTF8: { switch(text->encoding) { case ENC_ASCII: text->encoding = ENC_UTF8; break; case ENC_ISO_LATIN_1: { Buffer b = findBuffer(BUF_RING); const unsigned char *s = (const unsigned char *)text->text.t; const unsigned char *e = &s[text->length]; char tmp[8]; for( ; slength = entriesBuffer(b, char); addBuffer(b, EOS, char); text->text.t = baseBuffer(b, char); text->encoding = ENC_UTF8; text->storage = PL_CHARS_RING; break; } case ENC_WCHAR: { Buffer b = findBuffer(BUF_RING); const pl_wchar_t *s = text->text.w; const pl_wchar_t *e = &s[text->length]; char tmp[8]; for( ; s 0x7f ) { const char *end = utf8_put_char(tmp, (int)*s); const char *q = tmp; for(q=tmp; qlength = entriesBuffer(b, char); addBuffer(b, EOS, char); text->text.t = baseBuffer(b, char); text->encoding = ENC_UTF8; text->storage = PL_CHARS_RING; break; } default: assert(0); } break; default: assert(0); } } } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PL_cmp_text(PL_chars_t *t1, size_t o1, PL_chars_t *t2, size_t o2, size_t len) Compares two substrings of two text representations. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ int PL_cmp_text(PL_chars_t *t1, size_t o1, PL_chars_t *t2, size_t o2, size_t len) { ssize_t l = len; int ifeq = 0; if ( l > (ssize_t)(t1->length - o1) ) { l = t1->length - o1; ifeq = CMP_LESS; /* first is short */ } if ( l > (ssize_t)(t2->length - o2) ) { l = t2->length - o2; if ( ifeq == 0 ) ifeq = CMP_GREATER; } if ( l == 0 ) /* too long offsets */ return ifeq; if ( t1->encoding == ENC_ISO_LATIN_1 && t2->encoding == ENC_ISO_LATIN_1 ) { const unsigned char *s = (const unsigned char *)t1->text.t+o1; const unsigned char *q = (const unsigned char *)t2->text.t+o2; for(; l-- > 0 && *s == *q; s++, q++ ) ; if ( l < 0 ) return ifeq; else return *s > *q ? CMP_GREATER : CMP_LESS; } else if ( t1->encoding == ENC_WCHAR && t2->encoding == ENC_WCHAR ) { const pl_wchar_t *s = t1->text.w+o1; const pl_wchar_t *q = t2->text.w+o2; for(; l-- > 0 && *s == *q; s++, q++ ) ; if ( l < 0 ) return ifeq; else return *s > *q ? CMP_GREATER : CMP_LESS; } else if ( t1->encoding == ENC_ISO_LATIN_1 && t2->encoding == ENC_WCHAR ) { const unsigned char *s = (const unsigned char *)t1->text.t+o1; const pl_wchar_t *q = t2->text.w+o2; for(; l-- > 0 && *s == *q; s++, q++ ) ; if ( l < 0 ) return ifeq; else return *s > *q ? CMP_GREATER : CMP_LESS; } else { const pl_wchar_t *s = t1->text.w+o1; const unsigned char *q = (const unsigned char *)t2->text.t+o2; for(; l-- > 0 && *s == *q; s++, q++ ) ; if ( l < 0 ) return ifeq; else return *s > *q ? CMP_GREATER : CMP_LESS; } } int PL_concat_text(int n, PL_chars_t **text, PL_chars_t *result) { size_t total_length = 0; int latin = TRUE; int i; for(i=0; ilength; } result->canonical = TRUE; result->length = total_length; if ( latin ) { char *to; result->encoding = ENC_ISO_LATIN_1; if ( total_length+1 < sizeof(result->buf) ) { result->text.t = result->buf; result->storage = PL_CHARS_LOCAL; } else { result->text.t = PL_malloc(total_length+1); result->storage = PL_CHARS_MALLOC; } for(to=result->text.t, i=0; itext.t, text[i]->length); to += text[i]->length; } *to = EOS; } else { pl_wchar_t *to; result->encoding = ENC_WCHAR; if ( total_length+1 < sizeof(result->buf)/sizeof(pl_wchar_t) ) { result->text.w = (pl_wchar_t*)result->buf; result->storage = PL_CHARS_LOCAL; } else { result->text.w = PL_malloc((total_length+1)*sizeof(pl_wchar_t)); result->storage = PL_CHARS_MALLOC; } for(to=result->text.w, i=0; iencoding == ENC_WCHAR ) { memcpy(to, text[i]->text.w, text[i]->length*sizeof(pl_wchar_t)); to += text[i]->length; } else { const unsigned char *f = (const unsigned char *)text[i]->text.t; const unsigned char *e = &f[text[i]->length]; while(ftext.w) == total_length); *to = EOS; } return TRUE; } IOSTREAM * Sopen_text(PL_chars_t *txt, const char *mode) { IOSTREAM *stream; if ( !streq(mode, "r") ) { errno = EINVAL; return NULL; } stream = Sopen_string(NULL, txt->text.t, bufsize_text(txt, txt->length), mode); stream->encoding = txt->encoding; return stream; }