Fix encoding

This commit is contained in:
Vítor Santos Costa 2016-02-18 12:10:58 +00:00
parent 05a978ce82
commit 484168b1ce
18 changed files with 1557 additions and 1522 deletions

View File

@ -921,7 +921,7 @@ cont_string_code3( USES_REGS1 )
s0 = UStringOfTerm( t2 );
i = IntOfTerm(EXTRA_CBACK_ARG(3,1)); // offset in coded string, increases by 1..6
j = IntOfTerm(EXTRA_CBACK_ARG(3,2)); // offset in UNICODE string, always increases by 1
s = (s0+i) + get_utf8( (unsigned char *)s0+i, &chr );
s = (s0+i) + get_utf8( (unsigned char *)s0+i, -1, &chr );
if (s[0]) {
EXTRA_CBACK_ARG(3,1) = MkIntTerm(s-s0);
EXTRA_CBACK_ARG(3,2) = MkIntTerm(j+1);
@ -983,7 +983,7 @@ string_code3( USES_REGS1 )
if (ns == NULL) {
cut_fail(); // silently fail?
}
get_utf8( (unsigned char *)ns, &chr);
get_utf8( (unsigned char *)ns, -1, &chr);
if ( chr == '\0') cut_fail();
if (Yap_unify(ARG3, MkIntegerTerm(chr))) cut_succeed();
cut_fail();
@ -1042,7 +1042,7 @@ get_string_code3( USES_REGS1 )
if (ns == NULL) {
return FALSE;
} else {
get_utf8( ns, &chr);
get_utf8( ns, -1, &chr);
if ( chr != '\0') return Yap_unify(ARG3, MkIntegerTerm(chr));
}
}
@ -1611,7 +1611,7 @@ build_new_atomic(int mask, wchar_t *wp, const unsigned char *p, size_t min, size
buf = buf_from_tstring(HR);
while (len) {
utf8proc_int32_t chr;
cp += get_utf8((unsigned char *)cp, &chr);
cp += get_utf8((unsigned char *)cp, -1, &chr);
buf += put_utf8((unsigned char *)buf, chr);
len--;
}
@ -1834,7 +1834,7 @@ cont_sub_atomic( USES_REGS1 )
/* found one, check if there is any left */
while (min <= sz-len) {
int chr;
p += get_utf8((unsigned char *)p, &chr);
p += get_utf8((unsigned char *)p, -1, &chr);
after--;
min++;
if (cmpn_utf8(p, UStringOfTerm(nat), len) == 0)

View File

@ -365,7 +365,7 @@ Yap_OpaqueTermToString(Term t, char *str, size_t max)
str_index += sprintf(& str[str_index], "\"");
do {
utf8proc_int32_t chr;
ptr += get_utf8(ptr, &chr);
ptr += get_utf8(ptr, -1, &chr);
if (chr == '\0') break;
str_index += sprintf(str+str_index, "%C", chr);
} while (TRUE);

View File

@ -195,15 +195,11 @@ static bool mkprompt(Term inp) {
static bool getenc(Term inp) {
CACHE_REGS
if (IsVarTerm(inp)) {
return Yap_unify(inp, MkAtomTerm(Yap_LookupAtom(enc_name(LOCAL_encoding))));
}
if (!IsAtomTerm(inp)) {
Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag");
if (!IsVarTerm(inp) && !IsAtomTerm(inp)) {
Yap_Error(TYPE_ERROR_ATOM, inp, "get_encoding");
return false;
}
enc_id((char *)RepAtom(AtomOfTerm(inp))->StrOfAE);
return true;
return Yap_unify(inp, MkAtomTerm(Yap_LookupAtom(enc_name(LOCAL_encoding))));
}
/*
@ -217,7 +213,7 @@ if (!IsAtomTerm(inp) ) {
Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag");
return false;
}
enc_id( RepAtom( AtomOfTerm( inp ) )->StrOfAE );
enc_id( RepAtom( AtomOfTerm( inp ) )->StrOfAE, ENC_OCTET );
return true;
}
*/

View File

@ -1232,7 +1232,12 @@ const char *Yap_tokRep(TokEntry *tokptr) {
switch (tokptr->Tok) {
case Name_tok:
return (char *)RepAtom((Atom)info)->StrOfAE;
if (IsWideAtom((Atom)info)) {
wchar_t *wc = RepAtom((Atom)info)->WStrOfAE;
Term s = Yap_WCharsToString(wc PASS_REGS);
return StringOfTerm(s);
}
return RepAtom((Atom)info)->StrOfAE;
case Number_tok:
if ((b = Yap_TermToString(info, buf, sze, &length, &LOCAL_encoding,
flags)) != buf) {
@ -2038,9 +2043,6 @@ TokEntry *Yap_tokenizer(struct stream_desc *inp_stream, bool store_comments,
char err[1024];
snprintf(err, 1023, "\n++++ token: unrecognised char %c (%d), type %c\n",
ch, ch, chtype(ch));
#if DEBUG
fprintf(stderr, "%s", err);
#endif
}
t->Tok = Ord(kind = eot_tok);
t->TokInfo = TermEof;

View File

@ -408,7 +408,7 @@ write_strings( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
buf = buf_from_tstring(HR);
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
buf += put_utf8(buf, chr);
}
if (max >= min) *buf++ = '\0';
@ -496,7 +496,7 @@ write_atoms( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng U
while (cp < lim && *cp) {
utf8proc_int32_t chr;
CELL *cl;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
if (chr == '\0') break;
w[0] = chr;
cl = HR;
@ -581,7 +581,7 @@ write_codes( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng U
LOCAL_TERM_ERROR( t, 2*(lim-s) );
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
HR[0] = MkIntTerm(chr);
HR[1] = AbsPair(HR+2);
HR += 2;
@ -659,7 +659,7 @@ write_atom( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng US
while (*s && s < lim) {
utf8proc_int32_t chr;
s += get_utf8(s, &chr);
s += get_utf8(s,-1, &chr);
*ptr++ = chr;
}
*ptr++ = '\0';
@ -743,14 +743,14 @@ write_wbuffer( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
return -1;
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
if (max >= min) *buf++ = '\0';
else while (max < min) {
utf8proc_int32_t chr;
max++;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
*buf = '\0';
@ -890,14 +890,14 @@ write_buffer( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
return -1;
while (*cp && cp < lim) {
utf8proc_int32_t chr;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
if (max >= min) *buf++ = '\0';
else while (max < min) {
utf8proc_int32_t chr;
max++;
cp += get_utf8(cp, &chr);
cp += get_utf8(cp, -1, &chr);
*buf++ = chr;
}
sz_end = buf-out->val.uc;
@ -1104,7 +1104,7 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{
utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, &chr2); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, -1, &chr2); if (chr1-chr2) return chr1-chr2; }
}
return 0;
case ENC_WCHAR:
@ -1126,21 +1126,21 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{
utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, -1, &chr1); if (chr1-chr2) return chr1-chr2; }
}
return 0;
case ENC_ISO_UTF8:
{
utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2;
for (i = 0; i < l; i++) { w2 += get_utf8(w2, &chr2); w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { w2 += get_utf8(w2, -1, &chr2); w1 += get_utf8(w1,-1, &chr1); if (chr1-chr2) return chr1-chr2; }
}
return 0;
case ENC_WCHAR:
{
utf8proc_int32_t chr1, chr2;
wchar_t *w2 = s2;
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, -1, &chr1); if (chr1-chr2) return chr1-chr2; }
}
return 0;
default:
@ -1162,7 +1162,7 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{
utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, &chr2); if (chr1-chr2) return chr1-chr2; }
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, -1, &chr2); if (chr1-chr2) return chr1-chr2; }
}
return 0;
case ENC_WCHAR:
@ -1232,7 +1232,7 @@ concat( int n, seq_tv_t *out, void *sv[], encoding_t encv[], size_t lengv[] USES
} else {
unsigned char *ptr = sv[i];
utf8proc_int32_t chr;
while ( (ptr += get_utf8( ptr, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; }
while ( (ptr += get_utf8( ptr, -1, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; }
}
}
*buf++ = '\0';
@ -1276,7 +1276,7 @@ slice( size_t min, size_t max, void *buf, seq_tv_t *out, encoding_t enc USES_REG
unsigned char *ptr = skip_utf8 (buf, min );
utf8proc_int32_t chr;
if (!ptr) return NULL;
while ( min++ < max ) { ptr += get_utf8(ptr, & chr); nbuf += put_utf8(nbuf, chr); }
while ( min++ < max ) { ptr += get_utf8(ptr, -1, & chr); nbuf += put_utf8(nbuf, chr); }
}
*nbuf ++ = '\0';
close_tstring( nbuf PASS_REGS );
@ -1313,7 +1313,7 @@ slice( size_t min, size_t max, void *buf, seq_tv_t *out, encoding_t enc USES_REG
utf8proc_int32_t chr;
LOCAL_ERROR( MkAtomTerm(Yap_LookupAtom(buf)), max-min );
while ( min++ < max ) { ptr += get_utf8(ptr, & chr); *nbuf++ = chr; }
while ( min++ < max ) { ptr += get_utf8(ptr, -1, & chr); *nbuf++ = chr; }
nbuf[0] = '\0';
at = Yap_LookupMaybeWideAtom( (wchar_t*)HR );
}

View File

@ -572,7 +572,7 @@ static void write_string(const unsigned char *s,
qt = '"';
wrputc(qt, stream);
do {
ptr += get_utf8(ptr, &chr);
ptr += get_utf8(ptr, -1, &chr);
if (chr == '\0')
break;
write_quoted(chr, qt, stream);

View File

@ -141,9 +141,9 @@ INLINE_ONLY EXTERN inline char_kind_t chtype(Int ch) {
#define __android_log_print(...)
#endif
inline static utf8proc_ssize_t get_utf8(utf8proc_uint8_t *ptr,
inline static utf8proc_ssize_t get_utf8(utf8proc_uint8_t *ptr, size_t n,
utf8proc_int32_t *valp) {
return utf8proc_iterate(ptr, -1, valp);
return utf8proc_iterate(ptr, n, valp);
}
inline static utf8proc_ssize_t put_utf8(utf8proc_uint8_t *ptr,
@ -164,6 +164,11 @@ inline static utf8proc_uint8_t *skip_utf8(utf8proc_uint8_t *pt,
return pt;
}
inline static utf8proc_ssize_t utf8_nof( utf8proc_int32_t val) {
return utf8proc_charwidth(val);
}
inline static utf8proc_ssize_t strlen_utf8(const utf8proc_uint8_t *pt) {
utf8proc_ssize_t rc = 0;
utf8proc_int32_t b;
@ -259,6 +264,12 @@ inline static int cmpn_utf8(const utf8proc_uint8_t *pt1,
return 0;
}
// UTF16
#define LEAD_OFFSET (0xD800 - (0x10000 >> 10))
#define SURROGATE_OFFSET ( 0x10000 - (0xD800 << 10) - 0xDC00 )
const char *Yap_tokRep(TokEntry *tokptr);
// standard strings

View File

@ -94,10 +94,6 @@ INLINE_ONLY inline EXTERN Int CharOfAtom(Atom at) {
}
}
static int plUnGetc(int sno, int ch) {
return ungetc(ch, GLOBAL_Stream[sno].file);
}
Int Yap_peek(int sno) {
CACHE_REGS
Int ocharcount, olinecount, olinepos;
@ -144,33 +140,41 @@ Int Yap_peek(int sno) {
}
} else if (s->encoding == ENC_UTF16_BE) {
/* do the ungetc as if a write .. */
unsigned long int c = ch;
if (c > ((1 << 16) - 1)) {
ungetc(c / 1 << 16, s->file);
c %= 1 << 16;
// computations
int lead = LEAD_OFFSET + (ch >> 10);
int trail = 0xDC00 + (ch & 0x3FF);
if (lead) {
ungetc(lead / 256, s->file);
ungetc(lead % 256, s->file);
}
ungetc(c, s->file);
} else if (s->encoding == ENC_UTF16_BE) {
/* do the ungetc as if a write .. */
unsigned long int c = ch;
if (c > ((1 << 16) - 1)) {
ungetc(c / 1 << 16, s->file);
c %= 1 << 16;
}
return c;
ungetc(trail / 256, s->file);
ungetc(trail % 256, s->file);
} else if (s->encoding == ENC_UTF16_LE) {
/* do the ungetc as if a write .. */
unsigned long int c = ch;
if (c > ((1 << 16) - 1)) {
ungetc(c % 1 << 16, s->file);
c /= 1 << 16;
// computations
uint16_t lead = LEAD_OFFSET + (ch >> 10);
uint16_t trail = 0xDC00 + (ch & 0x3FF);
lead = 0;
trail = ch;
if (lead) {
ungetc(lead / 256, s->file);
ungetc(lead % 256, s->file);
}
ungetc(c, s->file);
} else {
int (*f)(int, int) = s->stream_putc;
s->stream_putc = plUnGetc;
put_wchar(sno, ch);
s->stream_putc = f;
if (trail) {
ungetc(trail / 256, s->file);
ungetc(trail % 256, s->file);
}
} else if (s->encoding == ENC_ISO_UTF32_LE) {
ungetc( (ch >> 24) & 0xff, s->file);
ungetc( (ch >> 16) & 0xff, s->file);
ungetc( (ch >> 8) & 0xff, s->file);
return ungetc( ch & 0xff, s->file);
} else if (s->encoding == ENC_ISO_UTF32_BE) {
ungetc( ch & 0xff, s->file);
ungetc( (ch >> 8) & 0xff, s->file);
ungetc( (ch >> 16) & 0xff, s->file);
return ungetc( (ch >> 24) & 0xff, s->file);
}
return ch;
}
@ -421,7 +425,7 @@ static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */
//&& strictISOFlag()
) {
UNLOCK(GLOBAL_Stream[sno].streamlock);
Yap_Error(PERMISSION_ERROR_INPUT_TEXT_STREAM, ARG1, "get_byte/2");
Yap_Error(PERMISSION_ERROR_INPUT_STREAM, ARG1, "get_byte/2");
return (FALSE);
}
out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno));
@ -481,7 +485,7 @@ static Int put_code_1(USES_REGS1) { /* '$put'(,N) */
return FALSE;
}
LOCK(GLOBAL_Stream[sno].streamlock);
GLOBAL_Stream[sno].stream_wputc(sno, (int)IntegerOfTerm(Deref(ARG2)));
GLOBAL_Stream[sno].stream_wputc(sno, ch);
/*
* if (!(GLOBAL_Stream[sno].status & Null_Stream_f))
* yap_fflush(GLOBAL_Stream[sno].file);
@ -925,7 +929,6 @@ static Int flush_all_streams(USES_REGS1) { /* $flush_all_streams */
/** @pred peek_code(+ _S_, - _C_) is iso
If _C_ is unbound, or is the code for a character, and
the stream _S_ is a text stream, read the next character from the
current stream and unify its code with _C_, while

View File

@ -31,7 +31,7 @@ typedef enum {
ENC_ISO_UTF32_LE = 128, /// yes, nobody
} encoding_t;
#if defined(__BIG_ENDIAN__)
#if WORDS_BIGENDIAN
#define ENC_WCHAR ENC_ISO_UTF32_BE
#else
#define ENC_WCHAR ENC_ISO_UTF32_LE
@ -114,14 +114,27 @@ static inline const char *enc_name(encoding_t enc) {
}
}
static inline encoding_t enc_id(char *s) {
static inline encoding_t enc_id(const char *s, encoding_t enc_bom) {
{
if (!strcmp(s, "iso_utf8"))
return ENC_ISO_UTF8;
if (!strcmp(s, "utf8"))
return ENC_ISO_UTF8;
if (!strcmp(s, "UTF-8"))
return ENC_ISO_UTF8;
if (!strcmp(s, "utf16_le"))
return ENC_UTF16_LE;
if (!strcmp(s, "utf16_be"))
return ENC_UTF16_BE;
if (!strcmp(s, "UTF-16")) {
if (enc_bom == ENC_UTF16_LE)
return ENC_UTF16_LE;
return ENC_UTF16_BE;
}
if (!strcmp(s, "UTF-16LE"))
return ENC_UTF16_LE;
if (!strcmp(s, "UTF16-BE"))
return ENC_UTF16_BE;
if (!strcmp(s, "octet"))
return ENC_OCTET;
if (!strcmp(s, "iso_latin_1"))
@ -134,11 +147,31 @@ static inline encoding_t enc_id(char *s) {
return ENC_ISO_UTF32_BE;
if (!strcmp(s, "utf32_le"))
return ENC_ISO_UTF32_LE;
if (!strcmp(s, "default"))
if (!strcmp(s, "UTF-32")) {
if (enc_bom == ENC_ISO_UTF32_LE)
return ENC_ISO_UTF32_LE;
return ENC_ISO_UTF32_BE;
}
if (!strcmp(s, "UTF-32BE"))
return ENC_ISO_UTF32_BE;
if (!strcmp(s, "UTF-32LE"))
return ENC_ISO_UTF32_LE;
if (!strcmp(s, "ISO-8859-1"))
return ENC_ISO_LATIN1;
// just for SWI compat, this actually refers to
// UCS-2
if (!strcmp(s, "unicode_be"))
return ENC_UTF16_BE;
if (!strcmp(s, "unicode_le"))
return ENC_UTF16_LE;
if (!strcmp(s, "default")) {
if (enc_bom != ENC_OCTET)
return enc_bom;
return Yap_DefaultEncoding();
}
else {
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, 0, "bad encoding %s", s);
return ENC_OCTET;
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, MkAtomTerm(Yap_LookupAtom(s)), "bad encoding %s", s);
return Yap_DefaultEncoding();
}
}
}

View File

@ -328,7 +328,7 @@ static int format_print_str(Int sno, Int size, Int has_size, Term args,
const unsigned char *pt = UStringOfTerm(args);
while (*pt && (!has_size || size > 0)) {
utf8proc_int32_t ch;
pt += get_utf8((unsigned char *)pt, &ch);
pt += get_utf8((unsigned char *)pt, -1, &ch);
f_putc(sno, ch);
}
} else {

View File

@ -27,7 +27,8 @@ static char SccsId[] = "%W% %G%";
*
*/
/*
* This file includes the definition of a miscellania of standard predicates * for yap refering to: Files and GLOBAL_Streams, Simple Input/Output,
* This file includes the definition of a miscellania of standard predicates *
*for yap refering to: Files and GLOBAL_Streams, Simple Input/Output,
*
*/
@ -123,8 +124,7 @@ static Term gethdir(Term t) {
#endif
s[nsz + 1] = '\0';
}
return
MkAtomTerm(Yap_LookupAtom( s ) );
return MkAtomTerm(Yap_LookupAtom(s));
}
static bool issolutions(Term t) {
@ -188,10 +188,6 @@ void Yap_DefaultStreamOps(StreamDesc *st) {
st->stream_wgetc_for_read = ISOWGetc;
else
st->stream_wgetc_for_read = st->stream_wgetc;
if (st->encoding == ENC_ISO_UTF8)
st->stream_getc_for_utf8 = st->stream_getc;
else
st->stream_getc_for_utf8 = GetUTF8;
}
static void unix_upd_stream_info(StreamDesc *s) {
@ -631,7 +627,6 @@ int console_post_process_eof(StreamDesc *s) {
s->stream_getc = EOFGetc;
s->stream_wgetc = EOFWGetc;
s->stream_wgetc_for_read = EOFWGetc;
s->stream_getc_for_utf8 = EOFGetc;
LOCAL_newline = true;
}
return EOFCHAR;
@ -658,7 +653,6 @@ int post_process_eof(StreamDesc *s) {
s->stream_wgetc = EOFWGetc;
s->stream_getc = EOFGetc;
s->stream_wgetc_for_read = EOFWGetc;
s->stream_getc_for_utf8 = EOFGetc;
}
return EOFCHAR;
}
@ -669,7 +663,6 @@ int post_process_weof(StreamDesc *s) {
s->stream_wgetc = EOFWGetc;
s->stream_wgetc = EOFWGetc;
s->stream_wgetc_for_read = EOFWGetc;
s->stream_getc_for_utf8 = EOFGetc;
}
return EOFCHAR;
}
@ -693,7 +686,7 @@ int PlGetc(int sno) {
StreamDesc *s = &GLOBAL_Stream[sno];
Int ch;
ch = getc(s->file);
ch = fgetc(s->file);
if (ch == EOF) {
return post_process_eof(s);
}
@ -733,165 +726,178 @@ int DefaultGets(int sno, UInt size, char *buf) {
return (buf - pt) - 1;
}
int GetUTF8(int sno) {
StreamDesc *s = &GLOBAL_Stream[sno];
uint64_t bufi = s->utf8_buf;
unsigned char *buf = (unsigned char *)&bufi;
if (!bufi) {
int32_t ch = get_wchar(sno);
if (ch < 128)
return ch;
put_utf8((unsigned char *)&bufi, ch);
} else {
while (*buf++ == '\0')
;
}
unsigned char c = *buf;
buf[0] = '\0';
return c;
}
static int utf8_nof(char ch) {
if (!(ch & 0x20))
return 1;
if (!(ch & 0x10))
return 2;
if (!(ch & 0x08))
return 3;
if (!(ch & 0x04))
return 4;
return 5;
}
#define wide_char() \
switch (GLOBAL_Stream[sno].encoding) { \
case ENC_OCTET: \
return ch; \
case ENC_ISO_LATIN1: \
return ch; \
case ENC_ISO_ASCII: \
if (ch & 0x80) { \
/* error */ \
} \
return ch; \
case ENC_ISO_ANSI: { \
char buf[1]; \
int out; \
\
if (!how_many) { \
memset((void *)&(GLOBAL_Stream[sno].mbstate), 0, sizeof(mbstate_t)); \
} \
buf[0] = ch; \
if ((out = mbrtowc(&wch, buf, 1, &(GLOBAL_Stream[sno].mbstate))) == 1) \
return wch; \
if (out == -1) { \
/* error */ \
} \
how_many++; \
break; \
} \
case ENC_ISO_UTF8: { \
if (!how_many) { \
if (ch & 0x80) { \
how_many = utf8_nof(ch); \
/* \
keep a backup of the start character in case we meet an error, \
useful if we are scanning ISO files. \
*/ \
GLOBAL_Stream[sno].och = ch; \
wch = (ch & ((1 << (6 - how_many)) - 1)) << (6 * how_many); \
} else { \
return ch; \
} \
} else { \
how_many--; \
if ((ch & 0xc0) == 0x80) { \
wch += (ch & ~0xc0) << (how_many * 6); \
} else { \
/* error */ \
/* try to recover character, assume this is our first character */ \
wchar_t och = GLOBAL_Stream[sno].och; \
return och; \
} \
if (!how_many) { \
return wch; \
} \
} \
} break; \
case ENC_UTF16_BE: \
if (how_many) { \
return wch + ch; \
} \
how_many = 1; \
wch = ch << 8; \
break; \
case ENC_UTF16_LE: \
if (how_many) { \
return wch + (ch << 8); \
} \
how_many = 1; \
wch = ch; \
break; \
case ENC_ISO_UTF32_LE: \
if (!how_many) { \
how_many = 4; \
wch = 0; \
} \
how_many--; \
wch += ((unsigned char)(ch & 0xff)) << (how_many * 8); \
if (how_many == 0) \
return wch; \
break; \
case ENC_ISO_UTF32_BE: \
if (!how_many) { \
how_many = 4; \
wch = 0; \
} \
how_many--; \
wch += ((unsigned char)(ch & 0xff)) << ((3 - how_many) * 8); \
if (how_many == 0) \
return wch; \
break; \
}
/// compose a wide char from a sequence of getchars \
// this is a slow lane routine, called if no specialised code
// isavailable.
static int get_wchar(int sno) {
int ch;
wchar_t wch;
int how_many = 0;
StreamDesc *st = GLOBAL_Stream + sno;
int ch = st->stream_getc(sno);
while (true) {
ch = GLOBAL_Stream[sno].stream_getc(sno);
if (ch == -1) {
if (how_many) {
if (ch == -1)
return post_process_weof(st);
switch (st->encoding) {
case ENC_OCTET:
return ch;
// no error detection, all characters are ok.
case ENC_ISO_LATIN1:
return ch;
// 7 bits code, anything above is bad news
case ENC_ISO_ASCII:
if (ch & 0x80) {
/* error */
}
return post_process_weof(GLOBAL_Stream + sno);
return ch;
// default OS encoding, depends on locale.
case ENC_ISO_ANSI: {
char buf[8];
int out;
int wch;
mbstate_t mbstate;
memset((void *)&(mbstate), 0, sizeof(mbstate_t));
buf[0] = ch;
while ((out = mbrtowc(&wch, buf, 1, &(mbstate))) != 1) {
int ch = buf[0] = st->stream_getc(sno);
if (ch == -1)
return post_process_weof(st);
}
return wch;
}
// UTF-8 works o 8 bits.
case ENC_ISO_UTF8: {
unsigned char buf[8];
if (ch < 0x80) {
return ch;
}
// if ((ch - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
if (ch < 0xe0) { // 2-byte sequence
// Must have valid continuation character
int c1 = buf[0] = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
// if (!utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
return ((ch & 0x1f)<<6) | (c1 & 0x3f);
}
if (ch < 0xf0) { // 3-byte sequence
//if ((str + 1 >= end) || !utf_cont(*str) || !utf_cont(str[1]))
// return UTF8PROC_ERROR_INVALIDUTF8;
// Check for surrogate chars
//if (ch == 0xed && *str > 0x9f)
// return UTF8PROC_ERROR_INVALIDUTF8;
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
int c2 = st->stream_getc(sno);
if (c2 == -1)
return post_process_weof(st);
return ((ch & 0xf)<<12) | ((c1 & 0x3f)<<6) | (c2 & 0x3f);
} else {
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
int c2 = st->stream_getc(sno);
if (c2 == -1)
return post_process_weof(st);
int c3 = st->stream_getc(sno);
if (c3 == -1)
return post_process_weof(st);
return ((ch & 7)<<18) | ((c1 & 0x3f)<<12) | ((c2 & 0x3f)<<6) | (c3 & 0x3f);
}
}
case ENC_UTF16_LE: // check http://unicode.org/faq/utf_bom.html#utf16-3
// little-endian: start with big shot
{
int wch;
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = (c1 << 8) + ch;
if (wch >= 0xEFFF) {
int c2 = st->stream_getc(sno);
if (c2 == -1)
return post_process_weof(st);
int c3 = st->stream_getc(sno);
if (c3 == -1)
return post_process_weof(st);
wch = wch + (((c3 << 8) + c2)<<wch) + SURROGATE_OFFSET;
}
return wch;
}
case ENC_UTF16_BE: // check http://unicode.org/faq/utf_bom.html#utf16-3
// little-endian: start with big shot
{
int wch;
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = (c1) + (ch<<8);
if (wch >= 0xEFFF) {
int c3 = st->stream_getc(sno);
if (c3 == -1)
return post_process_weof(st);
int c2 = st->stream_getc(sno);
if (c2 == -1)
return post_process_weof(st);
wch = (((c3 << 8) + c2) << 10) + wch + SURROGATE_OFFSET;
}
return wch;
}
case ENC_ISO_UTF32_BE: // check http://unicode.org/faq/utf_bom.html#utf16-3
// little-endian: start with big shot
{
int wch = ch;
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = wch + c1;
}
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = (wch << 8 )+c1;
}
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = (wch << 8) +c1;
}
return wch;
}
case ENC_ISO_UTF32_LE: // check http://unicode.org/faq/utf_bom.html#utf16-3
// little-endian: start with big shot
{
int wch = ch;
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch += c1<<8;
}
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch += c1<<16;
}
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch += c1<<24;
}
return wch;
}
wide_char();
}
return EOF;
}
// layered version
static int get_wchar__(int sno) {
int ch;
wchar_t wch;
int how_many = 0;
StreamDesc *s = GLOBAL_Stream + sno;
while (TRUE) {
ch = getc(GLOBAL_Stream[sno].file);
if (ch == -1) {
if (how_many) {
/* error */
}
return post_process_weof(s);
}
wide_char();
}
return EOF;
}
static int get_wchar__(int sno) { return get_wchar(sno); }
static int get_wchar_from_file(int sno) {
return post_process_read_char(get_wchar__(sno), GLOBAL_Stream + sno);
@ -953,10 +959,11 @@ int put_wchar(int sno, wchar_t ch) {
return GLOBAL_Stream[sno].stream_putc(sno, ch);
case ENC_ISO_ANSI: {
char buf[MB_LEN_MAX];
mbstate_t mbstate;
int n;
memset((void *)&(GLOBAL_Stream[sno].mbstate), 0, sizeof(mbstate_t));
if ((n = wcrtomb(buf, ch, &(GLOBAL_Stream[sno].mbstate))) < 0) {
memset((void *)&mbstate, 0, sizeof(mbstate_t));
if ((n = wcrtomb(buf, ch, &mbstate)) < 0) {
/* error */
GLOBAL_Stream[sno].stream_putc(sno, ch);
return -1;
@ -989,17 +996,41 @@ int put_wchar(int sno, wchar_t ch) {
}
break;
case ENC_UTF16_BE:
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8));
return GLOBAL_Stream[sno].stream_putc(sno, (ch & 0xff));
{
// computations
int lead = LEAD_OFFSET + (ch >> 10);
int trail = 0xDC00 + (ch & 0x3FF);
GLOBAL_Stream[sno].stream_putc(sno, (trail & 0xff));
GLOBAL_Stream[sno].stream_putc(sno, (trail >> 8));
if (trail) {
GLOBAL_Stream[sno].stream_putc(sno, (lead & 0xff));
GLOBAL_Stream[sno].stream_putc(sno, (lead >> 8));
}
return lead >> 8;
}
case ENC_UTF16_LE:
GLOBAL_Stream[sno].stream_putc(sno, (ch & 0xff));
return GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8));
case ENC_ISO_UTF32_BE:
{
// computations
int lead = LEAD_OFFSET + (ch >> 10);
int trail = 0xDC00 + (ch & 0x3FF);
if (lead) {
GLOBAL_Stream[sno].stream_putc(sno, (lead >> 8));
GLOBAL_Stream[sno].stream_putc(sno, (lead & 0xff));
}
GLOBAL_Stream[sno].stream_putc(sno, (trail >> 8));
GLOBAL_Stream[sno].stream_putc(sno, (trail & 0xff));
return lead >> 8;
}
case ENC_ISO_UTF32_LE:
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 24) & 0xff);
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 16) & 0xff);
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8) & 0xff);
return GLOBAL_Stream[sno].stream_putc(sno, ch & 0xff);
case ENC_ISO_UTF32_LE:
case ENC_ISO_UTF32_BE:
GLOBAL_Stream[sno].stream_putc(sno, ch & 0xff);
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8) & 0xff);
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 16) & 0xff);
@ -1012,8 +1043,8 @@ int put_wchar(int sno, wchar_t ch) {
/* used by user-code to read characters from the current input stream */
int Yap_PlGetchar(void) {
CACHE_REGS
return (
GLOBAL_Stream[LOCAL_c_input_stream].stream_getc(LOCAL_c_input_stream));
return (GLOBAL_Stream[LOCAL_c_input_stream].stream_getc(
LOCAL_c_input_stream));
}
int Yap_PlGetWchar(void) {
@ -1171,29 +1202,23 @@ static void check_bom(int sno, StreamDesc *st) {
} else {
ch3 = st->stream_getc(sno);
if (ch3 != 0x00) {
ungetc(ch1, st->file);
ungetc(ch2, st->file);
ungetc(ch3, st->file);
return;
} else {
ch4 = st->stream_getc(sno);
if (ch4 != 0x00) {
ungetc(ch1, st->file);
ungetc(ch2, st->file);
ungetc(ch3, st->file);
ungetc(ch4, st->file);
return;
} else {
if (ch4 == 0x00) {
st->status |= HAS_BOM_f;
st->encoding = ENC_ISO_UTF32_LE;
return;
} else {
ungetc(ch4, st->file);
ungetc(0x00, st->file);
}
}
}
st->status |= HAS_BOM_f;
st->encoding = ENC_UTF16_LE;
return;
}
}
case 0xEF:
ch2 = st->stream_getc(sno);
if (ch2 != 0xBB) {
@ -1257,9 +1282,7 @@ static void check_bom(int sno, StreamDesc *st) {
return true;
}
static bool
open_header( int sno, Atom open_mode)
{
static bool open_header(int sno, Atom open_mode) {
if (open_mode == AtomWrite) {
const char *ptr;
const char s[] = "#!";
@ -1281,13 +1304,13 @@ static void check_bom(int sno, StreamDesc *st) {
// skip header
int ch;
while ((ch = Yap_peek(sno)) == '#') {
while ((ch = GLOBAL_Stream[sno].stream_wgetc( sno )) != 10 && ch != -1 );
while ((ch = GLOBAL_Stream[sno].stream_wgetc(sno)) != 10 && ch != -1)
;
}
}
return true;
}
#define OPEN_DEFS() \
PAR("alias", isatom, OPEN_ALIAS), PAR("bom", booleanFlag, OPEN_BOM), \
PAR("buffer", isatom, OPEN_BUFFER), \
@ -1301,9 +1324,8 @@ static void check_bom(int sno, StreamDesc *st) {
PAR("mode", isatom, OPEN_MODE), PAR("output", ok, OPEN_OUTPUT), \
PAR("representation_errors", booleanFlag, OPEN_REPRESENTATION_ERRORS), \
PAR("reposition", booleanFlag, OPEN_REPOSITION), \
PAR("script", booleanFlag, OPEN_SCRIPT), \
PAR("type", isatom, OPEN_TYPE), PAR("wait", booleanFlag, OPEN_WAIT), \
PAR(NULL, ok, OPEN_END)
PAR("script", booleanFlag, OPEN_SCRIPT), PAR("type", isatom, OPEN_TYPE), \
PAR("wait", booleanFlag, OPEN_WAIT), PAR(NULL, ok, OPEN_END)
#define PAR(x, y, z) z
typedef enum open_enum_choices { OPEN_DEFS() } open_choices_t;
@ -1316,10 +1338,8 @@ typedef enum open_enum_choices { OPEN_DEFS() } open_choices_t;
static const param_t open_defs[] = {OPEN_DEFS()};
#undef PAR
static Int
do_open(Term file_name, Term t2,
static Int do_open(
Term file_name, Term t2,
Term tlist USES_REGS) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
Atom open_mode;
int sno;
@ -1330,6 +1350,7 @@ do_open(Term file_name, Term t2,
const char *fname;
stream_flags_t flags;
FILE *fd;
const char *s_encoding;
encoding_t encoding;
Term tenc;
@ -1383,7 +1404,8 @@ do_open(Term file_name, Term t2,
if (LOCAL_Error_TYPE != YAP_NO_ERROR) {
if (LOCAL_Error_TYPE == DOMAIN_ERROR_PROLOG_FLAG)
LOCAL_Error_TYPE = DOMAIN_ERROR_OPEN_OPTION;
Yap_Error( LOCAL_Error_TYPE, LOCAL_Error_Term, "option handling in open/3" );
Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term,
"option handling in open/3");
}
return false;
}
@ -1402,10 +1424,13 @@ do_open(Term file_name, Term t2,
}
if (args[OPEN_ENCODING].used) {
tenc = args[OPEN_ENCODING].tvalue;
encoding = enc_id(RepAtom(AtomOfTerm(tenc))->StrOfAE);
s_encoding = RepAtom(AtomOfTerm(tenc))->StrOfAE;
} else {
encoding = LOCAL_encoding;
s_encoding = "default";
}
// default encoding, no bom yet
encoding = enc_id( s_encoding, ENC_OCTET);
// only set encoding after getting BOM
bool ok = (args[OPEN_EXPAND_FILENAME].used
? args[OPEN_EXPAND_FILENAME].tvalue == TermTrue
: false) ||
@ -1419,8 +1444,8 @@ do_open(Term file_name, Term t2,
}
// Skip scripts that start with !#/.. or similar
bool script = (args[OPEN_SCRIPT].used
? args[OPEN_SCRIPT].tvalue == TermTrue
bool script =
(args[OPEN_SCRIPT].used ? args[OPEN_SCRIPT].tvalue == TermTrue
: false);
// binary type
if (args[OPEN_TYPE].used) {
@ -1468,11 +1493,11 @@ do_open(Term file_name, Term t2,
fname = LOCAL_FileNameBuf;
UNLOCK(st->streamlock);
if (errno == ENOENT)
return (PlIOError(EXISTENCE_ERROR_SOURCE_SINK, file_name, "%s: %s", fname,
strerror(errno)));
else {
return (PlIOError(PERMISSION_ERROR_OPEN_SOURCE_SINK, file_name, "%s: %s",
return (PlIOError(EXISTENCE_ERROR_SOURCE_SINK, file_name, "%s: %s",
fname, strerror(errno)));
else {
return (PlIOError(PERMISSION_ERROR_OPEN_SOURCE_SINK, file_name,
"%s: %s", fname, strerror(errno)));
}
}
#if MAC
@ -1481,7 +1506,11 @@ do_open(Term file_name, Term t2,
}
#endif
flags &= ~(Free_Stream_f);
if (!Yap_initStream(sno, fd, fname, file_name, encoding, flags, open_mode))
if (!Yap_initStream(sno, fd, fname, file_name, encoding, flags,
open_mode))
return false;
if (!Yap_initStream(sno, fd, fname, file_name, encoding, flags,
open_mode))
return false;
if (open_mode == AtomWrite) {
if (needs_bom && !write_bom(sno, st))
@ -1489,11 +1518,14 @@ do_open(Term file_name, Term t2,
} else if (open_mode == AtomRead && !avoid_bom) {
check_bom(sno, st); // can change encoding
}
// follow declaration unless there is v
if (st->status & HAS_BOM_f)
st->encoding = enc_id( s_encoding, st->encoding);
else
st->encoding = encoding;
if (script)
open_header(sno, open_mode);
UNLOCK(st->streamlock);
{
Term t = Yap_MkStream(sno);
@ -1521,8 +1553,8 @@ writable.
*/
static Int open3(USES_REGS1) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
static Int open3(
USES_REGS1) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
return do_open(Deref(ARG1), Deref(ARG2), TermNil PASS_REGS);
}
@ -1554,7 +1586,8 @@ which generates a new `end_of_file` (default for non-tty files).
+ `alias(+ _Name_)` is iso
Specify an alias to the stream. The alias <tt>Name</tt> must be an atom. The
Specify an alias to the stream. The alias <tt>Name</tt> must be an atom.
The
alias can be used instead of the stream descriptor for every operation
concerning the stream.
@ -1603,11 +1636,13 @@ open_expands_filename.
*/
static Int open4(USES_REGS1) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
static Int open4(
USES_REGS1) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
return do_open(Deref(ARG1), Deref(ARG2), Deref(ARG4) PASS_REGS);
}
static Int p_file_expansion(USES_REGS1) { /* '$file_expansion'(+File,-Name) */
static Int p_file_expansion(
USES_REGS1) { /* '$file_expansion'(+File,-Name) */
Term file_name = Deref(ARG1);
/* we know file_name is bound */
@ -1650,10 +1685,6 @@ static Int p_open_null_stream(USES_REGS1) {
st->stream_gets = PlGets;
st->stream_wgetc = get_wchar;
st->stream_wgetc_for_read = get_wchar;
if (st->encoding == ENC_ISO_UTF8)
st->stream_getc_for_utf8 = st->stream_getc;
else
st->stream_getc_for_utf8 = GetUTF8;
st->user_name = MkAtomTerm(st->name = AtomDevNull);
UNLOCK(st->streamlock);
t = Yap_MkStream(sno);
@ -1683,8 +1714,8 @@ int Yap_OpenStream(FILE *fd, char *name, Term file_name, int flags) {
#define CheckStream(arg, kind, msg) \
CheckStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
static int CheckStream__(const char *file, const char *f, int line, Term arg,
int kind, const char *msg) {
static int CheckStream__(const char *file, const char *f, int line,
Term arg, int kind, const char *msg) {
int sno = -1;
arg = Deref(arg);
if (IsVarTerm(arg)) {
@ -1746,8 +1777,8 @@ int Yap_CheckStream__(const char *file, const char *f, int line, Term arg,
return CheckStream__(file, f, line, arg, kind, msg);
}
int Yap_CheckTextStream__(const char *file, const char *f, int line, Term arg,
int kind, const char *msg) {
int Yap_CheckTextStream__(const char *file, const char *f, int line,
Term arg, int kind, const char *msg) {
int sno;
if ((sno = CheckStream__(file, f, line, arg, kind, msg)) < 0)
return -1;
@ -1784,7 +1815,8 @@ int Yap_GetFreeStreamDForReading(void) {
/**
* @pred always_prompt_user
*
* Ensure that the stream always prompts before asking the standard input stream for data.
* Ensure that the stream always prompts before asking the standard input
stream for data.
*/
static Int always_prompt_user(USES_REGS1) {
@ -1815,7 +1847,8 @@ user_output, and user_error can never be closed.
(USES_REGS1) { /* '$close'(+GLOBAL_Stream) */
Int sno = CheckStream(
ARG1, (Input_Stream_f | Output_Stream_f | Socket_Stream_f), "close/2");
ARG1, (Input_Stream_f | Output_Stream_f | Socket_Stream_f),
"close/2");
if (sno < 0)
return (FALSE);
if (sno <= StdErrStream) {
@ -1853,7 +1886,8 @@ YAP currently ignores these options.
*/
static Int close2(USES_REGS1) { /* '$close'(+GLOBAL_Stream) */
Int sno = CheckStream(
ARG1, (Input_Stream_f | Output_Stream_f | Socket_Stream_f), "close/2");
ARG1, (Input_Stream_f | Output_Stream_f | Socket_Stream_f),
"close/2");
Term tlist;
if (sno < 0)
return (FALSE);
@ -1944,11 +1978,11 @@ static Int abs_file_parameters(USES_REGS1) {
t[ABSOLUTE_FILE_NAME_RELATIVE_TO] =
gethdir(args[ABSOLUTE_FILE_NAME_RELATIVE_TO].tvalue);
} else {
t[ABSOLUTE_FILE_NAME_RELATIVE_TO] =
gethdir( TermDot );
t[ABSOLUTE_FILE_NAME_RELATIVE_TO] = gethdir(TermDot);
}
if (args[ABSOLUTE_FILE_NAME_FILE_TYPE].used)
t[ABSOLUTE_FILE_NAME_FILE_TYPE] = args[ABSOLUTE_FILE_NAME_FILE_TYPE].tvalue;
t[ABSOLUTE_FILE_NAME_FILE_TYPE] =
args[ABSOLUTE_FILE_NAME_FILE_TYPE].tvalue;
else
t[ABSOLUTE_FILE_NAME_FILE_TYPE] = TermTxt;
if (args[ABSOLUTE_FILE_NAME_ACCESS].used)
@ -1961,7 +1995,8 @@ static Int abs_file_parameters(USES_REGS1) {
else
t[ABSOLUTE_FILE_NAME_FILE_ERRORS] = TermError;
if (args[ABSOLUTE_FILE_NAME_SOLUTIONS].used)
t[ABSOLUTE_FILE_NAME_SOLUTIONS] = args[ABSOLUTE_FILE_NAME_SOLUTIONS].tvalue;
t[ABSOLUTE_FILE_NAME_SOLUTIONS] =
args[ABSOLUTE_FILE_NAME_SOLUTIONS].tvalue;
else
t[ABSOLUTE_FILE_NAME_SOLUTIONS] = TermFirst;
if (args[ABSOLUTE_FILE_NAME_EXPAND].used)
@ -1977,7 +2012,8 @@ static Int abs_file_parameters(USES_REGS1) {
args[ABSOLUTE_FILE_NAME_VERBOSE_FILE_SEARCH].tvalue;
else
t[ABSOLUTE_FILE_NAME_VERBOSE_FILE_SEARCH] =
(trueGlobalPrologFlag(VERBOSE_FILE_SEARCH_FLAG) ? TermTrue : TermFalse);
(trueGlobalPrologFlag(VERBOSE_FILE_SEARCH_FLAG) ? TermTrue
: TermFalse);
tf = Yap_MkApplTerm(Yap_MkFunctor(AtomOpt, ABSOLUTE_FILE_NAME_END),
ABSOLUTE_FILE_NAME_END, t);
return (Yap_unify(ARG2, tf));

View File

@ -37,10 +37,14 @@ extern size_t Yap_page_size;
#include <wchar.h>
#define Yap_CheckStream( arg, kind, msg) Yap_CheckStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
extern int Yap_CheckStream__(const char *, const char *, int , Term, int, const char *);
#define Yap_CheckTextStream( arg, kind, msg) Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
extern int Yap_CheckTextStream__(const char *, const char *, int , Term, int, const char *);
#define Yap_CheckStream(arg, kind, msg) \
Yap_CheckStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
extern int Yap_CheckStream__(const char *, const char *, int, Term, int,
const char *);
#define Yap_CheckTextStream(arg, kind, msg) \
Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int,
const char *);
extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name,
encoding_t encoding, stream_flags_t flags,
@ -76,8 +80,8 @@ Int Yap_CloseSocket(int, socket_info, socket_domain);
#endif /* USE_SOCKET */
/************ SWI compatible support for unicode representations ************/
typedef struct yap_io_position
{ int64_t byteno; /* byte-position in file */
typedef struct yap_io_position {
int64_t byteno; /* byte-position in file */
int64_t charno; /* character position in file */
long int lineno; /* lineno in file */
long int linepos; /* position in line */
@ -85,15 +89,14 @@ typedef struct yap_io_position
} yapIOPOS;
#ifndef _PL_STREAM_H
typedef struct
{ Atom file; /* current source file */
typedef struct {
Atom file; /* current source file */
yapIOPOS position; /* Line, line pos, char and byte */
} yapSourceLocation;
#endif
#define RD_MAGIC 0xefebe128
typedef struct vlist_struct_t {
struct VARSTRUCT *ve;
struct vlist_struct_t *next;
@ -106,9 +109,8 @@ typedef struct qq_struct_t {
struct qq_struct_t *next;
} qq_t;
typedef struct read_data_t
{ unsigned char *here; /* current character */
typedef struct read_data_t {
unsigned char *here; /* current character */
unsigned char *base; /* base of clause */
unsigned char *end; /* end of the clause */
unsigned char *token_start; /* start of most recent read token */
@ -163,8 +165,7 @@ typedef int (*GetsFunc)(int, UInt, char *);
#include <sys/socket.h>
#endif
typedef
struct mem_desc {
typedef struct mem_desc {
char *buf; /* where the file is being read from/written to */
int src; /* where the space comes from, 0 code space, 1 malloc */
Int max_size; /* maximum buffer size (may be changed dynamically) */
@ -172,8 +173,7 @@ struct mem_desc {
volatile void *error_handler;
} memHandle;
typedef struct stream_desc
{
typedef struct stream_desc {
Atom name;
Term user_name;
FILE *file;
@ -183,7 +183,7 @@ typedef struct stream_desc
union {
struct {
#define PLGETC_BUF_SIZE 4096
char *buf, *ptr;
unsigned char *buf, *ptr;
int left;
} file;
memHandle mem_string;
@ -198,7 +198,7 @@ typedef struct stream_desc
} socket;
#endif
struct {
const char *buf, *ptr;
const unsigned char *buf, *ptr;
} irl;
} u;
Int charcount, linecount, linepos;
@ -209,37 +209,29 @@ typedef struct stream_desc
#endif
int (*stream_putc)(int, int); /* function the stream uses for writing */
int (*stream_getc)(int); /* function the stream uses for reading */
GetsFunc stream_gets; /* function the stream uses for reading a sequence of characters */
GetsFunc stream_gets; /* function the stream uses for reading a sequence of
characters */
/* function the stream uses for parser. It may be different if the ISO
character conversion is on */
int (*stream_wgetc_for_read)(int);
int (*stream_wgetc)(int);
int (* stream_getc_for_utf8)(int);
int (*stream_wputc)(int, wchar_t);
uint64_t utf8_buf; /* used to translate to utf-8 */
encoding_t encoding;
mbstate_t mbstate;
}
StreamDesc;
} StreamDesc;
static inline bool
IsStreamTerm(Term t)
{
static inline bool IsStreamTerm(Term t) {
return !IsVarTerm(t) &&
(IsAtomTerm(t) || (IsApplTerm(t) && (FunctorOfTerm(t) == FunctorStream)));
(IsAtomTerm(t) ||
(IsApplTerm(t) && (FunctorOfTerm(t) == FunctorStream)));
}
static inline StreamDesc *
Yap_GetStreamHandle(Term t)
{
static inline StreamDesc *Yap_GetStreamHandle(Term t) {
int sno = Yap_CheckStream(t, 0, "stream search");
if (sno < 0)
return NULL;
return GLOBAL_Stream + sno;
}
#define YAP_ERROR NIL
#define MaxStreams 64
@ -255,13 +247,12 @@ Yap_GetStreamHandle(Term t)
void Yap_InitStdStreams(void);
Term Yap_StreamPosition(int);
static inline int
GetCurInpPos (StreamDesc * inp_stream)
{
static inline int GetCurInpPos(StreamDesc *inp_stream) {
return (inp_stream->linecount);
}
#define PlIOError(type, culprit,...) PlIOError__(__FILE__, __FUNCTION__, __LINE__, type, culprit, __VA_ARGS__)
#define PlIOError(type, culprit, ...) \
PlIOError__(__FILE__, __FUNCTION__, __LINE__, type, culprit, __VA_ARGS__)
Int PlIOError__(const char *, const char *, int, yap_error_number, Term, ...);
@ -270,7 +261,6 @@ Term Yap_MkStream (int n);
bool Yap_PrintWarning(Term twarning);
void Yap_plwrite(Term, struct stream_desc *, int, int, int);
void Yap_WriteAtom(struct stream_desc *s, Atom atom);
@ -353,16 +343,11 @@ INLINE_ONLY inline EXTERN void count_output_char(int ch, StreamDesc *s);
Term Yap_StreamUserName(int sno);
INLINE_ONLY inline EXTERN void
count_output_char(int ch, StreamDesc *s)
{
if (ch == '\n')
{
INLINE_ONLY inline EXTERN void count_output_char(int ch, StreamDesc *s) {
if (ch == '\n') {
#if MPWSHELL
if (mpwshell && (sno == StdOutStream || sno ==
StdErrStream) &&
!(s->status & Null_Stream_f))
{
if (mpwshell && (sno == StdOutStream || sno == StdErrStream) &&
!(s->status & Null_Stream_f)) {
putc(MPWSEP, s->file);
if (!(GLOBAL_Stream[LOCAL_output_stream].status & Null_Stream_f))
fflush(stdout);
@ -371,11 +356,10 @@ count_output_char(int ch, StreamDesc *s)
/* Inform that we have written a newline */
++s->charcount;
++s->linecount;
s->linepos = 0; }
else {
s->linepos = 0;
} else {
#if MAC
if ((sno == StdOutStream || sno == StdErrStream)
&& s->linepos > 200)
if ((sno == StdOutStream || sno == StdErrStream) && s->linepos > 200)
sno->stream_putc(sno, '\n');
#endif
++s->charcount;
@ -383,29 +367,16 @@ count_output_char(int ch, StreamDesc *s)
}
}
inline static Term
StreamName(int i)
{
return(GLOBAL_Stream[i].user_name);
}
inline static Term StreamName(int i) { return (GLOBAL_Stream[i].user_name); }
inline static Atom
StreamFullName(int i)
{
return(GLOBAL_Stream[i].name);
}
inline static Atom StreamFullName(int i) { return (GLOBAL_Stream[i].name); }
inline static void
console_count_output_char(int ch, StreamDesc *s)
{
inline static void console_count_output_char(int ch, StreamDesc *s) {
CACHE_REGS
if (ch == '\n')
{
if (ch == '\n') {
#if MPWSHELL
if (mpwshell && (sno == StdOutStream || sno ==
StdErrStream) &&
!(s->status & Null_Stream_f))
{
if (mpwshell && (sno == StdOutStream || sno == StdErrStream) &&
!(s->status & Null_Stream_f)) {
putc(MPWSEP, s->file);
if (!(GLOBAL_Stream[LOCAL_output_stream].status & Null_Stream_f))
fflush(stdout);
@ -416,12 +387,10 @@ console_count_output_char(int ch, StreamDesc *s)
s->linepos = 0;
LOCAL_newline = TRUE;
/* Inform we are not at the start of a newline */
}
else {
} else {
LOCAL_newline = FALSE;
#if MAC
if ((sno == StdOutStream || sno == StdErrStream)
&& s->linepos > 200)
if ((sno == StdOutStream || sno == StdErrStream) && s->linepos > 200)
sno->stream_putc(sno, '\n');
#endif
++s->charcount;
@ -429,9 +398,7 @@ console_count_output_char(int ch, StreamDesc *s)
}
}
inline static Term
StreamPosition(int sno)
{
inline static Term StreamPosition(int sno) {
CACHE_REGS
Term sargs[5];
Int cpos;
@ -443,10 +410,7 @@ StreamPosition(int sno)
return Yap_MkApplTerm(FunctorStreamPos, 5, sargs);
}
inline static Term
CurrentPositionToTerm(void)
{
inline static Term CurrentPositionToTerm(void) {
CACHE_REGS
Term sargs[5];
sargs[0] = MkIntegerTerm(LOCAL_StartCharCount);
@ -456,10 +420,6 @@ CurrentPositionToTerm(void)
return Yap_MkApplTerm(FunctorStreamPos, 5, sargs);
}
extern FILE *Yap_stdin;
extern FILE *Yap_stdout;
extern FILE *Yap_stderr;

View File

@ -227,21 +227,19 @@ static bool getLine(int inp, int out) {
CACHE_REGS
rl_instream = GLOBAL_Stream[inp].file;
rl_outstream = GLOBAL_Stream[out].file;
const char *myrl_line;
const unsigned char *myrl_line;
StreamDesc *s = GLOBAL_Stream + inp;
if (!(s->status & Tty_Stream_f))
return false;
/* window of vulnerability opened */
LOCAL_PrologMode |= ConsoleGetcMode;
fflush(NULL);
LOCAL_PrologMode |= ConsoleGetcMode;
if (LOCAL_newline) { // no output so far
myrl_line = readline(LOCAL_Prompt);
myrl_line = (unsigned char *)readline(LOCAL_Prompt);
} else {
myrl_line = readline(NULL);
myrl_line = (unsigned char *)readline(NULL);
}
/* Do it the gnu way */
if (LOCAL_PrologMode & InterruptMode) {
@ -261,7 +259,7 @@ static bool getLine(int inp, int out) {
if (myrl_line == NULL)
return false;
if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
add_history(myrl_line);
add_history((char *)myrl_line);
append_history(1, history_file);
}
s->u.irl.ptr = s->u.irl.buf = myrl_line;
@ -296,7 +294,7 @@ static int ReadlineGetc(int sno) {
bool fetch = (s->u.irl.buf == NULL);
if (!fetch || getLine(sno, StdErrStream)) {
const char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf;
const unsigned char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf;
ch = *ttyptr;
if (ch == '\0') {
ch = '\n';
@ -322,7 +320,7 @@ Int Yap_ReadlinePeekChar(int sno) {
int ch;
if (s->u.irl.buf) {
const char *ttyptr = s->u.irl.ptr;
const unsigned char *ttyptr = s->u.irl.ptr;
ch = *ttyptr;
if (ch == '\0') {
ch = '\n';
@ -350,15 +348,15 @@ int Yap_ReadlineForSIGINT(void) {
CACHE_REGS
int ch;
StreamDesc *s = &GLOBAL_Stream[StdInStream];
const char *myrl_line = s->u.irl.buf;
const unsigned char *myrl_line = s->u.irl.buf;
if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != (char *)NULL) {
if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != NULL) {
ch = myrl_line[0];
free((void *)myrl_line);
myrl_line = NULL;
return ch;
} else {
myrl_line = readline("Action (h for help): ");
myrl_line = (const unsigned char *)readline("Action (h for help): ");
if (!myrl_line) {
ch = EOF;
return ch;

View File

@ -976,6 +976,7 @@ static Int read_term(
Int out;
/* needs to change LOCAL_output_stream for write */
yhandle_t h = Yap_PushHandle(ARG2);
inp_stream = Yap_CheckTextStream(ARG1, Input_Stream_f, "read/3");
if (inp_stream == -1) {

View File

@ -295,18 +295,10 @@ static Int has_close_on_abort(
static bool
has_encoding(int sno,
Term t2 USES_REGS) { /* '$set_output'(+Stream,-ErrorMessage) */
if (!IsVarTerm(t2) && !(isatom(t2))) {
return FALSE;
}
if (0 && IsAtomTerm(t2)) {
encoding_t e = enc_id(RepAtom(AtomOfTerm(t2))->StrOfAE);
GLOBAL_Stream[sno].encoding = e;
return true;
} else {
const char *s = enc_name(LOCAL_encoding);
const char *s = enc_name(GLOBAL_Stream[sno].encoding);
return Yap_unify(t2, MkAtomTerm(Yap_LookupAtom(s)));
}
}
static bool
found_eof(int sno,
@ -719,9 +711,12 @@ static bool do_set_stream(int sno,
sno, (args[SET_STREAM_CLOSE_ON_ABORT].tvalue == TermTrue));
break;
case SET_STREAM_ENCODING:
{
Term t2 = args[SET_STREAM_ENCODING].tvalue;
Atom atEnc = AtomOfTerm(t2);
GLOBAL_Stream[sno].encoding =
enc_id(AtomOfTerm(args[SET_STREAM_ENCODING].tvalue)->StrOfAE);
has_encoding(sno, args[SET_STREAM_ENCODING].tvalue PASS_REGS);
enc_id(atEnc->StrOfAE, (GLOBAL_Stream[sno].status & HAS_BOM_f ? GLOBAL_Stream[sno].encoding :ENC_OCTET ) );
}
break;
case SET_STREAM_EOF_ACTION: {
Term t2 = args[SET_STREAM_EOF_ACTION].tvalue;

View File

@ -423,7 +423,7 @@ stream_position_data(Prop, Term, Value) :-
'$stream_position_field'(byte_count, 4).
'$set_encoding'(Enc) :-
stream_property(loop_stream, Enc).
set_stream(loop_stream, encoding(Enc)).
%! @}