Fix encoding

This commit is contained in:
Vítor Santos Costa 2016-02-18 12:10:58 +00:00
parent 05a978ce82
commit 484168b1ce
18 changed files with 1557 additions and 1522 deletions

View File

@ -921,7 +921,7 @@ cont_string_code3( USES_REGS1 )
s0 = UStringOfTerm( t2 ); s0 = UStringOfTerm( t2 );
i = IntOfTerm(EXTRA_CBACK_ARG(3,1)); // offset in coded string, increases by 1..6 i = IntOfTerm(EXTRA_CBACK_ARG(3,1)); // offset in coded string, increases by 1..6
j = IntOfTerm(EXTRA_CBACK_ARG(3,2)); // offset in UNICODE string, always increases by 1 j = IntOfTerm(EXTRA_CBACK_ARG(3,2)); // offset in UNICODE string, always increases by 1
s = (s0+i) + get_utf8( (unsigned char *)s0+i, &chr ); s = (s0+i) + get_utf8( (unsigned char *)s0+i, -1, &chr );
if (s[0]) { if (s[0]) {
EXTRA_CBACK_ARG(3,1) = MkIntTerm(s-s0); EXTRA_CBACK_ARG(3,1) = MkIntTerm(s-s0);
EXTRA_CBACK_ARG(3,2) = MkIntTerm(j+1); EXTRA_CBACK_ARG(3,2) = MkIntTerm(j+1);
@ -983,7 +983,7 @@ string_code3( USES_REGS1 )
if (ns == NULL) { if (ns == NULL) {
cut_fail(); // silently fail? cut_fail(); // silently fail?
} }
get_utf8( (unsigned char *)ns, &chr); get_utf8( (unsigned char *)ns, -1, &chr);
if ( chr == '\0') cut_fail(); if ( chr == '\0') cut_fail();
if (Yap_unify(ARG3, MkIntegerTerm(chr))) cut_succeed(); if (Yap_unify(ARG3, MkIntegerTerm(chr))) cut_succeed();
cut_fail(); cut_fail();
@ -1042,7 +1042,7 @@ get_string_code3( USES_REGS1 )
if (ns == NULL) { if (ns == NULL) {
return FALSE; return FALSE;
} else { } else {
get_utf8( ns, &chr); get_utf8( ns, -1, &chr);
if ( chr != '\0') return Yap_unify(ARG3, MkIntegerTerm(chr)); if ( chr != '\0') return Yap_unify(ARG3, MkIntegerTerm(chr));
} }
} }
@ -1611,7 +1611,7 @@ build_new_atomic(int mask, wchar_t *wp, const unsigned char *p, size_t min, size
buf = buf_from_tstring(HR); buf = buf_from_tstring(HR);
while (len) { while (len) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
cp += get_utf8((unsigned char *)cp, &chr); cp += get_utf8((unsigned char *)cp, -1, &chr);
buf += put_utf8((unsigned char *)buf, chr); buf += put_utf8((unsigned char *)buf, chr);
len--; len--;
} }
@ -1834,7 +1834,7 @@ cont_sub_atomic( USES_REGS1 )
/* found one, check if there is any left */ /* found one, check if there is any left */
while (min <= sz-len) { while (min <= sz-len) {
int chr; int chr;
p += get_utf8((unsigned char *)p, &chr); p += get_utf8((unsigned char *)p, -1, &chr);
after--; after--;
min++; min++;
if (cmpn_utf8(p, UStringOfTerm(nat), len) == 0) if (cmpn_utf8(p, UStringOfTerm(nat), len) == 0)

View File

@ -365,7 +365,7 @@ Yap_OpaqueTermToString(Term t, char *str, size_t max)
str_index += sprintf(& str[str_index], "\""); str_index += sprintf(& str[str_index], "\"");
do { do {
utf8proc_int32_t chr; utf8proc_int32_t chr;
ptr += get_utf8(ptr, &chr); ptr += get_utf8(ptr, -1, &chr);
if (chr == '\0') break; if (chr == '\0') break;
str_index += sprintf(str+str_index, "%C", chr); str_index += sprintf(str+str_index, "%C", chr);
} while (TRUE); } while (TRUE);

View File

@ -195,15 +195,11 @@ static bool mkprompt(Term inp) {
static bool getenc(Term inp) { static bool getenc(Term inp) {
CACHE_REGS CACHE_REGS
if (IsVarTerm(inp)) { if (!IsVarTerm(inp) && !IsAtomTerm(inp)) {
return Yap_unify(inp, MkAtomTerm(Yap_LookupAtom(enc_name(LOCAL_encoding)))); Yap_Error(TYPE_ERROR_ATOM, inp, "get_encoding");
}
if (!IsAtomTerm(inp)) {
Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag");
return false; return false;
} }
enc_id((char *)RepAtom(AtomOfTerm(inp))->StrOfAE); return Yap_unify(inp, MkAtomTerm(Yap_LookupAtom(enc_name(LOCAL_encoding))));
return true;
} }
/* /*
@ -217,7 +213,7 @@ if (!IsAtomTerm(inp) ) {
Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag"); Yap_Error(TYPE_ERROR_ATOM, inp, "set_prolog_flag");
return false; return false;
} }
enc_id( RepAtom( AtomOfTerm( inp ) )->StrOfAE ); enc_id( RepAtom( AtomOfTerm( inp ) )->StrOfAE, ENC_OCTET );
return true; return true;
} }
*/ */

View File

@ -1232,7 +1232,12 @@ const char *Yap_tokRep(TokEntry *tokptr) {
switch (tokptr->Tok) { switch (tokptr->Tok) {
case Name_tok: case Name_tok:
return (char *)RepAtom((Atom)info)->StrOfAE; if (IsWideAtom((Atom)info)) {
wchar_t *wc = RepAtom((Atom)info)->WStrOfAE;
Term s = Yap_WCharsToString(wc PASS_REGS);
return StringOfTerm(s);
}
return RepAtom((Atom)info)->StrOfAE;
case Number_tok: case Number_tok:
if ((b = Yap_TermToString(info, buf, sze, &length, &LOCAL_encoding, if ((b = Yap_TermToString(info, buf, sze, &length, &LOCAL_encoding,
flags)) != buf) { flags)) != buf) {
@ -2038,9 +2043,6 @@ TokEntry *Yap_tokenizer(struct stream_desc *inp_stream, bool store_comments,
char err[1024]; char err[1024];
snprintf(err, 1023, "\n++++ token: unrecognised char %c (%d), type %c\n", snprintf(err, 1023, "\n++++ token: unrecognised char %c (%d), type %c\n",
ch, ch, chtype(ch)); ch, ch, chtype(ch));
#if DEBUG
fprintf(stderr, "%s", err);
#endif
} }
t->Tok = Ord(kind = eot_tok); t->Tok = Ord(kind = eot_tok);
t->TokInfo = TermEof; t->TokInfo = TermEof;

View File

@ -408,7 +408,7 @@ write_strings( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
buf = buf_from_tstring(HR); buf = buf_from_tstring(HR);
while (*cp && cp < lim) { while (*cp && cp < lim) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
cp += get_utf8(cp, &chr); cp += get_utf8(cp, -1, &chr);
buf += put_utf8(buf, chr); buf += put_utf8(buf, chr);
} }
if (max >= min) *buf++ = '\0'; if (max >= min) *buf++ = '\0';
@ -496,7 +496,7 @@ write_atoms( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng U
while (cp < lim && *cp) { while (cp < lim && *cp) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
CELL *cl; CELL *cl;
cp += get_utf8(cp, &chr); cp += get_utf8(cp, -1, &chr);
if (chr == '\0') break; if (chr == '\0') break;
w[0] = chr; w[0] = chr;
cl = HR; cl = HR;
@ -581,7 +581,7 @@ write_codes( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng U
LOCAL_TERM_ERROR( t, 2*(lim-s) ); LOCAL_TERM_ERROR( t, 2*(lim-s) );
while (*cp && cp < lim) { while (*cp && cp < lim) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
cp += get_utf8(cp, &chr); cp += get_utf8(cp, -1, &chr);
HR[0] = MkIntTerm(chr); HR[0] = MkIntTerm(chr);
HR[1] = AbsPair(HR+2); HR[1] = AbsPair(HR+2);
HR += 2; HR += 2;
@ -659,7 +659,7 @@ write_atom( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng US
while (*s && s < lim) { while (*s && s < lim) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
s += get_utf8(s, &chr); s += get_utf8(s,-1, &chr);
*ptr++ = chr; *ptr++ = chr;
} }
*ptr++ = '\0'; *ptr++ = '\0';
@ -743,14 +743,14 @@ write_wbuffer( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
return -1; return -1;
while (*cp && cp < lim) { while (*cp && cp < lim) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
cp += get_utf8(cp, &chr); cp += get_utf8(cp, -1, &chr);
*buf++ = chr; *buf++ = chr;
} }
if (max >= min) *buf++ = '\0'; if (max >= min) *buf++ = '\0';
else while (max < min) { else while (max < min) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
max++; max++;
cp += get_utf8(cp, &chr); cp += get_utf8(cp, -1, &chr);
*buf++ = chr; *buf++ = chr;
} }
*buf = '\0'; *buf = '\0';
@ -890,14 +890,14 @@ write_buffer( void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng
return -1; return -1;
while (*cp && cp < lim) { while (*cp && cp < lim) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
cp += get_utf8(cp, &chr); cp += get_utf8(cp, -1, &chr);
*buf++ = chr; *buf++ = chr;
} }
if (max >= min) *buf++ = '\0'; if (max >= min) *buf++ = '\0';
else while (max < min) { else while (max < min) {
utf8proc_int32_t chr; utf8proc_int32_t chr;
max++; max++;
cp += get_utf8(cp, &chr); cp += get_utf8(cp, -1, &chr);
*buf++ = chr; *buf++ = chr;
} }
sz_end = buf-out->val.uc; sz_end = buf-out->val.uc;
@ -1104,7 +1104,7 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{ {
utf8proc_int32_t chr1, chr2; utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2; unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, &chr2); if (chr1-chr2) return chr1-chr2; } for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, -1, &chr2); if (chr1-chr2) return chr1-chr2; }
} }
return 0; return 0;
case ENC_WCHAR: case ENC_WCHAR:
@ -1126,21 +1126,21 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{ {
utf8proc_int32_t chr1, chr2; utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2; unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; } for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, -1, &chr1); if (chr1-chr2) return chr1-chr2; }
} }
return 0; return 0;
case ENC_ISO_UTF8: case ENC_ISO_UTF8:
{ {
utf8proc_int32_t chr1, chr2; utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2; unsigned char *w2 = s2;
for (i = 0; i < l; i++) { w2 += get_utf8(w2, &chr2); w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; } for (i = 0; i < l; i++) { w2 += get_utf8(w2, -1, &chr2); w1 += get_utf8(w1,-1, &chr1); if (chr1-chr2) return chr1-chr2; }
} }
return 0; return 0;
case ENC_WCHAR: case ENC_WCHAR:
{ {
utf8proc_int32_t chr1, chr2; utf8proc_int32_t chr1, chr2;
wchar_t *w2 = s2; wchar_t *w2 = s2;
for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, &chr1); if (chr1-chr2) return chr1-chr2; } for (i = 0; i < l; i++) { chr2 = *w2++; w1 += get_utf8(w1, -1, &chr1); if (chr1-chr2) return chr1-chr2; }
} }
return 0; return 0;
default: default:
@ -1162,7 +1162,7 @@ cmp_Text( void *s1, void *s2, int l, encoding_t enc1, encoding_t enc2 )
{ {
utf8proc_int32_t chr1, chr2; utf8proc_int32_t chr1, chr2;
unsigned char *w2 = s2; unsigned char *w2 = s2;
for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, &chr2); if (chr1-chr2) return chr1-chr2; } for (i = 0; i < l; i++) { chr1 = *w1++; w2 += get_utf8(w2, -1, &chr2); if (chr1-chr2) return chr1-chr2; }
} }
return 0; return 0;
case ENC_WCHAR: case ENC_WCHAR:
@ -1232,7 +1232,7 @@ concat( int n, seq_tv_t *out, void *sv[], encoding_t encv[], size_t lengv[] USES
} else { } else {
unsigned char *ptr = sv[i]; unsigned char *ptr = sv[i];
utf8proc_int32_t chr; utf8proc_int32_t chr;
while ( (ptr += get_utf8( ptr, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; } while ( (ptr += get_utf8( ptr, -1, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; }
} }
} }
*buf++ = '\0'; *buf++ = '\0';
@ -1276,7 +1276,7 @@ slice( size_t min, size_t max, void *buf, seq_tv_t *out, encoding_t enc USES_REG
unsigned char *ptr = skip_utf8 (buf, min ); unsigned char *ptr = skip_utf8 (buf, min );
utf8proc_int32_t chr; utf8proc_int32_t chr;
if (!ptr) return NULL; if (!ptr) return NULL;
while ( min++ < max ) { ptr += get_utf8(ptr, & chr); nbuf += put_utf8(nbuf, chr); } while ( min++ < max ) { ptr += get_utf8(ptr, -1, & chr); nbuf += put_utf8(nbuf, chr); }
} }
*nbuf ++ = '\0'; *nbuf ++ = '\0';
close_tstring( nbuf PASS_REGS ); close_tstring( nbuf PASS_REGS );
@ -1313,7 +1313,7 @@ slice( size_t min, size_t max, void *buf, seq_tv_t *out, encoding_t enc USES_REG
utf8proc_int32_t chr; utf8proc_int32_t chr;
LOCAL_ERROR( MkAtomTerm(Yap_LookupAtom(buf)), max-min ); LOCAL_ERROR( MkAtomTerm(Yap_LookupAtom(buf)), max-min );
while ( min++ < max ) { ptr += get_utf8(ptr, & chr); *nbuf++ = chr; } while ( min++ < max ) { ptr += get_utf8(ptr, -1, & chr); *nbuf++ = chr; }
nbuf[0] = '\0'; nbuf[0] = '\0';
at = Yap_LookupMaybeWideAtom( (wchar_t*)HR ); at = Yap_LookupMaybeWideAtom( (wchar_t*)HR );
} }

View File

@ -572,7 +572,7 @@ static void write_string(const unsigned char *s,
qt = '"'; qt = '"';
wrputc(qt, stream); wrputc(qt, stream);
do { do {
ptr += get_utf8(ptr, &chr); ptr += get_utf8(ptr, -1, &chr);
if (chr == '\0') if (chr == '\0')
break; break;
write_quoted(chr, qt, stream); write_quoted(chr, qt, stream);

View File

@ -141,9 +141,9 @@ INLINE_ONLY EXTERN inline char_kind_t chtype(Int ch) {
#define __android_log_print(...) #define __android_log_print(...)
#endif #endif
inline static utf8proc_ssize_t get_utf8(utf8proc_uint8_t *ptr, inline static utf8proc_ssize_t get_utf8(utf8proc_uint8_t *ptr, size_t n,
utf8proc_int32_t *valp) { utf8proc_int32_t *valp) {
return utf8proc_iterate(ptr, -1, valp); return utf8proc_iterate(ptr, n, valp);
} }
inline static utf8proc_ssize_t put_utf8(utf8proc_uint8_t *ptr, inline static utf8proc_ssize_t put_utf8(utf8proc_uint8_t *ptr,
@ -164,6 +164,11 @@ inline static utf8proc_uint8_t *skip_utf8(utf8proc_uint8_t *pt,
return pt; return pt;
} }
inline static utf8proc_ssize_t utf8_nof( utf8proc_int32_t val) {
return utf8proc_charwidth(val);
}
inline static utf8proc_ssize_t strlen_utf8(const utf8proc_uint8_t *pt) { inline static utf8proc_ssize_t strlen_utf8(const utf8proc_uint8_t *pt) {
utf8proc_ssize_t rc = 0; utf8proc_ssize_t rc = 0;
utf8proc_int32_t b; utf8proc_int32_t b;
@ -259,6 +264,12 @@ inline static int cmpn_utf8(const utf8proc_uint8_t *pt1,
return 0; return 0;
} }
// UTF16
#define LEAD_OFFSET (0xD800 - (0x10000 >> 10))
#define SURROGATE_OFFSET ( 0x10000 - (0xD800 << 10) - 0xDC00 )
const char *Yap_tokRep(TokEntry *tokptr); const char *Yap_tokRep(TokEntry *tokptr);
// standard strings // standard strings

View File

@ -94,10 +94,6 @@ INLINE_ONLY inline EXTERN Int CharOfAtom(Atom at) {
} }
} }
static int plUnGetc(int sno, int ch) {
return ungetc(ch, GLOBAL_Stream[sno].file);
}
Int Yap_peek(int sno) { Int Yap_peek(int sno) {
CACHE_REGS CACHE_REGS
Int ocharcount, olinecount, olinepos; Int ocharcount, olinecount, olinepos;
@ -144,33 +140,41 @@ Int Yap_peek(int sno) {
} }
} else if (s->encoding == ENC_UTF16_BE) { } else if (s->encoding == ENC_UTF16_BE) {
/* do the ungetc as if a write .. */ /* do the ungetc as if a write .. */
unsigned long int c = ch; // computations
if (c > ((1 << 16) - 1)) { int lead = LEAD_OFFSET + (ch >> 10);
ungetc(c / 1 << 16, s->file); int trail = 0xDC00 + (ch & 0x3FF);
c %= 1 << 16;
if (lead) {
ungetc(lead / 256, s->file);
ungetc(lead % 256, s->file);
} }
ungetc(c, s->file); ungetc(trail / 256, s->file);
} else if (s->encoding == ENC_UTF16_BE) { ungetc(trail % 256, s->file);
/* do the ungetc as if a write .. */
unsigned long int c = ch;
if (c > ((1 << 16) - 1)) {
ungetc(c / 1 << 16, s->file);
c %= 1 << 16;
}
return c;
} else if (s->encoding == ENC_UTF16_LE) { } else if (s->encoding == ENC_UTF16_LE) {
/* do the ungetc as if a write .. */ /* do the ungetc as if a write .. */
unsigned long int c = ch; // computations
if (c > ((1 << 16) - 1)) { uint16_t lead = LEAD_OFFSET + (ch >> 10);
ungetc(c % 1 << 16, s->file); uint16_t trail = 0xDC00 + (ch & 0x3FF);
c /= 1 << 16; lead = 0;
trail = ch;
if (lead) {
ungetc(lead / 256, s->file);
ungetc(lead % 256, s->file);
} }
ungetc(c, s->file); if (trail) {
} else { ungetc(trail / 256, s->file);
int (*f)(int, int) = s->stream_putc; ungetc(trail % 256, s->file);
s->stream_putc = plUnGetc; }
put_wchar(sno, ch); } else if (s->encoding == ENC_ISO_UTF32_LE) {
s->stream_putc = f; ungetc( (ch >> 24) & 0xff, s->file);
ungetc( (ch >> 16) & 0xff, s->file);
ungetc( (ch >> 8) & 0xff, s->file);
return ungetc( ch & 0xff, s->file);
} else if (s->encoding == ENC_ISO_UTF32_BE) {
ungetc( ch & 0xff, s->file);
ungetc( (ch >> 8) & 0xff, s->file);
ungetc( (ch >> 16) & 0xff, s->file);
return ungetc( (ch >> 24) & 0xff, s->file);
} }
return ch; return ch;
} }
@ -421,7 +425,7 @@ static Int get_byte(USES_REGS1) { /* '$get_byte'(Stream,-N) */
//&& strictISOFlag() //&& strictISOFlag()
) { ) {
UNLOCK(GLOBAL_Stream[sno].streamlock); UNLOCK(GLOBAL_Stream[sno].streamlock);
Yap_Error(PERMISSION_ERROR_INPUT_TEXT_STREAM, ARG1, "get_byte/2"); Yap_Error(PERMISSION_ERROR_INPUT_STREAM, ARG1, "get_byte/2");
return (FALSE); return (FALSE);
} }
out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno)); out = MkIntTerm(GLOBAL_Stream[sno].stream_getc(sno));
@ -481,7 +485,7 @@ static Int put_code_1(USES_REGS1) { /* '$put'(,N) */
return FALSE; return FALSE;
} }
LOCK(GLOBAL_Stream[sno].streamlock); LOCK(GLOBAL_Stream[sno].streamlock);
GLOBAL_Stream[sno].stream_wputc(sno, (int)IntegerOfTerm(Deref(ARG2))); GLOBAL_Stream[sno].stream_wputc(sno, ch);
/* /*
* if (!(GLOBAL_Stream[sno].status & Null_Stream_f)) * if (!(GLOBAL_Stream[sno].status & Null_Stream_f))
* yap_fflush(GLOBAL_Stream[sno].file); * yap_fflush(GLOBAL_Stream[sno].file);
@ -925,7 +929,6 @@ static Int flush_all_streams(USES_REGS1) { /* $flush_all_streams */
/** @pred peek_code(+ _S_, - _C_) is iso /** @pred peek_code(+ _S_, - _C_) is iso
If _C_ is unbound, or is the code for a character, and If _C_ is unbound, or is the code for a character, and
the stream _S_ is a text stream, read the next character from the the stream _S_ is a text stream, read the next character from the
current stream and unify its code with _C_, while current stream and unify its code with _C_, while

View File

@ -31,7 +31,7 @@ typedef enum {
ENC_ISO_UTF32_LE = 128, /// yes, nobody ENC_ISO_UTF32_LE = 128, /// yes, nobody
} encoding_t; } encoding_t;
#if defined(__BIG_ENDIAN__) #if WORDS_BIGENDIAN
#define ENC_WCHAR ENC_ISO_UTF32_BE #define ENC_WCHAR ENC_ISO_UTF32_BE
#else #else
#define ENC_WCHAR ENC_ISO_UTF32_LE #define ENC_WCHAR ENC_ISO_UTF32_LE
@ -114,14 +114,27 @@ static inline const char *enc_name(encoding_t enc) {
} }
} }
static inline encoding_t enc_id(char *s) { static inline encoding_t enc_id(const char *s, encoding_t enc_bom) {
{ {
if (!strcmp(s, "iso_utf8")) if (!strcmp(s, "iso_utf8"))
return ENC_ISO_UTF8; return ENC_ISO_UTF8;
if (!strcmp(s, "utf8")) if (!strcmp(s, "utf8"))
return ENC_ISO_UTF8; return ENC_ISO_UTF8;
if (!strcmp(s, "UTF-8"))
return ENC_ISO_UTF8;
if (!strcmp(s, "utf16_le")) if (!strcmp(s, "utf16_le"))
return ENC_UTF16_LE; return ENC_UTF16_LE;
if (!strcmp(s, "utf16_be"))
return ENC_UTF16_BE;
if (!strcmp(s, "UTF-16")) {
if (enc_bom == ENC_UTF16_LE)
return ENC_UTF16_LE;
return ENC_UTF16_BE;
}
if (!strcmp(s, "UTF-16LE"))
return ENC_UTF16_LE;
if (!strcmp(s, "UTF16-BE"))
return ENC_UTF16_BE;
if (!strcmp(s, "octet")) if (!strcmp(s, "octet"))
return ENC_OCTET; return ENC_OCTET;
if (!strcmp(s, "iso_latin_1")) if (!strcmp(s, "iso_latin_1"))
@ -134,11 +147,31 @@ static inline encoding_t enc_id(char *s) {
return ENC_ISO_UTF32_BE; return ENC_ISO_UTF32_BE;
if (!strcmp(s, "utf32_le")) if (!strcmp(s, "utf32_le"))
return ENC_ISO_UTF32_LE; return ENC_ISO_UTF32_LE;
if (!strcmp(s, "default")) if (!strcmp(s, "UTF-32")) {
if (enc_bom == ENC_ISO_UTF32_LE)
return ENC_ISO_UTF32_LE;
return ENC_ISO_UTF32_BE;
}
if (!strcmp(s, "UTF-32BE"))
return ENC_ISO_UTF32_BE;
if (!strcmp(s, "UTF-32LE"))
return ENC_ISO_UTF32_LE;
if (!strcmp(s, "ISO-8859-1"))
return ENC_ISO_LATIN1;
// just for SWI compat, this actually refers to
// UCS-2
if (!strcmp(s, "unicode_be"))
return ENC_UTF16_BE;
if (!strcmp(s, "unicode_le"))
return ENC_UTF16_LE;
if (!strcmp(s, "default")) {
if (enc_bom != ENC_OCTET)
return enc_bom;
return Yap_DefaultEncoding(); return Yap_DefaultEncoding();
}
else { else {
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, 0, "bad encoding %s", s); Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, MkAtomTerm(Yap_LookupAtom(s)), "bad encoding %s", s);
return ENC_OCTET; return Yap_DefaultEncoding();
} }
} }
} }

View File

@ -328,7 +328,7 @@ static int format_print_str(Int sno, Int size, Int has_size, Term args,
const unsigned char *pt = UStringOfTerm(args); const unsigned char *pt = UStringOfTerm(args);
while (*pt && (!has_size || size > 0)) { while (*pt && (!has_size || size > 0)) {
utf8proc_int32_t ch; utf8proc_int32_t ch;
pt += get_utf8((unsigned char *)pt, &ch); pt += get_utf8((unsigned char *)pt, -1, &ch);
f_putc(sno, ch); f_putc(sno, ch);
} }
} else { } else {

View File

@ -27,7 +27,8 @@ static char SccsId[] = "%W% %G%";
* *
*/ */
/* /*
* This file includes the definition of a miscellania of standard predicates * for yap refering to: Files and GLOBAL_Streams, Simple Input/Output, * This file includes the definition of a miscellania of standard predicates *
*for yap refering to: Files and GLOBAL_Streams, Simple Input/Output,
* *
*/ */
@ -123,8 +124,7 @@ static Term gethdir(Term t) {
#endif #endif
s[nsz + 1] = '\0'; s[nsz + 1] = '\0';
} }
return return MkAtomTerm(Yap_LookupAtom(s));
MkAtomTerm(Yap_LookupAtom( s ) );
} }
static bool issolutions(Term t) { static bool issolutions(Term t) {
@ -188,10 +188,6 @@ void Yap_DefaultStreamOps(StreamDesc *st) {
st->stream_wgetc_for_read = ISOWGetc; st->stream_wgetc_for_read = ISOWGetc;
else else
st->stream_wgetc_for_read = st->stream_wgetc; st->stream_wgetc_for_read = st->stream_wgetc;
if (st->encoding == ENC_ISO_UTF8)
st->stream_getc_for_utf8 = st->stream_getc;
else
st->stream_getc_for_utf8 = GetUTF8;
} }
static void unix_upd_stream_info(StreamDesc *s) { static void unix_upd_stream_info(StreamDesc *s) {
@ -631,7 +627,6 @@ int console_post_process_eof(StreamDesc *s) {
s->stream_getc = EOFGetc; s->stream_getc = EOFGetc;
s->stream_wgetc = EOFWGetc; s->stream_wgetc = EOFWGetc;
s->stream_wgetc_for_read = EOFWGetc; s->stream_wgetc_for_read = EOFWGetc;
s->stream_getc_for_utf8 = EOFGetc;
LOCAL_newline = true; LOCAL_newline = true;
} }
return EOFCHAR; return EOFCHAR;
@ -658,7 +653,6 @@ int post_process_eof(StreamDesc *s) {
s->stream_wgetc = EOFWGetc; s->stream_wgetc = EOFWGetc;
s->stream_getc = EOFGetc; s->stream_getc = EOFGetc;
s->stream_wgetc_for_read = EOFWGetc; s->stream_wgetc_for_read = EOFWGetc;
s->stream_getc_for_utf8 = EOFGetc;
} }
return EOFCHAR; return EOFCHAR;
} }
@ -669,7 +663,6 @@ int post_process_weof(StreamDesc *s) {
s->stream_wgetc = EOFWGetc; s->stream_wgetc = EOFWGetc;
s->stream_wgetc = EOFWGetc; s->stream_wgetc = EOFWGetc;
s->stream_wgetc_for_read = EOFWGetc; s->stream_wgetc_for_read = EOFWGetc;
s->stream_getc_for_utf8 = EOFGetc;
} }
return EOFCHAR; return EOFCHAR;
} }
@ -693,7 +686,7 @@ int PlGetc(int sno) {
StreamDesc *s = &GLOBAL_Stream[sno]; StreamDesc *s = &GLOBAL_Stream[sno];
Int ch; Int ch;
ch = getc(s->file); ch = fgetc(s->file);
if (ch == EOF) { if (ch == EOF) {
return post_process_eof(s); return post_process_eof(s);
} }
@ -733,165 +726,178 @@ int DefaultGets(int sno, UInt size, char *buf) {
return (buf - pt) - 1; return (buf - pt) - 1;
} }
int GetUTF8(int sno) { /// compose a wide char from a sequence of getchars \
StreamDesc *s = &GLOBAL_Stream[sno]; // this is a slow lane routine, called if no specialised code
uint64_t bufi = s->utf8_buf; // isavailable.
unsigned char *buf = (unsigned char *)&bufi;
if (!bufi) {
int32_t ch = get_wchar(sno);
if (ch < 128)
return ch;
put_utf8((unsigned char *)&bufi, ch);
} else {
while (*buf++ == '\0')
;
}
unsigned char c = *buf;
buf[0] = '\0';
return c;
}
static int utf8_nof(char ch) {
if (!(ch & 0x20))
return 1;
if (!(ch & 0x10))
return 2;
if (!(ch & 0x08))
return 3;
if (!(ch & 0x04))
return 4;
return 5;
}
#define wide_char() \
switch (GLOBAL_Stream[sno].encoding) { \
case ENC_OCTET: \
return ch; \
case ENC_ISO_LATIN1: \
return ch; \
case ENC_ISO_ASCII: \
if (ch & 0x80) { \
/* error */ \
} \
return ch; \
case ENC_ISO_ANSI: { \
char buf[1]; \
int out; \
\
if (!how_many) { \
memset((void *)&(GLOBAL_Stream[sno].mbstate), 0, sizeof(mbstate_t)); \
} \
buf[0] = ch; \
if ((out = mbrtowc(&wch, buf, 1, &(GLOBAL_Stream[sno].mbstate))) == 1) \
return wch; \
if (out == -1) { \
/* error */ \
} \
how_many++; \
break; \
} \
case ENC_ISO_UTF8: { \
if (!how_many) { \
if (ch & 0x80) { \
how_many = utf8_nof(ch); \
/* \
keep a backup of the start character in case we meet an error, \
useful if we are scanning ISO files. \
*/ \
GLOBAL_Stream[sno].och = ch; \
wch = (ch & ((1 << (6 - how_many)) - 1)) << (6 * how_many); \
} else { \
return ch; \
} \
} else { \
how_many--; \
if ((ch & 0xc0) == 0x80) { \
wch += (ch & ~0xc0) << (how_many * 6); \
} else { \
/* error */ \
/* try to recover character, assume this is our first character */ \
wchar_t och = GLOBAL_Stream[sno].och; \
return och; \
} \
if (!how_many) { \
return wch; \
} \
} \
} break; \
case ENC_UTF16_BE: \
if (how_many) { \
return wch + ch; \
} \
how_many = 1; \
wch = ch << 8; \
break; \
case ENC_UTF16_LE: \
if (how_many) { \
return wch + (ch << 8); \
} \
how_many = 1; \
wch = ch; \
break; \
case ENC_ISO_UTF32_LE: \
if (!how_many) { \
how_many = 4; \
wch = 0; \
} \
how_many--; \
wch += ((unsigned char)(ch & 0xff)) << (how_many * 8); \
if (how_many == 0) \
return wch; \
break; \
case ENC_ISO_UTF32_BE: \
if (!how_many) { \
how_many = 4; \
wch = 0; \
} \
how_many--; \
wch += ((unsigned char)(ch & 0xff)) << ((3 - how_many) * 8); \
if (how_many == 0) \
return wch; \
break; \
}
static int get_wchar(int sno) { static int get_wchar(int sno) {
int ch; StreamDesc *st = GLOBAL_Stream + sno;
wchar_t wch; int ch = st->stream_getc(sno);
int how_many = 0;
while (true) { if (ch == -1)
ch = GLOBAL_Stream[sno].stream_getc(sno); return post_process_weof(st);
if (ch == -1) {
if (how_many) { switch (st->encoding) {
case ENC_OCTET:
return ch;
// no error detection, all characters are ok.
case ENC_ISO_LATIN1:
return ch;
// 7 bits code, anything above is bad news
case ENC_ISO_ASCII:
if (ch & 0x80) {
/* error */ /* error */
} }
return post_process_weof(GLOBAL_Stream + sno); return ch;
// default OS encoding, depends on locale.
case ENC_ISO_ANSI: {
char buf[8];
int out;
int wch;
mbstate_t mbstate;
memset((void *)&(mbstate), 0, sizeof(mbstate_t));
buf[0] = ch;
while ((out = mbrtowc(&wch, buf, 1, &(mbstate))) != 1) {
int ch = buf[0] = st->stream_getc(sno);
if (ch == -1)
return post_process_weof(st);
}
return wch;
}
// UTF-8 works o 8 bits.
case ENC_ISO_UTF8: {
unsigned char buf[8];
if (ch < 0x80) {
return ch;
}
// if ((ch - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
if (ch < 0xe0) { // 2-byte sequence
// Must have valid continuation character
int c1 = buf[0] = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
// if (!utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
return ((ch & 0x1f)<<6) | (c1 & 0x3f);
}
if (ch < 0xf0) { // 3-byte sequence
//if ((str + 1 >= end) || !utf_cont(*str) || !utf_cont(str[1]))
// return UTF8PROC_ERROR_INVALIDUTF8;
// Check for surrogate chars
//if (ch == 0xed && *str > 0x9f)
// return UTF8PROC_ERROR_INVALIDUTF8;
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
int c2 = st->stream_getc(sno);
if (c2 == -1)
return post_process_weof(st);
return ((ch & 0xf)<<12) | ((c1 & 0x3f)<<6) | (c2 & 0x3f);
} else {
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
int c2 = st->stream_getc(sno);
if (c2 == -1)
return post_process_weof(st);
int c3 = st->stream_getc(sno);
if (c3 == -1)
return post_process_weof(st);
return ((ch & 7)<<18) | ((c1 & 0x3f)<<12) | ((c2 & 0x3f)<<6) | (c3 & 0x3f);
}
}
case ENC_UTF16_LE: // check http://unicode.org/faq/utf_bom.html#utf16-3
// little-endian: start with big shot
{
int wch;
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = (c1 << 8) + ch;
if (wch >= 0xEFFF) {
int c2 = st->stream_getc(sno);
if (c2 == -1)
return post_process_weof(st);
int c3 = st->stream_getc(sno);
if (c3 == -1)
return post_process_weof(st);
wch = wch + (((c3 << 8) + c2)<<wch) + SURROGATE_OFFSET;
}
return wch;
}
case ENC_UTF16_BE: // check http://unicode.org/faq/utf_bom.html#utf16-3
// little-endian: start with big shot
{
int wch;
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = (c1) + (ch<<8);
if (wch >= 0xEFFF) {
int c3 = st->stream_getc(sno);
if (c3 == -1)
return post_process_weof(st);
int c2 = st->stream_getc(sno);
if (c2 == -1)
return post_process_weof(st);
wch = (((c3 << 8) + c2) << 10) + wch + SURROGATE_OFFSET;
}
return wch;
}
case ENC_ISO_UTF32_BE: // check http://unicode.org/faq/utf_bom.html#utf16-3
// little-endian: start with big shot
{
int wch = ch;
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = wch + c1;
}
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = (wch << 8 )+c1;
}
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch = (wch << 8) +c1;
}
return wch;
}
case ENC_ISO_UTF32_LE: // check http://unicode.org/faq/utf_bom.html#utf16-3
// little-endian: start with big shot
{
int wch = ch;
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch += c1<<8;
}
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch += c1<<16;
}
{
int c1 = st->stream_getc(sno);
if (c1 == -1)
return post_process_weof(st);
wch += c1<<24;
}
return wch;
} }
wide_char();
} }
return EOF;
} }
// layered version // layered version
static int get_wchar__(int sno) { static int get_wchar__(int sno) { return get_wchar(sno); }
int ch;
wchar_t wch;
int how_many = 0;
StreamDesc *s = GLOBAL_Stream + sno;
while (TRUE) {
ch = getc(GLOBAL_Stream[sno].file);
if (ch == -1) {
if (how_many) {
/* error */
}
return post_process_weof(s);
}
wide_char();
}
return EOF;
}
static int get_wchar_from_file(int sno) { static int get_wchar_from_file(int sno) {
return post_process_read_char(get_wchar__(sno), GLOBAL_Stream + sno); return post_process_read_char(get_wchar__(sno), GLOBAL_Stream + sno);
@ -953,10 +959,11 @@ int put_wchar(int sno, wchar_t ch) {
return GLOBAL_Stream[sno].stream_putc(sno, ch); return GLOBAL_Stream[sno].stream_putc(sno, ch);
case ENC_ISO_ANSI: { case ENC_ISO_ANSI: {
char buf[MB_LEN_MAX]; char buf[MB_LEN_MAX];
mbstate_t mbstate;
int n; int n;
memset((void *)&(GLOBAL_Stream[sno].mbstate), 0, sizeof(mbstate_t)); memset((void *)&mbstate, 0, sizeof(mbstate_t));
if ((n = wcrtomb(buf, ch, &(GLOBAL_Stream[sno].mbstate))) < 0) { if ((n = wcrtomb(buf, ch, &mbstate)) < 0) {
/* error */ /* error */
GLOBAL_Stream[sno].stream_putc(sno, ch); GLOBAL_Stream[sno].stream_putc(sno, ch);
return -1; return -1;
@ -989,17 +996,41 @@ int put_wchar(int sno, wchar_t ch) {
} }
break; break;
case ENC_UTF16_BE: case ENC_UTF16_BE:
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8)); {
return GLOBAL_Stream[sno].stream_putc(sno, (ch & 0xff)); // computations
int lead = LEAD_OFFSET + (ch >> 10);
int trail = 0xDC00 + (ch & 0x3FF);
GLOBAL_Stream[sno].stream_putc(sno, (trail & 0xff));
GLOBAL_Stream[sno].stream_putc(sno, (trail >> 8));
if (trail) {
GLOBAL_Stream[sno].stream_putc(sno, (lead & 0xff));
GLOBAL_Stream[sno].stream_putc(sno, (lead >> 8));
}
return lead >> 8;
}
case ENC_UTF16_LE: case ENC_UTF16_LE:
GLOBAL_Stream[sno].stream_putc(sno, (ch & 0xff)); {
return GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8)); // computations
case ENC_ISO_UTF32_BE: int lead = LEAD_OFFSET + (ch >> 10);
int trail = 0xDC00 + (ch & 0x3FF);
if (lead) {
GLOBAL_Stream[sno].stream_putc(sno, (lead >> 8));
GLOBAL_Stream[sno].stream_putc(sno, (lead & 0xff));
}
GLOBAL_Stream[sno].stream_putc(sno, (trail >> 8));
GLOBAL_Stream[sno].stream_putc(sno, (trail & 0xff));
return lead >> 8;
}
case ENC_ISO_UTF32_LE:
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 24) & 0xff); GLOBAL_Stream[sno].stream_putc(sno, (ch >> 24) & 0xff);
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 16) & 0xff); GLOBAL_Stream[sno].stream_putc(sno, (ch >> 16) & 0xff);
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8) & 0xff); GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8) & 0xff);
return GLOBAL_Stream[sno].stream_putc(sno, ch & 0xff); return GLOBAL_Stream[sno].stream_putc(sno, ch & 0xff);
case ENC_ISO_UTF32_LE: case ENC_ISO_UTF32_BE:
GLOBAL_Stream[sno].stream_putc(sno, ch & 0xff); GLOBAL_Stream[sno].stream_putc(sno, ch & 0xff);
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8) & 0xff); GLOBAL_Stream[sno].stream_putc(sno, (ch >> 8) & 0xff);
GLOBAL_Stream[sno].stream_putc(sno, (ch >> 16) & 0xff); GLOBAL_Stream[sno].stream_putc(sno, (ch >> 16) & 0xff);
@ -1012,8 +1043,8 @@ int put_wchar(int sno, wchar_t ch) {
/* used by user-code to read characters from the current input stream */ /* used by user-code to read characters from the current input stream */
int Yap_PlGetchar(void) { int Yap_PlGetchar(void) {
CACHE_REGS CACHE_REGS
return ( return (GLOBAL_Stream[LOCAL_c_input_stream].stream_getc(
GLOBAL_Stream[LOCAL_c_input_stream].stream_getc(LOCAL_c_input_stream)); LOCAL_c_input_stream));
} }
int Yap_PlGetWchar(void) { int Yap_PlGetWchar(void) {
@ -1171,29 +1202,23 @@ static void check_bom(int sno, StreamDesc *st) {
} else { } else {
ch3 = st->stream_getc(sno); ch3 = st->stream_getc(sno);
if (ch3 != 0x00) { if (ch3 != 0x00) {
ungetc(ch1, st->file);
ungetc(ch2, st->file);
ungetc(ch3, st->file); ungetc(ch3, st->file);
return;
} else { } else {
ch4 = st->stream_getc(sno); ch4 = st->stream_getc(sno);
if (ch4 != 0x00) { if (ch4 == 0x00) {
ungetc(ch1, st->file);
ungetc(ch2, st->file);
ungetc(ch3, st->file);
ungetc(ch4, st->file);
return;
} else {
st->status |= HAS_BOM_f; st->status |= HAS_BOM_f;
st->encoding = ENC_ISO_UTF32_LE; st->encoding = ENC_ISO_UTF32_LE;
return; return;
} else {
ungetc(ch4, st->file);
ungetc(0x00, st->file);
}
} }
} }
st->status |= HAS_BOM_f; st->status |= HAS_BOM_f;
st->encoding = ENC_UTF16_LE; st->encoding = ENC_UTF16_LE;
return; return;
} }
}
case 0xEF: case 0xEF:
ch2 = st->stream_getc(sno); ch2 = st->stream_getc(sno);
if (ch2 != 0xBB) { if (ch2 != 0xBB) {
@ -1257,9 +1282,7 @@ static void check_bom(int sno, StreamDesc *st) {
return true; return true;
} }
static bool static bool open_header(int sno, Atom open_mode) {
open_header( int sno, Atom open_mode)
{
if (open_mode == AtomWrite) { if (open_mode == AtomWrite) {
const char *ptr; const char *ptr;
const char s[] = "#!"; const char s[] = "#!";
@ -1281,13 +1304,13 @@ static void check_bom(int sno, StreamDesc *st) {
// skip header // skip header
int ch; int ch;
while ((ch = Yap_peek(sno)) == '#') { while ((ch = Yap_peek(sno)) == '#') {
while ((ch = GLOBAL_Stream[sno].stream_wgetc( sno )) != 10 && ch != -1 ); while ((ch = GLOBAL_Stream[sno].stream_wgetc(sno)) != 10 && ch != -1)
;
} }
} }
return true; return true;
} }
#define OPEN_DEFS() \ #define OPEN_DEFS() \
PAR("alias", isatom, OPEN_ALIAS), PAR("bom", booleanFlag, OPEN_BOM), \ PAR("alias", isatom, OPEN_ALIAS), PAR("bom", booleanFlag, OPEN_BOM), \
PAR("buffer", isatom, OPEN_BUFFER), \ PAR("buffer", isatom, OPEN_BUFFER), \
@ -1301,9 +1324,8 @@ static void check_bom(int sno, StreamDesc *st) {
PAR("mode", isatom, OPEN_MODE), PAR("output", ok, OPEN_OUTPUT), \ PAR("mode", isatom, OPEN_MODE), PAR("output", ok, OPEN_OUTPUT), \
PAR("representation_errors", booleanFlag, OPEN_REPRESENTATION_ERRORS), \ PAR("representation_errors", booleanFlag, OPEN_REPRESENTATION_ERRORS), \
PAR("reposition", booleanFlag, OPEN_REPOSITION), \ PAR("reposition", booleanFlag, OPEN_REPOSITION), \
PAR("script", booleanFlag, OPEN_SCRIPT), \ PAR("script", booleanFlag, OPEN_SCRIPT), PAR("type", isatom, OPEN_TYPE), \
PAR("type", isatom, OPEN_TYPE), PAR("wait", booleanFlag, OPEN_WAIT), \ PAR("wait", booleanFlag, OPEN_WAIT), PAR(NULL, ok, OPEN_END)
PAR(NULL, ok, OPEN_END)
#define PAR(x, y, z) z #define PAR(x, y, z) z
typedef enum open_enum_choices { OPEN_DEFS() } open_choices_t; typedef enum open_enum_choices { OPEN_DEFS() } open_choices_t;
@ -1316,10 +1338,8 @@ typedef enum open_enum_choices { OPEN_DEFS() } open_choices_t;
static const param_t open_defs[] = {OPEN_DEFS()}; static const param_t open_defs[] = {OPEN_DEFS()};
#undef PAR #undef PAR
static Int do_open(
Term file_name, Term t2,
static Int
do_open(Term file_name, Term t2,
Term tlist USES_REGS) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */ Term tlist USES_REGS) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
Atom open_mode; Atom open_mode;
int sno; int sno;
@ -1330,6 +1350,7 @@ do_open(Term file_name, Term t2,
const char *fname; const char *fname;
stream_flags_t flags; stream_flags_t flags;
FILE *fd; FILE *fd;
const char *s_encoding;
encoding_t encoding; encoding_t encoding;
Term tenc; Term tenc;
@ -1383,7 +1404,8 @@ do_open(Term file_name, Term t2,
if (LOCAL_Error_TYPE != YAP_NO_ERROR) { if (LOCAL_Error_TYPE != YAP_NO_ERROR) {
if (LOCAL_Error_TYPE == DOMAIN_ERROR_PROLOG_FLAG) if (LOCAL_Error_TYPE == DOMAIN_ERROR_PROLOG_FLAG)
LOCAL_Error_TYPE = DOMAIN_ERROR_OPEN_OPTION; LOCAL_Error_TYPE = DOMAIN_ERROR_OPEN_OPTION;
Yap_Error( LOCAL_Error_TYPE, LOCAL_Error_Term, "option handling in open/3" ); Yap_Error(LOCAL_Error_TYPE, LOCAL_Error_Term,
"option handling in open/3");
} }
return false; return false;
} }
@ -1402,10 +1424,13 @@ do_open(Term file_name, Term t2,
} }
if (args[OPEN_ENCODING].used) { if (args[OPEN_ENCODING].used) {
tenc = args[OPEN_ENCODING].tvalue; tenc = args[OPEN_ENCODING].tvalue;
encoding = enc_id(RepAtom(AtomOfTerm(tenc))->StrOfAE); s_encoding = RepAtom(AtomOfTerm(tenc))->StrOfAE;
} else { } else {
encoding = LOCAL_encoding; s_encoding = "default";
} }
// default encoding, no bom yet
encoding = enc_id( s_encoding, ENC_OCTET);
// only set encoding after getting BOM
bool ok = (args[OPEN_EXPAND_FILENAME].used bool ok = (args[OPEN_EXPAND_FILENAME].used
? args[OPEN_EXPAND_FILENAME].tvalue == TermTrue ? args[OPEN_EXPAND_FILENAME].tvalue == TermTrue
: false) || : false) ||
@ -1419,8 +1444,8 @@ do_open(Term file_name, Term t2,
} }
// Skip scripts that start with !#/.. or similar // Skip scripts that start with !#/.. or similar
bool script = (args[OPEN_SCRIPT].used bool script =
? args[OPEN_SCRIPT].tvalue == TermTrue (args[OPEN_SCRIPT].used ? args[OPEN_SCRIPT].tvalue == TermTrue
: false); : false);
// binary type // binary type
if (args[OPEN_TYPE].used) { if (args[OPEN_TYPE].used) {
@ -1468,11 +1493,11 @@ do_open(Term file_name, Term t2,
fname = LOCAL_FileNameBuf; fname = LOCAL_FileNameBuf;
UNLOCK(st->streamlock); UNLOCK(st->streamlock);
if (errno == ENOENT) if (errno == ENOENT)
return (PlIOError(EXISTENCE_ERROR_SOURCE_SINK, file_name, "%s: %s", fname, return (PlIOError(EXISTENCE_ERROR_SOURCE_SINK, file_name, "%s: %s",
strerror(errno)));
else {
return (PlIOError(PERMISSION_ERROR_OPEN_SOURCE_SINK, file_name, "%s: %s",
fname, strerror(errno))); fname, strerror(errno)));
else {
return (PlIOError(PERMISSION_ERROR_OPEN_SOURCE_SINK, file_name,
"%s: %s", fname, strerror(errno)));
} }
} }
#if MAC #if MAC
@ -1481,7 +1506,11 @@ do_open(Term file_name, Term t2,
} }
#endif #endif
flags &= ~(Free_Stream_f); flags &= ~(Free_Stream_f);
if (!Yap_initStream(sno, fd, fname, file_name, encoding, flags, open_mode)) if (!Yap_initStream(sno, fd, fname, file_name, encoding, flags,
open_mode))
return false;
if (!Yap_initStream(sno, fd, fname, file_name, encoding, flags,
open_mode))
return false; return false;
if (open_mode == AtomWrite) { if (open_mode == AtomWrite) {
if (needs_bom && !write_bom(sno, st)) if (needs_bom && !write_bom(sno, st))
@ -1489,11 +1518,14 @@ do_open(Term file_name, Term t2,
} else if (open_mode == AtomRead && !avoid_bom) { } else if (open_mode == AtomRead && !avoid_bom) {
check_bom(sno, st); // can change encoding check_bom(sno, st); // can change encoding
} }
// follow declaration unless there is v
if (st->status & HAS_BOM_f)
st->encoding = enc_id( s_encoding, st->encoding);
else
st->encoding = encoding;
if (script) if (script)
open_header(sno, open_mode); open_header(sno, open_mode);
UNLOCK(st->streamlock); UNLOCK(st->streamlock);
{ {
Term t = Yap_MkStream(sno); Term t = Yap_MkStream(sno);
@ -1521,8 +1553,8 @@ writable.
*/ */
static Int open3(
static Int open3(USES_REGS1) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */ USES_REGS1) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
return do_open(Deref(ARG1), Deref(ARG2), TermNil PASS_REGS); return do_open(Deref(ARG1), Deref(ARG2), TermNil PASS_REGS);
} }
@ -1554,7 +1586,8 @@ which generates a new `end_of_file` (default for non-tty files).
+ `alias(+ _Name_)` is iso + `alias(+ _Name_)` is iso
Specify an alias to the stream. The alias <tt>Name</tt> must be an atom. The Specify an alias to the stream. The alias <tt>Name</tt> must be an atom.
The
alias can be used instead of the stream descriptor for every operation alias can be used instead of the stream descriptor for every operation
concerning the stream. concerning the stream.
@ -1603,11 +1636,13 @@ open_expands_filename.
*/ */
static Int open4(USES_REGS1) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */ static Int open4(
USES_REGS1) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
return do_open(Deref(ARG1), Deref(ARG2), Deref(ARG4) PASS_REGS); return do_open(Deref(ARG1), Deref(ARG2), Deref(ARG4) PASS_REGS);
} }
static Int p_file_expansion(USES_REGS1) { /* '$file_expansion'(+File,-Name) */ static Int p_file_expansion(
USES_REGS1) { /* '$file_expansion'(+File,-Name) */
Term file_name = Deref(ARG1); Term file_name = Deref(ARG1);
/* we know file_name is bound */ /* we know file_name is bound */
@ -1650,10 +1685,6 @@ static Int p_open_null_stream(USES_REGS1) {
st->stream_gets = PlGets; st->stream_gets = PlGets;
st->stream_wgetc = get_wchar; st->stream_wgetc = get_wchar;
st->stream_wgetc_for_read = get_wchar; st->stream_wgetc_for_read = get_wchar;
if (st->encoding == ENC_ISO_UTF8)
st->stream_getc_for_utf8 = st->stream_getc;
else
st->stream_getc_for_utf8 = GetUTF8;
st->user_name = MkAtomTerm(st->name = AtomDevNull); st->user_name = MkAtomTerm(st->name = AtomDevNull);
UNLOCK(st->streamlock); UNLOCK(st->streamlock);
t = Yap_MkStream(sno); t = Yap_MkStream(sno);
@ -1683,8 +1714,8 @@ int Yap_OpenStream(FILE *fd, char *name, Term file_name, int flags) {
#define CheckStream(arg, kind, msg) \ #define CheckStream(arg, kind, msg) \
CheckStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg) CheckStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
static int CheckStream__(const char *file, const char *f, int line, Term arg, static int CheckStream__(const char *file, const char *f, int line,
int kind, const char *msg) { Term arg, int kind, const char *msg) {
int sno = -1; int sno = -1;
arg = Deref(arg); arg = Deref(arg);
if (IsVarTerm(arg)) { if (IsVarTerm(arg)) {
@ -1746,8 +1777,8 @@ int Yap_CheckStream__(const char *file, const char *f, int line, Term arg,
return CheckStream__(file, f, line, arg, kind, msg); return CheckStream__(file, f, line, arg, kind, msg);
} }
int Yap_CheckTextStream__(const char *file, const char *f, int line, Term arg, int Yap_CheckTextStream__(const char *file, const char *f, int line,
int kind, const char *msg) { Term arg, int kind, const char *msg) {
int sno; int sno;
if ((sno = CheckStream__(file, f, line, arg, kind, msg)) < 0) if ((sno = CheckStream__(file, f, line, arg, kind, msg)) < 0)
return -1; return -1;
@ -1784,7 +1815,8 @@ int Yap_GetFreeStreamDForReading(void) {
/** /**
* @pred always_prompt_user * @pred always_prompt_user
* *
* Ensure that the stream always prompts before asking the standard input stream for data. * Ensure that the stream always prompts before asking the standard input
stream for data.
*/ */
static Int always_prompt_user(USES_REGS1) { static Int always_prompt_user(USES_REGS1) {
@ -1815,7 +1847,8 @@ user_output, and user_error can never be closed.
(USES_REGS1) { /* '$close'(+GLOBAL_Stream) */ (USES_REGS1) { /* '$close'(+GLOBAL_Stream) */
Int sno = CheckStream( Int sno = CheckStream(
ARG1, (Input_Stream_f | Output_Stream_f | Socket_Stream_f), "close/2"); ARG1, (Input_Stream_f | Output_Stream_f | Socket_Stream_f),
"close/2");
if (sno < 0) if (sno < 0)
return (FALSE); return (FALSE);
if (sno <= StdErrStream) { if (sno <= StdErrStream) {
@ -1853,7 +1886,8 @@ YAP currently ignores these options.
*/ */
static Int close2(USES_REGS1) { /* '$close'(+GLOBAL_Stream) */ static Int close2(USES_REGS1) { /* '$close'(+GLOBAL_Stream) */
Int sno = CheckStream( Int sno = CheckStream(
ARG1, (Input_Stream_f | Output_Stream_f | Socket_Stream_f), "close/2"); ARG1, (Input_Stream_f | Output_Stream_f | Socket_Stream_f),
"close/2");
Term tlist; Term tlist;
if (sno < 0) if (sno < 0)
return (FALSE); return (FALSE);
@ -1944,11 +1978,11 @@ static Int abs_file_parameters(USES_REGS1) {
t[ABSOLUTE_FILE_NAME_RELATIVE_TO] = t[ABSOLUTE_FILE_NAME_RELATIVE_TO] =
gethdir(args[ABSOLUTE_FILE_NAME_RELATIVE_TO].tvalue); gethdir(args[ABSOLUTE_FILE_NAME_RELATIVE_TO].tvalue);
} else { } else {
t[ABSOLUTE_FILE_NAME_RELATIVE_TO] = t[ABSOLUTE_FILE_NAME_RELATIVE_TO] = gethdir(TermDot);
gethdir( TermDot );
} }
if (args[ABSOLUTE_FILE_NAME_FILE_TYPE].used) if (args[ABSOLUTE_FILE_NAME_FILE_TYPE].used)
t[ABSOLUTE_FILE_NAME_FILE_TYPE] = args[ABSOLUTE_FILE_NAME_FILE_TYPE].tvalue; t[ABSOLUTE_FILE_NAME_FILE_TYPE] =
args[ABSOLUTE_FILE_NAME_FILE_TYPE].tvalue;
else else
t[ABSOLUTE_FILE_NAME_FILE_TYPE] = TermTxt; t[ABSOLUTE_FILE_NAME_FILE_TYPE] = TermTxt;
if (args[ABSOLUTE_FILE_NAME_ACCESS].used) if (args[ABSOLUTE_FILE_NAME_ACCESS].used)
@ -1961,7 +1995,8 @@ static Int abs_file_parameters(USES_REGS1) {
else else
t[ABSOLUTE_FILE_NAME_FILE_ERRORS] = TermError; t[ABSOLUTE_FILE_NAME_FILE_ERRORS] = TermError;
if (args[ABSOLUTE_FILE_NAME_SOLUTIONS].used) if (args[ABSOLUTE_FILE_NAME_SOLUTIONS].used)
t[ABSOLUTE_FILE_NAME_SOLUTIONS] = args[ABSOLUTE_FILE_NAME_SOLUTIONS].tvalue; t[ABSOLUTE_FILE_NAME_SOLUTIONS] =
args[ABSOLUTE_FILE_NAME_SOLUTIONS].tvalue;
else else
t[ABSOLUTE_FILE_NAME_SOLUTIONS] = TermFirst; t[ABSOLUTE_FILE_NAME_SOLUTIONS] = TermFirst;
if (args[ABSOLUTE_FILE_NAME_EXPAND].used) if (args[ABSOLUTE_FILE_NAME_EXPAND].used)
@ -1977,7 +2012,8 @@ static Int abs_file_parameters(USES_REGS1) {
args[ABSOLUTE_FILE_NAME_VERBOSE_FILE_SEARCH].tvalue; args[ABSOLUTE_FILE_NAME_VERBOSE_FILE_SEARCH].tvalue;
else else
t[ABSOLUTE_FILE_NAME_VERBOSE_FILE_SEARCH] = t[ABSOLUTE_FILE_NAME_VERBOSE_FILE_SEARCH] =
(trueGlobalPrologFlag(VERBOSE_FILE_SEARCH_FLAG) ? TermTrue : TermFalse); (trueGlobalPrologFlag(VERBOSE_FILE_SEARCH_FLAG) ? TermTrue
: TermFalse);
tf = Yap_MkApplTerm(Yap_MkFunctor(AtomOpt, ABSOLUTE_FILE_NAME_END), tf = Yap_MkApplTerm(Yap_MkFunctor(AtomOpt, ABSOLUTE_FILE_NAME_END),
ABSOLUTE_FILE_NAME_END, t); ABSOLUTE_FILE_NAME_END, t);
return (Yap_unify(ARG2, tf)); return (Yap_unify(ARG2, tf));

View File

@ -37,10 +37,14 @@ extern size_t Yap_page_size;
#include <wchar.h> #include <wchar.h>
#define Yap_CheckStream( arg, kind, msg) Yap_CheckStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg) #define Yap_CheckStream(arg, kind, msg) \
extern int Yap_CheckStream__(const char *, const char *, int , Term, int, const char *); Yap_CheckStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
#define Yap_CheckTextStream( arg, kind, msg) Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg) extern int Yap_CheckStream__(const char *, const char *, int, Term, int,
extern int Yap_CheckTextStream__(const char *, const char *, int , Term, int, const char *); const char *);
#define Yap_CheckTextStream(arg, kind, msg) \
Yap_CheckTextStream__(__FILE__, __FUNCTION__, __LINE__, arg, kind, msg)
extern int Yap_CheckTextStream__(const char *, const char *, int, Term, int,
const char *);
extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name, extern bool Yap_initStream(int sno, FILE *fd, const char *name, Term file_name,
encoding_t encoding, stream_flags_t flags, encoding_t encoding, stream_flags_t flags,
@ -76,8 +80,8 @@ Int Yap_CloseSocket(int, socket_info, socket_domain);
#endif /* USE_SOCKET */ #endif /* USE_SOCKET */
/************ SWI compatible support for unicode representations ************/ /************ SWI compatible support for unicode representations ************/
typedef struct yap_io_position typedef struct yap_io_position {
{ int64_t byteno; /* byte-position in file */ int64_t byteno; /* byte-position in file */
int64_t charno; /* character position in file */ int64_t charno; /* character position in file */
long int lineno; /* lineno in file */ long int lineno; /* lineno in file */
long int linepos; /* position in line */ long int linepos; /* position in line */
@ -85,15 +89,14 @@ typedef struct yap_io_position
} yapIOPOS; } yapIOPOS;
#ifndef _PL_STREAM_H #ifndef _PL_STREAM_H
typedef struct typedef struct {
{ Atom file; /* current source file */ Atom file; /* current source file */
yapIOPOS position; /* Line, line pos, char and byte */ yapIOPOS position; /* Line, line pos, char and byte */
} yapSourceLocation; } yapSourceLocation;
#endif #endif
#define RD_MAGIC 0xefebe128 #define RD_MAGIC 0xefebe128
typedef struct vlist_struct_t { typedef struct vlist_struct_t {
struct VARSTRUCT *ve; struct VARSTRUCT *ve;
struct vlist_struct_t *next; struct vlist_struct_t *next;
@ -106,9 +109,8 @@ typedef struct qq_struct_t {
struct qq_struct_t *next; struct qq_struct_t *next;
} qq_t; } qq_t;
typedef struct read_data_t {
typedef struct read_data_t unsigned char *here; /* current character */
{ unsigned char *here; /* current character */
unsigned char *base; /* base of clause */ unsigned char *base; /* base of clause */
unsigned char *end; /* end of the clause */ unsigned char *end; /* end of the clause */
unsigned char *token_start; /* start of most recent read token */ unsigned char *token_start; /* start of most recent read token */
@ -163,8 +165,7 @@ typedef int (*GetsFunc)(int, UInt, char *);
#include <sys/socket.h> #include <sys/socket.h>
#endif #endif
typedef typedef struct mem_desc {
struct mem_desc {
char *buf; /* where the file is being read from/written to */ char *buf; /* where the file is being read from/written to */
int src; /* where the space comes from, 0 code space, 1 malloc */ int src; /* where the space comes from, 0 code space, 1 malloc */
Int max_size; /* maximum buffer size (may be changed dynamically) */ Int max_size; /* maximum buffer size (may be changed dynamically) */
@ -172,8 +173,7 @@ struct mem_desc {
volatile void *error_handler; volatile void *error_handler;
} memHandle; } memHandle;
typedef struct stream_desc typedef struct stream_desc {
{
Atom name; Atom name;
Term user_name; Term user_name;
FILE *file; FILE *file;
@ -183,7 +183,7 @@ typedef struct stream_desc
union { union {
struct { struct {
#define PLGETC_BUF_SIZE 4096 #define PLGETC_BUF_SIZE 4096
char *buf, *ptr; unsigned char *buf, *ptr;
int left; int left;
} file; } file;
memHandle mem_string; memHandle mem_string;
@ -198,7 +198,7 @@ typedef struct stream_desc
} socket; } socket;
#endif #endif
struct { struct {
const char *buf, *ptr; const unsigned char *buf, *ptr;
} irl; } irl;
} u; } u;
Int charcount, linecount, linepos; Int charcount, linecount, linepos;
@ -209,37 +209,29 @@ typedef struct stream_desc
#endif #endif
int (*stream_putc)(int, int); /* function the stream uses for writing */ int (*stream_putc)(int, int); /* function the stream uses for writing */
int (*stream_getc)(int); /* function the stream uses for reading */ int (*stream_getc)(int); /* function the stream uses for reading */
GetsFunc stream_gets; /* function the stream uses for reading a sequence of characters */ GetsFunc stream_gets; /* function the stream uses for reading a sequence of
characters */
/* function the stream uses for parser. It may be different if the ISO /* function the stream uses for parser. It may be different if the ISO
character conversion is on */ character conversion is on */
int (*stream_wgetc_for_read)(int); int (*stream_wgetc_for_read)(int);
int (*stream_wgetc)(int); int (*stream_wgetc)(int);
int (* stream_getc_for_utf8)(int);
int (*stream_wputc)(int, wchar_t); int (*stream_wputc)(int, wchar_t);
uint64_t utf8_buf; /* used to translate to utf-8 */
encoding_t encoding; encoding_t encoding;
mbstate_t mbstate; } StreamDesc;
}
StreamDesc;
static inline bool IsStreamTerm(Term t) {
static inline bool
IsStreamTerm(Term t)
{
return !IsVarTerm(t) && return !IsVarTerm(t) &&
(IsAtomTerm(t) || (IsApplTerm(t) && (FunctorOfTerm(t) == FunctorStream))); (IsAtomTerm(t) ||
(IsApplTerm(t) && (FunctorOfTerm(t) == FunctorStream)));
} }
static inline StreamDesc * static inline StreamDesc *Yap_GetStreamHandle(Term t) {
Yap_GetStreamHandle(Term t)
{
int sno = Yap_CheckStream(t, 0, "stream search"); int sno = Yap_CheckStream(t, 0, "stream search");
if (sno < 0) if (sno < 0)
return NULL; return NULL;
return GLOBAL_Stream + sno; return GLOBAL_Stream + sno;
} }
#define YAP_ERROR NIL #define YAP_ERROR NIL
#define MaxStreams 64 #define MaxStreams 64
@ -255,13 +247,12 @@ Yap_GetStreamHandle(Term t)
void Yap_InitStdStreams(void); void Yap_InitStdStreams(void);
Term Yap_StreamPosition(int); Term Yap_StreamPosition(int);
static inline int static inline int GetCurInpPos(StreamDesc *inp_stream) {
GetCurInpPos (StreamDesc * inp_stream)
{
return (inp_stream->linecount); return (inp_stream->linecount);
} }
#define PlIOError(type, culprit,...) PlIOError__(__FILE__, __FUNCTION__, __LINE__, type, culprit, __VA_ARGS__) #define PlIOError(type, culprit, ...) \
PlIOError__(__FILE__, __FUNCTION__, __LINE__, type, culprit, __VA_ARGS__)
Int PlIOError__(const char *, const char *, int, yap_error_number, Term, ...); Int PlIOError__(const char *, const char *, int, yap_error_number, Term, ...);
@ -270,7 +261,6 @@ Term Yap_MkStream (int n);
bool Yap_PrintWarning(Term twarning); bool Yap_PrintWarning(Term twarning);
void Yap_plwrite(Term, struct stream_desc *, int, int, int); void Yap_plwrite(Term, struct stream_desc *, int, int, int);
void Yap_WriteAtom(struct stream_desc *s, Atom atom); void Yap_WriteAtom(struct stream_desc *s, Atom atom);
@ -353,16 +343,11 @@ INLINE_ONLY inline EXTERN void count_output_char(int ch, StreamDesc *s);
Term Yap_StreamUserName(int sno); Term Yap_StreamUserName(int sno);
INLINE_ONLY inline EXTERN void INLINE_ONLY inline EXTERN void count_output_char(int ch, StreamDesc *s) {
count_output_char(int ch, StreamDesc *s) if (ch == '\n') {
{
if (ch == '\n')
{
#if MPWSHELL #if MPWSHELL
if (mpwshell && (sno == StdOutStream || sno == if (mpwshell && (sno == StdOutStream || sno == StdErrStream) &&
StdErrStream) && !(s->status & Null_Stream_f)) {
!(s->status & Null_Stream_f))
{
putc(MPWSEP, s->file); putc(MPWSEP, s->file);
if (!(GLOBAL_Stream[LOCAL_output_stream].status & Null_Stream_f)) if (!(GLOBAL_Stream[LOCAL_output_stream].status & Null_Stream_f))
fflush(stdout); fflush(stdout);
@ -371,11 +356,10 @@ count_output_char(int ch, StreamDesc *s)
/* Inform that we have written a newline */ /* Inform that we have written a newline */
++s->charcount; ++s->charcount;
++s->linecount; ++s->linecount;
s->linepos = 0; } s->linepos = 0;
else { } else {
#if MAC #if MAC
if ((sno == StdOutStream || sno == StdErrStream) if ((sno == StdOutStream || sno == StdErrStream) && s->linepos > 200)
&& s->linepos > 200)
sno->stream_putc(sno, '\n'); sno->stream_putc(sno, '\n');
#endif #endif
++s->charcount; ++s->charcount;
@ -383,29 +367,16 @@ count_output_char(int ch, StreamDesc *s)
} }
} }
inline static Term inline static Term StreamName(int i) { return (GLOBAL_Stream[i].user_name); }
StreamName(int i)
{
return(GLOBAL_Stream[i].user_name);
}
inline static Atom inline static Atom StreamFullName(int i) { return (GLOBAL_Stream[i].name); }
StreamFullName(int i)
{
return(GLOBAL_Stream[i].name);
}
inline static void inline static void console_count_output_char(int ch, StreamDesc *s) {
console_count_output_char(int ch, StreamDesc *s)
{
CACHE_REGS CACHE_REGS
if (ch == '\n') if (ch == '\n') {
{
#if MPWSHELL #if MPWSHELL
if (mpwshell && (sno == StdOutStream || sno == if (mpwshell && (sno == StdOutStream || sno == StdErrStream) &&
StdErrStream) && !(s->status & Null_Stream_f)) {
!(s->status & Null_Stream_f))
{
putc(MPWSEP, s->file); putc(MPWSEP, s->file);
if (!(GLOBAL_Stream[LOCAL_output_stream].status & Null_Stream_f)) if (!(GLOBAL_Stream[LOCAL_output_stream].status & Null_Stream_f))
fflush(stdout); fflush(stdout);
@ -416,12 +387,10 @@ console_count_output_char(int ch, StreamDesc *s)
s->linepos = 0; s->linepos = 0;
LOCAL_newline = TRUE; LOCAL_newline = TRUE;
/* Inform we are not at the start of a newline */ /* Inform we are not at the start of a newline */
} } else {
else {
LOCAL_newline = FALSE; LOCAL_newline = FALSE;
#if MAC #if MAC
if ((sno == StdOutStream || sno == StdErrStream) if ((sno == StdOutStream || sno == StdErrStream) && s->linepos > 200)
&& s->linepos > 200)
sno->stream_putc(sno, '\n'); sno->stream_putc(sno, '\n');
#endif #endif
++s->charcount; ++s->charcount;
@ -429,9 +398,7 @@ console_count_output_char(int ch, StreamDesc *s)
} }
} }
inline static Term inline static Term StreamPosition(int sno) {
StreamPosition(int sno)
{
CACHE_REGS CACHE_REGS
Term sargs[5]; Term sargs[5];
Int cpos; Int cpos;
@ -443,10 +410,7 @@ StreamPosition(int sno)
return Yap_MkApplTerm(FunctorStreamPos, 5, sargs); return Yap_MkApplTerm(FunctorStreamPos, 5, sargs);
} }
inline static Term CurrentPositionToTerm(void) {
inline static Term
CurrentPositionToTerm(void)
{
CACHE_REGS CACHE_REGS
Term sargs[5]; Term sargs[5];
sargs[0] = MkIntegerTerm(LOCAL_StartCharCount); sargs[0] = MkIntegerTerm(LOCAL_StartCharCount);
@ -456,10 +420,6 @@ CurrentPositionToTerm(void)
return Yap_MkApplTerm(FunctorStreamPos, 5, sargs); return Yap_MkApplTerm(FunctorStreamPos, 5, sargs);
} }
extern FILE *Yap_stdin; extern FILE *Yap_stdin;
extern FILE *Yap_stdout; extern FILE *Yap_stdout;
extern FILE *Yap_stderr; extern FILE *Yap_stderr;

View File

@ -227,21 +227,19 @@ static bool getLine(int inp, int out) {
CACHE_REGS CACHE_REGS
rl_instream = GLOBAL_Stream[inp].file; rl_instream = GLOBAL_Stream[inp].file;
rl_outstream = GLOBAL_Stream[out].file; rl_outstream = GLOBAL_Stream[out].file;
const char *myrl_line; const unsigned char *myrl_line;
StreamDesc *s = GLOBAL_Stream + inp; StreamDesc *s = GLOBAL_Stream + inp;
if (!(s->status & Tty_Stream_f)) if (!(s->status & Tty_Stream_f))
return false; return false;
/* window of vulnerability opened */ /* window of vulnerability opened */
LOCAL_PrologMode |= ConsoleGetcMode;
fflush(NULL); fflush(NULL);
LOCAL_PrologMode |= ConsoleGetcMode; LOCAL_PrologMode |= ConsoleGetcMode;
if (LOCAL_newline) { // no output so far if (LOCAL_newline) { // no output so far
myrl_line = readline(LOCAL_Prompt); myrl_line = (unsigned char *)readline(LOCAL_Prompt);
} else { } else {
myrl_line = readline(NULL); myrl_line = (unsigned char *)readline(NULL);
} }
/* Do it the gnu way */ /* Do it the gnu way */
if (LOCAL_PrologMode & InterruptMode) { if (LOCAL_PrologMode & InterruptMode) {
@ -261,7 +259,7 @@ static bool getLine(int inp, int out) {
if (myrl_line == NULL) if (myrl_line == NULL)
return false; return false;
if (myrl_line[0] != '\0' && myrl_line[1] != '\0') { if (myrl_line[0] != '\0' && myrl_line[1] != '\0') {
add_history(myrl_line); add_history((char *)myrl_line);
append_history(1, history_file); append_history(1, history_file);
} }
s->u.irl.ptr = s->u.irl.buf = myrl_line; s->u.irl.ptr = s->u.irl.buf = myrl_line;
@ -296,7 +294,7 @@ static int ReadlineGetc(int sno) {
bool fetch = (s->u.irl.buf == NULL); bool fetch = (s->u.irl.buf == NULL);
if (!fetch || getLine(sno, StdErrStream)) { if (!fetch || getLine(sno, StdErrStream)) {
const char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf; const unsigned char *ttyptr = s->u.irl.ptr++, *myrl_line = s->u.irl.buf;
ch = *ttyptr; ch = *ttyptr;
if (ch == '\0') { if (ch == '\0') {
ch = '\n'; ch = '\n';
@ -322,7 +320,7 @@ Int Yap_ReadlinePeekChar(int sno) {
int ch; int ch;
if (s->u.irl.buf) { if (s->u.irl.buf) {
const char *ttyptr = s->u.irl.ptr; const unsigned char *ttyptr = s->u.irl.ptr;
ch = *ttyptr; ch = *ttyptr;
if (ch == '\0') { if (ch == '\0') {
ch = '\n'; ch = '\n';
@ -350,15 +348,15 @@ int Yap_ReadlineForSIGINT(void) {
CACHE_REGS CACHE_REGS
int ch; int ch;
StreamDesc *s = &GLOBAL_Stream[StdInStream]; StreamDesc *s = &GLOBAL_Stream[StdInStream];
const char *myrl_line = s->u.irl.buf; const unsigned char *myrl_line = s->u.irl.buf;
if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != (char *)NULL) { if ((LOCAL_PrologMode & ConsoleGetcMode) && myrl_line != NULL) {
ch = myrl_line[0]; ch = myrl_line[0];
free((void *)myrl_line); free((void *)myrl_line);
myrl_line = NULL; myrl_line = NULL;
return ch; return ch;
} else { } else {
myrl_line = readline("Action (h for help): "); myrl_line = (const unsigned char *)readline("Action (h for help): ");
if (!myrl_line) { if (!myrl_line) {
ch = EOF; ch = EOF;
return ch; return ch;

View File

@ -976,6 +976,7 @@ static Int read_term(
Int out; Int out;
/* needs to change LOCAL_output_stream for write */ /* needs to change LOCAL_output_stream for write */
yhandle_t h = Yap_PushHandle(ARG2); yhandle_t h = Yap_PushHandle(ARG2);
inp_stream = Yap_CheckTextStream(ARG1, Input_Stream_f, "read/3"); inp_stream = Yap_CheckTextStream(ARG1, Input_Stream_f, "read/3");
if (inp_stream == -1) { if (inp_stream == -1) {

View File

@ -295,18 +295,10 @@ static Int has_close_on_abort(
static bool static bool
has_encoding(int sno, has_encoding(int sno,
Term t2 USES_REGS) { /* '$set_output'(+Stream,-ErrorMessage) */ Term t2 USES_REGS) { /* '$set_output'(+Stream,-ErrorMessage) */
if (!IsVarTerm(t2) && !(isatom(t2))) { const char *s = enc_name(GLOBAL_Stream[sno].encoding);
return FALSE;
}
if (0 && IsAtomTerm(t2)) {
encoding_t e = enc_id(RepAtom(AtomOfTerm(t2))->StrOfAE);
GLOBAL_Stream[sno].encoding = e;
return true;
} else {
const char *s = enc_name(LOCAL_encoding);
return Yap_unify(t2, MkAtomTerm(Yap_LookupAtom(s))); return Yap_unify(t2, MkAtomTerm(Yap_LookupAtom(s)));
} }
}
static bool static bool
found_eof(int sno, found_eof(int sno,
@ -719,9 +711,12 @@ static bool do_set_stream(int sno,
sno, (args[SET_STREAM_CLOSE_ON_ABORT].tvalue == TermTrue)); sno, (args[SET_STREAM_CLOSE_ON_ABORT].tvalue == TermTrue));
break; break;
case SET_STREAM_ENCODING: case SET_STREAM_ENCODING:
{
Term t2 = args[SET_STREAM_ENCODING].tvalue;
Atom atEnc = AtomOfTerm(t2);
GLOBAL_Stream[sno].encoding = GLOBAL_Stream[sno].encoding =
enc_id(AtomOfTerm(args[SET_STREAM_ENCODING].tvalue)->StrOfAE); enc_id(atEnc->StrOfAE, (GLOBAL_Stream[sno].status & HAS_BOM_f ? GLOBAL_Stream[sno].encoding :ENC_OCTET ) );
has_encoding(sno, args[SET_STREAM_ENCODING].tvalue PASS_REGS); }
break; break;
case SET_STREAM_EOF_ACTION: { case SET_STREAM_EOF_ACTION: {
Term t2 = args[SET_STREAM_EOF_ACTION].tvalue; Term t2 = args[SET_STREAM_EOF_ACTION].tvalue;

View File

@ -423,7 +423,7 @@ stream_position_data(Prop, Term, Value) :-
'$stream_position_field'(byte_count, 4). '$stream_position_field'(byte_count, 4).
'$set_encoding'(Enc) :- '$set_encoding'(Enc) :-
stream_property(loop_stream, Enc). set_stream(loop_stream, encoding(Enc)).
%! @} %! @}