diff --git a/C/iopreds.c b/C/iopreds.c index 5fbbe7695..c601f24b5 100644 --- a/C/iopreds.c +++ b/C/iopreds.c @@ -378,20 +378,8 @@ PlGetsFunc(void) } static void -InitStdStream (int sno, SMALLUNSGN flags, YP_File file) +InitFileIO(StreamDesc *s) { - StreamDesc *s = &Stream[sno]; - s->u.file.file = file; - s->status = flags; - s->linepos = 0; - s->linecount = 1; - s->charcount = 0; - s->encoding = DefaultEncoding(); - INIT_LOCK(s->streamlock); - unix_upd_stream_info (s); - /* Getting streams to prompt is a mess because we need for cooperation - between readers and writers to the stream :-( - */ s->stream_gets = PlGetsFunc(); #if USE_SOCKET if (s->status & Socket_Stream_f) { @@ -437,20 +425,39 @@ InitStdStream (int sno, SMALLUNSGN flags, YP_File file) s->stream_getc = PlGetc; s->stream_gets = PlGetsFunc(); } - switch(sno) { - case 0: - s->u.file.name=Yap_LookupAtom("user_input"); - break; - case 1: - s->u.file.name=Yap_LookupAtom("user_output"); - break; - default: - s->u.file.name=Yap_LookupAtom("user_error"); - break; - } - s->u.file.user_name = MkAtomTerm (s->u.file.name); } s->stream_wgetc = get_wchar; +} + + +static void +InitStdStream (int sno, SMALLUNSGN flags, YP_File file) +{ + StreamDesc *s = &Stream[sno]; + s->u.file.file = file; + s->status = flags; + s->linepos = 0; + s->linecount = 1; + s->charcount = 0; + s->encoding = DefaultEncoding(); + INIT_LOCK(s->streamlock); + unix_upd_stream_info (s); + /* Getting streams to prompt is a mess because we need for cooperation + between readers and writers to the stream :-( + */ + InitFileIO(s); + switch(sno) { + case 0: + s->u.file.name=Yap_LookupAtom("user_input"); + break; + case 1: + s->u.file.name=Yap_LookupAtom("user_output"); + break; + default: + s->u.file.name=Yap_LookupAtom("user_error"); + break; + } + s->u.file.user_name = MkAtomTerm (s->u.file.name); if (CharConversionTable != NULL) s->stream_wgetc_for_read = ISOWGetc; else @@ -1560,6 +1567,66 @@ PlUnGetc (int sno) return(post_process_read_char(ch, s)); } +/* give back 0376+ch */ +static int +PlUnGetc376 (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc376) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc; + ch = s->och; + s->och = 0xFE; + return ch; +} + +/* give back 0377+ch */ +static int +PlUnGetc377 (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc377) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc; + ch = s->och; + s->och = 0xFF; + return ch; +} + +/* give back 0357+ch */ +static int +PlUnGetc357 (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc357) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc; + ch = s->och; + s->och = 0xEF; + return ch; +} + +/* give back 0357+0273+ch */ +static int +PlUnGetc357273 (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc357273) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc357; + ch = s->och; + s->och = 0xBB; + return ch; +} + static int utf8_nof(char ch) { @@ -1662,7 +1729,7 @@ get_wchar(int sno) return wch+(ch<<8); } how_many=1; - ch = ch; + wch = ch; break; } } @@ -1678,8 +1745,6 @@ put_wchar(int sno, wchar_t ch) { /* pass the bug if we can */ - if (ch < 0x80) - return Stream[sno].stream_putc(sno, ch); switch (Stream[sno].encoding) { case ENC_OCTET: return Stream[sno].stream_putc(sno, ch); @@ -1712,25 +1777,24 @@ put_wchar(int sno, wchar_t ch) return ch; } case ENC_ISO_UTF8: - { - if (ch < 0x800) { - Stream[sno].stream_putc(sno, 0xC0 | ch>>6); - return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); - } - else if (ch < 0x10000) { - Stream[sno].stream_putc(sno, 0xE0 | ch>>12); - Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F)); - return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); - } else if (ch < 0x200000) { - Stream[sno].stream_putc(sno, 0xF0 | ch>>18); - Stream[sno].stream_putc(sno, 0x80 | (ch>>12 & 0x3F)); - Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F)); - return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); - } - else { - /* should never happen */ - return -1; - } + if (ch < 0x80) { + return Stream[sno].stream_putc(sno, ch); + } else if (ch < 0x800) { + Stream[sno].stream_putc(sno, 0xC0 | ch>>6); + return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); + } + else if (ch < 0x10000) { + Stream[sno].stream_putc(sno, 0xE0 | ch>>12); + Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F)); + return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); + } else if (ch < 0x200000) { + Stream[sno].stream_putc(sno, 0xF0 | ch>>18); + Stream[sno].stream_putc(sno, 0x80 | (ch>>12 & 0x3F)); + Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F)); + return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); + } else { + /* should never happen */ + return -1; } break; case ENC_UNICODE_BE: @@ -1939,6 +2003,103 @@ binary_file(char *file_name) #endif } +static int +write_bom(int sno, StreamDesc *st) +{ + /* dump encoding */ + switch (st->encoding) { + case ENC_ISO_UTF8: + if (st->stream_putc(sno,0xEF)<0) + return FALSE; + if (st->stream_putc(sno,0xBB)<0) + return FALSE; + if (st->stream_putc(sno,0xBF)<0) + return FALSE; + st->status |= HAS_BOM_f; + return TRUE; + case ENC_UNICODE_BE: + if (st->stream_putc(sno,0xFE)<0) + return FALSE; + if (st->stream_putc(sno,0xFF)<0) + return FALSE; + st->status |= HAS_BOM_f; + return TRUE; + case ENC_UNICODE_LE: + if (st->stream_putc(sno,0xFF)<0) + return FALSE; + if (st->stream_putc(sno,0xFE)<0) + return FALSE; + default: + return TRUE; + } +} + + +static int +check_bom(int sno, StreamDesc *st) +{ + + int ch; + + ch = st->stream_getc(sno); + switch(ch) { + case 0xFE: + { + ch = st->stream_getc(sno); + if (ch != 0xFF) { + st->och = ch; + st->stream_getc = PlUnGetc376; + st->stream_wgetc = get_wchar; + return TRUE; + } else { + st->status |= HAS_BOM_f; + st->encoding = ENC_UNICODE_BE; + return TRUE; + } + } + case 0xFF: + { + ch = st->stream_getc(sno); + if (ch != 0xFE) { + st->och = ch; + st->stream_getc = PlUnGetc377; + st->stream_wgetc = get_wchar; + return TRUE; + } else { + st->status |= HAS_BOM_f; + st->encoding = ENC_UNICODE_LE; + return TRUE; + } + } + case 0xEF: + ch = st->stream_getc(sno); + if (ch != 0xBB) { + st->och = ch; + st->stream_getc = PlUnGetc357; + st->stream_wgetc = get_wchar; + return TRUE; + } else { + ch = st->stream_getc(sno); + if (ch != 0xBF) { + st->och = ch; + st->stream_getc = PlUnGetc357273; + st->stream_wgetc = get_wchar; + return TRUE; + } else { + st->status |= HAS_BOM_f; + st->encoding = ENC_ISO_UTF8; + return TRUE; + } + } + default: + st->och = ch; + st->stream_getc = PlUnGetc; + st->stream_wgetc = get_wchar; + return TRUE; + } +} + + static Int p_open (void) { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */ @@ -1950,6 +2111,7 @@ p_open (void) StreamDesc *st; Int opts; UInt encoding; + int needs_bom = FALSE, avoid_bom = FALSE; file_name = Deref(ARG1); /* we know file_name is bound */ @@ -1987,14 +2149,14 @@ p_open (void) st = &Stream[sno]; /* can never happen */ topts = Deref(ARG4); - if (IsVarTerm(topts) || !IsIntTerm(topts)) + if (IsVarTerm(topts) || !IsIntegerTerm(topts)) return(FALSE); - opts = IntOfTerm(topts); + opts = IntegerOfTerm(topts); /* can never happen */ tenc = Deref(ARG5); - if (IsVarTerm(tenc) || !IsIntTerm(tenc)) + if (IsVarTerm(tenc) || !IsIntegerTerm(tenc)) return FALSE; - encoding = IntOfTerm(tenc); + encoding = IntegerOfTerm(tenc); #ifdef _WIN32 if (st->status & Binary_Stream_f) { strncat(io_mode, "b", 8); @@ -2096,6 +2258,12 @@ p_open (void) st->status &= ~Eof_Error_Stream_f; st->status |= Reset_Eof_Stream_f; } + if (opts & 128) { + needs_bom = TRUE; + } + if (opts & 256) { + avoid_bom = TRUE; + } } st->stream_wgetc = get_wchar; if (CharConversionTable != NULL) @@ -2103,6 +2271,15 @@ p_open (void) else st->stream_wgetc_for_read = st->stream_wgetc; t = MkStream (sno); + if (open_mode == AtomWrite ) { + if (!avoid_bom && !write_bom(sno,st)) + return FALSE; + } else if (open_mode == AtomRead && + !avoid_bom && + (needs_bom || (st->status & Seekable_Stream_f))) { + if (!check_bom(sno, st)) + return FALSE; + } st->status &= ~(Free_Stream_f); return (Yap_unify (ARG3, t)); } @@ -3128,6 +3305,15 @@ p_set_output (void) return (TRUE); } +static Int +p_has_bom (void) +{ /* '$set_output'(+Stream,-ErrorMessage) */ + Int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "has?bom/1"); + if (sno < 0) + return (FALSE); + return ((Stream[sno].status & HAS_BOM_f)); +} + static Int p_current_input (void) { /* current_input(?Stream) */ @@ -5642,6 +5828,7 @@ Yap_InitIOPreds(void) Yap_InitCPred ("$past_eof", 1, p_past_eof, SafePredFlag|SyncPredFlag), Yap_InitCPred ("$peek", 2, p_peek, SafePredFlag|SyncPredFlag), Yap_InitCPred ("$peek_byte", 2, p_peek_byte, SafePredFlag|SyncPredFlag), + Yap_InitCPred ("$has_bom", 1, p_has_bom, SafePredFlag); Yap_InitCPred ("current_input", 1, p_current_input, SafePredFlag|SyncPredFlag); Yap_InitCPred ("current_output", 1, p_current_output, SafePredFlag|SyncPredFlag); Yap_InitCPred ("prompt", 1, p_setprompt, SafePredFlag|SyncPredFlag); diff --git a/H/iopreds.h b/H/iopreds.h index 28efd9343..7d023405d 100644 --- a/H/iopreds.h +++ b/H/iopreds.h @@ -78,7 +78,7 @@ typedef struct stream_desc } u; Int charcount, linecount, linepos; Int status; - wchar_t och; + int och; #if defined(YAPOR) || defined(THREADS) lockvar streamlock; /* protect stream access */ #endif @@ -91,7 +91,6 @@ typedef struct stream_desc int (* stream_wgetc)(int); int (* stream_wputc)(int,wchar_t); encoding_t encoding; - int use_bom; mbstate_t mbstate; } StreamDesc; @@ -123,6 +122,7 @@ StreamDesc; #define Pipe_Stream_f 0x040000 #define Popen_Stream_f 0x080000 #define User_Stream_f 0x100000 +#define HAS_BOM_f 0x200000 #define StdInStream 0 #define StdOutStream 1 diff --git a/changes-5.1.html b/changes-5.1.html index fd3c9f531..fd742ae20 100644 --- a/changes-5.1.html +++ b/changes-5.1.html @@ -16,6 +16,8 @@

Yap-5.1.2: