fix unicode16 and add BOM
git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@1862 b08c6af1-5177-4d33-ba66-4b1c6b8b522a
This commit is contained in:
parent
f1411c368e
commit
917c777381
289
C/iopreds.c
289
C/iopreds.c
@ -378,20 +378,8 @@ PlGetsFunc(void)
|
||||
}
|
||||
|
||||
static void
|
||||
InitStdStream (int sno, SMALLUNSGN flags, YP_File file)
|
||||
InitFileIO(StreamDesc *s)
|
||||
{
|
||||
StreamDesc *s = &Stream[sno];
|
||||
s->u.file.file = file;
|
||||
s->status = flags;
|
||||
s->linepos = 0;
|
||||
s->linecount = 1;
|
||||
s->charcount = 0;
|
||||
s->encoding = DefaultEncoding();
|
||||
INIT_LOCK(s->streamlock);
|
||||
unix_upd_stream_info (s);
|
||||
/* Getting streams to prompt is a mess because we need for cooperation
|
||||
between readers and writers to the stream :-(
|
||||
*/
|
||||
s->stream_gets = PlGetsFunc();
|
||||
#if USE_SOCKET
|
||||
if (s->status & Socket_Stream_f) {
|
||||
@ -437,20 +425,39 @@ InitStdStream (int sno, SMALLUNSGN flags, YP_File file)
|
||||
s->stream_getc = PlGetc;
|
||||
s->stream_gets = PlGetsFunc();
|
||||
}
|
||||
switch(sno) {
|
||||
case 0:
|
||||
s->u.file.name=Yap_LookupAtom("user_input");
|
||||
break;
|
||||
case 1:
|
||||
s->u.file.name=Yap_LookupAtom("user_output");
|
||||
break;
|
||||
default:
|
||||
s->u.file.name=Yap_LookupAtom("user_error");
|
||||
break;
|
||||
}
|
||||
s->u.file.user_name = MkAtomTerm (s->u.file.name);
|
||||
}
|
||||
s->stream_wgetc = get_wchar;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
InitStdStream (int sno, SMALLUNSGN flags, YP_File file)
|
||||
{
|
||||
StreamDesc *s = &Stream[sno];
|
||||
s->u.file.file = file;
|
||||
s->status = flags;
|
||||
s->linepos = 0;
|
||||
s->linecount = 1;
|
||||
s->charcount = 0;
|
||||
s->encoding = DefaultEncoding();
|
||||
INIT_LOCK(s->streamlock);
|
||||
unix_upd_stream_info (s);
|
||||
/* Getting streams to prompt is a mess because we need for cooperation
|
||||
between readers and writers to the stream :-(
|
||||
*/
|
||||
InitFileIO(s);
|
||||
switch(sno) {
|
||||
case 0:
|
||||
s->u.file.name=Yap_LookupAtom("user_input");
|
||||
break;
|
||||
case 1:
|
||||
s->u.file.name=Yap_LookupAtom("user_output");
|
||||
break;
|
||||
default:
|
||||
s->u.file.name=Yap_LookupAtom("user_error");
|
||||
break;
|
||||
}
|
||||
s->u.file.user_name = MkAtomTerm (s->u.file.name);
|
||||
if (CharConversionTable != NULL)
|
||||
s->stream_wgetc_for_read = ISOWGetc;
|
||||
else
|
||||
@ -1560,6 +1567,66 @@ PlUnGetc (int sno)
|
||||
return(post_process_read_char(ch, s));
|
||||
}
|
||||
|
||||
/* give back 0376+ch */
|
||||
static int
|
||||
PlUnGetc376 (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc376)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc;
|
||||
ch = s->och;
|
||||
s->och = 0xFE;
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 0377+ch */
|
||||
static int
|
||||
PlUnGetc377 (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc377)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc;
|
||||
ch = s->och;
|
||||
s->och = 0xFF;
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 0357+ch */
|
||||
static int
|
||||
PlUnGetc357 (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc357)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc;
|
||||
ch = s->och;
|
||||
s->och = 0xEF;
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 0357+0273+ch */
|
||||
static int
|
||||
PlUnGetc357273 (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc357273)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc357;
|
||||
ch = s->och;
|
||||
s->och = 0xBB;
|
||||
return ch;
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_nof(char ch)
|
||||
{
|
||||
@ -1662,7 +1729,7 @@ get_wchar(int sno)
|
||||
return wch+(ch<<8);
|
||||
}
|
||||
how_many=1;
|
||||
ch = ch;
|
||||
wch = ch;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1678,8 +1745,6 @@ put_wchar(int sno, wchar_t ch)
|
||||
{
|
||||
|
||||
/* pass the bug if we can */
|
||||
if (ch < 0x80)
|
||||
return Stream[sno].stream_putc(sno, ch);
|
||||
switch (Stream[sno].encoding) {
|
||||
case ENC_OCTET:
|
||||
return Stream[sno].stream_putc(sno, ch);
|
||||
@ -1712,25 +1777,24 @@ put_wchar(int sno, wchar_t ch)
|
||||
return ch;
|
||||
}
|
||||
case ENC_ISO_UTF8:
|
||||
{
|
||||
if (ch < 0x800) {
|
||||
Stream[sno].stream_putc(sno, 0xC0 | ch>>6);
|
||||
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
|
||||
}
|
||||
else if (ch < 0x10000) {
|
||||
Stream[sno].stream_putc(sno, 0xE0 | ch>>12);
|
||||
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
|
||||
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
|
||||
} else if (ch < 0x200000) {
|
||||
Stream[sno].stream_putc(sno, 0xF0 | ch>>18);
|
||||
Stream[sno].stream_putc(sno, 0x80 | (ch>>12 & 0x3F));
|
||||
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
|
||||
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
|
||||
}
|
||||
else {
|
||||
/* should never happen */
|
||||
return -1;
|
||||
}
|
||||
if (ch < 0x80) {
|
||||
return Stream[sno].stream_putc(sno, ch);
|
||||
} else if (ch < 0x800) {
|
||||
Stream[sno].stream_putc(sno, 0xC0 | ch>>6);
|
||||
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
|
||||
}
|
||||
else if (ch < 0x10000) {
|
||||
Stream[sno].stream_putc(sno, 0xE0 | ch>>12);
|
||||
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
|
||||
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
|
||||
} else if (ch < 0x200000) {
|
||||
Stream[sno].stream_putc(sno, 0xF0 | ch>>18);
|
||||
Stream[sno].stream_putc(sno, 0x80 | (ch>>12 & 0x3F));
|
||||
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
|
||||
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
|
||||
} else {
|
||||
/* should never happen */
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case ENC_UNICODE_BE:
|
||||
@ -1939,6 +2003,103 @@ binary_file(char *file_name)
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
write_bom(int sno, StreamDesc *st)
|
||||
{
|
||||
/* dump encoding */
|
||||
switch (st->encoding) {
|
||||
case ENC_ISO_UTF8:
|
||||
if (st->stream_putc(sno,0xEF)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0xBB)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0xBF)<0)
|
||||
return FALSE;
|
||||
st->status |= HAS_BOM_f;
|
||||
return TRUE;
|
||||
case ENC_UNICODE_BE:
|
||||
if (st->stream_putc(sno,0xFE)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0xFF)<0)
|
||||
return FALSE;
|
||||
st->status |= HAS_BOM_f;
|
||||
return TRUE;
|
||||
case ENC_UNICODE_LE:
|
||||
if (st->stream_putc(sno,0xFF)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0xFE)<0)
|
||||
return FALSE;
|
||||
default:
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
check_bom(int sno, StreamDesc *st)
|
||||
{
|
||||
|
||||
int ch;
|
||||
|
||||
ch = st->stream_getc(sno);
|
||||
switch(ch) {
|
||||
case 0xFE:
|
||||
{
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch != 0xFF) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc376;
|
||||
st->stream_wgetc = get_wchar;
|
||||
return TRUE;
|
||||
} else {
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_UNICODE_BE;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
case 0xFF:
|
||||
{
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch != 0xFE) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc377;
|
||||
st->stream_wgetc = get_wchar;
|
||||
return TRUE;
|
||||
} else {
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_UNICODE_LE;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
case 0xEF:
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch != 0xBB) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc357;
|
||||
st->stream_wgetc = get_wchar;
|
||||
return TRUE;
|
||||
} else {
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch != 0xBF) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc357273;
|
||||
st->stream_wgetc = get_wchar;
|
||||
return TRUE;
|
||||
} else {
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_ISO_UTF8;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
default:
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc;
|
||||
st->stream_wgetc = get_wchar;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static Int
|
||||
p_open (void)
|
||||
{ /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
|
||||
@ -1950,6 +2111,7 @@ p_open (void)
|
||||
StreamDesc *st;
|
||||
Int opts;
|
||||
UInt encoding;
|
||||
int needs_bom = FALSE, avoid_bom = FALSE;
|
||||
|
||||
file_name = Deref(ARG1);
|
||||
/* we know file_name is bound */
|
||||
@ -1987,14 +2149,14 @@ p_open (void)
|
||||
st = &Stream[sno];
|
||||
/* can never happen */
|
||||
topts = Deref(ARG4);
|
||||
if (IsVarTerm(topts) || !IsIntTerm(topts))
|
||||
if (IsVarTerm(topts) || !IsIntegerTerm(topts))
|
||||
return(FALSE);
|
||||
opts = IntOfTerm(topts);
|
||||
opts = IntegerOfTerm(topts);
|
||||
/* can never happen */
|
||||
tenc = Deref(ARG5);
|
||||
if (IsVarTerm(tenc) || !IsIntTerm(tenc))
|
||||
if (IsVarTerm(tenc) || !IsIntegerTerm(tenc))
|
||||
return FALSE;
|
||||
encoding = IntOfTerm(tenc);
|
||||
encoding = IntegerOfTerm(tenc);
|
||||
#ifdef _WIN32
|
||||
if (st->status & Binary_Stream_f) {
|
||||
strncat(io_mode, "b", 8);
|
||||
@ -2096,6 +2258,12 @@ p_open (void)
|
||||
st->status &= ~Eof_Error_Stream_f;
|
||||
st->status |= Reset_Eof_Stream_f;
|
||||
}
|
||||
if (opts & 128) {
|
||||
needs_bom = TRUE;
|
||||
}
|
||||
if (opts & 256) {
|
||||
avoid_bom = TRUE;
|
||||
}
|
||||
}
|
||||
st->stream_wgetc = get_wchar;
|
||||
if (CharConversionTable != NULL)
|
||||
@ -2103,6 +2271,15 @@ p_open (void)
|
||||
else
|
||||
st->stream_wgetc_for_read = st->stream_wgetc;
|
||||
t = MkStream (sno);
|
||||
if (open_mode == AtomWrite ) {
|
||||
if (!avoid_bom && !write_bom(sno,st))
|
||||
return FALSE;
|
||||
} else if (open_mode == AtomRead &&
|
||||
!avoid_bom &&
|
||||
(needs_bom || (st->status & Seekable_Stream_f))) {
|
||||
if (!check_bom(sno, st))
|
||||
return FALSE;
|
||||
}
|
||||
st->status &= ~(Free_Stream_f);
|
||||
return (Yap_unify (ARG3, t));
|
||||
}
|
||||
@ -3128,6 +3305,15 @@ p_set_output (void)
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static Int
|
||||
p_has_bom (void)
|
||||
{ /* '$set_output'(+Stream,-ErrorMessage) */
|
||||
Int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "has?bom/1");
|
||||
if (sno < 0)
|
||||
return (FALSE);
|
||||
return ((Stream[sno].status & HAS_BOM_f));
|
||||
}
|
||||
|
||||
static Int
|
||||
p_current_input (void)
|
||||
{ /* current_input(?Stream) */
|
||||
@ -5642,6 +5828,7 @@ Yap_InitIOPreds(void)
|
||||
Yap_InitCPred ("$past_eof", 1, p_past_eof, SafePredFlag|SyncPredFlag),
|
||||
Yap_InitCPred ("$peek", 2, p_peek, SafePredFlag|SyncPredFlag),
|
||||
Yap_InitCPred ("$peek_byte", 2, p_peek_byte, SafePredFlag|SyncPredFlag),
|
||||
Yap_InitCPred ("$has_bom", 1, p_has_bom, SafePredFlag);
|
||||
Yap_InitCPred ("current_input", 1, p_current_input, SafePredFlag|SyncPredFlag);
|
||||
Yap_InitCPred ("current_output", 1, p_current_output, SafePredFlag|SyncPredFlag);
|
||||
Yap_InitCPred ("prompt", 1, p_setprompt, SafePredFlag|SyncPredFlag);
|
||||
|
@ -78,7 +78,7 @@ typedef struct stream_desc
|
||||
} u;
|
||||
Int charcount, linecount, linepos;
|
||||
Int status;
|
||||
wchar_t och;
|
||||
int och;
|
||||
#if defined(YAPOR) || defined(THREADS)
|
||||
lockvar streamlock; /* protect stream access */
|
||||
#endif
|
||||
@ -91,7 +91,6 @@ typedef struct stream_desc
|
||||
int (* stream_wgetc)(int);
|
||||
int (* stream_wputc)(int,wchar_t);
|
||||
encoding_t encoding;
|
||||
int use_bom;
|
||||
mbstate_t mbstate;
|
||||
}
|
||||
StreamDesc;
|
||||
@ -123,6 +122,7 @@ StreamDesc;
|
||||
#define Pipe_Stream_f 0x040000
|
||||
#define Popen_Stream_f 0x080000
|
||||
#define User_Stream_f 0x100000
|
||||
#define HAS_BOM_f 0x200000
|
||||
|
||||
#define StdInStream 0
|
||||
#define StdOutStream 1
|
||||
|
@ -16,6 +16,8 @@
|
||||
|
||||
<h2>Yap-5.1.2:</h2>
|
||||
<ul>
|
||||
<li> FIXED: UNICODE16 was broken.</li>
|
||||
<li> NEW: support for BOM.</li>
|
||||
<li> FIXED: debugging and clause/3 over tabled predicates would kill YAP.</li>
|
||||
<li> FIXED: memory expansion in deallocate would lose cut pointer.</li>
|
||||
<li> FIXED: make growtrail do more than grow chunks of 64KB.</li>
|
||||
|
31
pl/yio.yap
31
pl/yio.yap
@ -91,7 +91,11 @@ open(F,T,S,Opts) :-
|
||||
'$process_open_opts'(L,N2,N, Aliases, Encoding).
|
||||
'$process_open_opts'([encoding(Enc)|L], N0, N, Aliases, EncCode) :-
|
||||
'$valid_encoding'(Enc, EncCode),
|
||||
'$process_open_opts'(L, N0, N, Aliases, EncCode).
|
||||
'$process_open_opts'(L, N0, N, Aliases, _).
|
||||
'$process_open_opts'([bom(BOM)|L], N0, N, Aliases, EncCode) :-
|
||||
'$valid_bom'(BOM, Flag),
|
||||
NI is N0 \/ Flag,
|
||||
'$process_open_opts'(L, NI, N, Aliases, EncCode).
|
||||
'$process_open_opts'([eof_action(T)|L], N0, N, Aliases, Encoding) :-
|
||||
'$value_open_opt'(T,eof_action,I1,I2),
|
||||
N1 is I1\/N0,
|
||||
@ -108,6 +112,11 @@ open(F,T,S,Opts) :-
|
||||
'$value_open_opt'(error,_,16, X) :- X is 128-32-64.
|
||||
'$value_open_opt'(eof_code,_,32, X) :- X is 128-16-64.
|
||||
'$value_open_opt'(reset,64, X) :- X is 128-32-16.
|
||||
%128 -> use bom
|
||||
%256 -> do not use bom
|
||||
|
||||
'$valid_bom'(true, 128).
|
||||
'$valid_bom'(false, 256).
|
||||
|
||||
/* check whether a list of options is valid */
|
||||
'$check_io_opts'(V,G) :- var(V), !,
|
||||
@ -148,6 +157,8 @@ open(F,T,S,Opts) :-
|
||||
'$check_open_eof_action_arg'(T, G).
|
||||
'$check_opt_open'(encoding(T), G) :- !,
|
||||
'$check_open_encoding'(T, G).
|
||||
'$check_opt_open'(bom(T), G) :- !,
|
||||
'$check_open_bom_arg'(T, G).
|
||||
'$check_opt_open'(A, G) :-
|
||||
'$do_error'(domain_error(stream_option,A),G).
|
||||
|
||||
@ -171,6 +182,7 @@ open(F,T,S,Opts) :-
|
||||
'$check_opt_sp'(eof_action(_), _) :- !.
|
||||
'$check_opt_sp'(reposition(_), _) :- !.
|
||||
'$check_opt_sp'(type(_), _) :- !.
|
||||
'$check_opt_sp'(bom(_), _) :- !.
|
||||
'$check_opt_sp'(A, G) :-
|
||||
'$do_error'(domain_error(stream_property,A),G).
|
||||
|
||||
@ -211,6 +223,13 @@ open(F,T,S,Opts) :-
|
||||
'$check_open_reposition_arg'(X,G) :-
|
||||
'$do_error'(domain_error(io_mode,reposition(X)),G).
|
||||
|
||||
'$check_open_bom_arg'(X, G) :- var(X), !,
|
||||
'$do_error'(instantiation_error,G).
|
||||
'$check_open_bom_arg'(true,_) :- !.
|
||||
'$check_open_bom_arg'(false,_) :- !.
|
||||
'$check_open_bom_arg'(X,G) :-
|
||||
'$do_error'(domain_error(io_mode,bom(X)),G).
|
||||
|
||||
'$check_open_alias_arg'(X, G) :- var(X), !,
|
||||
'$do_error'(instantiation_error,G).
|
||||
'$check_open_alias_arg'(X,G) :- atom(X), !,
|
||||
@ -816,6 +835,7 @@ stream_property(Stream, Props) :-
|
||||
%'$generate_prop'(reposition(_R)).
|
||||
'$generate_prop'(type(_T)).
|
||||
'$generate_prop'(alias(_A)).
|
||||
'$generate_prop'(bom(_B)).
|
||||
|
||||
'$stream_property'(Stream, Props) :-
|
||||
var(Props), !,
|
||||
@ -843,7 +863,10 @@ stream_property(Stream, Props) :-
|
||||
'$process_stream_properties'([output|Props], Stream, F, write) :-
|
||||
'$process_stream_properties'(Props, Stream, F, write).
|
||||
'$process_stream_properties'([position(P)|Props], Stream, F, Mode) :-
|
||||
'$show_stream_position'(Stream, P),
|
||||
'$show_stream_bom'(Stream, P),
|
||||
'$process_stream_properties'(Props, Stream, F, Mode).
|
||||
'$process_stream_properties'([bom(B)|Props], Stream, F, Mode) :-
|
||||
'$show_stream_bom'(Stream, B),
|
||||
'$process_stream_properties'(Props, Stream, F, Mode).
|
||||
'$process_stream_properties'([end_of_stream(P)|Props], Stream, F, Mode) :-
|
||||
'$show_stream_eof'(Stream, P),
|
||||
@ -880,6 +903,10 @@ stream_property(Stream, Props) :-
|
||||
Fl /\ 0x2000 =:= 0x2000, !.
|
||||
'$show_stream_reposition'(_, false).
|
||||
|
||||
'$show_stream_bom'(Fl, true) :-
|
||||
'$has_bom'(Fl), !.
|
||||
'$show_stream_bom'(_, false).
|
||||
|
||||
'$show_stream_type'(Fl, binary) :-
|
||||
Fl /\ 0x0100 =:= 0x0100, !.
|
||||
'$show_stream_type'(_, text).
|
||||
|
Reference in New Issue
Block a user