fix unicode16 and add BOM

git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@1862 b08c6af1-5177-4d33-ba66-4b1c6b8b522a
This commit is contained in:
vsc 2007-04-02 23:04:48 +00:00
parent f1411c368e
commit 917c777381
4 changed files with 271 additions and 55 deletions

View File

@ -378,20 +378,8 @@ PlGetsFunc(void)
} }
static void static void
InitStdStream (int sno, SMALLUNSGN flags, YP_File file) InitFileIO(StreamDesc *s)
{ {
StreamDesc *s = &Stream[sno];
s->u.file.file = file;
s->status = flags;
s->linepos = 0;
s->linecount = 1;
s->charcount = 0;
s->encoding = DefaultEncoding();
INIT_LOCK(s->streamlock);
unix_upd_stream_info (s);
/* Getting streams to prompt is a mess because we need for cooperation
between readers and writers to the stream :-(
*/
s->stream_gets = PlGetsFunc(); s->stream_gets = PlGetsFunc();
#if USE_SOCKET #if USE_SOCKET
if (s->status & Socket_Stream_f) { if (s->status & Socket_Stream_f) {
@ -437,20 +425,39 @@ InitStdStream (int sno, SMALLUNSGN flags, YP_File file)
s->stream_getc = PlGetc; s->stream_getc = PlGetc;
s->stream_gets = PlGetsFunc(); s->stream_gets = PlGetsFunc();
} }
switch(sno) {
case 0:
s->u.file.name=Yap_LookupAtom("user_input");
break;
case 1:
s->u.file.name=Yap_LookupAtom("user_output");
break;
default:
s->u.file.name=Yap_LookupAtom("user_error");
break;
}
s->u.file.user_name = MkAtomTerm (s->u.file.name);
} }
s->stream_wgetc = get_wchar; s->stream_wgetc = get_wchar;
}
static void
InitStdStream (int sno, SMALLUNSGN flags, YP_File file)
{
StreamDesc *s = &Stream[sno];
s->u.file.file = file;
s->status = flags;
s->linepos = 0;
s->linecount = 1;
s->charcount = 0;
s->encoding = DefaultEncoding();
INIT_LOCK(s->streamlock);
unix_upd_stream_info (s);
/* Getting streams to prompt is a mess because we need for cooperation
between readers and writers to the stream :-(
*/
InitFileIO(s);
switch(sno) {
case 0:
s->u.file.name=Yap_LookupAtom("user_input");
break;
case 1:
s->u.file.name=Yap_LookupAtom("user_output");
break;
default:
s->u.file.name=Yap_LookupAtom("user_error");
break;
}
s->u.file.user_name = MkAtomTerm (s->u.file.name);
if (CharConversionTable != NULL) if (CharConversionTable != NULL)
s->stream_wgetc_for_read = ISOWGetc; s->stream_wgetc_for_read = ISOWGetc;
else else
@ -1560,6 +1567,66 @@ PlUnGetc (int sno)
return(post_process_read_char(ch, s)); return(post_process_read_char(ch, s));
} }
/* give back 0376+ch */
static int
PlUnGetc376 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc376)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc;
ch = s->och;
s->och = 0xFE;
return ch;
}
/* give back 0377+ch */
static int
PlUnGetc377 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc377)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc;
ch = s->och;
s->och = 0xFF;
return ch;
}
/* give back 0357+ch */
static int
PlUnGetc357 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc357)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc;
ch = s->och;
s->och = 0xEF;
return ch;
}
/* give back 0357+0273+ch */
static int
PlUnGetc357273 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc357273)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc357;
ch = s->och;
s->och = 0xBB;
return ch;
}
static int static int
utf8_nof(char ch) utf8_nof(char ch)
{ {
@ -1662,7 +1729,7 @@ get_wchar(int sno)
return wch+(ch<<8); return wch+(ch<<8);
} }
how_many=1; how_many=1;
ch = ch; wch = ch;
break; break;
} }
} }
@ -1678,8 +1745,6 @@ put_wchar(int sno, wchar_t ch)
{ {
/* pass the bug if we can */ /* pass the bug if we can */
if (ch < 0x80)
return Stream[sno].stream_putc(sno, ch);
switch (Stream[sno].encoding) { switch (Stream[sno].encoding) {
case ENC_OCTET: case ENC_OCTET:
return Stream[sno].stream_putc(sno, ch); return Stream[sno].stream_putc(sno, ch);
@ -1712,25 +1777,24 @@ put_wchar(int sno, wchar_t ch)
return ch; return ch;
} }
case ENC_ISO_UTF8: case ENC_ISO_UTF8:
{ if (ch < 0x80) {
if (ch < 0x800) { return Stream[sno].stream_putc(sno, ch);
Stream[sno].stream_putc(sno, 0xC0 | ch>>6); } else if (ch < 0x800) {
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); Stream[sno].stream_putc(sno, 0xC0 | ch>>6);
} return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
else if (ch < 0x10000) { }
Stream[sno].stream_putc(sno, 0xE0 | ch>>12); else if (ch < 0x10000) {
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F)); Stream[sno].stream_putc(sno, 0xE0 | ch>>12);
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
} else if (ch < 0x200000) { return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
Stream[sno].stream_putc(sno, 0xF0 | ch>>18); } else if (ch < 0x200000) {
Stream[sno].stream_putc(sno, 0x80 | (ch>>12 & 0x3F)); Stream[sno].stream_putc(sno, 0xF0 | ch>>18);
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F)); Stream[sno].stream_putc(sno, 0x80 | (ch>>12 & 0x3F));
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F)); Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
} return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
else { } else {
/* should never happen */ /* should never happen */
return -1; return -1;
}
} }
break; break;
case ENC_UNICODE_BE: case ENC_UNICODE_BE:
@ -1939,6 +2003,103 @@ binary_file(char *file_name)
#endif #endif
} }
static int
write_bom(int sno, StreamDesc *st)
{
/* dump encoding */
switch (st->encoding) {
case ENC_ISO_UTF8:
if (st->stream_putc(sno,0xEF)<0)
return FALSE;
if (st->stream_putc(sno,0xBB)<0)
return FALSE;
if (st->stream_putc(sno,0xBF)<0)
return FALSE;
st->status |= HAS_BOM_f;
return TRUE;
case ENC_UNICODE_BE:
if (st->stream_putc(sno,0xFE)<0)
return FALSE;
if (st->stream_putc(sno,0xFF)<0)
return FALSE;
st->status |= HAS_BOM_f;
return TRUE;
case ENC_UNICODE_LE:
if (st->stream_putc(sno,0xFF)<0)
return FALSE;
if (st->stream_putc(sno,0xFE)<0)
return FALSE;
default:
return TRUE;
}
}
static int
check_bom(int sno, StreamDesc *st)
{
int ch;
ch = st->stream_getc(sno);
switch(ch) {
case 0xFE:
{
ch = st->stream_getc(sno);
if (ch != 0xFF) {
st->och = ch;
st->stream_getc = PlUnGetc376;
st->stream_wgetc = get_wchar;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_UNICODE_BE;
return TRUE;
}
}
case 0xFF:
{
ch = st->stream_getc(sno);
if (ch != 0xFE) {
st->och = ch;
st->stream_getc = PlUnGetc377;
st->stream_wgetc = get_wchar;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_UNICODE_LE;
return TRUE;
}
}
case 0xEF:
ch = st->stream_getc(sno);
if (ch != 0xBB) {
st->och = ch;
st->stream_getc = PlUnGetc357;
st->stream_wgetc = get_wchar;
return TRUE;
} else {
ch = st->stream_getc(sno);
if (ch != 0xBF) {
st->och = ch;
st->stream_getc = PlUnGetc357273;
st->stream_wgetc = get_wchar;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_ISO_UTF8;
return TRUE;
}
}
default:
st->och = ch;
st->stream_getc = PlUnGetc;
st->stream_wgetc = get_wchar;
return TRUE;
}
}
static Int static Int
p_open (void) p_open (void)
{ /* '$open'(+File,+Mode,?Stream,-ReturnCode) */ { /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
@ -1950,6 +2111,7 @@ p_open (void)
StreamDesc *st; StreamDesc *st;
Int opts; Int opts;
UInt encoding; UInt encoding;
int needs_bom = FALSE, avoid_bom = FALSE;
file_name = Deref(ARG1); file_name = Deref(ARG1);
/* we know file_name is bound */ /* we know file_name is bound */
@ -1987,14 +2149,14 @@ p_open (void)
st = &Stream[sno]; st = &Stream[sno];
/* can never happen */ /* can never happen */
topts = Deref(ARG4); topts = Deref(ARG4);
if (IsVarTerm(topts) || !IsIntTerm(topts)) if (IsVarTerm(topts) || !IsIntegerTerm(topts))
return(FALSE); return(FALSE);
opts = IntOfTerm(topts); opts = IntegerOfTerm(topts);
/* can never happen */ /* can never happen */
tenc = Deref(ARG5); tenc = Deref(ARG5);
if (IsVarTerm(tenc) || !IsIntTerm(tenc)) if (IsVarTerm(tenc) || !IsIntegerTerm(tenc))
return FALSE; return FALSE;
encoding = IntOfTerm(tenc); encoding = IntegerOfTerm(tenc);
#ifdef _WIN32 #ifdef _WIN32
if (st->status & Binary_Stream_f) { if (st->status & Binary_Stream_f) {
strncat(io_mode, "b", 8); strncat(io_mode, "b", 8);
@ -2096,6 +2258,12 @@ p_open (void)
st->status &= ~Eof_Error_Stream_f; st->status &= ~Eof_Error_Stream_f;
st->status |= Reset_Eof_Stream_f; st->status |= Reset_Eof_Stream_f;
} }
if (opts & 128) {
needs_bom = TRUE;
}
if (opts & 256) {
avoid_bom = TRUE;
}
} }
st->stream_wgetc = get_wchar; st->stream_wgetc = get_wchar;
if (CharConversionTable != NULL) if (CharConversionTable != NULL)
@ -2103,6 +2271,15 @@ p_open (void)
else else
st->stream_wgetc_for_read = st->stream_wgetc; st->stream_wgetc_for_read = st->stream_wgetc;
t = MkStream (sno); t = MkStream (sno);
if (open_mode == AtomWrite ) {
if (!avoid_bom && !write_bom(sno,st))
return FALSE;
} else if (open_mode == AtomRead &&
!avoid_bom &&
(needs_bom || (st->status & Seekable_Stream_f))) {
if (!check_bom(sno, st))
return FALSE;
}
st->status &= ~(Free_Stream_f); st->status &= ~(Free_Stream_f);
return (Yap_unify (ARG3, t)); return (Yap_unify (ARG3, t));
} }
@ -3128,6 +3305,15 @@ p_set_output (void)
return (TRUE); return (TRUE);
} }
static Int
p_has_bom (void)
{ /* '$set_output'(+Stream,-ErrorMessage) */
Int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "has?bom/1");
if (sno < 0)
return (FALSE);
return ((Stream[sno].status & HAS_BOM_f));
}
static Int static Int
p_current_input (void) p_current_input (void)
{ /* current_input(?Stream) */ { /* current_input(?Stream) */
@ -5642,6 +5828,7 @@ Yap_InitIOPreds(void)
Yap_InitCPred ("$past_eof", 1, p_past_eof, SafePredFlag|SyncPredFlag), Yap_InitCPred ("$past_eof", 1, p_past_eof, SafePredFlag|SyncPredFlag),
Yap_InitCPred ("$peek", 2, p_peek, SafePredFlag|SyncPredFlag), Yap_InitCPred ("$peek", 2, p_peek, SafePredFlag|SyncPredFlag),
Yap_InitCPred ("$peek_byte", 2, p_peek_byte, SafePredFlag|SyncPredFlag), Yap_InitCPred ("$peek_byte", 2, p_peek_byte, SafePredFlag|SyncPredFlag),
Yap_InitCPred ("$has_bom", 1, p_has_bom, SafePredFlag);
Yap_InitCPred ("current_input", 1, p_current_input, SafePredFlag|SyncPredFlag); Yap_InitCPred ("current_input", 1, p_current_input, SafePredFlag|SyncPredFlag);
Yap_InitCPred ("current_output", 1, p_current_output, SafePredFlag|SyncPredFlag); Yap_InitCPred ("current_output", 1, p_current_output, SafePredFlag|SyncPredFlag);
Yap_InitCPred ("prompt", 1, p_setprompt, SafePredFlag|SyncPredFlag); Yap_InitCPred ("prompt", 1, p_setprompt, SafePredFlag|SyncPredFlag);

View File

@ -78,7 +78,7 @@ typedef struct stream_desc
} u; } u;
Int charcount, linecount, linepos; Int charcount, linecount, linepos;
Int status; Int status;
wchar_t och; int och;
#if defined(YAPOR) || defined(THREADS) #if defined(YAPOR) || defined(THREADS)
lockvar streamlock; /* protect stream access */ lockvar streamlock; /* protect stream access */
#endif #endif
@ -91,7 +91,6 @@ typedef struct stream_desc
int (* stream_wgetc)(int); int (* stream_wgetc)(int);
int (* stream_wputc)(int,wchar_t); int (* stream_wputc)(int,wchar_t);
encoding_t encoding; encoding_t encoding;
int use_bom;
mbstate_t mbstate; mbstate_t mbstate;
} }
StreamDesc; StreamDesc;
@ -123,6 +122,7 @@ StreamDesc;
#define Pipe_Stream_f 0x040000 #define Pipe_Stream_f 0x040000
#define Popen_Stream_f 0x080000 #define Popen_Stream_f 0x080000
#define User_Stream_f 0x100000 #define User_Stream_f 0x100000
#define HAS_BOM_f 0x200000
#define StdInStream 0 #define StdInStream 0
#define StdOutStream 1 #define StdOutStream 1

View File

@ -16,6 +16,8 @@
<h2>Yap-5.1.2:</h2> <h2>Yap-5.1.2:</h2>
<ul> <ul>
<li> FIXED: UNICODE16 was broken.</li>
<li> NEW: support for BOM.</li>
<li> FIXED: debugging and clause/3 over tabled predicates would kill YAP.</li> <li> FIXED: debugging and clause/3 over tabled predicates would kill YAP.</li>
<li> FIXED: memory expansion in deallocate would lose cut pointer.</li> <li> FIXED: memory expansion in deallocate would lose cut pointer.</li>
<li> FIXED: make growtrail do more than grow chunks of 64KB.</li> <li> FIXED: make growtrail do more than grow chunks of 64KB.</li>

View File

@ -91,7 +91,11 @@ open(F,T,S,Opts) :-
'$process_open_opts'(L,N2,N, Aliases, Encoding). '$process_open_opts'(L,N2,N, Aliases, Encoding).
'$process_open_opts'([encoding(Enc)|L], N0, N, Aliases, EncCode) :- '$process_open_opts'([encoding(Enc)|L], N0, N, Aliases, EncCode) :-
'$valid_encoding'(Enc, EncCode), '$valid_encoding'(Enc, EncCode),
'$process_open_opts'(L, N0, N, Aliases, EncCode). '$process_open_opts'(L, N0, N, Aliases, _).
'$process_open_opts'([bom(BOM)|L], N0, N, Aliases, EncCode) :-
'$valid_bom'(BOM, Flag),
NI is N0 \/ Flag,
'$process_open_opts'(L, NI, N, Aliases, EncCode).
'$process_open_opts'([eof_action(T)|L], N0, N, Aliases, Encoding) :- '$process_open_opts'([eof_action(T)|L], N0, N, Aliases, Encoding) :-
'$value_open_opt'(T,eof_action,I1,I2), '$value_open_opt'(T,eof_action,I1,I2),
N1 is I1\/N0, N1 is I1\/N0,
@ -108,6 +112,11 @@ open(F,T,S,Opts) :-
'$value_open_opt'(error,_,16, X) :- X is 128-32-64. '$value_open_opt'(error,_,16, X) :- X is 128-32-64.
'$value_open_opt'(eof_code,_,32, X) :- X is 128-16-64. '$value_open_opt'(eof_code,_,32, X) :- X is 128-16-64.
'$value_open_opt'(reset,64, X) :- X is 128-32-16. '$value_open_opt'(reset,64, X) :- X is 128-32-16.
%128 -> use bom
%256 -> do not use bom
'$valid_bom'(true, 128).
'$valid_bom'(false, 256).
/* check whether a list of options is valid */ /* check whether a list of options is valid */
'$check_io_opts'(V,G) :- var(V), !, '$check_io_opts'(V,G) :- var(V), !,
@ -148,6 +157,8 @@ open(F,T,S,Opts) :-
'$check_open_eof_action_arg'(T, G). '$check_open_eof_action_arg'(T, G).
'$check_opt_open'(encoding(T), G) :- !, '$check_opt_open'(encoding(T), G) :- !,
'$check_open_encoding'(T, G). '$check_open_encoding'(T, G).
'$check_opt_open'(bom(T), G) :- !,
'$check_open_bom_arg'(T, G).
'$check_opt_open'(A, G) :- '$check_opt_open'(A, G) :-
'$do_error'(domain_error(stream_option,A),G). '$do_error'(domain_error(stream_option,A),G).
@ -171,6 +182,7 @@ open(F,T,S,Opts) :-
'$check_opt_sp'(eof_action(_), _) :- !. '$check_opt_sp'(eof_action(_), _) :- !.
'$check_opt_sp'(reposition(_), _) :- !. '$check_opt_sp'(reposition(_), _) :- !.
'$check_opt_sp'(type(_), _) :- !. '$check_opt_sp'(type(_), _) :- !.
'$check_opt_sp'(bom(_), _) :- !.
'$check_opt_sp'(A, G) :- '$check_opt_sp'(A, G) :-
'$do_error'(domain_error(stream_property,A),G). '$do_error'(domain_error(stream_property,A),G).
@ -211,6 +223,13 @@ open(F,T,S,Opts) :-
'$check_open_reposition_arg'(X,G) :- '$check_open_reposition_arg'(X,G) :-
'$do_error'(domain_error(io_mode,reposition(X)),G). '$do_error'(domain_error(io_mode,reposition(X)),G).
'$check_open_bom_arg'(X, G) :- var(X), !,
'$do_error'(instantiation_error,G).
'$check_open_bom_arg'(true,_) :- !.
'$check_open_bom_arg'(false,_) :- !.
'$check_open_bom_arg'(X,G) :-
'$do_error'(domain_error(io_mode,bom(X)),G).
'$check_open_alias_arg'(X, G) :- var(X), !, '$check_open_alias_arg'(X, G) :- var(X), !,
'$do_error'(instantiation_error,G). '$do_error'(instantiation_error,G).
'$check_open_alias_arg'(X,G) :- atom(X), !, '$check_open_alias_arg'(X,G) :- atom(X), !,
@ -816,6 +835,7 @@ stream_property(Stream, Props) :-
%'$generate_prop'(reposition(_R)). %'$generate_prop'(reposition(_R)).
'$generate_prop'(type(_T)). '$generate_prop'(type(_T)).
'$generate_prop'(alias(_A)). '$generate_prop'(alias(_A)).
'$generate_prop'(bom(_B)).
'$stream_property'(Stream, Props) :- '$stream_property'(Stream, Props) :-
var(Props), !, var(Props), !,
@ -843,7 +863,10 @@ stream_property(Stream, Props) :-
'$process_stream_properties'([output|Props], Stream, F, write) :- '$process_stream_properties'([output|Props], Stream, F, write) :-
'$process_stream_properties'(Props, Stream, F, write). '$process_stream_properties'(Props, Stream, F, write).
'$process_stream_properties'([position(P)|Props], Stream, F, Mode) :- '$process_stream_properties'([position(P)|Props], Stream, F, Mode) :-
'$show_stream_position'(Stream, P), '$show_stream_bom'(Stream, P),
'$process_stream_properties'(Props, Stream, F, Mode).
'$process_stream_properties'([bom(B)|Props], Stream, F, Mode) :-
'$show_stream_bom'(Stream, B),
'$process_stream_properties'(Props, Stream, F, Mode). '$process_stream_properties'(Props, Stream, F, Mode).
'$process_stream_properties'([end_of_stream(P)|Props], Stream, F, Mode) :- '$process_stream_properties'([end_of_stream(P)|Props], Stream, F, Mode) :-
'$show_stream_eof'(Stream, P), '$show_stream_eof'(Stream, P),
@ -880,6 +903,10 @@ stream_property(Stream, Props) :-
Fl /\ 0x2000 =:= 0x2000, !. Fl /\ 0x2000 =:= 0x2000, !.
'$show_stream_reposition'(_, false). '$show_stream_reposition'(_, false).
'$show_stream_bom'(Fl, true) :-
'$has_bom'(Fl), !.
'$show_stream_bom'(_, false).
'$show_stream_type'(Fl, binary) :- '$show_stream_type'(Fl, binary) :-
Fl /\ 0x0100 =:= 0x0100, !. Fl /\ 0x0100 =:= 0x0100, !.
'$show_stream_type'(_, text). '$show_stream_type'(_, text).