fix unicode16 and add BOM

git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@1862 b08c6af1-5177-4d33-ba66-4b1c6b8b522a
This commit is contained in:
vsc
2007-04-02 23:04:48 +00:00
parent f1411c368e
commit 917c777381
4 changed files with 271 additions and 55 deletions

View File

@@ -378,20 +378,8 @@ PlGetsFunc(void)
}
static void
InitStdStream (int sno, SMALLUNSGN flags, YP_File file)
InitFileIO(StreamDesc *s)
{
StreamDesc *s = &Stream[sno];
s->u.file.file = file;
s->status = flags;
s->linepos = 0;
s->linecount = 1;
s->charcount = 0;
s->encoding = DefaultEncoding();
INIT_LOCK(s->streamlock);
unix_upd_stream_info (s);
/* Getting streams to prompt is a mess because we need for cooperation
between readers and writers to the stream :-(
*/
s->stream_gets = PlGetsFunc();
#if USE_SOCKET
if (s->status & Socket_Stream_f) {
@@ -437,20 +425,39 @@ InitStdStream (int sno, SMALLUNSGN flags, YP_File file)
s->stream_getc = PlGetc;
s->stream_gets = PlGetsFunc();
}
switch(sno) {
case 0:
s->u.file.name=Yap_LookupAtom("user_input");
break;
case 1:
s->u.file.name=Yap_LookupAtom("user_output");
break;
default:
s->u.file.name=Yap_LookupAtom("user_error");
break;
}
s->u.file.user_name = MkAtomTerm (s->u.file.name);
}
s->stream_wgetc = get_wchar;
}
static void
InitStdStream (int sno, SMALLUNSGN flags, YP_File file)
{
StreamDesc *s = &Stream[sno];
s->u.file.file = file;
s->status = flags;
s->linepos = 0;
s->linecount = 1;
s->charcount = 0;
s->encoding = DefaultEncoding();
INIT_LOCK(s->streamlock);
unix_upd_stream_info (s);
/* Getting streams to prompt is a mess because we need for cooperation
between readers and writers to the stream :-(
*/
InitFileIO(s);
switch(sno) {
case 0:
s->u.file.name=Yap_LookupAtom("user_input");
break;
case 1:
s->u.file.name=Yap_LookupAtom("user_output");
break;
default:
s->u.file.name=Yap_LookupAtom("user_error");
break;
}
s->u.file.user_name = MkAtomTerm (s->u.file.name);
if (CharConversionTable != NULL)
s->stream_wgetc_for_read = ISOWGetc;
else
@@ -1560,6 +1567,66 @@ PlUnGetc (int sno)
return(post_process_read_char(ch, s));
}
/* give back 0376+ch */
static int
PlUnGetc376 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc376)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc;
ch = s->och;
s->och = 0xFE;
return ch;
}
/* give back 0377+ch */
static int
PlUnGetc377 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc377)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc;
ch = s->och;
s->och = 0xFF;
return ch;
}
/* give back 0357+ch */
static int
PlUnGetc357 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc357)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc;
ch = s->och;
s->och = 0xEF;
return ch;
}
/* give back 0357+0273+ch */
static int
PlUnGetc357273 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc357273)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc357;
ch = s->och;
s->och = 0xBB;
return ch;
}
static int
utf8_nof(char ch)
{
@@ -1662,7 +1729,7 @@ get_wchar(int sno)
return wch+(ch<<8);
}
how_many=1;
ch = ch;
wch = ch;
break;
}
}
@@ -1678,8 +1745,6 @@ put_wchar(int sno, wchar_t ch)
{
/* pass the bug if we can */
if (ch < 0x80)
return Stream[sno].stream_putc(sno, ch);
switch (Stream[sno].encoding) {
case ENC_OCTET:
return Stream[sno].stream_putc(sno, ch);
@@ -1712,25 +1777,24 @@ put_wchar(int sno, wchar_t ch)
return ch;
}
case ENC_ISO_UTF8:
{
if (ch < 0x800) {
Stream[sno].stream_putc(sno, 0xC0 | ch>>6);
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
}
else if (ch < 0x10000) {
Stream[sno].stream_putc(sno, 0xE0 | ch>>12);
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
} else if (ch < 0x200000) {
Stream[sno].stream_putc(sno, 0xF0 | ch>>18);
Stream[sno].stream_putc(sno, 0x80 | (ch>>12 & 0x3F));
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
}
else {
/* should never happen */
return -1;
}
if (ch < 0x80) {
return Stream[sno].stream_putc(sno, ch);
} else if (ch < 0x800) {
Stream[sno].stream_putc(sno, 0xC0 | ch>>6);
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
}
else if (ch < 0x10000) {
Stream[sno].stream_putc(sno, 0xE0 | ch>>12);
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
} else if (ch < 0x200000) {
Stream[sno].stream_putc(sno, 0xF0 | ch>>18);
Stream[sno].stream_putc(sno, 0x80 | (ch>>12 & 0x3F));
Stream[sno].stream_putc(sno, 0x80 | (ch>>6 & 0x3F));
return Stream[sno].stream_putc(sno, 0x80 | (ch & 0x3F));
} else {
/* should never happen */
return -1;
}
break;
case ENC_UNICODE_BE:
@@ -1939,6 +2003,103 @@ binary_file(char *file_name)
#endif
}
static int
write_bom(int sno, StreamDesc *st)
{
/* dump encoding */
switch (st->encoding) {
case ENC_ISO_UTF8:
if (st->stream_putc(sno,0xEF)<0)
return FALSE;
if (st->stream_putc(sno,0xBB)<0)
return FALSE;
if (st->stream_putc(sno,0xBF)<0)
return FALSE;
st->status |= HAS_BOM_f;
return TRUE;
case ENC_UNICODE_BE:
if (st->stream_putc(sno,0xFE)<0)
return FALSE;
if (st->stream_putc(sno,0xFF)<0)
return FALSE;
st->status |= HAS_BOM_f;
return TRUE;
case ENC_UNICODE_LE:
if (st->stream_putc(sno,0xFF)<0)
return FALSE;
if (st->stream_putc(sno,0xFE)<0)
return FALSE;
default:
return TRUE;
}
}
static int
check_bom(int sno, StreamDesc *st)
{
int ch;
ch = st->stream_getc(sno);
switch(ch) {
case 0xFE:
{
ch = st->stream_getc(sno);
if (ch != 0xFF) {
st->och = ch;
st->stream_getc = PlUnGetc376;
st->stream_wgetc = get_wchar;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_UNICODE_BE;
return TRUE;
}
}
case 0xFF:
{
ch = st->stream_getc(sno);
if (ch != 0xFE) {
st->och = ch;
st->stream_getc = PlUnGetc377;
st->stream_wgetc = get_wchar;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_UNICODE_LE;
return TRUE;
}
}
case 0xEF:
ch = st->stream_getc(sno);
if (ch != 0xBB) {
st->och = ch;
st->stream_getc = PlUnGetc357;
st->stream_wgetc = get_wchar;
return TRUE;
} else {
ch = st->stream_getc(sno);
if (ch != 0xBF) {
st->och = ch;
st->stream_getc = PlUnGetc357273;
st->stream_wgetc = get_wchar;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_ISO_UTF8;
return TRUE;
}
}
default:
st->och = ch;
st->stream_getc = PlUnGetc;
st->stream_wgetc = get_wchar;
return TRUE;
}
}
static Int
p_open (void)
{ /* '$open'(+File,+Mode,?Stream,-ReturnCode) */
@@ -1950,6 +2111,7 @@ p_open (void)
StreamDesc *st;
Int opts;
UInt encoding;
int needs_bom = FALSE, avoid_bom = FALSE;
file_name = Deref(ARG1);
/* we know file_name is bound */
@@ -1987,14 +2149,14 @@ p_open (void)
st = &Stream[sno];
/* can never happen */
topts = Deref(ARG4);
if (IsVarTerm(topts) || !IsIntTerm(topts))
if (IsVarTerm(topts) || !IsIntegerTerm(topts))
return(FALSE);
opts = IntOfTerm(topts);
opts = IntegerOfTerm(topts);
/* can never happen */
tenc = Deref(ARG5);
if (IsVarTerm(tenc) || !IsIntTerm(tenc))
if (IsVarTerm(tenc) || !IsIntegerTerm(tenc))
return FALSE;
encoding = IntOfTerm(tenc);
encoding = IntegerOfTerm(tenc);
#ifdef _WIN32
if (st->status & Binary_Stream_f) {
strncat(io_mode, "b", 8);
@@ -2096,6 +2258,12 @@ p_open (void)
st->status &= ~Eof_Error_Stream_f;
st->status |= Reset_Eof_Stream_f;
}
if (opts & 128) {
needs_bom = TRUE;
}
if (opts & 256) {
avoid_bom = TRUE;
}
}
st->stream_wgetc = get_wchar;
if (CharConversionTable != NULL)
@@ -2103,6 +2271,15 @@ p_open (void)
else
st->stream_wgetc_for_read = st->stream_wgetc;
t = MkStream (sno);
if (open_mode == AtomWrite ) {
if (!avoid_bom && !write_bom(sno,st))
return FALSE;
} else if (open_mode == AtomRead &&
!avoid_bom &&
(needs_bom || (st->status & Seekable_Stream_f))) {
if (!check_bom(sno, st))
return FALSE;
}
st->status &= ~(Free_Stream_f);
return (Yap_unify (ARG3, t));
}
@@ -3128,6 +3305,15 @@ p_set_output (void)
return (TRUE);
}
static Int
p_has_bom (void)
{ /* '$set_output'(+Stream,-ErrorMessage) */
Int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "has?bom/1");
if (sno < 0)
return (FALSE);
return ((Stream[sno].status & HAS_BOM_f));
}
static Int
p_current_input (void)
{ /* current_input(?Stream) */
@@ -5642,6 +5828,7 @@ Yap_InitIOPreds(void)
Yap_InitCPred ("$past_eof", 1, p_past_eof, SafePredFlag|SyncPredFlag),
Yap_InitCPred ("$peek", 2, p_peek, SafePredFlag|SyncPredFlag),
Yap_InitCPred ("$peek_byte", 2, p_peek_byte, SafePredFlag|SyncPredFlag),
Yap_InitCPred ("$has_bom", 1, p_has_bom, SafePredFlag);
Yap_InitCPred ("current_input", 1, p_current_input, SafePredFlag|SyncPredFlag);
Yap_InitCPred ("current_output", 1, p_current_output, SafePredFlag|SyncPredFlag);
Yap_InitCPred ("prompt", 1, p_setprompt, SafePredFlag|SyncPredFlag);