patches to provide initial support UTF32 encodings(from Paulo Moura)

This commit is contained in:
Vitor Santos Costa 2010-10-27 14:49:27 +01:00
parent 95acd408e7
commit 9396252588
9 changed files with 235 additions and 28 deletions

View File

@ -798,6 +798,21 @@ Yap_Error(yap_error_number type, Term where, char *format,...)
serious = TRUE;
}
break;
case DOMAIN_ERROR_STREAM_ENCODING:
{
int i;
Term ti[2];
i = strlen(tmpbuf);
ti[0] = MkAtomTerm(AtomEncoding);
ti[1] = where;
nt[0] = Yap_MkApplTerm(FunctorDomainError, 2, ti);
tp = tmpbuf+i;
psize -= i;
fun = FunctorError;
serious = TRUE;
}
break;
case DOMAIN_ERROR_STREAM_POSITION:
{
int i;

236
C/iopreds.c Executable file → Normal file
View File

@ -780,7 +780,7 @@ MemPutc(int sno, int ch)
if (Stream[sno].u.mem_string.error_handler) {
Yap_Error_Size = new_max_size*sizeof(char);
save_machine_regs();
_longjmp(*(jmp_buf *)Stream[sno].u.mem_string.error_handler,1);
longjmp(*(jmp_buf *)Stream[sno].u.mem_string.error_handler,1);
} else {
Yap_Error(OUT_OF_HEAP_ERROR, TermNil, "YAP could not grow heap for writing to string");
}
@ -1736,6 +1736,21 @@ PlUnGetc376 (int sno)
return ch;
}
/* give back 0376+ch */
static int
PlUnGetc00 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc00)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc;
ch = s->och;
s->och = 0x00;
return ch;
}
/* give back 0377+ch */
static int
PlUnGetc377 (int sno)
@ -1781,6 +1796,66 @@ PlUnGetc357273 (int sno)
return ch;
}
/* give back 000+000+ch */
static int
PlUnGetc0000 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc0000)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc00;
ch = s->och;
s->och = 0x00;
return ch;
}
/* give back 000+000+ch */
static int
PlUnGetc0000fe (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc0000fe)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc0000;
ch = s->och;
s->och = 0xfe;
return ch;
}
/* give back 0377+0376+ch */
static int
PlUnGetc377376 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc377376)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc377;
ch = s->och;
s->och = 0xFE;
return ch;
}
/* give back 0377+0376+000+ch */
static int
PlUnGetc37737600 (int sno)
{
register StreamDesc *s = &Stream[sno];
Int ch;
if (s->stream_getc != PlUnGetc37737600)
return(s->stream_getc(sno));
s->stream_getc = PlUnGetc377376;
ch = s->och;
s->och = 0x00;
return ch;
}
static int
utf8_nof(char ch)
{
@ -1886,6 +1961,26 @@ get_wchar(int sno)
how_many=1;
wch = ch;
break;
case ENC_ISO_UTF32_LE:
if (!how_many) {
how_many = 4;
wch = 0;
}
how_many--;
wch += ((unsigned char) (ch & 0xff)) << (how_many*8);
if (how_many == 0)
return wch;
break;
case ENC_ISO_UTF32_BE:
if (!how_many) {
how_many = 4;
wch = 0;
}
how_many--;
wch += ((unsigned char) (ch & 0xff)) << ((3-how_many)*8);
if (how_many == 0)
return wch;
break;
}
}
return EOF;
@ -1992,6 +2087,16 @@ put_wchar(int sno, wchar_t ch)
case ENC_UNICODE_LE:
Stream[sno].stream_putc(sno, (ch&0xff));
return Stream[sno].stream_putc(sno, (ch>>8));
case ENC_ISO_UTF32_BE:
Stream[sno].stream_putc(sno, (ch>>24) & 0xff);
Stream[sno].stream_putc(sno, (ch>>16) &0xff);
Stream[sno].stream_putc(sno, (ch>>8) & 0xff);
return Stream[sno].stream_putc(sno, ch&0xff);
case ENC_ISO_UTF32_LE:
Stream[sno].stream_putc(sno, ch&0xff);
Stream[sno].stream_putc(sno, (ch>>8) & 0xff);
Stream[sno].stream_putc(sno, (ch>>16) &0xff);
return Stream[sno].stream_putc(sno, (ch>>24) & 0xff);
}
}
return -1;
@ -2219,6 +2324,24 @@ write_bom(int sno, StreamDesc *st)
return FALSE;
if (st->stream_putc(sno,0xFE)<0)
return FALSE;
case ENC_ISO_UTF32_BE:
if (st->stream_putc(sno,0x00)<0)
return FALSE;
if (st->stream_putc(sno,0x00)<0)
return FALSE;
if (st->stream_putc(sno,0xFE)<0)
return FALSE;
if (st->stream_putc(sno,0xFF)<0)
return FALSE;
case ENC_ISO_UTF32_LE:
if (st->stream_putc(sno,0xFF)<0)
return FALSE;
if (st->stream_putc(sno,0xFE)<0)
return FALSE;
if (st->stream_putc(sno,0x00)<0)
return FALSE;
if (st->stream_putc(sno,0x00)<0)
return FALSE;
default:
return TRUE;
}
@ -2240,36 +2363,87 @@ check_bom(int sno, StreamDesc *st)
return TRUE;
}
switch(ch) {
case 0x00:
{
ch = st->stream_getc(sno);
if (ch == EOFCHAR || ch != 0x00) {
st->och = ch;
st->stream_getc = PlUnGetc00;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
} else {
ch = st->stream_getc(sno);
if (ch == EOFCHAR || ch != 0xFE) {
st->och = ch;
st->stream_getc = PlUnGetc0000;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
} else {
if (ch == EOFCHAR || ch != 0xFF) {
st->och = ch;
st->stream_getc = PlUnGetc0000fe;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_ISO_UTF32_BE;
return TRUE;
}
}
}
}
case 0xFE:
{
ch = st->stream_getc(sno);
if (ch != 0xFF) {
st->och = ch;
st->stream_getc = PlUnGetc376;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
st->och = ch;
st->stream_getc = PlUnGetc376;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_UNICODE_BE;
return TRUE;
st->status |= HAS_BOM_f;
st->encoding = ENC_UNICODE_BE;
return TRUE;
}
}
case 0xFF:
{
ch = st->stream_getc(sno);
if (ch != 0xFE) {
st->och = ch;
st->stream_getc = PlUnGetc377;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
st->och = ch;
st->stream_getc = PlUnGetc377;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_UNICODE_LE;
return TRUE;
ch = st->stream_getc(sno);
if (ch == EOFCHAR || ch != 0x00) {
st->och = ch;
st->stream_getc = PlUnGetc377376;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
} else {
ch = st->stream_getc(sno);
if (ch == EOFCHAR || ch != 0x00) {
st->och = ch;
st->stream_getc = PlUnGetc37737600;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_ISO_UTF32_LE;
return TRUE;
}
st->status |= HAS_BOM_f;
st->encoding = ENC_UNICODE_LE;
return TRUE;
}
}
}
case 0xEF:
ch = st->stream_getc(sno);
if (ch != 0xBB) {
@ -2281,15 +2455,15 @@ check_bom(int sno, StreamDesc *st)
} else {
ch = st->stream_getc(sno);
if (ch != 0xBF) {
st->och = ch;
st->stream_getc = PlUnGetc357273;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
st->och = ch;
st->stream_getc = PlUnGetc357273;
st->stream_wgetc = get_wchar;
st->stream_gets = DefaultGets;
return TRUE;
} else {
st->status |= HAS_BOM_f;
st->encoding = ENC_ISO_UTF8;
return TRUE;
st->status |= HAS_BOM_f;
st->encoding = ENC_ISO_UTF8;
return TRUE;
}
}
default:
@ -2628,6 +2802,14 @@ p_open (void)
(needs_bom || (st->status & Seekable_Stream_f))) {
if (!check_bom(sno, st))
return FALSE;
/*
if (st->encoding == ENC_ISO_UTF32_BE ||
st->encoding == ENC_ISO_UTF32_LE)
{
Yap_Error(DOMAIN_ERROR_STREAM_ENCODING, ARG1, "unsupported stream encoding");
return FALSE;
}
*/
}
st->status &= ~(Free_Stream_f);
return (Yap_unify (ARG3, t));
@ -4354,7 +4536,7 @@ static Int
while (TRUE) {
CELL *old_H = H;
if (_setjmp(Yap_IOBotch) == 0) {
if (setjmp(Yap_IOBotch) == 0) {
v = Yap_VarNames(Yap_VarTable, TermNil);
break;
} else {
@ -5145,7 +5327,7 @@ format(volatile Term otail, volatile Term oargs, int sno)
Stream[sno].u.mem_string.error_handler = (void *)&format_botch;
old_pos = Stream[sno].u.mem_string.pos;
/* set up an error handler */
if (_setjmp(format_botch)) {
if (setjmp(format_botch)) {
restore_machine_regs();
*H++ = oargs;
*H++ = otail;

View File

@ -476,6 +476,7 @@ typedef enum
DOMAIN_ERROR_SHIFT_COUNT_OVERFLOW,
DOMAIN_ERROR_SOURCE_SINK,
DOMAIN_ERROR_STREAM,
DOMAIN_ERROR_STREAM_ENCODING,
DOMAIN_ERROR_STREAM_OR_ALIAS,
DOMAIN_ERROR_STREAM_POSITION,
DOMAIN_ERROR_TIMEOUT_SPEC,

View File

@ -84,6 +84,7 @@
AtomEOFBeforeEOT = Yap_LookupAtom("end_of_file_found_before_end_of_term");
AtomEQ = Yap_LookupAtom("=");
AtomEmptyAtom = Yap_LookupAtom("");
AtomEncoding = Yap_LookupAtom("encoding");
AtomEndOfStream = Yap_LookupAtom("$end_of_stream");
AtomEof = Yap_LookupAtom("end_of_file");
AtomEq = Yap_LookupAtom("=");

View File

@ -84,6 +84,7 @@
AtomEOFBeforeEOT = AtomAdjust(AtomEOFBeforeEOT);
AtomEQ = AtomAdjust(AtomEQ);
AtomEmptyAtom = AtomAdjust(AtomEmptyAtom);
AtomEncoding = AtomAdjust(AtomEncoding);
AtomEndOfStream = AtomAdjust(AtomEndOfStream);
AtomEof = AtomAdjust(AtomEof);
AtomEq = AtomAdjust(AtomEq);

View File

@ -166,6 +166,8 @@
#define AtomEQ Yap_heap_regs->AtomEQ_
Atom AtomEmptyAtom_;
#define AtomEmptyAtom Yap_heap_regs->AtomEmptyAtom_
Atom AtomEncoding_;
#define AtomEncoding Yap_heap_regs->AtomEncoding_
Atom AtomEndOfStream_;
#define AtomEndOfStream Yap_heap_regs->AtomEndOfStream_
Atom AtomEof_;

View File

@ -256,7 +256,9 @@ typedef enum {
ENC_ISO_ANSI = 4,
ENC_ISO_UTF8 = 8,
ENC_UNICODE_BE = 16,
ENC_UNICODE_LE = 32
ENC_UNICODE_LE = 32,
ENC_ISO_UTF32_BE = 64,
ENC_ISO_UTF32_LE = 128
} encoding_t;
#endif

View File

@ -89,6 +89,7 @@ A E N "e"
A EOFBeforeEOT N "end_of_file_found_before_end_of_term"
A EQ N "="
A EmptyAtom N ""
A Encoding N "encoding"
A EndOfStream N "$end_of_stream"
A Eof N "end_of_file"
A Eq N "="

View File

@ -354,6 +354,8 @@ domain_error(stream, Opt) --> !,
[ '~w is not a stream' - [Opt] ].
domain_error(stream_or_alias, Opt) --> !,
[ '~w is not a stream (or alias)' - [Opt] ].
domain_error(stream_encoding, Opt) --> !,
[ '~w is not a supported stream encoding' - [Opt] ].
domain_error(stream_position, Opt) --> !,
[ '~w is not a stream position' - [Opt] ].
domain_error(stream_property, Opt) --> !,