patches to provide initial support UTF32 encodings(from Paulo Moura)
This commit is contained in:
parent
95acd408e7
commit
9396252588
15
C/errors.c
15
C/errors.c
@ -798,6 +798,21 @@ Yap_Error(yap_error_number type, Term where, char *format,...)
|
||||
serious = TRUE;
|
||||
}
|
||||
break;
|
||||
case DOMAIN_ERROR_STREAM_ENCODING:
|
||||
{
|
||||
int i;
|
||||
Term ti[2];
|
||||
|
||||
i = strlen(tmpbuf);
|
||||
ti[0] = MkAtomTerm(AtomEncoding);
|
||||
ti[1] = where;
|
||||
nt[0] = Yap_MkApplTerm(FunctorDomainError, 2, ti);
|
||||
tp = tmpbuf+i;
|
||||
psize -= i;
|
||||
fun = FunctorError;
|
||||
serious = TRUE;
|
||||
}
|
||||
break;
|
||||
case DOMAIN_ERROR_STREAM_POSITION:
|
||||
{
|
||||
int i;
|
||||
|
236
C/iopreds.c
Executable file → Normal file
236
C/iopreds.c
Executable file → Normal file
@ -780,7 +780,7 @@ MemPutc(int sno, int ch)
|
||||
if (Stream[sno].u.mem_string.error_handler) {
|
||||
Yap_Error_Size = new_max_size*sizeof(char);
|
||||
save_machine_regs();
|
||||
_longjmp(*(jmp_buf *)Stream[sno].u.mem_string.error_handler,1);
|
||||
longjmp(*(jmp_buf *)Stream[sno].u.mem_string.error_handler,1);
|
||||
} else {
|
||||
Yap_Error(OUT_OF_HEAP_ERROR, TermNil, "YAP could not grow heap for writing to string");
|
||||
}
|
||||
@ -1736,6 +1736,21 @@ PlUnGetc376 (int sno)
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 0376+ch */
|
||||
static int
|
||||
PlUnGetc00 (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc00)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc;
|
||||
ch = s->och;
|
||||
s->och = 0x00;
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 0377+ch */
|
||||
static int
|
||||
PlUnGetc377 (int sno)
|
||||
@ -1781,6 +1796,66 @@ PlUnGetc357273 (int sno)
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 000+000+ch */
|
||||
static int
|
||||
PlUnGetc0000 (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc0000)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc00;
|
||||
ch = s->och;
|
||||
s->och = 0x00;
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 000+000+ch */
|
||||
static int
|
||||
PlUnGetc0000fe (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc0000fe)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc0000;
|
||||
ch = s->och;
|
||||
s->och = 0xfe;
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 0377+0376+ch */
|
||||
static int
|
||||
PlUnGetc377376 (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc377376)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc377;
|
||||
ch = s->och;
|
||||
s->och = 0xFE;
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* give back 0377+0376+000+ch */
|
||||
static int
|
||||
PlUnGetc37737600 (int sno)
|
||||
{
|
||||
register StreamDesc *s = &Stream[sno];
|
||||
Int ch;
|
||||
|
||||
if (s->stream_getc != PlUnGetc37737600)
|
||||
return(s->stream_getc(sno));
|
||||
s->stream_getc = PlUnGetc377376;
|
||||
ch = s->och;
|
||||
s->och = 0x00;
|
||||
return ch;
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_nof(char ch)
|
||||
{
|
||||
@ -1886,6 +1961,26 @@ get_wchar(int sno)
|
||||
how_many=1;
|
||||
wch = ch;
|
||||
break;
|
||||
case ENC_ISO_UTF32_LE:
|
||||
if (!how_many) {
|
||||
how_many = 4;
|
||||
wch = 0;
|
||||
}
|
||||
how_many--;
|
||||
wch += ((unsigned char) (ch & 0xff)) << (how_many*8);
|
||||
if (how_many == 0)
|
||||
return wch;
|
||||
break;
|
||||
case ENC_ISO_UTF32_BE:
|
||||
if (!how_many) {
|
||||
how_many = 4;
|
||||
wch = 0;
|
||||
}
|
||||
how_many--;
|
||||
wch += ((unsigned char) (ch & 0xff)) << ((3-how_many)*8);
|
||||
if (how_many == 0)
|
||||
return wch;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return EOF;
|
||||
@ -1992,6 +2087,16 @@ put_wchar(int sno, wchar_t ch)
|
||||
case ENC_UNICODE_LE:
|
||||
Stream[sno].stream_putc(sno, (ch&0xff));
|
||||
return Stream[sno].stream_putc(sno, (ch>>8));
|
||||
case ENC_ISO_UTF32_BE:
|
||||
Stream[sno].stream_putc(sno, (ch>>24) & 0xff);
|
||||
Stream[sno].stream_putc(sno, (ch>>16) &0xff);
|
||||
Stream[sno].stream_putc(sno, (ch>>8) & 0xff);
|
||||
return Stream[sno].stream_putc(sno, ch&0xff);
|
||||
case ENC_ISO_UTF32_LE:
|
||||
Stream[sno].stream_putc(sno, ch&0xff);
|
||||
Stream[sno].stream_putc(sno, (ch>>8) & 0xff);
|
||||
Stream[sno].stream_putc(sno, (ch>>16) &0xff);
|
||||
return Stream[sno].stream_putc(sno, (ch>>24) & 0xff);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
@ -2219,6 +2324,24 @@ write_bom(int sno, StreamDesc *st)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0xFE)<0)
|
||||
return FALSE;
|
||||
case ENC_ISO_UTF32_BE:
|
||||
if (st->stream_putc(sno,0x00)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0x00)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0xFE)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0xFF)<0)
|
||||
return FALSE;
|
||||
case ENC_ISO_UTF32_LE:
|
||||
if (st->stream_putc(sno,0xFF)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0xFE)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0x00)<0)
|
||||
return FALSE;
|
||||
if (st->stream_putc(sno,0x00)<0)
|
||||
return FALSE;
|
||||
default:
|
||||
return TRUE;
|
||||
}
|
||||
@ -2240,36 +2363,87 @@ check_bom(int sno, StreamDesc *st)
|
||||
return TRUE;
|
||||
}
|
||||
switch(ch) {
|
||||
case 0x00:
|
||||
{
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch == EOFCHAR || ch != 0x00) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc00;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
} else {
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch == EOFCHAR || ch != 0xFE) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc0000;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
} else {
|
||||
if (ch == EOFCHAR || ch != 0xFF) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc0000fe;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
} else {
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_ISO_UTF32_BE;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case 0xFE:
|
||||
{
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch != 0xFF) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc376;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc376;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
} else {
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_UNICODE_BE;
|
||||
return TRUE;
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_UNICODE_BE;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
case 0xFF:
|
||||
{
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch != 0xFE) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc377;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc377;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
} else {
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_UNICODE_LE;
|
||||
return TRUE;
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch == EOFCHAR || ch != 0x00) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc377376;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
} else {
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch == EOFCHAR || ch != 0x00) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc37737600;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
} else {
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_ISO_UTF32_LE;
|
||||
return TRUE;
|
||||
}
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_UNICODE_LE;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
case 0xEF:
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch != 0xBB) {
|
||||
@ -2281,15 +2455,15 @@ check_bom(int sno, StreamDesc *st)
|
||||
} else {
|
||||
ch = st->stream_getc(sno);
|
||||
if (ch != 0xBF) {
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc357273;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
st->och = ch;
|
||||
st->stream_getc = PlUnGetc357273;
|
||||
st->stream_wgetc = get_wchar;
|
||||
st->stream_gets = DefaultGets;
|
||||
return TRUE;
|
||||
} else {
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_ISO_UTF8;
|
||||
return TRUE;
|
||||
st->status |= HAS_BOM_f;
|
||||
st->encoding = ENC_ISO_UTF8;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
default:
|
||||
@ -2628,6 +2802,14 @@ p_open (void)
|
||||
(needs_bom || (st->status & Seekable_Stream_f))) {
|
||||
if (!check_bom(sno, st))
|
||||
return FALSE;
|
||||
/*
|
||||
if (st->encoding == ENC_ISO_UTF32_BE ||
|
||||
st->encoding == ENC_ISO_UTF32_LE)
|
||||
{
|
||||
Yap_Error(DOMAIN_ERROR_STREAM_ENCODING, ARG1, "unsupported stream encoding");
|
||||
return FALSE;
|
||||
}
|
||||
*/
|
||||
}
|
||||
st->status &= ~(Free_Stream_f);
|
||||
return (Yap_unify (ARG3, t));
|
||||
@ -4354,7 +4536,7 @@ static Int
|
||||
while (TRUE) {
|
||||
CELL *old_H = H;
|
||||
|
||||
if (_setjmp(Yap_IOBotch) == 0) {
|
||||
if (setjmp(Yap_IOBotch) == 0) {
|
||||
v = Yap_VarNames(Yap_VarTable, TermNil);
|
||||
break;
|
||||
} else {
|
||||
@ -5145,7 +5327,7 @@ format(volatile Term otail, volatile Term oargs, int sno)
|
||||
Stream[sno].u.mem_string.error_handler = (void *)&format_botch;
|
||||
old_pos = Stream[sno].u.mem_string.pos;
|
||||
/* set up an error handler */
|
||||
if (_setjmp(format_botch)) {
|
||||
if (setjmp(format_botch)) {
|
||||
restore_machine_regs();
|
||||
*H++ = oargs;
|
||||
*H++ = otail;
|
||||
|
1
H/Yap.h
1
H/Yap.h
@ -476,6 +476,7 @@ typedef enum
|
||||
DOMAIN_ERROR_SHIFT_COUNT_OVERFLOW,
|
||||
DOMAIN_ERROR_SOURCE_SINK,
|
||||
DOMAIN_ERROR_STREAM,
|
||||
DOMAIN_ERROR_STREAM_ENCODING,
|
||||
DOMAIN_ERROR_STREAM_OR_ALIAS,
|
||||
DOMAIN_ERROR_STREAM_POSITION,
|
||||
DOMAIN_ERROR_TIMEOUT_SPEC,
|
||||
|
@ -84,6 +84,7 @@
|
||||
AtomEOFBeforeEOT = Yap_LookupAtom("end_of_file_found_before_end_of_term");
|
||||
AtomEQ = Yap_LookupAtom("=");
|
||||
AtomEmptyAtom = Yap_LookupAtom("");
|
||||
AtomEncoding = Yap_LookupAtom("encoding");
|
||||
AtomEndOfStream = Yap_LookupAtom("$end_of_stream");
|
||||
AtomEof = Yap_LookupAtom("end_of_file");
|
||||
AtomEq = Yap_LookupAtom("=");
|
||||
|
@ -84,6 +84,7 @@
|
||||
AtomEOFBeforeEOT = AtomAdjust(AtomEOFBeforeEOT);
|
||||
AtomEQ = AtomAdjust(AtomEQ);
|
||||
AtomEmptyAtom = AtomAdjust(AtomEmptyAtom);
|
||||
AtomEncoding = AtomAdjust(AtomEncoding);
|
||||
AtomEndOfStream = AtomAdjust(AtomEndOfStream);
|
||||
AtomEof = AtomAdjust(AtomEof);
|
||||
AtomEq = AtomAdjust(AtomEq);
|
||||
|
@ -166,6 +166,8 @@
|
||||
#define AtomEQ Yap_heap_regs->AtomEQ_
|
||||
Atom AtomEmptyAtom_;
|
||||
#define AtomEmptyAtom Yap_heap_regs->AtomEmptyAtom_
|
||||
Atom AtomEncoding_;
|
||||
#define AtomEncoding Yap_heap_regs->AtomEncoding_
|
||||
Atom AtomEndOfStream_;
|
||||
#define AtomEndOfStream Yap_heap_regs->AtomEndOfStream_
|
||||
Atom AtomEof_;
|
||||
|
@ -256,7 +256,9 @@ typedef enum {
|
||||
ENC_ISO_ANSI = 4,
|
||||
ENC_ISO_UTF8 = 8,
|
||||
ENC_UNICODE_BE = 16,
|
||||
ENC_UNICODE_LE = 32
|
||||
ENC_UNICODE_LE = 32,
|
||||
ENC_ISO_UTF32_BE = 64,
|
||||
ENC_ISO_UTF32_LE = 128
|
||||
} encoding_t;
|
||||
#endif
|
||||
|
||||
|
@ -89,6 +89,7 @@ A E N "e"
|
||||
A EOFBeforeEOT N "end_of_file_found_before_end_of_term"
|
||||
A EQ N "="
|
||||
A EmptyAtom N ""
|
||||
A Encoding N "encoding"
|
||||
A EndOfStream N "$end_of_stream"
|
||||
A Eof N "end_of_file"
|
||||
A Eq N "="
|
||||
|
@ -354,6 +354,8 @@ domain_error(stream, Opt) --> !,
|
||||
[ '~w is not a stream' - [Opt] ].
|
||||
domain_error(stream_or_alias, Opt) --> !,
|
||||
[ '~w is not a stream (or alias)' - [Opt] ].
|
||||
domain_error(stream_encoding, Opt) --> !,
|
||||
[ '~w is not a supported stream encoding' - [Opt] ].
|
||||
domain_error(stream_position, Opt) --> !,
|
||||
[ '~w is not a stream position' - [Opt] ].
|
||||
domain_error(stream_property, Opt) --> !,
|
||||
|
Reference in New Issue
Block a user