From 9396252588e897413f2692edbd287b32366f1346 Mon Sep 17 00:00:00 2001 From: Vitor Santos Costa Date: Wed, 27 Oct 2010 14:49:27 +0100 Subject: [PATCH] patches to provide initial support UTF32 encodings(from Paulo Moura) --- C/errors.c | 15 +++ C/iopreds.c | 236 ++++++++++++++++++++++++++++++++++++++++++------ H/Yap.h | 1 + H/iatoms.h | 1 + H/ratoms.h | 1 + H/tatoms.h | 2 + H/yapio.h | 4 +- misc/ATOMS | 1 + pl/messages.yap | 2 + 9 files changed, 235 insertions(+), 28 deletions(-) mode change 100755 => 100644 C/iopreds.c diff --git a/C/errors.c b/C/errors.c index 97a1ceab6..f372b9828 100644 --- a/C/errors.c +++ b/C/errors.c @@ -798,6 +798,21 @@ Yap_Error(yap_error_number type, Term where, char *format,...) serious = TRUE; } break; + case DOMAIN_ERROR_STREAM_ENCODING: + { + int i; + Term ti[2]; + + i = strlen(tmpbuf); + ti[0] = MkAtomTerm(AtomEncoding); + ti[1] = where; + nt[0] = Yap_MkApplTerm(FunctorDomainError, 2, ti); + tp = tmpbuf+i; + psize -= i; + fun = FunctorError; + serious = TRUE; + } + break; case DOMAIN_ERROR_STREAM_POSITION: { int i; diff --git a/C/iopreds.c b/C/iopreds.c old mode 100755 new mode 100644 index b0951497d..e31e0f78a --- a/C/iopreds.c +++ b/C/iopreds.c @@ -780,7 +780,7 @@ MemPutc(int sno, int ch) if (Stream[sno].u.mem_string.error_handler) { Yap_Error_Size = new_max_size*sizeof(char); save_machine_regs(); - _longjmp(*(jmp_buf *)Stream[sno].u.mem_string.error_handler,1); + longjmp(*(jmp_buf *)Stream[sno].u.mem_string.error_handler,1); } else { Yap_Error(OUT_OF_HEAP_ERROR, TermNil, "YAP could not grow heap for writing to string"); } @@ -1736,6 +1736,21 @@ PlUnGetc376 (int sno) return ch; } +/* give back 0376+ch */ +static int +PlUnGetc00 (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc00) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc; + ch = s->och; + s->och = 0x00; + return ch; +} + /* give back 0377+ch */ static int PlUnGetc377 (int sno) @@ -1781,6 +1796,66 @@ PlUnGetc357273 (int sno) return ch; } +/* give back 000+000+ch */ +static int +PlUnGetc0000 (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc0000) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc00; + ch = s->och; + s->och = 0x00; + return ch; +} + +/* give back 000+000+ch */ +static int +PlUnGetc0000fe (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc0000fe) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc0000; + ch = s->och; + s->och = 0xfe; + return ch; +} + +/* give back 0377+0376+ch */ +static int +PlUnGetc377376 (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc377376) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc377; + ch = s->och; + s->och = 0xFE; + return ch; +} + +/* give back 0377+0376+000+ch */ +static int +PlUnGetc37737600 (int sno) +{ + register StreamDesc *s = &Stream[sno]; + Int ch; + + if (s->stream_getc != PlUnGetc37737600) + return(s->stream_getc(sno)); + s->stream_getc = PlUnGetc377376; + ch = s->och; + s->och = 0x00; + return ch; +} + static int utf8_nof(char ch) { @@ -1886,6 +1961,26 @@ get_wchar(int sno) how_many=1; wch = ch; break; + case ENC_ISO_UTF32_LE: + if (!how_many) { + how_many = 4; + wch = 0; + } + how_many--; + wch += ((unsigned char) (ch & 0xff)) << (how_many*8); + if (how_many == 0) + return wch; + break; + case ENC_ISO_UTF32_BE: + if (!how_many) { + how_many = 4; + wch = 0; + } + how_many--; + wch += ((unsigned char) (ch & 0xff)) << ((3-how_many)*8); + if (how_many == 0) + return wch; + break; } } return EOF; @@ -1992,6 +2087,16 @@ put_wchar(int sno, wchar_t ch) case ENC_UNICODE_LE: Stream[sno].stream_putc(sno, (ch&0xff)); return Stream[sno].stream_putc(sno, (ch>>8)); + case ENC_ISO_UTF32_BE: + Stream[sno].stream_putc(sno, (ch>>24) & 0xff); + Stream[sno].stream_putc(sno, (ch>>16) &0xff); + Stream[sno].stream_putc(sno, (ch>>8) & 0xff); + return Stream[sno].stream_putc(sno, ch&0xff); + case ENC_ISO_UTF32_LE: + Stream[sno].stream_putc(sno, ch&0xff); + Stream[sno].stream_putc(sno, (ch>>8) & 0xff); + Stream[sno].stream_putc(sno, (ch>>16) &0xff); + return Stream[sno].stream_putc(sno, (ch>>24) & 0xff); } } return -1; @@ -2219,6 +2324,24 @@ write_bom(int sno, StreamDesc *st) return FALSE; if (st->stream_putc(sno,0xFE)<0) return FALSE; + case ENC_ISO_UTF32_BE: + if (st->stream_putc(sno,0x00)<0) + return FALSE; + if (st->stream_putc(sno,0x00)<0) + return FALSE; + if (st->stream_putc(sno,0xFE)<0) + return FALSE; + if (st->stream_putc(sno,0xFF)<0) + return FALSE; + case ENC_ISO_UTF32_LE: + if (st->stream_putc(sno,0xFF)<0) + return FALSE; + if (st->stream_putc(sno,0xFE)<0) + return FALSE; + if (st->stream_putc(sno,0x00)<0) + return FALSE; + if (st->stream_putc(sno,0x00)<0) + return FALSE; default: return TRUE; } @@ -2240,36 +2363,87 @@ check_bom(int sno, StreamDesc *st) return TRUE; } switch(ch) { + case 0x00: + { + ch = st->stream_getc(sno); + if (ch == EOFCHAR || ch != 0x00) { + st->och = ch; + st->stream_getc = PlUnGetc00; + st->stream_wgetc = get_wchar; + st->stream_gets = DefaultGets; + return TRUE; + } else { + ch = st->stream_getc(sno); + if (ch == EOFCHAR || ch != 0xFE) { + st->och = ch; + st->stream_getc = PlUnGetc0000; + st->stream_wgetc = get_wchar; + st->stream_gets = DefaultGets; + return TRUE; + } else { + if (ch == EOFCHAR || ch != 0xFF) { + st->och = ch; + st->stream_getc = PlUnGetc0000fe; + st->stream_wgetc = get_wchar; + st->stream_gets = DefaultGets; + return TRUE; + } else { + st->status |= HAS_BOM_f; + st->encoding = ENC_ISO_UTF32_BE; + return TRUE; + } + } + } + } case 0xFE: { ch = st->stream_getc(sno); if (ch != 0xFF) { - st->och = ch; - st->stream_getc = PlUnGetc376; - st->stream_wgetc = get_wchar; - st->stream_gets = DefaultGets; - return TRUE; + st->och = ch; + st->stream_getc = PlUnGetc376; + st->stream_wgetc = get_wchar; + st->stream_gets = DefaultGets; + return TRUE; } else { - st->status |= HAS_BOM_f; - st->encoding = ENC_UNICODE_BE; - return TRUE; + st->status |= HAS_BOM_f; + st->encoding = ENC_UNICODE_BE; + return TRUE; } } case 0xFF: { ch = st->stream_getc(sno); if (ch != 0xFE) { - st->och = ch; - st->stream_getc = PlUnGetc377; - st->stream_wgetc = get_wchar; - st->stream_gets = DefaultGets; - return TRUE; + st->och = ch; + st->stream_getc = PlUnGetc377; + st->stream_wgetc = get_wchar; + st->stream_gets = DefaultGets; + return TRUE; } else { - st->status |= HAS_BOM_f; - st->encoding = ENC_UNICODE_LE; - return TRUE; + ch = st->stream_getc(sno); + if (ch == EOFCHAR || ch != 0x00) { + st->och = ch; + st->stream_getc = PlUnGetc377376; + st->stream_wgetc = get_wchar; + st->stream_gets = DefaultGets; + } else { + ch = st->stream_getc(sno); + if (ch == EOFCHAR || ch != 0x00) { + st->och = ch; + st->stream_getc = PlUnGetc37737600; + st->stream_wgetc = get_wchar; + st->stream_gets = DefaultGets; + } else { + st->status |= HAS_BOM_f; + st->encoding = ENC_ISO_UTF32_LE; + return TRUE; + } + st->status |= HAS_BOM_f; + st->encoding = ENC_UNICODE_LE; + return TRUE; } } + } case 0xEF: ch = st->stream_getc(sno); if (ch != 0xBB) { @@ -2281,15 +2455,15 @@ check_bom(int sno, StreamDesc *st) } else { ch = st->stream_getc(sno); if (ch != 0xBF) { - st->och = ch; - st->stream_getc = PlUnGetc357273; - st->stream_wgetc = get_wchar; - st->stream_gets = DefaultGets; - return TRUE; + st->och = ch; + st->stream_getc = PlUnGetc357273; + st->stream_wgetc = get_wchar; + st->stream_gets = DefaultGets; + return TRUE; } else { - st->status |= HAS_BOM_f; - st->encoding = ENC_ISO_UTF8; - return TRUE; + st->status |= HAS_BOM_f; + st->encoding = ENC_ISO_UTF8; + return TRUE; } } default: @@ -2628,6 +2802,14 @@ p_open (void) (needs_bom || (st->status & Seekable_Stream_f))) { if (!check_bom(sno, st)) return FALSE; + /* + if (st->encoding == ENC_ISO_UTF32_BE || + st->encoding == ENC_ISO_UTF32_LE) + { + Yap_Error(DOMAIN_ERROR_STREAM_ENCODING, ARG1, "unsupported stream encoding"); + return FALSE; + } + */ } st->status &= ~(Free_Stream_f); return (Yap_unify (ARG3, t)); @@ -4354,7 +4536,7 @@ static Int while (TRUE) { CELL *old_H = H; - if (_setjmp(Yap_IOBotch) == 0) { + if (setjmp(Yap_IOBotch) == 0) { v = Yap_VarNames(Yap_VarTable, TermNil); break; } else { @@ -5145,7 +5327,7 @@ format(volatile Term otail, volatile Term oargs, int sno) Stream[sno].u.mem_string.error_handler = (void *)&format_botch; old_pos = Stream[sno].u.mem_string.pos; /* set up an error handler */ - if (_setjmp(format_botch)) { + if (setjmp(format_botch)) { restore_machine_regs(); *H++ = oargs; *H++ = otail; diff --git a/H/Yap.h b/H/Yap.h index 44ec886fa..90dfbac5c 100755 --- a/H/Yap.h +++ b/H/Yap.h @@ -476,6 +476,7 @@ typedef enum DOMAIN_ERROR_SHIFT_COUNT_OVERFLOW, DOMAIN_ERROR_SOURCE_SINK, DOMAIN_ERROR_STREAM, + DOMAIN_ERROR_STREAM_ENCODING, DOMAIN_ERROR_STREAM_OR_ALIAS, DOMAIN_ERROR_STREAM_POSITION, DOMAIN_ERROR_TIMEOUT_SPEC, diff --git a/H/iatoms.h b/H/iatoms.h index 5b4c31bcf..2e628a768 100644 --- a/H/iatoms.h +++ b/H/iatoms.h @@ -84,6 +84,7 @@ AtomEOFBeforeEOT = Yap_LookupAtom("end_of_file_found_before_end_of_term"); AtomEQ = Yap_LookupAtom("="); AtomEmptyAtom = Yap_LookupAtom(""); + AtomEncoding = Yap_LookupAtom("encoding"); AtomEndOfStream = Yap_LookupAtom("$end_of_stream"); AtomEof = Yap_LookupAtom("end_of_file"); AtomEq = Yap_LookupAtom("="); diff --git a/H/ratoms.h b/H/ratoms.h index cc12ef29d..be8c91c3b 100644 --- a/H/ratoms.h +++ b/H/ratoms.h @@ -84,6 +84,7 @@ AtomEOFBeforeEOT = AtomAdjust(AtomEOFBeforeEOT); AtomEQ = AtomAdjust(AtomEQ); AtomEmptyAtom = AtomAdjust(AtomEmptyAtom); + AtomEncoding = AtomAdjust(AtomEncoding); AtomEndOfStream = AtomAdjust(AtomEndOfStream); AtomEof = AtomAdjust(AtomEof); AtomEq = AtomAdjust(AtomEq); diff --git a/H/tatoms.h b/H/tatoms.h index 1c2e21f15..8a272004a 100644 --- a/H/tatoms.h +++ b/H/tatoms.h @@ -166,6 +166,8 @@ #define AtomEQ Yap_heap_regs->AtomEQ_ Atom AtomEmptyAtom_; #define AtomEmptyAtom Yap_heap_regs->AtomEmptyAtom_ + Atom AtomEncoding_; +#define AtomEncoding Yap_heap_regs->AtomEncoding_ Atom AtomEndOfStream_; #define AtomEndOfStream Yap_heap_regs->AtomEndOfStream_ Atom AtomEof_; diff --git a/H/yapio.h b/H/yapio.h index 61dac11f4..6cb463f09 100644 --- a/H/yapio.h +++ b/H/yapio.h @@ -256,7 +256,9 @@ typedef enum { ENC_ISO_ANSI = 4, ENC_ISO_UTF8 = 8, ENC_UNICODE_BE = 16, - ENC_UNICODE_LE = 32 + ENC_UNICODE_LE = 32, + ENC_ISO_UTF32_BE = 64, + ENC_ISO_UTF32_LE = 128 } encoding_t; #endif diff --git a/misc/ATOMS b/misc/ATOMS index 081d712ae..d3c3676e7 100644 --- a/misc/ATOMS +++ b/misc/ATOMS @@ -89,6 +89,7 @@ A E N "e" A EOFBeforeEOT N "end_of_file_found_before_end_of_term" A EQ N "=" A EmptyAtom N "" +A Encoding N "encoding" A EndOfStream N "$end_of_stream" A Eof N "end_of_file" A Eq N "=" diff --git a/pl/messages.yap b/pl/messages.yap index 1cc62351d..a3f7dbbec 100644 --- a/pl/messages.yap +++ b/pl/messages.yap @@ -354,6 +354,8 @@ domain_error(stream, Opt) --> !, [ '~w is not a stream' - [Opt] ]. domain_error(stream_or_alias, Opt) --> !, [ '~w is not a stream (or alias)' - [Opt] ]. +domain_error(stream_encoding, Opt) --> !, + [ '~w is not a supported stream encoding' - [Opt] ]. domain_error(stream_position, Opt) --> !, [ '~w is not a stream position' - [Opt] ]. domain_error(stream_property, Opt) --> !,