look for errors
This commit is contained in:
parent
fccf3d9f6f
commit
cad55bcf71
77
os/getw.h
77
os/getw.h
@ -1,4 +1,19 @@
|
|||||||
|
|
||||||
|
#define utf_cont(ch) (((ch) & 0xc0) == 0x80)
|
||||||
|
|
||||||
|
#define encoding_error(ch,v,st) post_process_read_wchar(1, v, st)
|
||||||
|
|
||||||
|
static int post_process_f_weof(StreamDesc *st)
|
||||||
|
{
|
||||||
|
if (ferror(st->file)) {
|
||||||
|
clearerr(st->file);
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return post_process_weof(st);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/// compose a wide char from a sequence of getchars
|
/// compose a wide char from a sequence of getchars
|
||||||
/// this is a slow lane routine, called if no specialised code
|
/// this is a slow lane routine, called if no specialised code
|
||||||
/// isavailable.
|
/// isavailable.
|
||||||
@ -7,20 +22,20 @@ extern int get_wchar(int sno) {
|
|||||||
int ch = st->stream_getc(sno);
|
int ch = st->stream_getc(sno);
|
||||||
|
|
||||||
if (ch == -1)
|
if (ch == -1)
|
||||||
return post_process_weof(st);
|
return post_process_f_weof(st);
|
||||||
|
|
||||||
switch (st->encoding) {
|
switch (st->encoding) {
|
||||||
case ENC_OCTET:
|
case ENC_OCTET:
|
||||||
// no error detection, all characters are ok.
|
// no error detection, all characters are ok.
|
||||||
case ENC_ISO_LATIN1:
|
case ENC_ISO_LATIN1:
|
||||||
return post_process_read_wchar(ch, 1, st);
|
return post_process_read_wchar(ch, 1, st);
|
||||||
// 7 bits code, anything above is bad news
|
// 7 bits code, anything above is bad news
|
||||||
case ENC_ISO_ASCII:
|
case ENC_ISO_ASCII:
|
||||||
if (ch & 0x80) {
|
if (ch & 0x80) {
|
||||||
/* error */
|
/* error */
|
||||||
}
|
}
|
||||||
return post_process_read_wchar(ch, 1, st);
|
return post_process_read_wchar(ch, 1, st);
|
||||||
// default OS encoding, depends on locale.
|
// default OS encoding, depends on locale.
|
||||||
case ENC_ISO_ANSI: {
|
case ENC_ISO_ANSI: {
|
||||||
char buf[8];
|
char buf[8];
|
||||||
int out;
|
int out;
|
||||||
@ -38,7 +53,7 @@ extern int get_wchar(int sno) {
|
|||||||
}
|
}
|
||||||
return post_process_read_wchar(wch, n, st);
|
return post_process_read_wchar(wch, n, st);
|
||||||
}
|
}
|
||||||
// UTF-8 works o 8 bits.
|
// UTF-8 works o 8 bits.
|
||||||
case ENC_ISO_UTF8: {
|
case ENC_ISO_UTF8: {
|
||||||
int wch;
|
int wch;
|
||||||
unsigned char buf[8];
|
unsigned char buf[8];
|
||||||
@ -46,28 +61,36 @@ extern int get_wchar(int sno) {
|
|||||||
if (ch < 0x80) {
|
if (ch < 0x80) {
|
||||||
return post_process_read_wchar(ch, 1, st);
|
return post_process_read_wchar(ch, 1, st);
|
||||||
}
|
}
|
||||||
// if ((ch - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
if ((ch - 0xc2) > (0xf4-0xc2)) {
|
||||||
|
return encoding_error(ch, 1, st);
|
||||||
|
}
|
||||||
if (ch < 0xe0) { // 2-byte sequence
|
if (ch < 0xe0) { // 2-byte sequence
|
||||||
// Must have valid continuation character
|
// Must have valid continuation character
|
||||||
int c1 = buf[0] = st->stream_getc(sno);
|
int c1 = buf[0] = st->stream_getc(sno);
|
||||||
if (c1 == -1)
|
if (c1 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
// if (!utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
if (!utf_cont(c1)) {
|
||||||
|
return encoding_error(ch, 2, st);
|
||||||
|
}
|
||||||
wch = ((ch & 0x1f) << 6) | (c1 & 0x3f);
|
wch = ((ch & 0x1f) << 6) | (c1 & 0x3f);
|
||||||
return post_process_read_wchar(wch, 2, st);
|
return post_process_read_wchar(wch, 2, st);
|
||||||
}
|
}
|
||||||
if (ch < 0xf0) { // 3-byte sequence
|
if (ch < 0xf0) { // 3-byte sequence
|
||||||
// if ((str + 1 >= end) || !utf_cont(*str) || !utf_cont(str[1]))
|
|
||||||
// return UTF8PROC_ERROR_INVALIDUTF8;
|
|
||||||
// Check for surrogate chars
|
|
||||||
// if (ch == 0xed && *str > 0x9f)
|
|
||||||
// return UTF8PROC_ERROR_INVALIDUTF8;
|
|
||||||
int c1 = st->stream_getc(sno);
|
int c1 = st->stream_getc(sno);
|
||||||
if (c1 == -1)
|
if (c1 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
|
// return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
if (ch == 0xed && c1 > 0x9f) {
|
||||||
|
return encoding_error(ch, 1, st);
|
||||||
|
}
|
||||||
int c2 = st->stream_getc(sno);
|
int c2 = st->stream_getc(sno);
|
||||||
if (c2 == -1)
|
if (c2 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
|
if ( !utf_cont(c1) || !utf_cont(c2)) {
|
||||||
|
return encoding_error(ch, 2, st);
|
||||||
|
// Check for surrogate chars
|
||||||
|
|
||||||
|
}
|
||||||
wch = ((ch & 0xf) << 12) | ((c1 & 0x3f) << 6) | (c2 & 0x3f);
|
wch = ((ch & 0xf) << 12) | ((c1 & 0x3f) << 6) | (c2 & 0x3f);
|
||||||
return post_process_read_wchar(wch, 3, st);
|
return post_process_read_wchar(wch, 3, st);
|
||||||
} else {
|
} else {
|
||||||
@ -80,8 +103,11 @@ extern int get_wchar(int sno) {
|
|||||||
int c3 = st->stream_getc(sno);
|
int c3 = st->stream_getc(sno);
|
||||||
if (c3 == -1)
|
if (c3 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
|
if ( !utf_cont(c1) || !utf_cont(c2) || !utf_cont(c3)) {
|
||||||
|
return encoding_error(ch, 3, st);
|
||||||
|
}
|
||||||
wch = ((ch & 7) << 18) | ((c1 & 0x3f) << 12) | ((c2 & 0x3f) << 6) |
|
wch = ((ch & 7) << 18) | ((c1 & 0x3f) << 12) | ((c2 & 0x3f) << 6) |
|
||||||
(c3 & 0x3f);
|
(c3 & 0x3f);
|
||||||
return post_process_read_wchar(wch, 4, st);
|
return post_process_read_wchar(wch, 4, st);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -208,7 +234,6 @@ extern int get_wchar(int sno) {
|
|||||||
extern int get_wchar_UTF8(int sno) {
|
extern int get_wchar_UTF8(int sno) {
|
||||||
StreamDesc *st = GLOBAL_Stream + sno;
|
StreamDesc *st = GLOBAL_Stream + sno;
|
||||||
int ch = st->stream_getc(sno);
|
int ch = st->stream_getc(sno);
|
||||||
|
|
||||||
if (ch == -1)
|
if (ch == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
else {
|
else {
|
||||||
@ -224,7 +249,9 @@ extern int get_wchar_UTF8(int sno) {
|
|||||||
int c1 = buf[0] = st->stream_getc(sno);
|
int c1 = buf[0] = st->stream_getc(sno);
|
||||||
if (c1 == -1)
|
if (c1 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
// if (!utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
if (!utf_cont(c1)) {
|
||||||
|
return encoding_error(ch, 2, st);
|
||||||
|
}
|
||||||
wch = ((ch & 0x1f) << 6) | (c1 & 0x3f);
|
wch = ((ch & 0x1f) << 6) | (c1 & 0x3f);
|
||||||
return post_process_read_wchar(wch, 2, st);
|
return post_process_read_wchar(wch, 2, st);
|
||||||
}
|
}
|
||||||
@ -237,23 +264,31 @@ extern int get_wchar_UTF8(int sno) {
|
|||||||
int c1 = st->stream_getc(sno);
|
int c1 = st->stream_getc(sno);
|
||||||
if (c1 == -1)
|
if (c1 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
|
if (ch == 0xed && c1 > 0x9f)
|
||||||
|
return encoding_error(ch, 2, st);
|
||||||
int c2 = st->stream_getc(sno);
|
int c2 = st->stream_getc(sno);
|
||||||
if (c2 == -1)
|
if (c2 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
wch = ((ch & 0xf) << 12) | ((c1 & 0x3f) << 6) | (c2 & 0x3f);
|
wch = ((ch & 0xf)<<12) | ((c1 & 0x3f)<<6) | (c2 & 0x3f);
|
||||||
|
if (wch < 0x800)
|
||||||
|
return encoding_error(ch, 3, st);
|
||||||
return post_process_read_wchar(wch, 3, st);
|
return post_process_read_wchar(wch, 3, st);
|
||||||
} else {
|
} else {
|
||||||
int c1 = st->stream_getc(sno);
|
int c1 = st->stream_getc(sno);
|
||||||
if (c1 == -1)
|
if (c1 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
int c2 = st->stream_getc(sno);
|
int c2 = st->stream_getc(sno);
|
||||||
if (c2 == -1)
|
if (c2 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
int c3 = st->stream_getc(sno);
|
int c3 = st->stream_getc(sno);
|
||||||
if (c3 == -1)
|
if (c3 == -1)
|
||||||
return post_process_weof(st);
|
return post_process_weof(st);
|
||||||
wch = ((ch & 7) << 18) | ((c1 & 0x3f) << 12) | ((c2 & 0x3f) << 6) |
|
if (ch == 0xf0) {
|
||||||
(c3 & 0x3f);
|
if (c1 < 0x90) return encoding_error(ch, 4, st);
|
||||||
|
} else if (c1 == 0xf4) {
|
||||||
|
if (c2 > 0x8f) return encoding_error(ch, 4, st);
|
||||||
|
}
|
||||||
|
wch = ((ch & 7)<<18) | ((c1 & 0x3f)<<12) | ((c2 & 0x3f)<<6) | (c3 & 0x3f);
|
||||||
return post_process_read_wchar(wch, 4, st);
|
return post_process_read_wchar(wch, 4, st);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user