diff --git a/os/pl-utf8.c b/os/pl-utf8.c index e5b405002..d8e4a67e8 100644 --- a/os/pl-utf8.c +++ b/os/pl-utf8.c @@ -148,6 +148,37 @@ _PL__utf8_put_char(char *out, int chr) return out; } +char * +_PL__utf8_skip_char(const char *in) +{ /* 2-byte, 0x80-0x7ff */ + if ( (in[0]&0xe0) == 0xc0 && CONT(1) ) + { + return (char *)in+2; + } + /* 3-byte, 0x800-0xffff */ + if ( (in[0]&0xf0) == 0xe0 && CONT(1) && CONT(2) ) + { + return (char *)in+3; + } + /* 4-byte, 0x10000-0x1FFFFF */ + if ( (in[0]&0xf8) == 0xf0 && CONT(1) && CONT(2) && CONT(3) ) + { + return (char *)in+4; + } + /* 5-byte, 0x200000-0x3FFFFFF */ + if ( (in[0]&0xfc) == 0xf8 && CONT(1) && CONT(2) && CONT(3) && CONT(4) ) + { + return (char *)in+5; + } + /* 6-byte, 0x400000-0x7FFFFFF */ + if ( (in[0]&0xfe) == 0xfc && CONT(1) && CONT(2) && CONT(3) && CONT(4) && CONT(5) ) + { + return (char *)in+4; + } + + return (char *)in+1; +} + size_t utf8_strlen(const char *s, size_t len) @@ -169,11 +200,9 @@ utf8_strlen1(const char *s) { unsigned int l = 0; - while(1) - { int chr; - - s = utf8_get_char(s, &chr); - if (!chr) break; + while( s [0] ) + { + s = utf8_skip_char(s); l++; } @@ -181,13 +210,12 @@ utf8_strlen1(const char *s) } const char * -utf8_n(const char *s, int n) +utf8_skip(const char *s, int n) { while(n--) - { int chr; - - s = utf8_get_char(s, &chr); - if (!chr) return NULL; + { + if (!s[0]) return NULL; + s = utf8_skip_char(s); } return s; diff --git a/os/pl-utf8.h b/os/pl-utf8.h index be4f7da11..9ec6c6a3c 100644 --- a/os/pl-utf8.h +++ b/os/pl-utf8.h @@ -50,16 +50,20 @@ #define utf8_get_char(in, chr) \ (*(in) & 0x80 ? _PL__utf8_get_char(in, chr) \ : (*(chr) = *(in), (char *)(in)+1)) +#define utf8_skip_char(in) \ + (*(in) & 0x80 ? _PL__utf8_skip_char(in) \ + : (char *)(in)+1) #define utf8_put_char(out, chr) \ ((chr) < 0x80 ? out[0]=(char)(chr), out+1 \ : _PL__utf8_put_char(out, (chr))) extern char *_PL__utf8_get_char(const char *in, int *chr); extern char *_PL__utf8_put_char(char *out, int chr); +extern char *_PL__utf8_skip_char(const char *out); extern size_t utf8_strlen(const char *s, size_t len); extern size_t utf8_strlen1(const char *s); -extern const char * utf8_n(const char *s, int n); +extern const char * utf8_skip(const char *s, int n); extern int utf8_strncmp(const char *s1, const char *s2, size_t n); extern int utf8_strprefix(const char *s1, const char *s2);