75 lines
2.5 KiB
C
75 lines
2.5 KiB
C
|
#include "tests.h"
|
||
|
|
||
|
int main(int argc, char **argv)
|
||
|
{
|
||
|
char *buf = NULL;
|
||
|
size_t bufsize = 0;
|
||
|
FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
|
||
|
utf8proc_uint8_t src[1024];
|
||
|
int len;
|
||
|
|
||
|
check(f != NULL, "error opening GraphemeBreakTest.txt");
|
||
|
while (getline(&buf, &bufsize, f) > 0) {
|
||
|
size_t bi = 0, si = 0;
|
||
|
lineno += 1;
|
||
|
|
||
|
if (lineno % 100 == 0)
|
||
|
printf("checking line %zd...\n", lineno);
|
||
|
|
||
|
if (buf[0] == '#') continue;
|
||
|
|
||
|
while (buf[bi]) {
|
||
|
bi = skipspaces(buf, bi);
|
||
|
if (buf[bi] == '/') { /* grapheme break */
|
||
|
src[si++] = '/';
|
||
|
bi++;
|
||
|
}
|
||
|
else if (buf[bi] == '+') { /* no break */
|
||
|
bi++;
|
||
|
}
|
||
|
else if (buf[bi] == '#') { /* start of comments */
|
||
|
break;
|
||
|
}
|
||
|
else { /* hex-encoded codepoint */
|
||
|
len = encode((char*) (src + si), buf + bi) - 1;
|
||
|
while (src[si]) ++si; /* advance to NUL termination */
|
||
|
bi += len;
|
||
|
}
|
||
|
}
|
||
|
if (si && src[si-1] == '/')
|
||
|
--si; /* no break after final grapheme */
|
||
|
src[si] = 0; /* NUL-terminate */
|
||
|
|
||
|
if (si) {
|
||
|
utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
|
||
|
size_t i = 0, j = 0;
|
||
|
utf8proc_ssize_t glen;
|
||
|
utf8proc_uint8_t *g; /* utf8proc_map grapheme results */
|
||
|
while (i < si) {
|
||
|
if (src[i] != '/')
|
||
|
utf8[j++] = src[i++];
|
||
|
else
|
||
|
i++;
|
||
|
}
|
||
|
glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
|
||
|
if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
|
||
|
/* the test file contains surrogate codepoints, which are only for UTF-16 */
|
||
|
printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
|
||
|
}
|
||
|
else {
|
||
|
check(glen >= 0, "utf8proc_map error = %s",
|
||
|
utf8proc_errmsg(glen));
|
||
|
for (i = 0; i <= glen; ++i)
|
||
|
if (g[i] == 0xff)
|
||
|
g[i] = '/'; /* easier-to-read output (/ is not in test strings) */
|
||
|
check(!strcmp((char*)g, (char*)src),
|
||
|
"grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
|
||
|
}
|
||
|
free(g);
|
||
|
}
|
||
|
}
|
||
|
fclose(f);
|
||
|
printf("Passed tests after %zd lines!\n", lineno);
|
||
|
return 0;
|
||
|
}
|