75 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			75 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|   | #include "tests.h"
 | ||
|  | 
 | ||
|  | int main(int argc, char **argv) | ||
|  | { | ||
|  |     char *buf = NULL; | ||
|  |     size_t bufsize = 0; | ||
|  |     FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL; | ||
|  |     utf8proc_uint8_t src[1024]; | ||
|  |     int len; | ||
|  |      | ||
|  |     check(f != NULL, "error opening GraphemeBreakTest.txt"); | ||
|  |     while (getline(&buf, &bufsize, f) > 0) { | ||
|  |         size_t bi = 0, si = 0; | ||
|  |         lineno += 1; | ||
|  |          | ||
|  |         if (lineno % 100 == 0) | ||
|  |             printf("checking line %zd...\n", lineno); | ||
|  |          | ||
|  |         if (buf[0] == '#') continue; | ||
|  |          | ||
|  |         while (buf[bi]) { | ||
|  |             bi = skipspaces(buf, bi); | ||
|  |             if (buf[bi] == '/') { /* grapheme break */ | ||
|  |                 src[si++] = '/'; | ||
|  |                 bi++; | ||
|  |             } | ||
|  |             else if (buf[bi] == '+') { /* no break */ | ||
|  |                 bi++; | ||
|  |             } | ||
|  |             else if (buf[bi] == '#') { /* start of comments */ | ||
|  |                 break; | ||
|  |             } | ||
|  | 	    else { /* hex-encoded codepoint */ | ||
|  |                 len = encode((char*) (src + si), buf + bi) - 1; | ||
|  |                 while (src[si]) ++si; /* advance to NUL termination */ | ||
|  |                 bi += len; | ||
|  |             } | ||
|  |         } | ||
|  |         if (si && src[si-1] == '/') | ||
|  |             --si; /* no break after final grapheme */ | ||
|  |         src[si] = 0; /* NUL-terminate */ | ||
|  |          | ||
|  |         if (si) { | ||
|  |             utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */ | ||
|  |             size_t i = 0, j = 0; | ||
|  |             utf8proc_ssize_t glen; | ||
|  |             utf8proc_uint8_t *g; /* utf8proc_map grapheme results */ | ||
|  |             while (i < si) { | ||
|  |                 if (src[i] != '/') | ||
|  |                     utf8[j++] = src[i++]; | ||
|  |                 else | ||
|  |                     i++; | ||
|  |             } | ||
|  |             glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND); | ||
|  |             if (glen == UTF8PROC_ERROR_INVALIDUTF8) { | ||
|  |                  /* the test file contains surrogate codepoints, which are only for UTF-16 */ | ||
|  |                  printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno); | ||
|  |             } | ||
|  |             else { | ||
|  |                  check(glen >= 0, "utf8proc_map error = %s", | ||
|  |                        utf8proc_errmsg(glen)); | ||
|  |                  for (i = 0; i <= glen; ++i) | ||
|  |                       if (g[i] == 0xff) | ||
|  |                           g[i] = '/'; /* easier-to-read output (/ is not in test strings) */ | ||
|  |                  check(!strcmp((char*)g, (char*)src), | ||
|  |                        "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src); | ||
|  |             } | ||
|  |             free(g); | ||
|  |         } | ||
|  |     } | ||
|  |     fclose(f); | ||
|  |     printf("Passed tests after %zd lines!\n", lineno); | ||
|  |     return 0; | ||
|  | } |