75 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			75 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								#include "tests.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								int main(int argc, char **argv)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    char *buf = NULL;
							 | 
						||
| 
								 | 
							
								    size_t bufsize = 0;
							 | 
						||
| 
								 | 
							
								    FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
							 | 
						||
| 
								 | 
							
								    utf8proc_uint8_t src[1024];
							 | 
						||
| 
								 | 
							
								    int len;
							 | 
						||
| 
								 | 
							
								    
							 | 
						||
| 
								 | 
							
								    check(f != NULL, "error opening GraphemeBreakTest.txt");
							 | 
						||
| 
								 | 
							
								    while (getline(&buf, &bufsize, f) > 0) {
							 | 
						||
| 
								 | 
							
								        size_t bi = 0, si = 0;
							 | 
						||
| 
								 | 
							
								        lineno += 1;
							 | 
						||
| 
								 | 
							
								        
							 | 
						||
| 
								 | 
							
								        if (lineno % 100 == 0)
							 | 
						||
| 
								 | 
							
								            printf("checking line %zd...\n", lineno);
							 | 
						||
| 
								 | 
							
								        
							 | 
						||
| 
								 | 
							
								        if (buf[0] == '#') continue;
							 | 
						||
| 
								 | 
							
								        
							 | 
						||
| 
								 | 
							
								        while (buf[bi]) {
							 | 
						||
| 
								 | 
							
								            bi = skipspaces(buf, bi);
							 | 
						||
| 
								 | 
							
								            if (buf[bi] == '/') { /* grapheme break */
							 | 
						||
| 
								 | 
							
								                src[si++] = '/';
							 | 
						||
| 
								 | 
							
								                bi++;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            else if (buf[bi] == '+') { /* no break */
							 | 
						||
| 
								 | 
							
								                bi++;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            else if (buf[bi] == '#') { /* start of comments */
							 | 
						||
| 
								 | 
							
								                break;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
									    else { /* hex-encoded codepoint */
							 | 
						||
| 
								 | 
							
								                len = encode((char*) (src + si), buf + bi) - 1;
							 | 
						||
| 
								 | 
							
								                while (src[si]) ++si; /* advance to NUL termination */
							 | 
						||
| 
								 | 
							
								                bi += len;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        if (si && src[si-1] == '/')
							 | 
						||
| 
								 | 
							
								            --si; /* no break after final grapheme */
							 | 
						||
| 
								 | 
							
								        src[si] = 0; /* NUL-terminate */
							 | 
						||
| 
								 | 
							
								        
							 | 
						||
| 
								 | 
							
								        if (si) {
							 | 
						||
| 
								 | 
							
								            utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
							 | 
						||
| 
								 | 
							
								            size_t i = 0, j = 0;
							 | 
						||
| 
								 | 
							
								            utf8proc_ssize_t glen;
							 | 
						||
| 
								 | 
							
								            utf8proc_uint8_t *g; /* utf8proc_map grapheme results */
							 | 
						||
| 
								 | 
							
								            while (i < si) {
							 | 
						||
| 
								 | 
							
								                if (src[i] != '/')
							 | 
						||
| 
								 | 
							
								                    utf8[j++] = src[i++];
							 | 
						||
| 
								 | 
							
								                else
							 | 
						||
| 
								 | 
							
								                    i++;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
							 | 
						||
| 
								 | 
							
								            if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
							 | 
						||
| 
								 | 
							
								                 /* the test file contains surrogate codepoints, which are only for UTF-16 */
							 | 
						||
| 
								 | 
							
								                 printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            else {
							 | 
						||
| 
								 | 
							
								                 check(glen >= 0, "utf8proc_map error = %s",
							 | 
						||
| 
								 | 
							
								                       utf8proc_errmsg(glen));
							 | 
						||
| 
								 | 
							
								                 for (i = 0; i <= glen; ++i)
							 | 
						||
| 
								 | 
							
								                      if (g[i] == 0xff)
							 | 
						||
| 
								 | 
							
								                          g[i] = '/'; /* easier-to-read output (/ is not in test strings) */
							 | 
						||
| 
								 | 
							
								                 check(!strcmp((char*)g, (char*)src),
							 | 
						||
| 
								 | 
							
								                       "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            free(g);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    fclose(f);
							 | 
						||
| 
								 | 
							
								    printf("Passed tests after %zd lines!\n", lineno);
							 | 
						||
| 
								 | 
							
								    return 0;
							 | 
						||
| 
								 | 
							
								}
							 |