456 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			456 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*  $Id$
 | |
| 
 | |
|     Part of SWI-Prolog
 | |
| 
 | |
|     Author:        Jan Wielemaker
 | |
|     E-mail:        wielemak@science.uva.nl
 | |
|     WWW:           http://www.swi-prolog.org
 | |
|     Copyright (C): 1985-2006, University of Amsterdam
 | |
| 
 | |
|     This library is free software; you can redistribute it and/or
 | |
|     modify it under the terms of the GNU Lesser General Public
 | |
|     License as published by the Free Software Foundation; either
 | |
|     version 2.1 of the License, or (at your option) any later version.
 | |
| 
 | |
|     This library is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|     Lesser General Public License for more details.
 | |
| 
 | |
|     You should have received a copy of the GNU Lesser General Public
 | |
|     License along with this library; if not, write to the Free Software
 | |
|     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| */
 | |
| 
 | |
| #define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
 | |
| #include <stdio.h>
 | |
| #include "dtd.h"
 | |
| #include "util.h"
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| #include <assert.h>
 | |
| #include <wctype.h>
 | |
| #include <wchar.h>
 | |
| #include <locale.h>
 | |
| 
 | |
| #define streq(s1, s2) (strcmp(s1, s2) == 0)
 | |
| 
 | |
| char *program;
 | |
| int nerrors = 0;
 | |
| int nwarnings = 0;
 | |
| int style_messages = FALSE;
 | |
| 
 | |
| static void
 | |
| usage(void)
 | |
| { fprintf(stderr,
 | |
| 	  "Usage: %s [-xml] [-s] [-nodefs] [file.dtd] [file]\n\n", program);
 | |
|   fprintf(stderr,
 | |
| 	  "\t-xml\tForce XML mode\n"
 | |
| 	  "\t-s\tSilent: only report errors and warnings\n"
 | |
| 	  "\t-style\tWarn about correct but dubious input\n"
 | |
| 	  "\t-nodefs\tDo not include defaulted attributes\n");
 | |
|   exit(EXIT_FAILURE);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| wputc(int c, FILE *f)
 | |
| { char buf[MB_CUR_MAX];
 | |
|   int i, len = wctomb(buf, c);
 | |
| 
 | |
|   for(i=0; i<len; i++)
 | |
|     putc(buf[i], f);
 | |
| 
 | |
|   return c;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static void
 | |
| print_word(dtd_parser * p, char c,     /* preceding character */
 | |
| 	   ichar const *s,	       /* where to start */
 | |
| 	   ichar const *e)	       /* where to end (at NUL if e is NULL) */
 | |
| { FILE *f = stdout;
 | |
|   ichar x;
 | |
| 
 | |
|   wputc(c, f);
 | |
|   if (p->dtd->case_sensitive)
 | |
|   { if (e != 0)
 | |
|       while (s != e)
 | |
| 	wputc(*s++, f);
 | |
|     else
 | |
|       while ((x = *s++) != (ichar) 0)
 | |
| 	wputc(x, f);
 | |
|   } else
 | |
|   { if (e != 0)
 | |
|       while (s != e)
 | |
| 	wputc(towupper((wint_t)*s++), f);
 | |
|     else
 | |
|       while ((x = *s++) != (ichar) 0)
 | |
| 	wputc(towupper(x), f);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| wprint_escaped(FILE *f, const wchar_t *s, int len)
 | |
| { const wchar_t *e = &s[len];
 | |
| 
 | |
|   while ( s < e )
 | |
|   { wint_t x = *s++;
 | |
| 
 | |
|     if (x >= ' ')
 | |
|     { if (x == '\\')			/* \ --> \\ */
 | |
| 	wputc(x, f);
 | |
|       wputc(x, f);
 | |
|     } else if (x == '\t')
 | |
|     { wputc(x, f);			/* \t */
 | |
|     } else if (x == '\n')
 | |
|     { fprintf(f, "\\n");		/* \n */
 | |
|     } else
 | |
|     { fprintf(f, "\\%03o", x);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| print_cdata(char c, sgml_attribute *a)
 | |
| { wputc(c, stdout);
 | |
|   wprint_escaped(stdout, a->value.textW, a->value.number);
 | |
|   wputc('\n', stdout);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| print_close(dtd_parser * p, dtd_element * e)
 | |
| { print_word(p, ')', e->name->name, 0);
 | |
|   putchar('\n');
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| typedef struct atdef
 | |
| { attrtype type;		/* AT_* */
 | |
|   char const *name;		/* name */
 | |
|   int islist;			/* list-type */
 | |
| } atdef;
 | |
| 
 | |
| static atdef attrs[] = {
 | |
|   {AT_CDATA,	"CDATA",    FALSE},
 | |
|   {AT_ENTITY,	"ENTITY",   FALSE},
 | |
|   {AT_ENTITIES,	"ENTITY",   TRUE},
 | |
|   {AT_ID,	"ID",	    FALSE},
 | |
|   {AT_IDREF,	"IDREF",    FALSE},
 | |
|   {AT_IDREFS,	"IDREF",    TRUE},
 | |
|   {AT_NAME,	"NAME",	    FALSE},
 | |
|   {AT_NAMES,	"NAME",	    TRUE},
 | |
|   {AT_NMTOKEN,	"NMTOKEN",  FALSE},
 | |
|   {AT_NMTOKENS,	"NMTOKEN",  TRUE},
 | |
|   {AT_NUMBER,	"NUMBER",   FALSE},
 | |
|   {AT_NUMBERS,	"NUMBER",   TRUE},
 | |
|   {AT_NUTOKEN,	"NUTOKEN",  FALSE},
 | |
|   {AT_NUTOKENS,	"NUTOKEN",  TRUE},
 | |
|   {AT_NOTATION,	"NOTATION", FALSE},
 | |
| 
 | |
|   {AT_CDATA,	(char *) 0, FALSE}
 | |
| };
 | |
| 
 | |
| 
 | |
| static atdef *
 | |
| find_attrdef(attrtype type)
 | |
| { atdef *ad;
 | |
| 
 | |
|   for (ad = attrs; ad->name != (char *) 0; ad++)
 | |
|   { if (ad->type == type)
 | |
|       return ad;
 | |
|   }
 | |
|   assert(0);
 | |
|   return (atdef *) 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static ichar *
 | |
| istrblank(ichar const *s)
 | |
| { for (; *s; s++)
 | |
|   { if (iswspace(*s))
 | |
|       return (ichar *) s;
 | |
|   }
 | |
|   return (ichar *) 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| print_open(dtd_parser * p, dtd_element * e, int argc, sgml_attribute *argv)
 | |
| { int i;
 | |
| 
 | |
|   for (i = 0; i < argc; i++)
 | |
|   { print_word(p, 'A', argv[i].definition->name->name, 0);
 | |
|     switch (argv[i].definition->type)
 | |
|     { case AT_CDATA:
 | |
| 	printf(" CDATA");
 | |
| 	print_cdata(' ', &argv[i]);
 | |
| 	continue;		/* so we don't get two line breaks */
 | |
|       case AT_NUMBER:
 | |
| 	printf(" NUMBER ");
 | |
| 	if (argv[i].value.textW)
 | |
| 	  print_word(p, ' ', argv[i].value.textW, 0);
 | |
| 	else
 | |
| 	  printf("%ld", argv[i].value.number);
 | |
| 	break;
 | |
|       case AT_NAMEOF:
 | |
| 	printf(" NAME");
 | |
| 	print_word(p, ' ', argv[i].value.textW, 0);
 | |
| 	break;
 | |
|       default:
 | |
|       { atdef *ad = find_attrdef(argv[i].definition->type);
 | |
| 	ichar const *val = argv[i].value.textW;
 | |
| 
 | |
| 	printf(" %s", ad->name);
 | |
| 	if (ad->islist)
 | |
| 	{ ichar const *n;
 | |
| 
 | |
| 	  while ((n = istrblank(val)) != 0)
 | |
| 	  { if (n != val)
 | |
| 	      print_word(p, ' ', val, n);
 | |
| 	    val = n + 1;
 | |
| 	  }
 | |
| 	}
 | |
| 	print_word(p, ' ', val, 0);
 | |
|       }
 | |
| 	break;
 | |
|     }
 | |
|     putchar('\n');
 | |
|   }
 | |
|   print_word(p, '(', e->name->name, 0);
 | |
|   putchar('\n');
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| print_data(dtd_parser * p, data_type type, int len, const wchar_t *data)
 | |
| { char c;
 | |
| 
 | |
|   switch (type)
 | |
|   { case EC_CDATA:
 | |
|       c = '-';
 | |
|       break;
 | |
|     case EC_NDATA:
 | |
|       c = 'N';
 | |
|       break;
 | |
|     case EC_SDATA:
 | |
|       c = 'S';
 | |
|       break;
 | |
|     default:
 | |
|       assert(0);
 | |
|   }
 | |
|   wputc(c, stdout);
 | |
|   wprint_escaped(stdout, data, len);
 | |
|   wputc('\n', stdout);
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_entity(dtd_parser *p, dtd_entity *e, int chr)
 | |
| { if (e == 0)
 | |
|     printf("&#%d;\n", chr);
 | |
|   else
 | |
|     wprintf(L"&%s;\n", e->name->name);
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_pi(dtd_parser *p, ichar const *pi)
 | |
| { wputc('?', stdout);
 | |
|   wprint_escaped(stdout, pi, wcslen(pi));
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_srcloc *
 | |
| file_location(dtd_srcloc *l)
 | |
| { while (l->parent && l->type != IN_FILE)
 | |
|     l = l->parent;
 | |
|   return l;
 | |
| }
 | |
| 
 | |
| static int
 | |
| on_error(dtd_parser * p, dtd_error * error)
 | |
| { char const *severity;
 | |
|   char const *dialect;
 | |
|   dtd_srcloc *l = file_location(error->location);
 | |
| 
 | |
|   switch (p->dtd->dialect)
 | |
|   { case DL_SGML:
 | |
|       dialect = "sgml";
 | |
|       break;
 | |
|     case DL_XML:
 | |
|       dialect = "xml";
 | |
|       break;
 | |
|     case DL_XMLNS:
 | |
|     default:			/* make compiler happy */
 | |
|       dialect = "xmlns";
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   switch (error->severity)
 | |
|   { case ERS_STYLE:
 | |
|       severity = "Style";
 | |
|       if ( !style_messages )
 | |
| 	return TRUE;
 | |
|       break;
 | |
|     case ERS_WARNING:
 | |
|       severity = "Warning";
 | |
|       nwarnings++;
 | |
|       break;
 | |
|     case ERS_ERROR:
 | |
|     default:			/* make compiler happy */
 | |
|       severity = "Error";
 | |
|       nerrors++;
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   if ( l->name.file )
 | |
|   { fwprintf(stderr, L"%s: (%s mode) %s: %ls:%d:%d %ls\n",
 | |
| 	     program, dialect, severity,
 | |
| 	     l->name.entity, l->line, l->linepos,
 | |
| 	     error->plain_message);
 | |
|   } else
 | |
|   { fwprintf(stderr, L"%s: (%s mode) %s: %d:%d %ls\n",
 | |
| 	       program, dialect, severity,
 | |
| 	       error->plain_message);
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| set_functions(dtd_parser * p, int output)
 | |
| { if (output)
 | |
|   { p->on_end_element = print_close;
 | |
|     p->on_begin_element = print_open;
 | |
|     p->on_data = print_data;
 | |
|     p->on_entity = on_entity;
 | |
|     p->on_pi = on_pi;
 | |
|   }
 | |
|   p->on_error = on_error;
 | |
| }
 | |
| 
 | |
| 
 | |
| static wchar_t *
 | |
| mb2wc(const char *s)
 | |
| { int wl = mbstowcs(NULL, s, 0);
 | |
| 
 | |
|   if ( wl > 0 )
 | |
|   { wchar_t *ws = malloc((wl+1)*sizeof(wchar_t));
 | |
|     mbstowcs(ws, s, wl+1);
 | |
| 
 | |
|     return ws;
 | |
|   }
 | |
| 
 | |
|   perror("mbstowcs");
 | |
|   exit(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| #define shift (argc--, argv++)
 | |
| 
 | |
| #define strcaseeq(x, y) istrcaseeq((ichar const *)(x), (ichar const *)(y))
 | |
| 
 | |
| static ichar const *no_dtd = (ichar const *) NULL;
 | |
| 
 | |
| int
 | |
| main(int argc, char **argv)
 | |
| { dtd_parser *p = NULL;
 | |
|   char *s;
 | |
|   int xml = FALSE;
 | |
|   int output = TRUE;
 | |
|   int nodefs = FALSE;		/* include defaulted attributes */
 | |
| 
 | |
|   setlocale(LC_CTYPE, "");
 | |
| 
 | |
|   s = strchr(argv[0], '/');
 | |
|   program = s == NULL ? argv[0] : s + 1;
 | |
|   if (streq(program, "xml"))
 | |
|     xml = TRUE;
 | |
| 
 | |
|   shift;
 | |
| 
 | |
|   while (argc > 0 && argv[0][0] == '-')
 | |
|   { if (streq(argv[0], "-xml"))
 | |
|     { xml = TRUE;
 | |
|     } else if (streq(argv[0], "-s"))
 | |
|     { output = FALSE;
 | |
|     } else if (streq(argv[0], "-nodefs"))
 | |
|     { nodefs = TRUE;
 | |
|     } else if (streq(argv[0], "-style"))
 | |
|     { style_messages = TRUE;
 | |
|     } else
 | |
|     { usage();
 | |
|     }
 | |
|     shift;
 | |
|   }
 | |
| 
 | |
|   if (argc > 0)
 | |
|   { char *slash = strchr(argv[0], '/');
 | |
|     char *dot = strchr(argv[0], '.');
 | |
|     char *ext = dot == 0 || (slash != 0 && slash > dot) ? "." : dot;
 | |
| 
 | |
|     if (strcaseeq(ext, ".dtd"))
 | |
|     { char doctype[256];
 | |
| 
 | |
|       strncpy(doctype, argv[0], ext - argv[0]);
 | |
|       doctype[ext - argv[0]] = '\0';
 | |
| 
 | |
|       p = new_dtd_parser(new_dtd(mb2wc(doctype)));
 | |
|       load_dtd_from_file(p, mb2wc(argv[0]));
 | |
|       shift;
 | |
|     } else if (strcaseeq(ext, ".html") || strcaseeq(ext, ".htm"))
 | |
|     { p = new_dtd_parser(new_dtd((ichar const *) "html"));
 | |
|       load_dtd_from_file(p, L"html.dtd");
 | |
|     } else if (xml || strcaseeq(ext, ".xml"))
 | |
|     { dtd *dtd = new_dtd(no_dtd);
 | |
| 
 | |
|       set_dialect_dtd(dtd, DL_XML);
 | |
|       p = new_dtd_parser(dtd);
 | |
|     } else
 | |
|     { p = new_dtd_parser(new_dtd(no_dtd));
 | |
|     }
 | |
|   } else
 | |
|   { p = new_dtd_parser(new_dtd(no_dtd));
 | |
|   }
 | |
| 
 | |
|   if (nodefs)
 | |
|     p->flags |= SGML_PARSER_NODEFS;
 | |
| 
 | |
|   switch (argc)
 | |
|   { case 1:
 | |
|     { set_functions(p, output);
 | |
|       sgml_process_file(p, mb2wc(argv[0]), 0);
 | |
|       free_dtd_parser(p);
 | |
|       if (output && nerrors == 0)
 | |
| 	printf("C\n");
 | |
|       return 0;
 | |
|     }
 | |
|     case 0:
 | |
|     { set_functions(p, output);
 | |
|       set_file_dtd_parser(p, IN_FILE, L"stdin");
 | |
|       set_mode_dtd_parser(p, DM_DATA);
 | |
|       sgml_process_stream(p, stdin, 0);
 | |
|       free_dtd_parser(p);
 | |
|       if (output && nerrors == 0 && nwarnings == 0)
 | |
| 	printf("C\n");
 | |
|       return 0;
 | |
|     }
 | |
|     default:
 | |
|     { usage();
 | |
|       return EXIT_FAILURE;
 | |
|     }
 | |
|   }
 | |
| }
 |