| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | /*  $Id$
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Part of SWI-Prolog | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Author:        Jan Wielemaker | 
					
						
							|  |  |  |     E-mail:        wielemak@science.uva.nl | 
					
						
							|  |  |  |     WWW:           http://www.swi-prolog.org
 | 
					
						
							|  |  |  |     Copyright (C): 1985-2006, University of Amsterdam | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     This library is free software; you can redistribute it and/or | 
					
						
							|  |  |  |     modify it under the terms of the GNU Lesser General Public | 
					
						
							|  |  |  |     License as published by the Free Software Foundation; either | 
					
						
							|  |  |  |     version 2.1 of the License, or (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     This library is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
					
						
							|  |  |  |     Lesser General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     You should have received a copy of the GNU Lesser General Public | 
					
						
							|  |  |  |     License along with this library; if not, write to the Free Software | 
					
						
							|  |  |  |     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define DTD_IMPLEMENTATION 1
 | 
					
						
							|  |  |  | #include <stdio.h>
 | 
					
						
							|  |  |  | #include <wchar.h>
 | 
					
						
							|  |  |  | #include "dtd.h"
 | 
					
						
							|  |  |  | #include "model.h"
 | 
					
						
							|  |  |  | #include "util.h"
 | 
					
						
							|  |  |  | #include "catalog.h"
 | 
					
						
							|  |  |  | #include "parser.h"
 | 
					
						
							|  |  |  | #include <stdlib.h>
 | 
					
						
							|  |  |  | #include <assert.h>
 | 
					
						
							|  |  |  | #include <stdarg.h>
 | 
					
						
							|  |  |  | #include <ctype.h>
 | 
					
						
							|  |  |  | #include <string.h>
 | 
					
						
							|  |  |  | #include "utf8.h"
 | 
					
						
							|  |  |  | #include <errno.h>
 | 
					
						
							|  |  |  | #include <wctype.h>
 | 
					
						
							|  |  |  | #include "xml_unicode.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define DEBUG(g) ((void)0)
 | 
					
						
							|  |  |  | #define ZERO_TERM_LEN (-1)		/* terminated by nul */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __WINDOWS__
 | 
					
						
							|  |  |  | #define inline __inline
 | 
					
						
							|  |  |  | #define swprintf _snwprintf
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	    LOCAL TYPES		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct locbuf | 
					
						
							|  |  |  | { dtd_srcloc start;			/* p->startloc */ | 
					
						
							|  |  |  |   dtd_srcloc here;			/* p->location */ | 
					
						
							|  |  |  | } locbuf; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	      PROTOYPES		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | static const ichar *	itake_name(dtd_parser *p, | 
					
						
							|  |  |  | 				   const ichar *in, dtd_symbol **id); | 
					
						
							|  |  |  | static const ichar *	itake_entity_name(dtd_parser *p, const ichar *in, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 					  dtd_symbol **id); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | static const ichar *	itake_namegroup(dtd_parser *p, const ichar *decl, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 					dtd_symbol **names, int *n); | 
					
						
							|  |  |  | static const ichar *	iskip_layout(dtd *dtd, const ichar *in); | 
					
						
							|  |  |  | static dtd_parser *	clone_dtd_parser(dtd_parser *p); | 
					
						
							|  |  |  | static void		free_model(dtd_model *m); | 
					
						
							|  |  |  | static int		process_entity_declaration(dtd_parser *p, | 
					
						
							|  |  |  | 						    const ichar *decl); | 
					
						
							|  |  |  | static void		free_notations(dtd_notation *n); | 
					
						
							|  |  |  | static void		free_shortrefs(dtd_shortref *sr); | 
					
						
							|  |  |  | static int		process_cdata(dtd_parser *p, int last); | 
					
						
							|  |  |  | static int		process_entity(dtd_parser *p, const ichar *name); | 
					
						
							|  |  |  | static int		emit_cdata(dtd_parser *p, int last); | 
					
						
							|  |  |  | static dtd_space_mode	istr_to_space_mode(const ichar *val); | 
					
						
							|  |  |  | static void		update_space_mode(dtd_parser *p, dtd_element *e, | 
					
						
							|  |  |  | 					  int natts, sgml_attribute *atts); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | static dtd_model *	make_model(dtd_parser *p, const ichar *decl, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 				   const ichar **end); | 
					
						
							|  |  |  | static void		for_elements_in_model(dtd_model *m, | 
					
						
							|  |  |  | 					      void (*f)(dtd_element *e, | 
					
						
							|  |  |  | 							void *closure), | 
					
						
							|  |  |  | 					      void *closure); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | int			putchar_dtd_parser(dtd_parser *p, int chr); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | void			free_dtd_parser(dtd_parser *p); | 
					
						
							|  |  |  | static const ichar *	isee_character_entity(dtd *dtd, const ichar *in, | 
					
						
							|  |  |  | 					      int *chr); | 
					
						
							|  |  |  | static int		add_default_attributes(dtd_parser *p, dtd_element *e, | 
					
						
							|  |  |  | 					       int natts, | 
					
						
							|  |  |  | 					       sgml_attribute *atts); | 
					
						
							|  |  |  | static int		prepare_cdata(dtd_parser *p); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	      MACROS		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define WITH_CLASS(p, c, g) \
 | 
					
						
							|  |  |  | 	{ sgml_event_class _oc = p->event_class; \ | 
					
						
							|  |  |  | 	  p->event_class = c; \ | 
					
						
							|  |  |  | 	  g; \ | 
					
						
							|  |  |  | 	  p->event_class = _oc; \ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	     STATISTICS		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef O_STATISTICS
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int edefs_created = 0; | 
					
						
							|  |  |  | int edefs_freed = 0; | 
					
						
							|  |  |  | int edefs_implicit = 0; | 
					
						
							|  |  |  | int edefs_atts = 0; | 
					
						
							|  |  |  | int edefs_decl = 0; | 
					
						
							|  |  |  | int dtd_created = 0; | 
					
						
							|  |  |  | int dtd_freed = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | sgml_statistics(void) | 
					
						
							|  |  |  | { fprintf(stderr, "EDEFS: created %d; freed %d\n", edefs_created, edefs_freed); | 
					
						
							|  |  |  |   fprintf(stderr, "EDEFS: implicit %d; atts %d; decl %d\n", | 
					
						
							|  |  |  | 	  edefs_implicit, edefs_atts, edefs_decl); | 
					
						
							|  |  |  |   fprintf(stderr, "DTDs: created: %d; freed: %d\n", dtd_created, dtd_freed); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define STAT(g) g
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define STAT(g) ((void)0)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	   SRC LOCATION		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void				/* TBD: also handle startloc */ | 
					
						
							|  |  |  | push_location(dtd_parser *p, locbuf *save) | 
					
						
							|  |  |  | { save->here  = p->location; | 
					
						
							|  |  |  |   save->start = p->startloc; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   p->location.parent = &save->here; | 
					
						
							|  |  |  |   p->startloc.parent = &save->start; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | pop_location(dtd_parser *p, locbuf *saved) | 
					
						
							|  |  |  | { p->location = saved->here; | 
					
						
							|  |  |  |   p->startloc = saved->start; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void | 
					
						
							|  |  |  | _sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc) | 
					
						
							|  |  |  | { d->type    = loc->type; | 
					
						
							|  |  |  |   d->name.file = loc->name.file; | 
					
						
							|  |  |  |   d->line    = loc->line; | 
					
						
							|  |  |  |   d->linepos = loc->linepos; | 
					
						
							|  |  |  |   d->charpos = loc->charpos; | 
					
						
							|  |  |  | 					/* but not the parent! */ | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc) | 
					
						
							|  |  |  | { _sgml_cplocation(d, loc); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define sgml_cplocation(d,s) _sgml_cplocation(d, s)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | inc_location(dtd_srcloc *l, int chr) | 
					
						
							|  |  |  | { if ( chr == '\n' ) | 
					
						
							|  |  |  |   { l->linepos = 0; | 
					
						
							|  |  |  |     l->line++; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   l->linepos++; | 
					
						
							|  |  |  |   l->charpos++; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | dec_location(dtd_srcloc *l, int chr) | 
					
						
							|  |  |  | { if ( chr == '\n' ) | 
					
						
							|  |  |  |   { l->linepos = 2;			/* not good! */ | 
					
						
							|  |  |  |     l->line--; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   l->linepos--; | 
					
						
							|  |  |  |   l->charpos--; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *   CLASSIFICATION PRIMITIVES	* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int | 
					
						
							|  |  |  | HasClass(dtd *dtd, wint_t chr, int mask) | 
					
						
							|  |  |  | { if ( chr <= 0xff ) | 
					
						
							|  |  |  |     return (dtd->charclass->class[(chr)] & (mask)); | 
					
						
							|  |  |  |   else | 
					
						
							|  |  |  |   { switch(mask) | 
					
						
							|  |  |  |     { case CH_NAME: | 
					
						
							|  |  |  | 	return ( xml_basechar(chr) || | 
					
						
							|  |  |  | 		 xml_digit(chr) || | 
					
						
							|  |  |  | 		 xml_ideographic(chr) || | 
					
						
							|  |  |  | 		 xml_combining_char(chr) || | 
					
						
							|  |  |  | 		 xml_extender(chr) | 
					
						
							|  |  |  | 	       ); | 
					
						
							|  |  |  |       case CH_NMSTART: | 
					
						
							|  |  |  | 	return ( xml_basechar(chr) || | 
					
						
							|  |  |  | 		 xml_ideographic(chr) ); | 
					
						
							|  |  |  |       case CH_WHITE: | 
					
						
							|  |  |  | 	return FALSE;			/* only ' ' and '\t' */ | 
					
						
							|  |  |  |       case CH_BLANK: | 
					
						
							|  |  |  | 	return iswspace(chr); | 
					
						
							|  |  |  |       case CH_DIGIT: | 
					
						
							|  |  |  | 	return xml_digit(chr); | 
					
						
							|  |  |  |       case CH_RS: | 
					
						
							|  |  |  |       case CH_RE: | 
					
						
							|  |  |  | 	return FALSE; | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	assert(0); | 
					
						
							|  |  |  |         return FALSE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | isee_func(dtd *dtd, const ichar *in, charfunc func) | 
					
						
							|  |  |  | { if ( dtd->charfunc->func[func] == *in ) | 
					
						
							|  |  |  |     return ++in; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	      SYMBOLS		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_symbol_table * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | new_symbol_table() | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { dtd_symbol_table *t = sgml_calloc(1, sizeof(*t)); | 
					
						
							|  |  |  |   t->size    = SYMBOLHASHSIZE; | 
					
						
							|  |  |  |   t->entries = sgml_calloc(t->size, sizeof(dtd_symbol*)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return t; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_symbol_table(dtd_symbol_table *t) | 
					
						
							|  |  |  | { int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(i=0; i<t->size; i++) | 
					
						
							|  |  |  |   { dtd_symbol *s, *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(s=t->entries[i]; s; s=next) | 
					
						
							|  |  |  |     { next = s->next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       sgml_free((ichar*)s->name); | 
					
						
							|  |  |  |       sgml_free(s); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   sgml_free(t->entries); | 
					
						
							|  |  |  |   sgml_free(t); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | dtd_symbol * | 
					
						
							|  |  |  | dtd_find_symbol(dtd *dtd, const ichar *name) | 
					
						
							|  |  |  | { dtd_symbol_table *t = dtd->symbols; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( dtd->case_sensitive ) | 
					
						
							|  |  |  |   { int k = istrhash(name, t->size); | 
					
						
							|  |  |  |     dtd_symbol *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(s=t->entries[k]; s; s = s->next) | 
					
						
							|  |  |  |     { if ( istreq(s->name, name) ) | 
					
						
							|  |  |  | 	return s; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { int k = istrcasehash(name, t->size); | 
					
						
							|  |  |  |     dtd_symbol *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(s=t->entries[k]; s; s = s->next) | 
					
						
							|  |  |  |     { if ( istrcaseeq(s->name, name) ) | 
					
						
							|  |  |  | 	return s; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_symbol * | 
					
						
							|  |  |  | dtd_find_entity_symbol(dtd *dtd, const ichar *name) | 
					
						
							|  |  |  | { dtd_symbol_table *t = dtd->symbols; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( dtd->ent_case_sensitive ) | 
					
						
							|  |  |  |   { int k = istrhash(name, t->size); | 
					
						
							|  |  |  |     dtd_symbol *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(s=t->entries[k]; s; s = s->next) | 
					
						
							|  |  |  |     { if ( istreq(s->name, name) ) | 
					
						
							|  |  |  | 	return s; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { int k = istrcasehash(name, t->size); | 
					
						
							|  |  |  |     dtd_symbol *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(s=t->entries[k]; s; s = s->next) | 
					
						
							|  |  |  |     { if ( istrcaseeq(s->name, name) ) | 
					
						
							|  |  |  | 	return s; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | dtd_symbol * | 
					
						
							|  |  |  | dtd_add_symbol(dtd *dtd, const ichar *name) | 
					
						
							|  |  |  | { dtd_symbol_table *t = dtd->symbols; | 
					
						
							|  |  |  |   int k = istrhash(name, t->size); | 
					
						
							|  |  |  |   dtd_symbol *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(s=t->entries[k]; s; s = s->next) | 
					
						
							|  |  |  |   { if ( istreq(s->name, name) ) | 
					
						
							|  |  |  |       return s; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   s = sgml_calloc(1, sizeof(*s)); | 
					
						
							|  |  |  |   s->name = istrdup(name); | 
					
						
							|  |  |  |   s->next = t->entries[k]; | 
					
						
							|  |  |  |   t->entries[k] = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return s; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	    ENTITIES		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_entity_list(dtd_entity *e) | 
					
						
							|  |  |  | { dtd_entity *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; e; e=next) | 
					
						
							|  |  |  |   { next = e->next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( e->value )   sgml_free(e->value); | 
					
						
							|  |  |  |     if ( e->extid )   sgml_free(e->extid); | 
					
						
							|  |  |  |     if ( e->exturl )  sgml_free(e->exturl); | 
					
						
							|  |  |  |     if ( e->baseurl ) sgml_free(e->baseurl); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sgml_free(e); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_entity * | 
					
						
							|  |  |  | find_pentity(dtd *dtd, dtd_symbol *id) | 
					
						
							|  |  |  | { dtd_entity *e; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(e = dtd->pentities; e; e=e->next) | 
					
						
							|  |  |  |   { if ( e->name == id ) | 
					
						
							|  |  |  |       return e; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* returned path must be freed when done */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static ichar * | 
					
						
							|  |  |  | entity_file(dtd *dtd, dtd_entity *e) | 
					
						
							|  |  |  | { switch(e->type) | 
					
						
							|  |  |  |   { case ET_SYSTEM: | 
					
						
							|  |  |  |     case ET_PUBLIC: | 
					
						
							|  |  |  |     { const ichar *f; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       f = find_in_catalogue(e->catalog_location, | 
					
						
							|  |  |  | 			    e->name->name, | 
					
						
							|  |  |  | 			    e->extid, | 
					
						
							|  |  |  | 			    e->exturl, | 
					
						
							|  |  |  | 			    dtd->dialect != DL_SGML); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( f )				/* owned by catalog */ | 
					
						
							|  |  |  |       { ichar *file; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( is_absolute_path(f) || !e->baseurl ) | 
					
						
							|  |  |  | 	  file = istrdup(f); | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 	  file = localpath(e->baseurl, f); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return file; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | entity_value(dtd_parser *p, dtd_entity *e, int *len) | 
					
						
							|  |  |  | { ichar *file; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !e->value && (file=entity_file(p->dtd, e)) ) | 
					
						
							|  |  |  |   { int normalise = (e->content == EC_SGML || e->content == EC_CDATA); | 
					
						
							|  |  |  |     size_t l; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     e->value = load_sgml_file_to_charp(file, normalise, &l); | 
					
						
							|  |  |  |     e->length = (long)l; | 
					
						
							|  |  |  |     sgml_free(file); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( len ) | 
					
						
							|  |  |  |     *len = e->length; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return e->value; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | expand_pentities(dtd_parser *p, const ichar *in, int ilen, ichar *out, int len) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   int pero = dtd->charfunc->func[CF_PERO]; /* % */ | 
					
						
							|  |  |  |   int ero = dtd->charfunc->func[CF_ERO]; /* & */ | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  |   const ichar *end; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( ilen == ZERO_TERM_LEN ) | 
					
						
							|  |  |  |   { end = in + wcslen(in); | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { end = &in[ilen]; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   while(in < end) | 
					
						
							|  |  |  |   { if ( *in == pero ) | 
					
						
							|  |  |  |     { dtd_symbol *id; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       if ( (s = itake_entity_name(p, in+1, &id)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       { dtd_entity *e = find_pentity(dtd, id); | 
					
						
							|  |  |  | 	const ichar *eval; | 
					
						
							|  |  |  | 	int l; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	in = s; | 
					
						
							|  |  |  | 	if ( (s=isee_func(dtd, s, CF_ERC)) ) /* ; is not obligatory? */ | 
					
						
							|  |  |  | 	  in = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( !e ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  return gripe(p, ERC_EXISTENCE, L"parameter entity", id->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if ( !(eval = entity_value(p, e, NULL)) ) | 
					
						
							|  |  |  | 	  return FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( !expand_pentities(p, eval, ZERO_TERM_LEN, out, len) ) | 
					
						
							|  |  |  | 	  return FALSE; | 
					
						
							|  |  |  | 	l = (int)istrlen(out);		/* could be better */ | 
					
						
							|  |  |  | 	out += l; | 
					
						
							|  |  |  | 	len -= l; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( --len <= 0 ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { gripe(p, ERC_REPRESENTATION, L"Declaration too long"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       return FALSE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( *in == ero && in[1] == '#' )	/* &# */ | 
					
						
							|  |  |  |     { int chr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( (s=isee_character_entity(dtd, in, &chr)) ) | 
					
						
							|  |  |  |       { if ( chr == 0 ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", in); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} else | 
					
						
							|  |  |  | 	{ *out++ = chr; | 
					
						
							|  |  |  | 	  in = s; | 
					
						
							|  |  |  | 	  continue; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     *out++ = *in++; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *out = '\0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | char_entity_value(const ichar *decl) | 
					
						
							|  |  |  | { if ( *decl == '#' ) | 
					
						
							|  |  |  |   { const ichar *s = decl+1; | 
					
						
							|  |  |  |     ichar *end; | 
					
						
							|  |  |  |     long v; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* do octal too? */ | 
					
						
							|  |  |  |     if ( s[0] == 'x' || s[0] == 'X' ) | 
					
						
							|  |  |  |       v = wcstoul(s+1, &end, 16); | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       v = wcstoul(s, &end, 10); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( *end == '\0' ) | 
					
						
							|  |  |  |     { return (int)v; | 
					
						
							|  |  |  |     } else if ( istreq(s, L"RS") ) | 
					
						
							|  |  |  |     { return '\n'; | 
					
						
							|  |  |  |     } else if ( istreq(s, L"RE") ) | 
					
						
							|  |  |  |     { return '\r'; | 
					
						
							|  |  |  |     } else if ( istreq(s, L"TAB") ) | 
					
						
							|  |  |  |     { return '\t'; | 
					
						
							|  |  |  |     } else if ( istreq(s, L"SPACE") ) | 
					
						
							|  |  |  |     { return ' '; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | isee_character_entity(dtd *dtd, const ichar *in, int *chr) | 
					
						
							|  |  |  | { const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s=isee_func(dtd, in, CF_ERO)) && *s == '#' ) | 
					
						
							|  |  |  |   { ichar e[32]; | 
					
						
							|  |  |  |     ichar *o = e; | 
					
						
							|  |  |  |     int v; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     *o++ = *s++; | 
					
						
							|  |  |  |     while(o < e+sizeof(e)/sizeof(ichar)-1 && HasClass(dtd, *s, CH_NAME)) | 
					
						
							|  |  |  |       *o++ = *s++; | 
					
						
							|  |  |  |     if ( isee_func(dtd, s, CF_ERC))	/* skip ; */ | 
					
						
							|  |  |  |       s++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     *o = '\0'; | 
					
						
							|  |  |  |     if ( (v=char_entity_value(e)) >= 0 ) | 
					
						
							|  |  |  |     { *chr = v; | 
					
						
							|  |  |  |       return s; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Expand entities in a string.  Used to expand CDATA attribute values. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | expand_entities(dtd_parser *p, const ichar *in, int len, ocharbuf *out) | 
					
						
							|  |  |  | { const ichar *s; | 
					
						
							|  |  |  |   const ichar *end = &in[len]; | 
					
						
							|  |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   int ero = dtd->charfunc->func[CF_ERO]; /* & */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   while(in < end) | 
					
						
							|  |  |  |   { if ( *in == ero ) | 
					
						
							|  |  |  |     { const ichar *estart = in;		/* for recovery */ | 
					
						
							|  |  |  |       int chr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( (s=isee_character_entity(dtd, in, &chr)) ) | 
					
						
							|  |  |  |       { if ( chr == 0 ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", in); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	add_ocharbuf(out, chr); | 
					
						
							|  |  |  | 	in = s; | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( HasClass(dtd, in[1], CH_NMSTART) ) | 
					
						
							|  |  |  |       { dtd_symbol *id; | 
					
						
							|  |  |  | 	dtd_entity *e; | 
					
						
							|  |  |  | 	const ichar *eval; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if ( !(in = itake_name(p, in+1, &id)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	{ in = estart; | 
					
						
							|  |  |  | 	  goto recover; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if ( isee_func(dtd, in, CF_ERC) || *in == '\n' ) | 
					
						
							|  |  |  | 	  in++; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	if ( !(e = id->entity) && !(e=dtd->default_entity) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ gripe(p, ERC_EXISTENCE, L"entity", id->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  in = estart; | 
					
						
							|  |  |  | 	  goto recover; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	if ( !(eval = entity_value(p, e, NULL)) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ gripe(p, ERC_NO_VALUE, e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  in = estart; | 
					
						
							|  |  |  | 	  goto recover; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( e->content == EC_SGML ) | 
					
						
							|  |  |  | 	{ if ( !expand_entities(p, eval, (int)istrlen(eval), out) ) | 
					
						
							|  |  |  | 	    return FALSE; | 
					
						
							|  |  |  | 	} else | 
					
						
							|  |  |  | 	{ const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  for(s=eval; *s; s++) | 
					
						
							|  |  |  | 	    add_ocharbuf(out, *s); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( dtd->dialect != DL_SGML ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_ERROR, L"Illegal entity", estart); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   recover: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( *in == CR && in[1] == LF ) | 
					
						
							|  |  |  |       in++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( HasClass(dtd, *in, CH_BLANK) ) | 
					
						
							|  |  |  |     { add_ocharbuf(out, ' '); | 
					
						
							|  |  |  |       in++; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { add_ocharbuf(out, *in++); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   terminate_ocharbuf(out); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	      ELEMENTS		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_element * | 
					
						
							|  |  |  | find_element(dtd *dtd, dtd_symbol *id) | 
					
						
							|  |  |  | { dtd_element *e; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( id->element ) | 
					
						
							|  |  |  |     return id->element;			/* must check */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   e = sgml_calloc(1, sizeof(*e)); | 
					
						
							|  |  |  |   e->space_mode = SP_INHERIT; | 
					
						
							|  |  |  |   e->undefined = TRUE; | 
					
						
							|  |  |  |   e->name = id; | 
					
						
							|  |  |  |   id->element = e; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   e->next = dtd->elements; | 
					
						
							|  |  |  |   dtd->elements = e; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return e; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_edef * | 
					
						
							|  |  |  | new_element_definition(dtd *dtd) | 
					
						
							|  |  |  | { dtd_edef *def = sgml_calloc(1, sizeof(*def)); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   STAT(edefs_created++); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return def; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_element * | 
					
						
							|  |  |  | def_element(dtd *dtd, dtd_symbol *id) | 
					
						
							|  |  |  | { dtd_element *e = find_element(dtd, id); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !e->structure ) | 
					
						
							|  |  |  |   { e->structure = new_element_definition(dtd); | 
					
						
							|  |  |  |     e->structure->references = 1; | 
					
						
							|  |  |  |     e->structure->type = C_EMPTY; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return e; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_name_list(dtd_name_list *nl) | 
					
						
							|  |  |  | { dtd_name_list *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; nl; nl=next) | 
					
						
							|  |  |  |   { next = nl->next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sgml_free(nl); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define REFS_VIRGIN (-42)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_attribute(dtd_attr *a) | 
					
						
							|  |  |  | { if ( a->references == REFS_VIRGIN || --a->references == 0 ) | 
					
						
							|  |  |  |   { switch(a->type) | 
					
						
							|  |  |  |     { case AT_NAMEOF: | 
					
						
							|  |  |  |       case AT_NOTATION: | 
					
						
							|  |  |  | 	free_name_list(a->typeex.nameof); | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     switch(a->def) | 
					
						
							|  |  |  |     { case AT_DEFAULT: | 
					
						
							|  |  |  |       case AT_FIXED: | 
					
						
							|  |  |  |       { if ( a->islist ) | 
					
						
							|  |  |  | 	  sgml_free(a->att_def.list); | 
					
						
							|  |  |  | 	else if ( a->type == AT_CDATA && a->att_def.cdata ) | 
					
						
							|  |  |  | 	  sgml_free(a->att_def.cdata); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sgml_free(a); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_attribute_list(dtd_attr_list *l) | 
					
						
							|  |  |  | { dtd_attr_list *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(; l; l=next) | 
					
						
							|  |  |  |   { next = l->next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     free_attribute(l->attribute); | 
					
						
							|  |  |  |     sgml_free(l); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   } | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_element_list(dtd_element_list *l) | 
					
						
							|  |  |  | { dtd_element_list *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; l; l=next) | 
					
						
							|  |  |  |   { next = l->next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sgml_free(l); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_element_definition(dtd_edef *def) | 
					
						
							|  |  |  | { if ( --def->references == 0 ) | 
					
						
							|  |  |  |   { STAT(edefs_freed++); | 
					
						
							|  |  |  |     if ( def->content ) | 
					
						
							|  |  |  |       free_model(def->content); | 
					
						
							|  |  |  |     free_element_list(def->included); | 
					
						
							|  |  |  |     free_element_list(def->excluded); | 
					
						
							|  |  |  |     free_state_engine(def->initial_state); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sgml_free(def); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_elements(dtd_element *e) | 
					
						
							|  |  |  | { dtd_element *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; e; e=next) | 
					
						
							|  |  |  |   { next = e->next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( e->structure ) | 
					
						
							|  |  |  |       free_element_definition(e->structure); | 
					
						
							|  |  |  |     free_attribute_list(e->attributes); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sgml_free(e); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	    ATTRIBUTES		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_attr * | 
					
						
							|  |  |  | find_attribute(dtd_element *e, dtd_symbol *name) | 
					
						
							|  |  |  | { dtd_attr_list *a; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(a=e->attributes; a; a=a->next) | 
					
						
							|  |  |  |   { if ( a->attribute->name == name ) | 
					
						
							|  |  |  |       return a->attribute; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	  PARSE PRIMITIVES	* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | iskip_layout(dtd *dtd, const ichar *in) | 
					
						
							|  |  |  | { ichar cmt = dtd->charfunc->func[CF_CMT]; /* also skips comment */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; *in; in++ ) | 
					
						
							|  |  |  |   { if ( HasClass(dtd, *in, CH_BLANK) ) | 
					
						
							|  |  |  |       continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( in[0] == cmt && in[1] == cmt ) | 
					
						
							|  |  |  |     { in += 2; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for( ; *in; in++ ) | 
					
						
							|  |  |  |       { if ( in[0] == cmt && in[1] == cmt ) | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       in++; | 
					
						
							|  |  |  |       continue; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return in; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return in; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | See whether we are looking at identifier   "id". "id" must be lowercase! | 
					
						
							|  |  |  | This is only used for reserved words,  and parsed case-insentive in both | 
					
						
							|  |  |  | XML and SGML modes. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | isee_identifier(dtd *dtd, const ichar *in, char *id) | 
					
						
							|  |  |  | { in = iskip_layout(dtd, in); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* match */ | 
					
						
							|  |  |  |   while (*id && (wint_t)*id == towlower(*in) ) | 
					
						
							|  |  |  |     id++, in++; | 
					
						
							|  |  |  |   if ( *id == 0 && !HasClass(dtd, *in, CH_NAME) ) | 
					
						
							|  |  |  |     return iskip_layout(dtd, in); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_name(dtd_parser *p, const ichar *in, dtd_symbol **id) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { ichar buf[MAXNMLEN]; | 
					
						
							|  |  |  |   ichar *o = buf; | 
					
						
							|  |  |  |   ichar *e = &buf[MAXNMLEN]-1; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   in = iskip_layout(dtd, in); | 
					
						
							|  |  |  |   if ( !HasClass(dtd, *in, CH_NMSTART) ) | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( dtd->case_sensitive ) | 
					
						
							|  |  |  |   { while( HasClass(dtd, *in, CH_NAME) && o < e ) | 
					
						
							|  |  |  |       *o++ = *in++; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { while( HasClass(dtd, *in, CH_NAME) && o < e ) | 
					
						
							|  |  |  |       *o++ = towlower(*in++); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( o == e ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_REPRESENTATION, L"NAME too long"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *o++ = '\0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *id = dtd_add_symbol(dtd, buf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return iskip_layout(dtd, in); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_entity_name(dtd_parser *p, const ichar *in, dtd_symbol **id) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { ichar buf[MAXNMLEN]; | 
					
						
							|  |  |  |   ichar *o = buf; | 
					
						
							|  |  |  |   ichar *e = &buf[MAXNMLEN]-1; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   in = iskip_layout(dtd, in); | 
					
						
							|  |  |  |   if ( !HasClass(dtd, *in, CH_NMSTART) ) | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( dtd->ent_case_sensitive ) | 
					
						
							|  |  |  |   { while( HasClass(dtd, *in, CH_NAME) && o < e ) | 
					
						
							|  |  |  |       *o++ = *in++; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { while( HasClass(dtd, *in, CH_NAME) && o < e ) | 
					
						
							|  |  |  |       *o++ = towlower(*in++); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( o == e ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_REPRESENTATION, L"Entity NAME too long"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *o++ = '\0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *id = dtd_add_symbol(dtd, buf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return in; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_nmtoken(dtd_parser *p, const ichar *in, dtd_symbol **id) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { ichar buf[MAXNMLEN]; | 
					
						
							|  |  |  |   ichar *o = buf; | 
					
						
							|  |  |  |   ichar *e = &buf[MAXNMLEN]-1; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   in = iskip_layout(dtd, in); | 
					
						
							|  |  |  |   if ( !HasClass(dtd, *in, CH_NAME) ) | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  |   if ( dtd->case_sensitive ) | 
					
						
							|  |  |  |   { while( HasClass(dtd, *in, CH_NAME) && o < e ) | 
					
						
							|  |  |  |       *o++ = *in++; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { while( HasClass(dtd, *in, CH_NAME) && o < e ) | 
					
						
							|  |  |  |       *o++ = towlower(*in++); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( o == e ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_REPRESENTATION, L"NMTOKEN too long"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *o = '\0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *id = dtd_add_symbol(dtd, buf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return iskip_layout(dtd, in); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_nutoken(dtd_parser *p, const ichar *in, dtd_symbol **id) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { ichar buf[MAXNMLEN]; | 
					
						
							|  |  |  |   ichar *o = buf; | 
					
						
							|  |  |  |   ichar *e = &buf[MAXNMLEN]-1; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   in = iskip_layout(dtd, in); | 
					
						
							|  |  |  |   if ( !HasClass(dtd, *in, CH_DIGIT) ) | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( dtd->case_sensitive ) | 
					
						
							|  |  |  |   { while( HasClass(dtd, *in, CH_NAME) && o < e ) | 
					
						
							|  |  |  |       *o++ = *in++; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { while( HasClass(dtd, *in, CH_NAME) && o < e ) | 
					
						
							|  |  |  |       *o++ = towlower(*in++); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( o == e ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_REPRESENTATION, L"NUTOKEN too long"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *o = '\0'; | 
					
						
							|  |  |  |   if ( o - buf > 8 ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     gripe(p, ERC_LIMIT, L"nutoken length"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   *id = dtd_add_symbol(dtd, buf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return iskip_layout(dtd, in); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_number(dtd_parser *p, const ichar *in, dtd_attr *at) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   in = iskip_layout(dtd, in); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   switch(dtd->number_mode) | 
					
						
							|  |  |  |   { case NU_TOKEN: | 
					
						
							|  |  |  |     { ichar buf[MAXNMLEN]; | 
					
						
							|  |  |  |       ichar *o = buf; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       while( HasClass(dtd, *in, CH_DIGIT) ) | 
					
						
							|  |  |  | 	*o++ = *in++; | 
					
						
							|  |  |  |       if ( o == buf ) | 
					
						
							|  |  |  | 	return NULL;			/* empty */ | 
					
						
							|  |  |  |       *o = '\0'; | 
					
						
							|  |  |  |       at->att_def.name = dtd_add_symbol(dtd, buf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return iskip_layout(dtd, (const ichar *)in); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case NU_INTEGER: | 
					
						
							|  |  |  |     { ichar *end; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       at->att_def.number = wcstol(in, &end, 10); | 
					
						
							|  |  |  |       if ( end > in && errno != ERANGE ) | 
					
						
							|  |  |  | 	return iskip_layout(dtd, end); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Get a quoted value. After successful return,  *start points to the start | 
					
						
							|  |  |  | of the string in the input and  *len   to  the length. The data is *not* | 
					
						
							|  |  |  | nul terminated. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | itake_string(dtd *dtd, const ichar *in, ichar **start, int *len) | 
					
						
							|  |  |  | { in = iskip_layout(dtd, in); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( isee_func(dtd, in, CF_LIT) || | 
					
						
							|  |  |  |        isee_func(dtd, in, CF_LITA) ) | 
					
						
							|  |  |  |   { ichar q = *in++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     *start = (ichar *)in; | 
					
						
							|  |  |  |     while( *in && *in != q ) | 
					
						
							|  |  |  |       in++; | 
					
						
							|  |  |  |     if ( *in ) | 
					
						
							|  |  |  |     { *len = (int)(in - (*start)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return iskip_layout(dtd, ++in); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | itake_dubbed_string(dtd *dtd, const ichar *in, ichar **out) | 
					
						
							|  |  |  | { ichar *start; | 
					
						
							|  |  |  |   int len; | 
					
						
							|  |  |  |   const ichar *end; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (end=itake_string(dtd, in, &start, &len)) ) | 
					
						
							|  |  |  |     *out = istrndup(start, len); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return end; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | itake_url() is used to get the argument of a SYSTEM or 2nd argument of a | 
					
						
							|  |  |  | PUBLIC reference. Once upon a  time  it   tried  to  tag the argument as | 
					
						
							|  |  |  | file:<path>, but this job cannot be before   lookup in the catalogue. It | 
					
						
							|  |  |  | is now the same as itake_dubbed_string(), so we simply call this one. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | itake_url(dtd *dtd, const ichar *in, ichar **out) | 
					
						
							|  |  |  | { return itake_dubbed_string(dtd, in, out); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_nmtoken_chars(dtd_parser *p, const ichar *in, ichar *out, int len) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   in = iskip_layout(dtd, in); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   if ( !HasClass(dtd, *in, CH_NAME) ) | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  |   while( HasClass(dtd, *in, CH_NAME) ) | 
					
						
							|  |  |  |   { if ( --len <= 0 ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_REPRESENTATION, L"Name token too long"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     *out++ = (dtd->case_sensitive ? *in++ : (ichar)towlower(*in++)); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   *out++ = '\0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return iskip_layout(dtd, in); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  There used to be a function
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     itake_nonblank_chars(dtd, in, out, len) -> new end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     which | 
					
						
							|  |  |  |     - skipped layout, | 
					
						
							|  |  |  |     - copied characters from in[] to out[] until layout or \0 was found, | 
					
						
							|  |  |  |     - added a terminating \0 to out[], | 
					
						
							|  |  |  |     - skipped any following layout, and | 
					
						
							|  |  |  |     - returned the new position. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     That function was only called by get_attribute_value(), which used | 
					
						
							|  |  |  |     it to parse an unquoted attribute value.  According to SGML, that's | 
					
						
							|  |  |  |     not right:  unquoted attribute values must look like NMTOKENs (but | 
					
						
							|  |  |  |     have a different length bound).  In particular, elements like | 
					
						
							|  |  |  | 	<foo a=bar>zoo</foo> | 
					
						
							|  |  |  | 	<foo a=ugh/zip/ | 
					
						
							|  |  |  |     are perfectly legal, so scanning an unquoted attribute value MUST | 
					
						
							|  |  |  |     stop at a '/' or '>'.  According to HTML practice, pretty much any | 
					
						
							|  |  |  |     old junk will be accepted, and some HTML parsers will allow bare | 
					
						
							|  |  |  |     slashes in such an attribute. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Typical HTML is *so* bad that it doesn't agree with *any* part of | 
					
						
							|  |  |  |     the HTML specifications (e.g., <FONT> is commonly wrapped around | 
					
						
							|  |  |  |     block-level elements, which has never been legal).  It's not clear | 
					
						
							|  |  |  |     that there is much point in trying to accomodate bad HTML; if you | 
					
						
							|  |  |  |     really need to do that, use the free program HTML Tidy (from the | 
					
						
							|  |  |  |     http://www.w3c.org/ site) to clean up, and parse its output instead.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     However, in order to break as little as possible, the new (sgml-1.0.14) | 
					
						
							|  |  |  |     function accepts anything except > / \0 and blanks. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | JW: I decided to accept / as part of an unquoted in SGML-mode if | 
					
						
							|  |  |  |     shorttag is disabled as well as in XML mode if it is not the | 
					
						
							|  |  |  |     end of the begin-element | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static ichar const * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_unquoted(dtd_parser *p, ichar const *in, ichar *out, int len) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   ichar const end2 = dtd->charfunc->func[CF_ETAGO2];	/* / */ | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   ichar c; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /* skip leading layout.  Do NOT skip comments! --x-- is a value! */ | 
					
						
							|  |  |  |   while (c = *in, HasClass(dtd, c, CH_BLANK)) | 
					
						
							|  |  |  |     in++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /* copy the attribute to out[] */ | 
					
						
							|  |  |  |   while ( !HasClass(dtd, c, CH_BLANK) && | 
					
						
							|  |  |  | 	  c != '\0' ) | 
					
						
							|  |  |  |   { if ( c == end2 && (dtd->shorttag || | 
					
						
							|  |  |  | 		       (in[1] == '\0' && dtd->dialect != DL_SGML)) ) | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( --len > 0 ) | 
					
						
							|  |  |  |       *out++ = c; | 
					
						
							|  |  |  |     else if ( len == 0 ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_REPRESENTATION, L"Attribute too long"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     c = *++in; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   *out = '\0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /* skip trailing layout.  While it is kind to skip comments here,
 | 
					
						
							|  |  |  |      it is technically wrong to do so.  Tags may not contain comments. | 
					
						
							|  |  |  |    */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return iskip_layout(dtd, in); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *		DTD		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | dtd * | 
					
						
							|  |  |  | new_dtd(const ichar *doctype) | 
					
						
							|  |  |  | { dtd *dtd = sgml_calloc(1, sizeof(*dtd)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   STAT(dtd_created++); | 
					
						
							|  |  |  |   dtd->magic	 = SGML_DTD_MAGIC; | 
					
						
							|  |  |  |   dtd->implicit  = TRUE; | 
					
						
							|  |  |  |   dtd->dialect   = DL_SGML; | 
					
						
							|  |  |  |   if ( doctype ) | 
					
						
							|  |  |  |     dtd->doctype = istrdup(doctype); | 
					
						
							|  |  |  |   dtd->symbols	 = new_symbol_table(); | 
					
						
							|  |  |  |   dtd->charclass = new_charclass(); | 
					
						
							|  |  |  |   dtd->charfunc	 = new_charfunc(); | 
					
						
							|  |  |  |   dtd->space_mode = SP_SGML; | 
					
						
							|  |  |  |   dtd->ent_case_sensitive = TRUE;	/* case-sensitive entities */ | 
					
						
							|  |  |  |   dtd->shorttag    = TRUE;		/* allow for <tag/value/ */ | 
					
						
							|  |  |  |   dtd->number_mode = NU_TOKEN; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return dtd; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | free_dtd(dtd *dtd) | 
					
						
							|  |  |  | { if ( --dtd->references == 0 ) | 
					
						
							|  |  |  |   { STAT(dtd_freed++); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( dtd->doctype ) | 
					
						
							|  |  |  |       sgml_free(dtd->doctype); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     free_entity_list(dtd->entities); | 
					
						
							|  |  |  |     free_entity_list(dtd->pentities); | 
					
						
							|  |  |  |     free_notations(dtd->notations); | 
					
						
							|  |  |  |     free_shortrefs(dtd->shortrefs); | 
					
						
							|  |  |  |     free_elements(dtd->elements); | 
					
						
							|  |  |  |     free_symbol_table(dtd->symbols); | 
					
						
							|  |  |  |     sgml_free(dtd->charfunc); | 
					
						
							|  |  |  |     sgml_free(dtd->charclass); | 
					
						
							|  |  |  |     dtd->magic = 0; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     sgml_free(dtd); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const wchar_t *xml_entities[] = | 
					
						
							|  |  |  | { L"lt CDATA \"<\"",		/* < */ | 
					
						
							|  |  |  |   L"gt CDATA \">\"",		/* > */ | 
					
						
							|  |  |  |   L"amp CDATA \"&\"",		/* & */ | 
					
						
							|  |  |  |   L"apos CDATA \"'\"",		/* ' */ | 
					
						
							|  |  |  |   L"quot CDATA \""\"",		/* " */ | 
					
						
							|  |  |  |   NULL | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | set_dialect_dtd(dtd *dtd, dtd_dialect dialect) | 
					
						
							|  |  |  | { if ( dtd->dialect != dialect ) | 
					
						
							|  |  |  |   { dtd->dialect = dialect; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch(dialect) | 
					
						
							|  |  |  |     { case DL_SGML: | 
					
						
							|  |  |  |       { dtd->case_sensitive = FALSE; | 
					
						
							|  |  |  | 	dtd->space_mode = SP_SGML; | 
					
						
							|  |  |  | 	dtd->shorttag = TRUE; | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       case DL_XML: | 
					
						
							|  |  |  |       case DL_XMLNS: | 
					
						
							|  |  |  |       { const ichar **el; | 
					
						
							|  |  |  | 	dtd_parser p; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	dtd->case_sensitive = TRUE; | 
					
						
							|  |  |  | 	dtd->encoding = SGML_ENC_UTF8; | 
					
						
							|  |  |  | 	dtd->space_mode = SP_PRESERVE; | 
					
						
							|  |  |  | 	dtd->shorttag = FALSE; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	memset(&p, 0, sizeof(p)); | 
					
						
							|  |  |  | 	p.dtd = dtd; | 
					
						
							|  |  |  | 	for(el = xml_entities; *el; el++) | 
					
						
							|  |  |  | 	  process_entity_declaration(&p, *el); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | set_option_dtd(dtd *dtd, dtd_option option, int set) | 
					
						
							|  |  |  | { switch(option) | 
					
						
							|  |  |  |   { case OPT_SHORTTAG: | 
					
						
							|  |  |  |       dtd->shorttag = set; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | baseurl(dtd_parser *p) | 
					
						
							|  |  |  | { if ( p->location.type == IN_FILE && p->location.name.file ) | 
					
						
							|  |  |  |   { return p->location.name.file; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | process_entity_value_declaration(dtd_parser *p, | 
					
						
							|  |  |  | 				 const ichar *decl, dtd_entity *e) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( e->type == ET_SYSTEM ) | 
					
						
							|  |  |  |   { if ( (s=itake_url(dtd, decl, &e->exturl)) ) | 
					
						
							|  |  |  |     { e->baseurl = istrdup(baseurl(p)); | 
					
						
							|  |  |  |       return s; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     goto string_expected; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { ichar *start; int len; | 
					
						
							|  |  |  |     ichar val[MAXSTRINGLEN]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !(s = itake_string(dtd, decl, &start, &len)) ) | 
					
						
							|  |  |  |       goto string_expected; | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expand_pentities(p, start, len, val, sizeof(val)/sizeof(ichar)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch ( e->type ) | 
					
						
							|  |  |  |     { case ET_PUBLIC: | 
					
						
							|  |  |  |       { e->extid = istrdup(val); | 
					
						
							|  |  |  | 	if ( isee_func(dtd, decl, CF_LIT) || | 
					
						
							|  |  |  | 	     isee_func(dtd, decl, CF_LITA) ) | 
					
						
							|  |  |  | 	{ if ( (s=itake_url(dtd, decl, &e->exturl)) ) | 
					
						
							|  |  |  | 	  { e->baseurl = istrdup(baseurl(p)); | 
					
						
							|  |  |  | 	    decl = s; | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return decl; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       case ET_LITERAL: | 
					
						
							|  |  |  |       { e->value = istrdup(val); | 
					
						
							|  |  |  | 	e->length = (int)wcslen(e->value); | 
					
						
							|  |  |  | 	return decl; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	assert(0); | 
					
						
							|  |  |  | 	return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | string_expected: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   gripe(p, ERC_SYNTAX_ERROR, L"String expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | The sgml-standard tells us to accept the  first definition of an entity, | 
					
						
							|  |  |  | silently suppressing any further attempt to redefine the entity. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_entity_declaration(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  |   dtd_symbol *id; | 
					
						
							|  |  |  |   dtd_entity *e; | 
					
						
							|  |  |  |   int isparam; | 
					
						
							|  |  |  |   int isdef = FALSE; | 
					
						
							|  |  |  | 					/* parameter entity */ | 
					
						
							|  |  |  |   if ( (s=isee_func(dtd, decl, CF_PERO)) ) | 
					
						
							|  |  |  |   { isparam = TRUE; | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |     isparam = FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(s = itake_entity_name(p, decl, &id)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   { if ( !(s = isee_identifier(dtd, decl, "#default")) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     id = dtd_add_symbol(dtd, (ichar*)"#DEFAULT"); | 
					
						
							|  |  |  |     isdef = TRUE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( isparam && find_pentity(dtd, id) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_REDEFINED, L"parameter entity", id); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return TRUE;			/* already defined parameter entity */ | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( id->entity ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_REDEFINED, L"entity", id); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return TRUE;			/* already defined normal entity */ | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   decl = iskip_layout(dtd, s); | 
					
						
							|  |  |  |   e = sgml_calloc(1, sizeof(*e)); | 
					
						
							|  |  |  |   e->name = id; | 
					
						
							|  |  |  |   e->catalog_location = (isparam ? CAT_PENTITY : CAT_ENTITY); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s = isee_identifier(dtd, decl, "system")) ) | 
					
						
							|  |  |  |   { e->type = ET_SYSTEM; | 
					
						
							|  |  |  |     e->content = EC_SGML; | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } else if ( (s = isee_identifier(dtd, decl, "public")) ) | 
					
						
							|  |  |  |   { e->type = ET_PUBLIC; | 
					
						
							|  |  |  |     e->content = EC_SGML; | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { e->type = ET_LITERAL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !isparam ) | 
					
						
							|  |  |  |     { if ( (s=isee_identifier(dtd, decl, "cdata")) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	e->content = EC_CDATA; | 
					
						
							|  |  |  |       } else if ( (s=isee_identifier(dtd, decl, "sdata")) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	e->content = EC_SDATA; | 
					
						
							|  |  |  |       } else if ( (s=isee_identifier(dtd, decl, "pi")) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	e->content = EC_PI; | 
					
						
							|  |  |  |       } else if ( (s=isee_identifier(dtd, decl, "starttag")) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	e->content = EC_STARTTAG; | 
					
						
							|  |  |  |       } else if ( (s=isee_identifier(dtd, decl, "endtag")) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	e->content = EC_ENDTAG; | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  | 	e->content = EC_SGML; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (decl=process_entity_value_declaration(p, decl, e)) ) | 
					
						
							|  |  |  |   { if ( e->type == ET_LITERAL ) | 
					
						
							|  |  |  |     { switch(e->content) | 
					
						
							|  |  |  |       { case EC_STARTTAG: | 
					
						
							|  |  |  | 	{ ichar *buf = sgml_malloc((e->length + 3)*sizeof(ichar)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  buf[0] = dtd->charfunc->func[CF_STAGO]; | 
					
						
							|  |  |  | 	  istrcpy(&buf[1], e->value); | 
					
						
							|  |  |  | 	  buf[++e->length] = dtd->charfunc->func[CF_STAGC]; | 
					
						
							|  |  |  | 	  buf[++e->length] = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  sgml_free(e->value); | 
					
						
							|  |  |  | 	  e->value = buf; | 
					
						
							|  |  |  | 	  e->content = EC_SGML; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	case EC_ENDTAG: | 
					
						
							|  |  |  | 	{ ichar *buf = sgml_malloc((e->length + 4)*sizeof(ichar)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  buf[0] = dtd->charfunc->func[CF_ETAGO1]; | 
					
						
							|  |  |  | 	  buf[1] = dtd->charfunc->func[CF_ETAGO2]; | 
					
						
							|  |  |  | 	  istrcpy(&buf[2], e->value); | 
					
						
							|  |  |  | 	  e->length++; | 
					
						
							|  |  |  | 	  buf[++e->length] = dtd->charfunc->func[CF_STAGC]; | 
					
						
							|  |  |  | 	  buf[++e->length] = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  sgml_free(e->value); | 
					
						
							|  |  |  | 	  e->value = buf; | 
					
						
							|  |  |  | 	  e->content = EC_SGML; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { if ( *decl ) | 
					
						
							|  |  |  |       { dtd_symbol *nname; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( (s=isee_identifier(dtd, decl, "cdata")) ) | 
					
						
							|  |  |  | 	{ decl = s; | 
					
						
							|  |  |  | 	  e->content = EC_CDATA; | 
					
						
							|  |  |  | 	} else if ( (s=isee_identifier(dtd, decl, "sdata")) ) | 
					
						
							|  |  |  | 	{ decl = s; | 
					
						
							|  |  |  | 	  e->content = EC_SDATA; | 
					
						
							|  |  |  | 	} else if ( (s=isee_identifier(dtd, decl, "ndata")) ) | 
					
						
							|  |  |  | 	{ decl = s; | 
					
						
							|  |  |  | 	  e->content = EC_NDATA; | 
					
						
							|  |  |  | 	} else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  return gripe(p, ERC_SYNTAX_ERROR, L"Bad datatype declaration", decl); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( (s=itake_name(p, decl, &nname)) ) /* what is this? */ | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	{ decl = s; | 
					
						
							|  |  |  | 	} else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  return gripe(p, ERC_SYNTAX_ERROR, L"Bad notation declaration", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( *decl ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( isparam ) | 
					
						
							|  |  |  |   { e->next = dtd->pentities; | 
					
						
							|  |  |  |     dtd->pentities = e; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { e->name->entity = e; | 
					
						
							|  |  |  |     e->next = dtd->entities; | 
					
						
							|  |  |  |     dtd->entities = e; | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   if ( isdef ) | 
					
						
							|  |  |  |     dtd->default_entity = e; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	      NOTATIONS		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_notation * | 
					
						
							|  |  |  | find_notation(dtd *dtd, dtd_symbol *name) | 
					
						
							|  |  |  | { dtd_notation *n; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(n=dtd->notations; n; n = n->next) | 
					
						
							|  |  |  |   { if ( n->name == name ) | 
					
						
							|  |  |  |       return n; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | add_notation(dtd *dtd, dtd_notation *not) | 
					
						
							|  |  |  | { dtd_notation **n = &dtd->notations; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; *n; n = &(*n)->next) | 
					
						
							|  |  |  |     ; | 
					
						
							|  |  |  |   *n = not; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_notation_declaration(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   dtd_symbol *nname; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  |   ichar *system = NULL, *public = NULL; | 
					
						
							|  |  |  |   dtd_notation *not; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(s=itake_name(p, decl, &nname)) ) | 
					
						
							|  |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"Notation name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( find_notation(dtd, nname) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_REDEFINED, L"notation", nname); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return TRUE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s=isee_identifier(dtd, decl, "system")) ) | 
					
						
							|  |  |  |   { ; | 
					
						
							|  |  |  |   } else if ( (s=isee_identifier(dtd, decl, "public")) ) | 
					
						
							|  |  |  |   { decl = s; | 
					
						
							|  |  |  |     if ( !(s=itake_dubbed_string(dtd, decl, &public)) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, L"Public identifier expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"SYSTEM or PUBLIC expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   decl = s; | 
					
						
							|  |  |  |   if ( (s=itake_dubbed_string(dtd, decl, &system)) ) | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( *decl ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   not = sgml_calloc(1, sizeof(*not)); | 
					
						
							|  |  |  |   not->name = nname; | 
					
						
							|  |  |  |   not->system = system; | 
					
						
							|  |  |  |   not->public = public; | 
					
						
							|  |  |  |   not->next = NULL; | 
					
						
							|  |  |  |   add_notation(dtd, not); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_notations(dtd_notation *n) | 
					
						
							|  |  |  | { dtd_notation *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; n; n=next) | 
					
						
							|  |  |  |   { next = n->next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sgml_free(n->system); | 
					
						
							|  |  |  |     sgml_free(n->public); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sgml_free(n); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	       SHORTREF		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_maps(dtd_map *map) | 
					
						
							|  |  |  | { dtd_map *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; map; map=next) | 
					
						
							|  |  |  |   { next = map->next; | 
					
						
							|  |  |  |     if ( map->from ) | 
					
						
							|  |  |  |       sgml_free(map->from); | 
					
						
							|  |  |  |     sgml_free(map); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_shortrefs(dtd_shortref *sr) | 
					
						
							|  |  |  | { dtd_shortref *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; sr; sr=next) | 
					
						
							|  |  |  |   { next = sr->next; | 
					
						
							|  |  |  |     free_maps(sr->map); | 
					
						
							|  |  |  |     sgml_free(sr); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | shortref_add_map(dtd_parser *p, const ichar *decl, dtd_shortref *sr) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { ichar *start; int len; | 
					
						
							|  |  |  |   ichar from[MAXMAPLEN]; | 
					
						
							|  |  |  |   ichar *f = from; | 
					
						
							|  |  |  |   dtd_symbol *to; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  |   const ichar *end; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   dtd_map **prev; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   dtd_map *m; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !(s=itake_string(dtd, decl, &start, &len)) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_SYNTAX_ERROR, L"map-string expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   decl = s; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(s=itake_entity_name(p, decl, &to)) ) | 
					
						
							|  |  |  |   { gripe(p, ERC_SYNTAX_ERROR, L"map-to name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   end = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(decl=start; len > 0;) | 
					
						
							|  |  |  |   { if ( *decl == 'B' )		/* blank */ | 
					
						
							|  |  |  |     { if ( decl[1] == 'B' ) | 
					
						
							|  |  |  |       { *f++ = CHR_DBLANK; | 
					
						
							|  |  |  | 	decl += 2; | 
					
						
							|  |  |  | 	len -= 2; | 
					
						
							|  |  |  |         continue; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       *f++ = CHR_BLANK; | 
					
						
							|  |  |  |       decl++; | 
					
						
							|  |  |  |       len--; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { *f++ = *decl++;			/* any other character */ | 
					
						
							|  |  |  |       len--; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   *f = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   for(prev=&sr->map; *prev; prev = &(*prev)->next) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     ; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   m = sgml_calloc(1, sizeof(*m)); | 
					
						
							|  |  |  |   m->from = istrdup(from); | 
					
						
							|  |  |  |   m->len  = (int)istrlen(from); | 
					
						
							|  |  |  |   m->to   = to; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   *prev = m; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   return end; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_shortref * | 
					
						
							|  |  |  | def_shortref(dtd_parser *p, dtd_symbol *name) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   dtd_shortref *sr, **pr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(pr=&dtd->shortrefs; *pr; pr = &(*pr)->next) | 
					
						
							|  |  |  |   { dtd_shortref *r = *pr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( r->name == name ) | 
					
						
							|  |  |  |       return r; | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   sr = sgml_calloc(1, sizeof(*sr)); | 
					
						
							|  |  |  |   sr->name = name; | 
					
						
							|  |  |  |   *pr = sr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return sr; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Create an array with TRUE in any character   that can be the last of the | 
					
						
							|  |  |  | shortref map. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | compile_map(dtd *dtd, dtd_shortref *sr) | 
					
						
							|  |  |  | { dtd_map *map; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(map = sr->map; map; map = map->next) | 
					
						
							|  |  |  |   { ichar last = map->from[map->len-1]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch( last ) | 
					
						
							|  |  |  |     { case CHR_BLANK: | 
					
						
							|  |  |  |       case CHR_DBLANK: | 
					
						
							|  |  |  |       { wint_t i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for( i=0; i< SHORTMAP_SIZE; i++) | 
					
						
							|  |  |  | 	{ if ( HasClass(dtd, i, CH_BLANK) ) | 
					
						
							|  |  |  | 	    sr->ends[i] = TRUE; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	sr->ends[last] = TRUE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_shortref_declaration(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   ichar buf[MAXDECL]; | 
					
						
							|  |  |  |   dtd_shortref *sr; | 
					
						
							|  |  |  |   dtd_symbol *name; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) ) | 
					
						
							|  |  |  |     return FALSE; | 
					
						
							|  |  |  |   decl = buf; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(s=itake_name(p, decl, &name)) ) | 
					
						
							|  |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   sr = def_shortref(p, name); | 
					
						
							|  |  |  |   if ( sr->defined ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(p, ERC_REDEFINED, L"shortref", name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   sr->defined = TRUE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   while( *(decl = iskip_layout(dtd, decl)) != '\0' | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	 && (s=shortref_add_map(p, decl, sr)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     decl = s; | 
					
						
							|  |  |  |   compile_map(dtd, sr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( *decl ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"Map expected", decl); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Find named name.  The name NULL stands for the #empty map | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_shortref * | 
					
						
							|  |  |  | find_map(dtd *dtd, dtd_symbol *name) | 
					
						
							|  |  |  | { dtd_shortref *sr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !name ) | 
					
						
							|  |  |  |   { static dtd_shortref *empty; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !empty ) | 
					
						
							|  |  |  |     { empty = sgml_calloc(1, sizeof(*empty)); | 
					
						
							|  |  |  |       empty->name = dtd_add_symbol(dtd, (ichar*)"#EMPTY"); | 
					
						
							|  |  |  |       empty->defined = TRUE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return empty; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( sr = dtd->shortrefs; sr; sr = sr->next ) | 
					
						
							|  |  |  |   { if ( sr->name == name ) | 
					
						
							|  |  |  |     { if ( !sr->defined ) | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return sr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | set_map_element(dtd_element *e, void *closure) | 
					
						
							|  |  |  | { e->map = closure; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_usemap_declaration(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   ichar buf[MAXDECL]; | 
					
						
							|  |  |  |   dtd_symbol *name; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  |   dtd_symbol *ename; | 
					
						
							|  |  |  |   dtd_element *e; | 
					
						
							|  |  |  |   dtd_shortref *map; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) ) | 
					
						
							|  |  |  |     return FALSE; | 
					
						
							|  |  |  |   decl = buf; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(s=itake_name(p, decl, &name)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   { if ( (s=isee_identifier(dtd, decl, "#empty")) ) | 
					
						
							|  |  |  |       name = NULL; | 
					
						
							|  |  |  |     else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, L"map-name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   decl = s; | 
					
						
							|  |  |  |   if ( !(map = find_map(dtd, name)) ) | 
					
						
							|  |  |  |     map = def_shortref(p, name);	/* make undefined map */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( isee_func(dtd, decl, CF_GRPO) )	/* ( */ | 
					
						
							|  |  |  |   { dtd_model *model; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     if ( (model = make_model(p, decl, &s)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     { for_elements_in_model(model, set_map_element, map); | 
					
						
							|  |  |  |       free_model(model); | 
					
						
							|  |  |  |       decl = s; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |       return FALSE; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   } else if ( (s=itake_name(p, decl, &ename)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   { e = find_element(dtd, ename); | 
					
						
							|  |  |  |     e->map = map; | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } else if ( p->environments ) | 
					
						
							|  |  |  |   { if ( !map->defined ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_EXISTENCE, L"map", name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     p->environments->map = map; | 
					
						
							|  |  |  |     p->map = p->environments->map; | 
					
						
							|  |  |  |   } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"element-name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   if ( *decl ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"Unparsed", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | match_map(dtd *dtd, dtd_map *map, ocharbuf *buf) | 
					
						
							|  |  |  | { wchar_t *data = buf->data.w; | 
					
						
							|  |  |  |   wchar_t *e    = data+buf->size-1; | 
					
						
							|  |  |  |   ichar *m      = map->from+map->len-1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   while( m >= map->from ) | 
					
						
							|  |  |  |   { if ( e < data ) | 
					
						
							|  |  |  |       return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( *m == *e ) | 
					
						
							|  |  |  |     { m--; | 
					
						
							|  |  |  |       e--; | 
					
						
							|  |  |  |       continue; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if ( *m == CHR_DBLANK ) | 
					
						
							|  |  |  |     { if ( e>data && HasClass(dtd, *e, CH_WHITE) ) | 
					
						
							|  |  |  | 	e--; | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  | 	return FALSE; | 
					
						
							|  |  |  |       goto wblank; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if ( *m == CHR_BLANK ) | 
					
						
							|  |  |  |     { wblank: | 
					
						
							|  |  |  |       while( e>data && HasClass(dtd, *e, CH_WHITE) ) | 
					
						
							|  |  |  | 	e--; | 
					
						
							|  |  |  |       m--; | 
					
						
							|  |  |  |       continue; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return (int)(data+buf->size-1-e); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | match_shortref(dtd_parser *p) | 
					
						
							|  |  |  | { dtd_map *map; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(map = p->map->map; map; map = map->next) | 
					
						
							|  |  |  |   { int len; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( (len=match_map(p->dtd, map, p->cdata)) ) | 
					
						
							|  |  |  |     { p->cdata->size -= len; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( p->cdata_must_be_empty ) | 
					
						
							|  |  |  |       { int blank = TRUE; | 
					
						
							|  |  |  | 	const wchar_t *s; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for(s = p->cdata->data.w, i=0; i++ < p->cdata->size; s++) | 
					
						
							|  |  |  | 	{ if ( !iswspace(*s) ) | 
					
						
							|  |  |  | 	  { blank = FALSE; | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	p->blank_cdata = blank; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       WITH_CLASS(p, EV_SHORTREF, | 
					
						
							|  |  |  | 		 { sgml_cplocation(&p->startloc, &p->location); | 
					
						
							|  |  |  | 		   p->startloc.charpos -= len; | 
					
						
							|  |  |  | 		   p->startloc.linepos -= len; | 
					
						
							|  |  |  | 		   if ( p->startloc.linepos < 0 ) | 
					
						
							|  |  |  | 		   { p->startloc.line--; | 
					
						
							|  |  |  | 		     p->startloc.linepos = 0; /* not correct! */ | 
					
						
							|  |  |  | 		   } | 
					
						
							|  |  |  | 		   DEBUG(printf("%d-%d: Matched map '%s' --> %s, len = %d\n", | 
					
						
							|  |  |  | 				p->startloc.charpos, | 
					
						
							|  |  |  | 				p->location.charpos, | 
					
						
							|  |  |  | 				map->from, map->to->name, len)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		   process_entity(p, map->to->name); | 
					
						
							|  |  |  | 		 })			/* TBD: optimise */ | 
					
						
							|  |  |  |       return TRUE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return FALSE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	       ELEMENTS		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | add_submodel(dtd_model *m, dtd_model *sub) | 
					
						
							|  |  |  | { dtd_model **d; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( d = &m->content.group; *d; d = &(*d)->next ) | 
					
						
							|  |  |  |     ; | 
					
						
							|  |  |  |   *d = sub; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* for_elements_in_model()
 | 
					
						
							|  |  |  |    Walk along the model, calling f(e, closure) for any element found | 
					
						
							|  |  |  |    in the model.  Used for <!SHORTREF name model> | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | for_elements_in_model(dtd_model *m, | 
					
						
							|  |  |  | 		      void (*f)(dtd_element *e, void *closure), | 
					
						
							|  |  |  | 		      void *closure) | 
					
						
							|  |  |  | { switch(m->type) | 
					
						
							|  |  |  |   { case MT_SEQ: | 
					
						
							|  |  |  |     case MT_AND: | 
					
						
							|  |  |  |     case MT_OR: | 
					
						
							|  |  |  |     { dtd_model *sub = m->content.group; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for(; sub; sub = sub->next) | 
					
						
							|  |  |  | 	for_elements_in_model(sub, f, closure); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case MT_ELEMENT: | 
					
						
							|  |  |  |       (*f)(m->content.element, closure); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       ; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_model(dtd_model *m) | 
					
						
							|  |  |  | { switch(m->type) | 
					
						
							|  |  |  |   { case MT_SEQ: | 
					
						
							|  |  |  |     case MT_AND: | 
					
						
							|  |  |  |     case MT_OR: | 
					
						
							|  |  |  |     { dtd_model *sub = m->content.group; | 
					
						
							|  |  |  |       dtd_model *next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for(; sub; sub = next) | 
					
						
							|  |  |  |       { next = sub->next; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	free_model(sub); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       ; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   sgml_free(m); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_model * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | make_model(dtd_parser *p, const ichar *decl, const ichar **end) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { const ichar *s; | 
					
						
							|  |  |  |   dtd_model *m = sgml_calloc(1, sizeof(*m)); | 
					
						
							|  |  |  |   dtd_symbol *id; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   decl = iskip_layout(dtd, decl); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s=isee_identifier(dtd, decl, "#pcdata")) ) | 
					
						
							|  |  |  |   { m->type = MT_PCDATA; | 
					
						
							|  |  |  |     m->cardinality = MC_ONE;		/* actually don't care */ | 
					
						
							|  |  |  |     *end = s; | 
					
						
							|  |  |  |     return m; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( (s=itake_name(p, decl, &id)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   { m->type = MT_ELEMENT; | 
					
						
							|  |  |  |     m->content.element = find_element(dtd, id); | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { if ( !(s=isee_func(dtd, decl, CF_GRPO)) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { gripe(p, ERC_SYNTAX_ERROR, L"Name group expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       free_model(m); | 
					
						
							|  |  |  |       return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(;;) | 
					
						
							|  |  |  |     { dtd_model *sub; | 
					
						
							|  |  |  |       modeltype mt; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       if ( !(sub = make_model(p, decl, &s)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       { free_model(sub); | 
					
						
							|  |  |  | 	return NULL; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       decl = s; | 
					
						
							|  |  |  |       add_submodel(m, sub); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       if ( (s = isee_func(dtd, decl, CF_OR)) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	mt = MT_OR; | 
					
						
							|  |  |  |       } else if ( (s = isee_func(dtd, decl, CF_SEQ)) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	mt = MT_SEQ; | 
					
						
							|  |  |  |       } else if ( (s = isee_func(dtd, decl, CF_AND)) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	mt = MT_AND; | 
					
						
							|  |  |  |       } else if ( (s = isee_func(dtd, decl, CF_GRPC)) ) | 
					
						
							|  |  |  |       { decl = s; | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       { gripe(p, ERC_SYNTAX_ERROR, L"Connector ('|', ',' or '&') expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	free_model(m); | 
					
						
							|  |  |  | 	return NULL; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       decl = iskip_layout(dtd, decl); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( m->type != mt ) | 
					
						
							|  |  |  |       { if ( !m->type ) | 
					
						
							|  |  |  | 	  m->type = mt; | 
					
						
							|  |  |  | 	else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ gripe(p, ERC_SYNTAX_ERROR, L"Different connector types in model", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  free_model(m); | 
					
						
							|  |  |  | 	  return NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s = isee_func(dtd, decl, CF_OPT)) ) | 
					
						
							|  |  |  |   { decl = s; | 
					
						
							|  |  |  |     m->cardinality = MC_OPT; | 
					
						
							|  |  |  |   } else if ( (s=isee_func(dtd, decl, CF_REP)) ) | 
					
						
							|  |  |  |   { decl = s; | 
					
						
							|  |  |  |     m->cardinality = MC_REP; | 
					
						
							|  |  |  |   } else if ( (s=isee_func(dtd, decl, CF_PLUS)) ) | 
					
						
							|  |  |  |   {					/* ROK: watch out for (x) +(y) */ | 
					
						
							|  |  |  |     if ( isee_func(dtd, iskip_layout(dtd, s), CF_GRPO) == NULL ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       m->cardinality = MC_PLUS; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |     m->cardinality = MC_ONE; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   if ( m->type == MT_UNDEF )		/* simplify (e+), etc. */ | 
					
						
							|  |  |  |   { dtd_model *sub = m->content.group; | 
					
						
							|  |  |  |     modelcard card; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert(!sub->next); | 
					
						
							|  |  |  |     if ( sub->cardinality == MC_ONE ) | 
					
						
							|  |  |  |       card = m->cardinality; | 
					
						
							|  |  |  |     else if ( m->cardinality == MC_ONE ) | 
					
						
							|  |  |  |       card = sub->cardinality; | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |     { m->type = MT_OR; | 
					
						
							|  |  |  |       goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     *m = *sub; | 
					
						
							|  |  |  |     m->cardinality = card; | 
					
						
							|  |  |  |     sgml_free(sub); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | out: | 
					
						
							|  |  |  |   *end = iskip_layout(dtd, decl); | 
					
						
							|  |  |  |   return m; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | process_model(dtd_parser *p, dtd_edef *e, const ichar *decl) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { const ichar *s; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   decl = iskip_layout(dtd, decl); | 
					
						
							|  |  |  |   if ( (s = isee_identifier(dtd, decl, "empty")) ) | 
					
						
							|  |  |  |   { e->type = C_EMPTY; | 
					
						
							|  |  |  |     return s; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( (s = isee_identifier(dtd, decl, "cdata")) ) | 
					
						
							|  |  |  |   { e->type = C_CDATA; | 
					
						
							|  |  |  |     return s; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( (s = isee_identifier(dtd, decl, "rcdata")) ) | 
					
						
							|  |  |  |   { e->type = C_RCDATA; | 
					
						
							|  |  |  |     return s; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( (s = isee_identifier(dtd, decl, "any")) ) | 
					
						
							|  |  |  |   { e->type = C_ANY; | 
					
						
							|  |  |  |     return s; | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   e->type = C_PCDATA; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(e->content = make_model(p, decl, &decl)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return decl; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | See a name-group separator.  As long as we haven't decided, this can be | 
					
						
							|  |  |  | CF_NG.  If we have decided they must all be the same. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep) | 
					
						
							|  |  |  | { const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s=isee_func(dtd, decl, *sep)) ) | 
					
						
							|  |  |  |     return iskip_layout(dtd, s); | 
					
						
							|  |  |  |   if ( *sep == CF_NG )			/* undecided */ | 
					
						
							|  |  |  |   { static const charfunc ng[] = { CF_SEQ, CF_OR, CF_AND }; | 
					
						
							|  |  |  |     int n; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(n=0; n<3; n++) | 
					
						
							|  |  |  |     { if ( (s=isee_func(dtd, decl, ng[n])) ) | 
					
						
							|  |  |  |       { *sep = ng[n]; | 
					
						
							|  |  |  |         return iskip_layout(dtd, s); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_namegroup(dtd_parser *p, const ichar *decl, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		dtd_symbol **names, int *n) | 
					
						
							|  |  |  | { const ichar *s; | 
					
						
							|  |  |  |   int en = 0; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   if ( (s=isee_func(dtd, decl, CF_GRPO)) ) | 
					
						
							|  |  |  |   { charfunc ngs = CF_NG; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(;;) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { if ( !(decl=itake_name(p, s, &names[en++])) ) | 
					
						
							|  |  |  |       { gripe(p, ERC_SYNTAX_ERROR, L"Name expected", s); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	return NULL; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       if ( (s=isee_ngsep(dtd, decl, &ngs)) ) | 
					
						
							|  |  |  |       { decl = iskip_layout(dtd, s); | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       if ( (s=isee_func(dtd, decl, CF_GRPC)) ) | 
					
						
							|  |  |  |       { *n = en; | 
					
						
							|  |  |  |         decl = s; | 
					
						
							|  |  |  | 	return iskip_layout(dtd, decl); | 
					
						
							|  |  |  |       } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_SYNTAX_ERROR, L"Bad name-group", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct | 
					
						
							|  |  |  | { dtd_symbol **list; | 
					
						
							|  |  |  |   int size; | 
					
						
							|  |  |  | } namelist; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | add_list_element(dtd_element *e, void *closure) | 
					
						
							|  |  |  | { namelist *nl = closure; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   nl->list[nl->size++] = e->name; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | itake_el_or_model_element_list(dtd_parser *p, | 
					
						
							|  |  |  | 			       const ichar *decl, dtd_symbol **names, int *n) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { const ichar *s; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   dtd *dtd = p->dtd; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   if ( isee_func(dtd, decl, CF_GRPO) ) | 
					
						
							|  |  |  |   { dtd_model *model; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     if ( (model = make_model(p, decl, &s)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     { namelist nl; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       nl.list = names; | 
					
						
							|  |  |  |       nl.size = 0; | 
					
						
							|  |  |  |       for_elements_in_model(model, add_list_element, &nl); | 
					
						
							|  |  |  |       free_model(model); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       *n = nl.size; | 
					
						
							|  |  |  |       return s; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |       return NULL; | 
					
						
							|  |  |  |   } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { if ( !(s = itake_name(p, decl, &names[0])) ) | 
					
						
							|  |  |  |     { gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     *n = 1; | 
					
						
							|  |  |  |     return s; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | add_element_list(dtd_element_list **l, dtd_element *e) | 
					
						
							|  |  |  | { dtd_element_list *n = sgml_calloc(1, sizeof(*n)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   n->value = e; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; *l; l = &(*l)->next ) | 
					
						
							|  |  |  |     ; | 
					
						
							|  |  |  |   *l = n; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_element_declaraction(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   ichar buf[MAXDECL]; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  |   dtd_symbol *eid[MAXATTELEM]; | 
					
						
							|  |  |  |   dtd_edef *def; | 
					
						
							|  |  |  |   int en; | 
					
						
							|  |  |  |   int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* expand parameter entities */ | 
					
						
							|  |  |  |   if ( !expand_pentities(p, decl, ZERO_TERM_LEN, | 
					
						
							|  |  |  | 			 buf, sizeof(buf)/sizeof(ichar)) ) | 
					
						
							|  |  |  |     return FALSE; | 
					
						
							|  |  |  |   decl = buf; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(s=itake_el_or_model_element_list(p, decl, eid, &en)) ) | 
					
						
							|  |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"Name or name-group expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   decl = s; | 
					
						
							|  |  |  |   if ( en == 0 ) | 
					
						
							|  |  |  |     return TRUE;			/* 0 elements */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   STAT(edefs_decl++); | 
					
						
							|  |  |  |   def = new_element_definition(dtd); | 
					
						
							|  |  |  |   for(i=0; i<en; i++) | 
					
						
							|  |  |  |   { find_element(dtd, eid[i]); | 
					
						
							|  |  |  |     assert(eid[i]->element->structure == NULL); | 
					
						
							|  |  |  |     eid[i]->element->structure = def; | 
					
						
							|  |  |  |     eid[i]->element->undefined = FALSE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   def->references = en;			/* for GC */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* omitted tag declarations (opt) */ | 
					
						
							|  |  |  |   if ( (s = isee_identifier(dtd, decl, "-")) ) | 
					
						
							|  |  |  |   { def->omit_close = FALSE; | 
					
						
							|  |  |  |     goto seeclose; | 
					
						
							|  |  |  |   } else if ( (s = isee_identifier(dtd, decl, "o")) ) | 
					
						
							|  |  |  |   { def->omit_open = TRUE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   seeclose: | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |     if ( (s = isee_identifier(dtd, decl, "-")) ) | 
					
						
							|  |  |  |     { def->omit_close = FALSE; | 
					
						
							|  |  |  |     } else if ( (s = isee_identifier(dtd, decl, "o")) ) | 
					
						
							|  |  |  |     { for(i=0; i<en; i++) | 
					
						
							|  |  |  | 	def->omit_close = TRUE; | 
					
						
							|  |  |  |     } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, L"Bad omit-tag declaration", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 					/* content model */ | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(decl=process_model(p, def, decl)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* in/excluded elements */ | 
					
						
							|  |  |  |   if ( decl[0] == '-' || decl[0] == '+' ) | 
					
						
							|  |  |  |   { dtd_symbol *ng[MAXNAMEGROUP]; | 
					
						
							|  |  |  |     int ns; | 
					
						
							|  |  |  |     dtd_element_list **l; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     if ( decl[0] == '-' ) | 
					
						
							|  |  |  |       l = &def->excluded; | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       l = &def->included; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     decl++; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     if ( (s=itake_namegroup(p, decl, ng, &ns)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     { int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for(i=0; i<ns; i++) | 
					
						
							|  |  |  | 	add_element_list(l, find_element(dtd, ng[i])); | 
					
						
							|  |  |  |     } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { return gripe(p, ERC_SYNTAX_ERROR, L"Name group expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (*decl) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaration", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | add_name_list(dtd_name_list **nl, dtd_symbol *s) | 
					
						
							|  |  |  | { dtd_name_list *n = sgml_calloc(1, sizeof(*n)); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   n->value = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; *nl; nl = &(*nl)->next ) | 
					
						
							|  |  |  |     ; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *nl = n; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | set_element_properties(dtd_element *e, dtd_attr *a) | 
					
						
							|  |  |  | { if ( istreq(a->name->name, L"xml:space") ) | 
					
						
							|  |  |  |   { switch(a->def) | 
					
						
							|  |  |  |     { case AT_FIXED: | 
					
						
							|  |  |  |       case AT_DEFAULT: | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch (a->type ) | 
					
						
							|  |  |  |     { case AT_NAMEOF: | 
					
						
							|  |  |  |       case AT_NAME: | 
					
						
							|  |  |  |       case AT_NMTOKEN: | 
					
						
							|  |  |  | 	e->space_mode = istr_to_space_mode(a->att_def.name->name); | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       case AT_CDATA: | 
					
						
							|  |  |  | 	e->space_mode = istr_to_space_mode((ichar *)a->att_def.cdata); | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | add_attribute(dtd_parser *p, dtd_element *e, dtd_attr *a) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { dtd_attr_list **l; | 
					
						
							|  |  |  |   dtd_attr_list *n; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(l = &e->attributes; *l; l = &(*l)->next) | 
					
						
							|  |  |  |   { if ( (*l)->attribute->name == a->name ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { gripe(p, ERC_REDEFINED, L"attribute", a->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       a->references++;			/* attempt to redefine attribute: */ | 
					
						
							|  |  |  |       free_attribute(a);		/* first wins according to standard */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   n = sgml_calloc(1, sizeof(*n)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   n->attribute = a; | 
					
						
							|  |  |  |   a->references++; | 
					
						
							|  |  |  |   *l = n; | 
					
						
							|  |  |  |   set_element_properties(e, a); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_attlist_declaraction(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   dtd_symbol *eid[MAXATTELEM]; | 
					
						
							|  |  |  |   int i, en; | 
					
						
							|  |  |  |   ichar buf[MAXDECL]; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* expand parameter entities */ | 
					
						
							|  |  |  |   if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) ) | 
					
						
							|  |  |  |     return FALSE; | 
					
						
							|  |  |  |   decl = iskip_layout(dtd, buf); | 
					
						
							|  |  |  |   DEBUG(printf("Expanded to %s\n", decl)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(decl=itake_el_or_model_element_list(p, decl, eid, &en)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* fetch attributes */ | 
					
						
							|  |  |  |   while(*decl) | 
					
						
							|  |  |  |   { dtd_attr *at = sgml_calloc(1, sizeof(*at)); | 
					
						
							|  |  |  |     at->references = REFS_VIRGIN; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* name of attribute */ | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     if ( !(s = itake_name(p, decl, &at->name)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     { free_attribute(at); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* (name1|name2|...) type */ | 
					
						
							|  |  |  |     if ( (s=isee_func(dtd, decl, CF_GRPO)) ) | 
					
						
							|  |  |  |     { charfunc ngs = CF_NG; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       at->type = AT_NAMEOF; | 
					
						
							|  |  |  |       decl=s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for(;;) | 
					
						
							|  |  |  |       { dtd_symbol *nm; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	if ( !(s = itake_nmtoken(p, decl, &nm)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	{ free_attribute(at); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	decl = s; | 
					
						
							|  |  |  | 	add_name_list(&at->typeex.nameof, nm); | 
					
						
							|  |  |  | 	if ( (s=isee_ngsep(dtd, decl, &ngs)) ) | 
					
						
							|  |  |  | 	{ decl = s; | 
					
						
							|  |  |  | 	  continue; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if ( (s = isee_func(dtd, decl, CF_GRPC)) ) | 
					
						
							|  |  |  | 	{ decl=s; | 
					
						
							|  |  |  | 	  decl = iskip_layout(dtd, decl); | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	free_attribute(at); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return gripe(p, ERC_SYNTAX_ERROR, L"Illegal name-group", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "cdata")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_CDATA; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "entity")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_ENTITY; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "entities")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_ENTITIES; | 
					
						
							|  |  |  |       at->islist = TRUE; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "id")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_ID; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "idref")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_IDREF; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "idrefs")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_IDREFS; | 
					
						
							|  |  |  |       at->islist = TRUE; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "name")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_NAME; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "names")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_NAMES; | 
					
						
							|  |  |  |       at->islist = TRUE; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "nmtoken")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_NMTOKEN; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "nmtokens")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_NMTOKENS; | 
					
						
							|  |  |  |       at->islist = TRUE; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "number")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_NUMBER; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "numbers")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_NUMBERS; | 
					
						
							|  |  |  |       at->islist = TRUE; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "nutoken")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_NUTOKEN; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "nutokens")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->type = AT_NUTOKENS; | 
					
						
							|  |  |  |       at->islist = TRUE; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "notation")) ) | 
					
						
							|  |  |  |     { dtd_symbol *ng[MAXNAMEGROUP]; | 
					
						
							|  |  |  |       int ns; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       at->type = AT_NOTATION; | 
					
						
							|  |  |  |       decl=s; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       if ( (s=itake_namegroup(p, decl, ng, &ns)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       { decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for(i=0; i<ns; i++) | 
					
						
							|  |  |  | 	  add_name_list(&at->typeex.nameof, ng[i]); | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { free_attribute(at); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return gripe(p, ERC_SYNTAX_ERROR, L"name-group expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { free_attribute(at); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, L"Attribute-type expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* Attribute Defaults */ | 
					
						
							|  |  |  |     if ( (s=isee_identifier(dtd, decl, "#fixed")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->def = AT_FIXED; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "#required")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->def = AT_REQUIRED; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "#current")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->def = AT_CURRENT; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "#conref")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->def = AT_CONREF; | 
					
						
							|  |  |  |     } else if ( (s=isee_identifier(dtd, decl, "#implied")) ) | 
					
						
							|  |  |  |     { decl = s; | 
					
						
							|  |  |  |       at->def = AT_IMPLIED; | 
					
						
							|  |  |  |     } else				/* real default */ | 
					
						
							|  |  |  |       at->def = AT_DEFAULT; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( at->def == AT_DEFAULT || at->def == AT_FIXED ) | 
					
						
							|  |  |  |     { ichar buf[MAXSTRINGLEN]; | 
					
						
							|  |  |  |       ichar *start; int len; | 
					
						
							|  |  |  |       const ichar *end; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       if ( !(end=itake_string(dtd, decl, &start, &len)) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       { end=itake_nmtoken_chars(p, decl, buf, sizeof(buf)/sizeof(ichar)); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	start = buf; | 
					
						
							|  |  |  | 	len = (int)istrlen(buf); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       if ( !end ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute default", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Note: itake_name(), etc. work on nul-terminated   strings. The result of | 
					
						
							|  |  |  | itake_string() is a  pointer  in  a   nul-terminated  string  and  these | 
					
						
							|  |  |  | functions will stop scanning at the  quote   anyway,  so  we can use the | 
					
						
							|  |  |  | length of the parsed data to verify we parsed all of it. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       switch(at->type) | 
					
						
							|  |  |  |       { case AT_CDATA: | 
					
						
							|  |  |  | 	{ at->att_def.cdata = istrndup(start, len); | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	case AT_ENTITY: | 
					
						
							|  |  |  | 	case AT_NOTATION: | 
					
						
							|  |  |  | 	case AT_NAME: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ if ( !(s=itake_name(p, start, &at->att_def.name)) || | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	       (s-start) != len ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	    return gripe(p, ERC_DOMAIN, L"name", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	case AT_NMTOKEN: | 
					
						
							|  |  |  | 	case AT_NAMEOF: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ if ( !(s=itake_nmtoken(p, start, &at->att_def.name)) || | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	       (s-start) != len ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	    return gripe(p, ERC_DOMAIN, L"nmtoken", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	case AT_NUTOKEN: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ if ( !(s=itake_nutoken(p, start, &at->att_def.name)) || | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	       (s-start) != len ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	    return gripe(p, ERC_DOMAIN, L"nutoken", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	case AT_NUMBER: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ if ( !(s=itake_number(p, start, at)) || | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	       (s-start) != len ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	     return gripe(p, ERC_DOMAIN, L"number", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	case AT_NAMES: | 
					
						
							|  |  |  | 	case AT_ENTITIES: | 
					
						
							|  |  |  | 	case AT_IDREFS: | 
					
						
							|  |  |  | 	case AT_NMTOKENS: | 
					
						
							|  |  |  | 	case AT_NUMBERS: | 
					
						
							|  |  |  | 	case AT_NUTOKENS: | 
					
						
							|  |  |  | 	{ at->att_def.list = istrndup(buf, len); | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 	{ free_attribute(at); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  return gripe(p, ERC_REPRESENTATION, L"No default for type"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       decl = end; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* add to list */ | 
					
						
							|  |  |  |     at->references = 0; | 
					
						
							|  |  |  |     for(i=0; i<en; i++) | 
					
						
							|  |  |  |     { dtd_element *e = def_element(dtd, eid[i]); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       add_attribute(p, e, at); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *    GENERIC TAG PROCESSING	* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef enum | 
					
						
							|  |  |  | { IE_NORMAL, | 
					
						
							|  |  |  |   IE_INCLUDED,				/* is included */ | 
					
						
							|  |  |  |   IE_EXCLUDED				/* is excluded */ | 
					
						
							|  |  |  | } includetype; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static includetype | 
					
						
							|  |  |  | in_or_excluded(sgml_environment *env, dtd_element *e) | 
					
						
							|  |  |  | { for(; env; env=env->parent) | 
					
						
							|  |  |  |   { if ( env->element->structure ) | 
					
						
							|  |  |  |     { dtd_edef *def = env->element->structure; | 
					
						
							|  |  |  |       dtd_element_list *el; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for(el=def->excluded; el; el=el->next) | 
					
						
							|  |  |  |       { if ( el->value == e ) | 
					
						
							|  |  |  | 	  return IE_EXCLUDED; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       for(el=def->included; el; el=el->next) | 
					
						
							|  |  |  |       { if ( el->value == e ) | 
					
						
							|  |  |  | 	  return IE_INCLUDED; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return IE_NORMAL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | complete(sgml_environment *env) | 
					
						
							|  |  |  | { if ( env->element->structure && | 
					
						
							|  |  |  |        !env->element->undefined && | 
					
						
							|  |  |  |        env->element->structure->type != C_ANY ) | 
					
						
							|  |  |  |   { dtd_edef *def = env->element->structure; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !same_state(def->final_state, env->state) ) | 
					
						
							|  |  |  |       return FALSE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | validate_completeness(dtd_parser *p, sgml_environment *env) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { if ( !complete(env) ) | 
					
						
							|  |  |  |   { wchar_t buf[MAXNMLEN+50]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     swprintf(buf, MAXNMLEN+50, L"Incomplete element: <%s>", | 
					
						
							|  |  |  | 	     env->element->name->name); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     gripe(p, ERC_VALIDATE, buf);		/* TBD: expected */ | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static sgml_environment * | 
					
						
							|  |  |  | push_element(dtd_parser *p, dtd_element *e, int callback) | 
					
						
							|  |  |  | { if ( e != CDATA_ELEMENT ) | 
					
						
							|  |  |  |   { sgml_environment *env = sgml_calloc(1, sizeof(*env)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     emit_cdata(p, FALSE); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     env->element = e; | 
					
						
							|  |  |  |     env->state = make_state_engine(e); | 
					
						
							|  |  |  |     env->space_mode = (p->environments ? p->environments->space_mode | 
					
						
							|  |  |  | 				       : p->dtd->space_mode); | 
					
						
							|  |  |  |     env->parent = p->environments; | 
					
						
							|  |  |  |     p->environments = env; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->dtd->shorttag ) | 
					
						
							|  |  |  |     { env->saved_waiting_for_net = p->waiting_for_net; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( p->event_class == EV_SHORTTAG ) | 
					
						
							|  |  |  |       { p->waiting_for_net = TRUE; | 
					
						
							|  |  |  | 	env->wants_net = TRUE; | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { env->wants_net = FALSE; | 
					
						
							|  |  |  | 	if ( e->structure && e->structure->omit_close == FALSE ) | 
					
						
							|  |  |  | 	  p->waiting_for_net = FALSE; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( e->map ) | 
					
						
							|  |  |  |       p->map = env->map = e->map; | 
					
						
							|  |  |  |     else if ( env->parent ) | 
					
						
							|  |  |  |       p->map = env->map = env->parent->map; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     p->first = TRUE; | 
					
						
							|  |  |  |     if ( callback && p->on_begin_element ) | 
					
						
							|  |  |  |     { sgml_attribute atts[MAXATTRIBUTES]; | 
					
						
							|  |  |  |       int natts = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( !(p->flags & SGML_PARSER_NODEFS) ) | 
					
						
							|  |  |  | 	natts = add_default_attributes(p, e, natts, atts); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       (*p->on_begin_element)(p, e, natts, atts); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( e->structure ) | 
					
						
							|  |  |  |     { if ( e->structure->type == C_CDATA || | 
					
						
							|  |  |  | 	   e->structure->type == C_RCDATA ) | 
					
						
							|  |  |  |       { p->state = (e->structure->type == C_CDATA ? S_CDATA : S_RCDATA); | 
					
						
							|  |  |  | 	p->cdata_state = p->state; | 
					
						
							|  |  |  | 	p->etag = e->name->name; | 
					
						
							|  |  |  | 	p->etaglen = (int)istrlen(p->etag); | 
					
						
							|  |  |  | 	sgml_cplocation(&p->startcdata, &p->location); | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  | 	p->cdata_state = S_PCDATA; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return p->environments; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_environment(sgml_environment *env) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #ifdef XMLNS
 | 
					
						
							|  |  |  |   if ( env->xmlns ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     xmlns_free(env->xmlns); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   sgml_free(env); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Pop the stack,  closing  all  environment   uptil  `to'.  The  close was | 
					
						
							|  |  |  | initiated by pushing the element `e'. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0) | 
					
						
							|  |  |  | { sgml_environment *env, *parent; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   for(env = p->environments; env != to; env=parent) | 
					
						
							|  |  |  |   { dtd_element *e = env->element; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     validate_completeness(p, env); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     parent = env->parent; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     if ( e->structure && !e->structure->omit_close ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_OMITTED_CLOSE, e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if ( e0 != CDATA_ELEMENT ) | 
					
						
							|  |  |  |       emit_cdata(p, TRUE); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     p->first = FALSE; | 
					
						
							|  |  |  |     p->environments = env; | 
					
						
							|  |  |  |     if ( p->dtd->shorttag ) | 
					
						
							|  |  |  |       p->waiting_for_net = env->saved_waiting_for_net; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     WITH_CLASS(p, EV_OMITTED, | 
					
						
							|  |  |  | 	       if ( p->on_end_element ) | 
					
						
							|  |  |  | 	         (*p->on_end_element)(p, e)); | 
					
						
							|  |  |  |     free_environment(env); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   p->environments = to; | 
					
						
							|  |  |  |   p->map = to->map; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | allow_for(dtd_element *in, dtd_element *e) | 
					
						
							|  |  |  | { dtd_edef *def = in->structure; | 
					
						
							|  |  |  |   dtd_model *g; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( def->type == C_EMPTY ) | 
					
						
							|  |  |  |   { def->type = C_PCDATA; | 
					
						
							|  |  |  |     def->content = sgml_calloc(1, sizeof(*def->content)); | 
					
						
							|  |  |  |     def->content->type = MT_OR; | 
					
						
							|  |  |  |     def->content->cardinality = MC_REP; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   assert(def->content->type == MT_OR); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   g = def->content->content.group; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( e == CDATA_ELEMENT ) | 
					
						
							|  |  |  |   { dtd_model *m; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(; g; g = g->next) | 
					
						
							|  |  |  |     { if ( g->type == MT_PCDATA ) | 
					
						
							|  |  |  | 	return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     m = sgml_calloc(1, sizeof(*m)); | 
					
						
							|  |  |  |     m->type	   = MT_PCDATA; | 
					
						
							|  |  |  |     m->cardinality = MC_ONE;		/* ignored */ | 
					
						
							|  |  |  |     add_submodel(def->content, m); | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { dtd_model *m; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(; g; g = g->next) | 
					
						
							|  |  |  |     { if ( g->type == MT_ELEMENT && g->content.element == e ) | 
					
						
							|  |  |  | 	return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     m = sgml_calloc(1, sizeof(*m)); | 
					
						
							|  |  |  |     m->type	   = MT_ELEMENT; | 
					
						
							|  |  |  |     m->cardinality = MC_ONE;		/* ignored */ | 
					
						
							|  |  |  |     m->content.element = e; | 
					
						
							|  |  |  |     add_submodel(def->content, m); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | open_element(dtd_parser *p, dtd_element *e, int warn) | 
					
						
							|  |  |  | { if ( !p->environments && p->enforce_outer_element ) | 
					
						
							|  |  |  |   { dtd_element *f = p->enforce_outer_element->element; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( f && f != e ) | 
					
						
							|  |  |  |     { if ( !f->structure || | 
					
						
							|  |  |  | 	   !f->structure->omit_open ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_OMITTED_OPEN, f->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       WITH_CLASS(p, EV_OMITTED, | 
					
						
							|  |  |  | 		 { open_element(p, f, TRUE); | 
					
						
							|  |  |  | 		   if ( p->on_begin_element ) | 
					
						
							|  |  |  | 		   { sgml_attribute atts[MAXATTRIBUTES]; | 
					
						
							|  |  |  | 		     int natts = 0; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		     if ( !(p->flags & SGML_PARSER_NODEFS) ) | 
					
						
							|  |  |  | 		       natts = add_default_attributes(p, f, natts, atts); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		     (*p->on_begin_element)(p, f, natts, atts); | 
					
						
							|  |  |  | 		   } | 
					
						
							|  |  |  | 		 }); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* no DTD available yet */ | 
					
						
							|  |  |  |   if ( !p->environments && !p->dtd->doctype && e != CDATA_ELEMENT ) | 
					
						
							|  |  |  |   { const ichar *file; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     file = find_in_catalogue(CAT_DOCTYPE, e->name->name, NULL, NULL, | 
					
						
							|  |  |  | 			     p->dtd->dialect != DL_SGML); | 
					
						
							|  |  |  |     if ( file ) | 
					
						
							|  |  |  |     { dtd_parser *clone = clone_dtd_parser(p); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_NO_DOCTYPE, e->name->name, file); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       if ( load_dtd_from_file(clone, file) ) | 
					
						
							|  |  |  | 	p->dtd->doctype = istrdup(e->name->name); | 
					
						
							|  |  |  |       else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_EXISTENCE, L"file", file); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       free_dtd_parser(clone); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( p->environments ) | 
					
						
							|  |  |  |   { sgml_environment *env = p->environments; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( env->element->undefined ) | 
					
						
							|  |  |  |     { allow_for(env->element, e);	/* <!ELEMENT x - - (model) +(y)> */ | 
					
						
							|  |  |  |       push_element(p, e, FALSE); | 
					
						
							|  |  |  |       return TRUE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( env->element->structure && | 
					
						
							|  |  |  | 	 env->element->structure->type == C_ANY ) | 
					
						
							|  |  |  |     { if ( e != CDATA_ELEMENT && e->undefined ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_EXISTENCE, L"Element", e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       push_element(p, e, FALSE); | 
					
						
							|  |  |  |       return TRUE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch(in_or_excluded(env, e)) | 
					
						
							|  |  |  |     { case IE_INCLUDED: | 
					
						
							|  |  |  |         push_element(p, e, FALSE); | 
					
						
							|  |  |  | 	return TRUE; | 
					
						
							|  |  |  |       case IE_EXCLUDED: | 
					
						
							|  |  |  | 	if ( warn ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  gripe(p, ERC_NOT_ALLOWED, e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	/*FALLTHROUGH*/ | 
					
						
							|  |  |  |       case IE_NORMAL: | 
					
						
							|  |  |  | 	for(; env; env=env->parent) | 
					
						
							|  |  |  | 	{ dtd_state *new; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  if ( (new = make_dtd_transition(env->state, e)) ) | 
					
						
							|  |  |  | 	  { env->state = new; | 
					
						
							|  |  |  | 	    pop_to(p, env, e); | 
					
						
							|  |  |  | 	    push_element(p, e, FALSE); | 
					
						
							|  |  |  | 	    return TRUE; | 
					
						
							|  |  |  | 	  } else | 
					
						
							|  |  |  | 	  { dtd_element *oe[MAXOMITTED]; /* omitted open */ | 
					
						
							|  |  |  | 	    int olen; | 
					
						
							|  |  |  | 	    int i; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	    if ( (olen=find_omitted_path(env->state, e, oe)) > 0 ) | 
					
						
							|  |  |  | 	    { pop_to(p, env, e); | 
					
						
							|  |  |  | 	      WITH_CLASS(p, EV_OMITTED, | 
					
						
							|  |  |  | 	      for(i=0; i<olen; i++) | 
					
						
							|  |  |  | 	      { env->state = make_dtd_transition(env->state, oe[i]); | 
					
						
							|  |  |  | 		env = push_element(p, oe[i], TRUE); | 
					
						
							|  |  |  | 	      }) | 
					
						
							|  |  |  | 	      env->state = make_dtd_transition(env->state, e); | 
					
						
							|  |  |  | 	      push_element(p, e, FALSE); | 
					
						
							|  |  |  | 	      return TRUE; | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( !env->element->structure || | 
					
						
							|  |  |  | 	       !env->element->structure->omit_close ) | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( warn ) | 
					
						
							|  |  |  |     { if ( e == CDATA_ELEMENT ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_VALIDATE, L"#PCDATA not allowed here"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       else if ( e->undefined ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_EXISTENCE, L"Element", e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_NOT_ALLOWED, e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( warn ) | 
					
						
							|  |  |  |   { push_element(p, e, FALSE); | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |     return FALSE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | close_element(dtd_parser *p, dtd_element *e, int conref) | 
					
						
							|  |  |  | { sgml_environment *env; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(env = p->environments; env; env=env->parent) | 
					
						
							|  |  |  |   { if ( env->element == e )		/* element is open */ | 
					
						
							|  |  |  |     { sgml_environment *parent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for(env = p->environments; ; env=parent) | 
					
						
							|  |  |  |       {	dtd_element *ce	= env->element; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( !(conref && env == p->environments) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  validate_completeness(p, env); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	parent = env->parent; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	p->first = FALSE; | 
					
						
							|  |  |  | 	if ( p->on_end_element ) | 
					
						
							|  |  |  | 	  (*p->on_end_element)(p, env->element); | 
					
						
							|  |  |  | 	free_environment(env); | 
					
						
							|  |  |  | 	p->environments = parent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( ce == e )			/* closing current element */ | 
					
						
							|  |  |  | 	{ p->map = (parent ? parent->map : NULL); | 
					
						
							|  |  |  | 	  return TRUE; | 
					
						
							|  |  |  | 	} else				/* omited close */ | 
					
						
							|  |  |  | 	{ if ( ce->structure && !ce->structure->omit_close ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	    gripe(p, ERC_OMITTED_CLOSE, ce->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   return gripe(p, ERC_NOT_OPEN, e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | close_current_element(dtd_parser *p) | 
					
						
							|  |  |  | { if ( p->environments ) | 
					
						
							|  |  |  |   { dtd_element *e = p->environments->element; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     emit_cdata(p, TRUE); | 
					
						
							|  |  |  |     return close_element(p, e, FALSE); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   return gripe(p, ERC_SYNTAX_ERROR, L"No element to close", ""); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | get_attribute_value() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Get the value for an attribute.  Once   I  thought  this was simple, but | 
					
						
							|  |  |  | Richard O'Keefe pointed to the complex   handling of white-space in SGML | 
					
						
							|  |  |  | attributes. Basically, if the attribute is quoted, we need: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	* If CDATA, map all blank to space characters, then expand | 
					
						
							|  |  |  | 	  entities | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	* If !CDATA expand all entities, canonise white space by | 
					
						
							|  |  |  | 	  deleting leading and trailing space and squishing multiple | 
					
						
							|  |  |  | 	  space characters to a single (lower for us) case. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This almost, but not completely matches the XML definition. This however | 
					
						
							|  |  |  | is so complex we will ignore it for now. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | [Rewritten by Richard O'Keefe with these addional comments] | 
					
						
							|  |  |  | Reads a value, the  attribute  name   and  value  indicator  having been | 
					
						
							|  |  |  | processed already. It calls itake_string() to   read  quoted values, and | 
					
						
							|  |  |  | itake_unquoted() to read unquoted values. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | itake_string(dtd, in, buf, size) | 
					
						
							|  |  |  | 	- skips layout INCLUDING comments, | 
					
						
							|  |  |  | 	- returns NULL if the next character is not ' or ", | 
					
						
							|  |  |  | 	- copies characters from in to buf until a matching ' or " is found, | 
					
						
							|  |  |  | 	- adds a terminating \0, | 
					
						
							|  |  |  | 	- skips more layout INCLUDING comments, and | 
					
						
							|  |  |  | 	- returns the new input position. | 
					
						
							|  |  |  | It is quite wrong to skip leading comments here.  In the tag | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     <foo bar = --ugh-- zoo> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | the characters "--ugh--" *are the value*.  They are not a comment. | 
					
						
							|  |  |  | Comments are not in fact allowed inside tags, unfortunately. | 
					
						
							|  |  |  | This tag is equivalent to | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     <foo bar="--ugh--" something="zoo"> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | where something is an attribute that has zoo as one of its enumerals. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Because itake_string() is called in many other places, this bug has | 
					
						
							|  |  |  | not yet been fixed. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static ichar const * | 
					
						
							|  |  |  | get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att) | 
					
						
							|  |  |  | { ichar tmp[MAXSTRINGLEN]; | 
					
						
							|  |  |  |   ichar *buf = tmp; | 
					
						
							|  |  |  |   ichar const *s; | 
					
						
							|  |  |  |   ichar c; | 
					
						
							|  |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   ichar const *end; | 
					
						
							|  |  |  |   ichar *start; int len; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   enum | 
					
						
							|  |  |  |   { DIG_FIRST = 8,		/* any token start with digit? */ | 
					
						
							|  |  |  |     NAM_FIRST = 4,		/* any token start with non-digit name char? */ | 
					
						
							|  |  |  |     NAM_LATER = 2,		/* any token have non-digit name char later? */ | 
					
						
							|  |  |  |     ANY_OTHER = 1,		/* any token have illegal character? */ | 
					
						
							|  |  |  |     YET_EMPTY = 0 | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   token = YET_EMPTY; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   att->value.textW = NULL;		/* UCS text */ | 
					
						
							|  |  |  |   att->value.number = 0; | 
					
						
							|  |  |  |   att->flags = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   end = itake_string(dtd, decl, &start, &len); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( end != NULL ) | 
					
						
							|  |  |  |   { ocharbuf out; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     init_ocharbuf(&out); | 
					
						
							|  |  |  |     expand_entities(p, start, len, &out); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( att->definition->type == AT_CDATA ) | 
					
						
							|  |  |  |     { malloc_ocharbuf(&out); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       att->value.number = out.size; | 
					
						
							|  |  |  |       att->value.textW  = out.data.w; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return end; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { ichar *d; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       buf = out.data.w; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* canonicalise blanks */ | 
					
						
							|  |  |  |       s = buf; | 
					
						
							|  |  |  |       while ((c = *s++) != '\0' && HasClass(dtd, c, CH_BLANK)) | 
					
						
							|  |  |  | 	; | 
					
						
							|  |  |  |       d = buf; | 
					
						
							|  |  |  |       while ( c != '\0' ) | 
					
						
							|  |  |  |       { token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST | 
					
						
							|  |  |  | 	  : HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER; | 
					
						
							|  |  |  | 	if ( d != buf ) | 
					
						
							|  |  |  | 	  *d++ = ' '; | 
					
						
							|  |  |  | 	if ( dtd->case_sensitive ) | 
					
						
							|  |  |  | 	{ *d++ = c; | 
					
						
							|  |  |  | 	  while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK)) | 
					
						
							|  |  |  | 	  { token |= HasClass(dtd, c, CH_DIGIT) ? 0 | 
					
						
							|  |  |  | 	      : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER; | 
					
						
							|  |  |  | 	    *d++ = c; | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} else | 
					
						
							|  |  |  | 	{ *d++ = towlower(c); | 
					
						
							|  |  |  | 	  while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK)) | 
					
						
							|  |  |  | 	  { token |= HasClass(dtd, c, CH_DIGIT) ? 0 | 
					
						
							|  |  |  | 	      : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER; | 
					
						
							|  |  |  | 	    *d++ = towlower(c); | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	while (c != '\0' && HasClass(dtd, c, CH_BLANK)) | 
					
						
							|  |  |  | 	  c = *s++; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       *d = '\0'; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { end = itake_unquoted(p, decl, tmp, sizeof(tmp)/sizeof(ichar)); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     if (end == NULL) | 
					
						
							|  |  |  |       return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     s = buf; | 
					
						
							|  |  |  |     c = *s++; | 
					
						
							|  |  |  |     if (c != '\0') | 
					
						
							|  |  |  |     { token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST | 
					
						
							|  |  |  | 	: HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER; | 
					
						
							|  |  |  |       while ((c = *s++) != 0) | 
					
						
							|  |  |  |       { token |= HasClass(dtd, c, CH_DIGIT) ? 0 | 
					
						
							|  |  |  | 	  : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if ( token == YET_EMPTY || (token & ANY_OTHER) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_SYNTAX_WARNING, L"Attribute value requires quotes", buf); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (!dtd->case_sensitive && att->definition->type != AT_CDATA) | 
					
						
							|  |  |  |       istrlower(buf); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   switch (att->definition->type) | 
					
						
							|  |  |  |   { case AT_NUMBER:		/* number */ | 
					
						
							|  |  |  |       if (token != DIG_FIRST) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       { gripe(p, ERC_SYNTAX_WARNING, L"NUMBER expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } else if (dtd->number_mode == NU_INTEGER) | 
					
						
							|  |  |  |       { (void) istrtol(buf, &att->value.number); | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { att->value.textW  = istrdup(buf); | 
					
						
							|  |  |  | 	att->value.number = (long)istrlen(buf); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       return end; | 
					
						
							|  |  |  |     case AT_CDATA:		/* CDATA attribute */ | 
					
						
							|  |  |  |       att->value.textW  = istrdup(buf); | 
					
						
							|  |  |  |       att->value.number = (long)istrlen(buf); | 
					
						
							|  |  |  |       return end; | 
					
						
							|  |  |  |     case AT_ID:		/* identifier */ | 
					
						
							|  |  |  |     case AT_IDREF:		/* identifier reference */ | 
					
						
							|  |  |  |     case AT_NAME:		/* name token */ | 
					
						
							|  |  |  |     case AT_NOTATION:		/* notation-name */ | 
					
						
							|  |  |  |       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"NAME expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     case AT_NAMEOF:		/* one of these names */ | 
					
						
							|  |  |  |     case AT_NMTOKEN:		/* name-token */ | 
					
						
							|  |  |  |       if (token == YET_EMPTY || (token & ANY_OTHER) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"NMTOKEN expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       if ( att->definition->type == AT_NAMEOF ) | 
					
						
							|  |  |  |       { dtd_name_list *nl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for(nl=att->definition->typeex.nameof; nl; nl = nl->next) | 
					
						
							|  |  |  | 	{ if ( istreq(nl->value->name, buf) ) | 
					
						
							|  |  |  | 	    goto passed; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"unexpected value", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case AT_NUTOKEN:		/* number token */ | 
					
						
							|  |  |  |       if ((token & (NAM_FIRST | ANY_OTHER)) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"NUTOKEN expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     case AT_ENTITY:		/* entity-name */ | 
					
						
							|  |  |  |       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"entity NAME expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     case AT_NAMES:		/* list of names */ | 
					
						
							|  |  |  |     case AT_IDREFS:		/* list of identifier references */ | 
					
						
							|  |  |  |       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"NAMES expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     case AT_ENTITIES:		/* entity-name list */ | 
					
						
							|  |  |  |       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"entity NAMES expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     case AT_NMTOKENS:		/* name-token list */ | 
					
						
							|  |  |  |       if (token == YET_EMPTY || (token & ANY_OTHER) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"NMTOKENS expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     case AT_NUMBERS:		/* number list */ | 
					
						
							|  |  |  |       if (token != DIG_FIRST) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"NUMBERS expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     case AT_NUTOKENS: | 
					
						
							|  |  |  |       if ((token & (NAM_FIRST | ANY_OTHER)) != 0) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_WARNING, L"NUTOKENS expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       assert(0); | 
					
						
							|  |  |  |       return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | passed: | 
					
						
							|  |  |  |   att->value.textW  = istrdup(buf);	/* TBD: more validation */ | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   att->value.number = (long)istrlen(buf); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   return end; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const ichar * | 
					
						
							|  |  |  | process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl, | 
					
						
							|  |  |  | 		   sgml_attribute *atts, int *argc) | 
					
						
							|  |  |  | { int attn = 0; | 
					
						
							|  |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   decl = iskip_layout(dtd, decl); | 
					
						
							|  |  |  |   while(decl && *decl) | 
					
						
							|  |  |  |   { dtd_symbol *nm; | 
					
						
							|  |  |  |     const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     if ( (s=itake_nmtoken(p, decl, &nm)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     { decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( (s=isee_func(dtd, decl, CF_VI)) ) /* name= */ | 
					
						
							|  |  |  |       { dtd_attr *a; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( !HasClass(dtd, nm->name[0], CH_NMSTART) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  gripe(p, ERC_SYNTAX_WARNING, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		"Illegal start of attribute-name", decl); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	decl = s; | 
					
						
							|  |  |  | 	if ( !(a=find_attribute(e, nm)) ) | 
					
						
							|  |  |  | 	{ a = sgml_calloc(1, sizeof(*a)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  a->name = nm; | 
					
						
							|  |  |  | 	  a->type = AT_CDATA; | 
					
						
							|  |  |  | 	  a->def  = AT_IMPLIED; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  add_attribute(p, e, a); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	  if ( !e->undefined && | 
					
						
							|  |  |  | 	       !(dtd->dialect != DL_SGML && | 
					
						
							|  |  |  | 		 (istreq(L"xmlns", nm->name) || | 
					
						
							|  |  |  | 		  istrprefix(L"xmlns:", nm->name))) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	    gripe(p, ERC_NO_ATTRIBUTE, e->name->name, nm->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	atts[attn].definition = a; | 
					
						
							|  |  |  | 	if ( (decl=get_attribute_value(p, decl, atts+attn)) ) | 
					
						
							|  |  |  | 	{ attn++; | 
					
						
							|  |  |  | 	  continue; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } else if ( e->structure ) | 
					
						
							|  |  |  |       { dtd_attr_list *al;		/* value shorthand */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for(al=e->attributes; al; al=al->next) | 
					
						
							|  |  |  | 	{ dtd_attr *a = al->attribute; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( a->type == AT_NAMEOF || a->type == AT_NOTATION ) | 
					
						
							|  |  |  | 	  { dtd_name_list *nl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    for(nl=a->typeex.nameof; nl; nl = nl->next) | 
					
						
							|  |  |  | 	    { if ( nl->value == nm ) | 
					
						
							|  |  |  | 	      { if ( dtd->dialect != DL_SGML ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 		  gripe(p, ERC_SYNTAX_WARNING, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 			"Value short-hand in XML mode", decl); | 
					
						
							|  |  |  | 		atts[attn].flags	= 0; | 
					
						
							|  |  |  | 		atts[attn].definition   = a; | 
					
						
							|  |  |  | 		atts[attn].value.textW  = istrdup(nm->name); | 
					
						
							|  |  |  | 		atts[attn].value.number = (long)istrlen(nm->name); | 
					
						
							|  |  |  | 		attn++; | 
					
						
							|  |  |  | 		goto next; | 
					
						
							|  |  |  | 	      } | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_NO_ATTRIBUTE_VALUE, e->name->name, nm->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	decl = s; | 
					
						
							|  |  |  |       } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       { gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	decl = s; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { *argc = attn; | 
					
						
							|  |  |  |       return decl; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   next: | 
					
						
							|  |  |  |     ; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *argc = attn; | 
					
						
							|  |  |  |   return decl; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | sgml_add_default_attributes() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This function adds attributes for omitted  default and fixed attributes. | 
					
						
							|  |  |  | These attributes are added to  the  end   of  the  attribute  list. This | 
					
						
							|  |  |  | function returns the new  number  of   attributes.  The  `atts' array is | 
					
						
							|  |  |  | assumed   to   be   MAXATTRIBUTES    long,     normally    passed   from | 
					
						
							|  |  |  | process_begin_element. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | add_default_attributes(dtd_parser *p, dtd_element *e, | 
					
						
							|  |  |  | 		       int natts, sgml_attribute *atts) | 
					
						
							|  |  |  | { dtd_attr_list *al; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( e == CDATA_ELEMENT ) | 
					
						
							|  |  |  |     return natts; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(al=e->attributes; al; al=al->next) | 
					
						
							|  |  |  |   { dtd_attr *a = al->attribute; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch(a->def) | 
					
						
							|  |  |  |     { case AT_REQUIRED:			/* TBD: check if present */ | 
					
						
							|  |  |  |       case AT_CURRENT:			/* TBD: register in DTD and reuse */ | 
					
						
							|  |  |  |       case AT_CONREF: | 
					
						
							|  |  |  |       case AT_IMPLIED: | 
					
						
							|  |  |  | 	goto next; | 
					
						
							|  |  |  |       case AT_FIXED: | 
					
						
							|  |  |  |       case AT_DEFAULT: | 
					
						
							|  |  |  |       { int i; | 
					
						
							|  |  |  | 	sgml_attribute *ap; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for(i=0, ap=atts; i<natts; i++, ap++) | 
					
						
							|  |  |  | 	{ if ( ap->definition == a ) | 
					
						
							|  |  |  | 	    goto next; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ap->definition   = a; | 
					
						
							|  |  |  | 	ap->value.textW  = NULL; | 
					
						
							|  |  |  | 	ap->value.number = 0; | 
					
						
							|  |  |  | 	ap->flags        = SGML_AT_DEFAULT; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	switch(a->type) | 
					
						
							|  |  |  | 	{ case AT_CDATA: | 
					
						
							|  |  |  | 	    ap->value.textW = a->att_def.cdata; | 
					
						
							|  |  |  | 	    ap->value.number = (long)istrlen(ap->value.textW); | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	  case AT_NUMBER: | 
					
						
							|  |  |  | 	    if ( p->dtd->number_mode == NU_TOKEN ) | 
					
						
							|  |  |  | 	    { ap->value.textW  = (ichar*)a->att_def.name->name; | 
					
						
							|  |  |  | 	      ap->value.number = (long)istrlen(ap->value.textW); | 
					
						
							|  |  |  | 	    } else | 
					
						
							|  |  |  | 	    { ap->value.number = a->att_def.number; | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	  default: | 
					
						
							|  |  |  | 	    if ( a->islist ) | 
					
						
							|  |  |  | 	    { ap->value.textW = a->att_def.list; | 
					
						
							|  |  |  | 	    } else | 
					
						
							|  |  |  | 	    { ap->value.textW = (ichar*)a->att_def.name->name; | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	    ap->value.number = (long)istrlen(ap->value.textW); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	natts++; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   next:; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return natts; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_attribute_values(int argc, sgml_attribute *argv) | 
					
						
							|  |  |  | { int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(i=0; i<argc; i++, argv++) | 
					
						
							|  |  |  |   { if ( (argv->flags & SGML_AT_DEFAULT) ) | 
					
						
							|  |  |  |       continue;				/* shared with the DTD */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( argv->value.textW ) | 
					
						
							|  |  |  |       sgml_free(argv->value.textW); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_begin_element(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   dtd_symbol *id; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( (s=itake_name(p, decl, &id)) ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   { sgml_attribute atts[MAXATTRIBUTES]; | 
					
						
							|  |  |  |     int natts; | 
					
						
							|  |  |  |     dtd_element *e = find_element(dtd, id); | 
					
						
							|  |  |  |     int empty = FALSE; | 
					
						
							|  |  |  |     int conref = FALSE; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     int rc = TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if ( !e->structure ) | 
					
						
							|  |  |  |     { dtd_edef *def; | 
					
						
							|  |  |  |       e->undefined = TRUE; | 
					
						
							|  |  |  |       STAT(edefs_implicit++); | 
					
						
							|  |  |  |       def_element(dtd, id); | 
					
						
							|  |  |  |       def = e->structure; | 
					
						
							|  |  |  |       def->type = C_EMPTY; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     open_element(p, e, TRUE); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     decl=s; | 
					
						
							|  |  |  |     if ( (s=process_attributes(p, e, decl, atts, &natts)) ) | 
					
						
							|  |  |  |       decl=s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( dtd->dialect != DL_SGML ) | 
					
						
							|  |  |  |     { if ( (s=isee_func(dtd, decl, CF_ETAGO2)) ) | 
					
						
							|  |  |  |       { empty = TRUE;			/* XML <tag/> */ | 
					
						
							|  |  |  | 	decl = s; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | #ifdef XMLNS
 | 
					
						
							|  |  |  |       if ( dtd->dialect == DL_XMLNS ) | 
					
						
							|  |  |  | 	update_xmlns(p, e, natts, atts); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |       if ( dtd->dialect != DL_SGML ) | 
					
						
							|  |  |  | 	update_space_mode(p, e, natts, atts); | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for(i=0; i<natts; i++) | 
					
						
							|  |  |  |       { if ( atts[i].definition->def == AT_CONREF ) | 
					
						
							|  |  |  | 	{ empty = TRUE; | 
					
						
							|  |  |  | 	  conref = TRUE; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if ( *decl ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute list", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if ( !(p->flags & SGML_PARSER_NODEFS) ) | 
					
						
							|  |  |  |       natts = add_default_attributes(p, e, natts, atts); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( empty || | 
					
						
							|  |  |  | 	 (dtd->dialect == DL_SGML && | 
					
						
							|  |  |  | 	  e->structure && | 
					
						
							|  |  |  | 	  e->structure->type == C_EMPTY && | 
					
						
							|  |  |  | 	  !e->undefined) ) | 
					
						
							|  |  |  |       p->empty_element = e; | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       p->empty_element = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->on_begin_element ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       rc = (*p->on_begin_element)(p, e, natts, atts); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     free_attribute_values(natts, atts); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->empty_element ) | 
					
						
							|  |  |  |     { p->empty_element = NULL; | 
					
						
							|  |  |  |       close_element(p, e, conref); | 
					
						
							|  |  |  |       if ( conref )	/* might be S_CDATA due to declared content */ | 
					
						
							|  |  |  | 	p->cdata_state = p->state = S_PCDATA; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     return rc; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   return gripe(p, ERC_SYNTAX_ERROR, L"Bad open-element tag", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_end_element(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   dtd_symbol *id; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   emit_cdata(p, TRUE); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( (s=itake_name(p, decl, &id)) && *s == '\0' ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return close_element(p, find_element(dtd, id), FALSE); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( p->dtd->shorttag && *decl == '\0' ) /* </>: close current element */ | 
					
						
							|  |  |  |     return close_current_element(p); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   return gripe(p, ERC_SYNTAX_ERROR, L"Bad close-element tag", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | process_net(dtd_parser *p) | 
					
						
							|  |  |  |     We've seen a / of a shorttag element.  Close this one. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_net(dtd_parser *p) | 
					
						
							|  |  |  | { sgml_environment *env; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   prepare_cdata(p); | 
					
						
							|  |  |  |   for(env = p->environments; env; env=env->parent) | 
					
						
							|  |  |  |   { if ( env->wants_net ) | 
					
						
							|  |  |  |     { sgml_environment *parent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       pop_to(p, env, NULL);		/* close parents */ | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       validate_completeness(p, env); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       parent = env->parent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       emit_cdata(p, TRUE); | 
					
						
							|  |  |  |       p->first = FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( p->on_end_element ) | 
					
						
							|  |  |  |       { WITH_CLASS(p, EV_SHORTTAG, | 
					
						
							|  |  |  | 		   (*p->on_end_element)(p, env->element)); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       free_environment(env); | 
					
						
							|  |  |  |       p->environments = parent; | 
					
						
							|  |  |  |       p->map = (parent ? parent->map : NULL); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return TRUE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return FALSE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int				/* <!DOCTYPE ...> */ | 
					
						
							|  |  |  | process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   dtd_symbol *id; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  |   dtd_entity *et = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( !(s=itake_name(p, decl, &id)) ) | 
					
						
							|  |  |  |     return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s=isee_identifier(dtd, decl, "system")) ) | 
					
						
							|  |  |  |   { et = sgml_calloc(1, sizeof(*et)); | 
					
						
							|  |  |  |     et->type = ET_SYSTEM; | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } else if ( (s=isee_identifier(dtd, decl, "public")) ) | 
					
						
							|  |  |  |   { et = sgml_calloc(1, sizeof(*et)); | 
					
						
							|  |  |  |     et->type = ET_PUBLIC; | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } else if ( isee_func(dtd, decl, CF_DSO) ) | 
					
						
							|  |  |  |     goto local; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( et ) | 
					
						
							|  |  |  |   { et->name = id; | 
					
						
							|  |  |  |     et->catalog_location = CAT_DOCTYPE; | 
					
						
							|  |  |  |     if ( !(s=process_entity_value_declaration(p, decl, et)) ) | 
					
						
							|  |  |  |       return FALSE; | 
					
						
							|  |  |  |     decl = s; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !dtd->doctype )			/* i.e. anonymous DTD */ | 
					
						
							|  |  |  |   { ichar *file; | 
					
						
							|  |  |  |     dtd_parser *clone; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dtd->doctype = istrdup(id->name);	/* Fill it */ | 
					
						
							|  |  |  |     if ( et ) | 
					
						
							|  |  |  |       file = entity_file(dtd, et); | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       file = istrdup(find_in_catalogue(CAT_DOCTYPE, | 
					
						
							|  |  |  | 				       dtd->doctype, NULL, NULL, | 
					
						
							|  |  |  | 				       dtd->dialect != DL_SGML)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !file ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { gripe(p, ERC_EXISTENCE, L"DTD", dtd->doctype); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } else | 
					
						
							|  |  |  |     { clone = clone_dtd_parser(p); | 
					
						
							|  |  |  |       if ( !load_dtd_from_file(clone, file) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_EXISTENCE, L"file", file); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       free_dtd_parser(clone); | 
					
						
							|  |  |  |       sgml_free(file); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( et ) | 
					
						
							|  |  |  |     free_entity_list(et); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | local: | 
					
						
							|  |  |  |   if ( (s=isee_func(dtd, decl, CF_DSO)) ) /* [...] */ | 
					
						
							|  |  |  |   { int grouplevel = 1; | 
					
						
							|  |  |  |     data_mode oldmode  = p->dmode; | 
					
						
							|  |  |  |     dtdstate  oldstate = p->state; | 
					
						
							|  |  |  |     locbuf oldloc; | 
					
						
							|  |  |  |     const ichar *q; | 
					
						
							|  |  |  |     icharbuf *saved_ibuf = p->buffer; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     push_location(p, &oldloc); | 
					
						
							|  |  |  | 					/* try to find start-location. */ | 
					
						
							|  |  |  | 					/* fails if there is comment before */ | 
					
						
							|  |  |  | 					/* the []! */ | 
					
						
							|  |  |  |     sgml_cplocation(&p->location, &p->startloc); | 
					
						
							|  |  |  |     inc_location(&p->location, '<'); | 
					
						
							|  |  |  |     for(q=decl0; q < s; q++) | 
					
						
							|  |  |  |       inc_location(&p->location, *q); | 
					
						
							|  |  |  |     p->dmode = DM_DTD; | 
					
						
							|  |  |  |     p->state = S_PCDATA; | 
					
						
							|  |  |  |     p->buffer = new_icharbuf(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for( ; *s; s++ ) | 
					
						
							|  |  |  |     { if ( isee_func(dtd, s, CF_LIT) ||	/* skip quoted strings */ | 
					
						
							|  |  |  | 	   isee_func(dtd, s, CF_LITA) ) | 
					
						
							|  |  |  |       { ichar q = *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	putchar_dtd_parser(p, *s++);	/* pass open quote */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for( ; *s && *s != q; s++ ) | 
					
						
							|  |  |  | 	  putchar_dtd_parser(p, *s); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( *s == q )			/* pass closing quote */ | 
					
						
							|  |  |  | 	  putchar_dtd_parser(p, *s); | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( isee_func(dtd, s, CF_DSO) ) | 
					
						
							|  |  |  | 	grouplevel++; | 
					
						
							|  |  |  |       else if ( isee_func(dtd, s, CF_DSC) && --grouplevel == 0 ) | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       putchar_dtd_parser(p, *s); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     p->dtd->implicit = FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     p->state    = oldstate; | 
					
						
							|  |  |  |     p->dmode    = oldmode; | 
					
						
							|  |  |  |     free_icharbuf(p->buffer); | 
					
						
							|  |  |  |     p->buffer = saved_ibuf; | 
					
						
							|  |  |  |     pop_location(p, &oldloc); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   p->enforce_outer_element = id;	/* make this the outer element */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | init_decoding(dtd_parser *p) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #ifdef UTF8
 | 
					
						
							|  |  |  |   int decode; | 
					
						
							|  |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( dtd->encoding == SGML_ENC_UTF8 && | 
					
						
							|  |  |  |        p->encoded    == TRUE ) | 
					
						
							|  |  |  |     decode = TRUE; | 
					
						
							|  |  |  |   else | 
					
						
							|  |  |  |     decode = FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( p->utf8_decode != decode ) | 
					
						
							|  |  |  |   { DEBUG(fprintf(stderr, "%s UTF-8 decoding on %p\n", | 
					
						
							|  |  |  | 		  decode ? "Enable" : "Disable", | 
					
						
							|  |  |  | 		  p)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     p->utf8_decode = decode; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | xml_set_encoding() is the public interface to   set the encoding for the | 
					
						
							|  |  |  | parser. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int				/* strcasecmp() with C locale */ | 
					
						
							|  |  |  | posix_strcasecmp(const char *s1, const char *s2) | 
					
						
							|  |  |  | { for(; *s1 && *s2; s1++, s2++) | 
					
						
							|  |  |  |   { int c1 = *s1&0xff; | 
					
						
							|  |  |  |     int c2 = *s2&0xff; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( c1 >= 'A' && c1 <= 'Z' ) c1 += 'a'-'A'; | 
					
						
							|  |  |  |     if ( c2 >= 'A' && c2 <= 'Z' ) c2 += 'a'-'A'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( c1 != c2 ) | 
					
						
							|  |  |  |       return c1-c2; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return *s1 - *s2; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | xml_set_encoding(dtd_parser *p, const char *enc) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( posix_strcasecmp(enc, "iso-8859-1") == 0 ) | 
					
						
							|  |  |  |   { dtd->encoding = SGML_ENC_ISO_LATIN1; | 
					
						
							|  |  |  |   } else if ( posix_strcasecmp(enc, "us-ascii") == 0 ) | 
					
						
							|  |  |  |   { dtd->encoding = SGML_ENC_ISO_LATIN1; 	/* doesn't make a difference */ | 
					
						
							|  |  |  |   } else if ( posix_strcasecmp(enc, "utf-8") == 0 ) | 
					
						
							|  |  |  |   { dtd->encoding = SGML_ENC_UTF8; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |     return FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   init_decoding(p); | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | set_encoding() sets the encoding from the encoding="..." field of the | 
					
						
							|  |  |  | XML header. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | set_encoding(dtd_parser *p, const ichar *enc) | 
					
						
							|  |  |  | { char buf[32]; | 
					
						
							|  |  |  |   char *e = buf+sizeof(buf)-1; | 
					
						
							|  |  |  |   char *o; | 
					
						
							|  |  |  |   const ichar *i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(i=enc, o=buf; *i; ) | 
					
						
							|  |  |  |   { if ( *i < 128 && o < e ) | 
					
						
							|  |  |  |     { *o++ = (char)*i++; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { goto error; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } | 
					
						
							|  |  |  |   *o = '\0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !xml_set_encoding(p, buf) ) | 
					
						
							|  |  |  |   { error: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     gripe(p, ERC_EXISTENCE, L"character encoding", enc); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Process <? ... ?> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Should deal with character encoding for XML documents. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_pi(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { const ichar *s; | 
					
						
							|  |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s=isee_identifier(dtd, decl, "xml")) ) /* <?xml version="1.0"?> */ | 
					
						
							|  |  |  |   { decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch(dtd->dialect) | 
					
						
							|  |  |  |     { case DL_SGML: | 
					
						
							|  |  |  | 	set_dialect_dtd(dtd, DL_XML); | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |       case DL_XML: | 
					
						
							|  |  |  |       case DL_XMLNS: | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while(*decl) | 
					
						
							|  |  |  |     { dtd_symbol *nm; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       if ( (s=itake_name(p, decl, &nm)) && | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	   (s=isee_func(dtd, s, CF_VI)) ) 		/* = */ | 
					
						
							|  |  |  |       { ichar *start; | 
					
						
							|  |  |  | 	int len; | 
					
						
							|  |  |  | 	ichar buf[MAXSTRINGLEN]; | 
					
						
							|  |  |  | 	const ichar *end; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( !(end=itake_string(dtd, s, &start, &len)) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ end=itake_nmtoken_chars(p, s, buf, sizeof(buf)/sizeof(ichar)); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  start = buf; | 
					
						
							|  |  |  | 	  len = (int)istrlen(buf); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( end ) | 
					
						
							|  |  |  | 	{ decl = end; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( istrcaseeq(nm->name, L"encoding") ) | 
					
						
							|  |  |  | 	  { ichar tmp[32]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    if ( len < (int)(sizeof(tmp)/sizeof(ichar)-1) ) | 
					
						
							|  |  |  | 	    { istrncpy(tmp, start, len); | 
					
						
							|  |  |  | 	      tmp[len] = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	      set_encoding(p, tmp); | 
					
						
							|  |  |  | 	    } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	    { gripe(p, ERC_SYNTAX_ERROR, L"Unterminated encoding?", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	    } | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  /* fprintf(stderr, "XML %s = %s\n", nm->name, buf); */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  continue; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_SYNTAX_ERROR, L"Illegal XML parameter", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( p->on_pi ) | 
					
						
							|  |  |  |     (*p->on_pi)(p, decl); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return FALSE;				/* Warn? */ | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_sgml_declaration(dtd_parser *p, const ichar *decl) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | { return gripe(p, ERC_SYNTAX_WARNING, L"Ignored <!SGML ...> declaration", NULL); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_declaration(dtd_parser *p, const ichar *decl) | 
					
						
							|  |  |  | { const ichar *s; | 
					
						
							|  |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( p->dmode != DM_DTD ) | 
					
						
							|  |  |  |   { if ( (s=isee_func(dtd, decl, CF_ETAGO2)) ) /* </ ... > */ | 
					
						
							|  |  |  |     { return process_end_element(p, s); | 
					
						
							|  |  |  |     } else if ( HasClass(dtd, *decl, CH_NAME) ) /* <letter */ | 
					
						
							|  |  |  |     { return process_begin_element(p, decl); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (s=isee_func(dtd, decl, CF_MDO2)) ) /* <! ... >*/ | 
					
						
							|  |  |  |   { decl = s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->on_decl ) | 
					
						
							|  |  |  |       (*p->on_decl)(p, decl); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( (s = isee_identifier(dtd, decl, "entity")) ) | 
					
						
							|  |  |  |       process_entity_declaration(p, s); | 
					
						
							|  |  |  |     else if ( (s = isee_identifier(dtd, decl, "element")) ) | 
					
						
							|  |  |  |       process_element_declaraction(p, s); | 
					
						
							|  |  |  |     else if ( (s = isee_identifier(dtd, decl, "attlist")) ) | 
					
						
							|  |  |  |       process_attlist_declaraction(p, s); | 
					
						
							|  |  |  |     else if ( (s = isee_identifier(dtd, decl, "notation")) ) | 
					
						
							|  |  |  |       process_notation_declaration(p, s); | 
					
						
							|  |  |  |     else if ( (s = isee_identifier(dtd, decl, "shortref")) ) | 
					
						
							|  |  |  |       process_shortref_declaration(p, s); | 
					
						
							|  |  |  |     else if ( (s = isee_identifier(dtd, decl, "usemap")) ) | 
					
						
							|  |  |  |       process_usemap_declaration(p, s); | 
					
						
							|  |  |  |     else if ( (s = isee_identifier(dtd, decl, "sgml")) ) | 
					
						
							|  |  |  |       process_sgml_declaration(p, s); | 
					
						
							|  |  |  |     else if ( (s = isee_identifier(dtd, decl, "doctype")) ) | 
					
						
							|  |  |  |     { if ( p->dmode != DM_DTD ) | 
					
						
							|  |  |  | 	process_doctype(p, s, decl-1); | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { s = iskip_layout(dtd, decl); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       if ( *s ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_SYNTAX_ERROR, L"Invalid declaration", s); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   return gripe(p, ERC_SYNTAX_ERROR, L"Invalid declaration", decl); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	  STREAM BINDING	* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | set_file_dtd_parser(dtd_parser *p, input_type type, const ichar *name) | 
					
						
							|  |  |  | { p->location.type      = type; | 
					
						
							|  |  |  |   p->location.name.file = name; | 
					
						
							|  |  |  |   p->location.line      = 1; | 
					
						
							|  |  |  |   p->location.linepos   = 0; | 
					
						
							|  |  |  |   p->location.charpos   = 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | set_src_dtd_parser(dtd_parser *p, input_type type, const ichar *name) | 
					
						
							|  |  |  | { p->location.type        = type; | 
					
						
							|  |  |  |   p->location.name.entity = name; | 
					
						
							|  |  |  |   p->location.line        = 1; | 
					
						
							|  |  |  |   p->location.linepos     = 0; | 
					
						
							|  |  |  |   p->location.charpos     = 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | set_mode_dtd_parser(dtd_parser *p, data_mode m) | 
					
						
							|  |  |  | { p->dmode = m;				/* DM_DTD or DM_DATA */ | 
					
						
							|  |  |  |   p->state = S_PCDATA; | 
					
						
							|  |  |  |   p->blank_cdata = TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | dtd_parser * | 
					
						
							|  |  |  | new_dtd_parser(dtd *dtd) | 
					
						
							|  |  |  | { dtd_parser *p = sgml_calloc(1, sizeof(*p)); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   if ( !dtd ) | 
					
						
							|  |  |  |     dtd = new_dtd(NULL); | 
					
						
							|  |  |  |   dtd->references++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   p->magic       = SGML_PARSER_MAGIC; | 
					
						
							|  |  |  |   p->dtd	 = dtd; | 
					
						
							|  |  |  |   p->state	 = S_PCDATA; | 
					
						
							|  |  |  |   p->mark_state	 = MS_INCLUDE; | 
					
						
							|  |  |  |   p->dmode       = DM_DTD; | 
					
						
							|  |  |  |   p->encoded	 = TRUE;		/* encoded octet stream */ | 
					
						
							|  |  |  |   p->buffer	 = new_icharbuf(); | 
					
						
							|  |  |  |   p->cdata	 = new_ocharbuf(); | 
					
						
							|  |  |  |   p->event_class = EV_EXPLICIT; | 
					
						
							|  |  |  |   set_src_dtd_parser(p, IN_NONE, NULL); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return p; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_parser * | 
					
						
							|  |  |  | clone_dtd_parser(dtd_parser *p) | 
					
						
							|  |  |  | { dtd_parser *clone = sgml_calloc(1, sizeof(*p)); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   *clone = *p; | 
					
						
							|  |  |  |   clone->dtd->references++; | 
					
						
							|  |  |  |   clone->environments =	NULL; | 
					
						
							|  |  |  |   clone->marked	      =	NULL; | 
					
						
							|  |  |  |   clone->etag	      =	NULL; | 
					
						
							|  |  |  |   clone->grouplevel   =	0; | 
					
						
							|  |  |  |   clone->state	      =	S_PCDATA; | 
					
						
							|  |  |  |   clone->mark_state   =	MS_INCLUDE; | 
					
						
							|  |  |  |   clone->dmode	      =	DM_DTD; | 
					
						
							|  |  |  |   clone->buffer	      =	new_icharbuf(); | 
					
						
							|  |  |  |   clone->cdata	      =	new_ocharbuf(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return clone; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | free_dtd_parser(dtd_parser *p) | 
					
						
							|  |  |  | { free_icharbuf(p->buffer); | 
					
						
							|  |  |  |   free_ocharbuf(p->cdata); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | #ifdef XMLNS
 | 
					
						
							|  |  |  |   xmlns_free(p->xmlns); | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   free_dtd(p->dtd); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   sgml_free(p); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s) | 
					
						
							|  |  |  | { locbuf old; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   push_location(p, &old); | 
					
						
							|  |  |  |   set_src_dtd_parser(p, in, name); | 
					
						
							|  |  |  |   empty_icharbuf(p->buffer);		/* dubious */ | 
					
						
							|  |  |  |   for(; *s; s++) | 
					
						
							|  |  |  |     putchar_dtd_parser(p, *s); | 
					
						
							|  |  |  |   pop_location(p, &old); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_include(dtd_parser *p, const ichar *entity_name) | 
					
						
							|  |  |  | { dtd_symbol *id; | 
					
						
							|  |  |  |   dtd_entity *pe; | 
					
						
							|  |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (id=dtd_find_entity_symbol(dtd, entity_name)) && | 
					
						
							|  |  |  |        (pe=find_pentity(p->dtd, id)) ) | 
					
						
							|  |  |  |   { ichar *file; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( (file = entity_file(dtd, pe)) ) | 
					
						
							|  |  |  |     { int rc = sgml_process_file(p, file, SGML_SUB_DOCUMENT); | 
					
						
							|  |  |  |       sgml_free(file); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return rc; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { const ichar *text = entity_value(p, pe, NULL); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( !text ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return gripe(p, ERC_NO_VALUE, pe->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       return process_chars(p, IN_ENTITY, entity_name, text); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |   return gripe(p, ERC_EXISTENCE, L"parameter entity", entity_name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Process <![ KEYWORD [ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Switches ->mark_state according to KEYWORD. Processes the rest in normal | 
					
						
							|  |  |  | S_PCDATA style, which pops the mark-stack on seeing ]]> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | For the purpose of <!DOCTYPE spec [additions]> we switch to S_GROUP if | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | process_marked_section(dtd_parser *p) | 
					
						
							|  |  |  | { ichar buf[MAXDECL]; | 
					
						
							|  |  |  |   dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   const ichar *decl = p->buffer->data; | 
					
						
							|  |  |  |   const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */ | 
					
						
							|  |  |  |        (decl=isee_func(dtd, decl, CF_DSO)) && /* [ */ | 
					
						
							|  |  |  |        expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) ) | 
					
						
							|  |  |  |   { dtd_symbol *kwd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     decl = buf; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     if ( (s=itake_name(p, decl, &kwd)) && | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	 isee_func(dtd, s, CF_DSO) )	/* [ */ | 
					
						
							|  |  |  |     { dtd_marked *m = sgml_calloc(1, sizeof(*m)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       m->keyword = kwd;			/* push on the stack */ | 
					
						
							|  |  |  |       m->parent = p->marked; | 
					
						
							|  |  |  |       p->marked = m; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( istrcaseeq(kwd->name, L"IGNORE") ) | 
					
						
							|  |  |  | 	m->type = MS_IGNORE; | 
					
						
							|  |  |  |       else if ( istrcaseeq(kwd->name, L"INCLUDE") ) | 
					
						
							|  |  |  | 	m->type = MS_INCLUDE; | 
					
						
							|  |  |  |       else if ( istrcaseeq(kwd->name, L"TEMP") ) | 
					
						
							|  |  |  | 	m->type = MS_INCLUDE; | 
					
						
							|  |  |  |       else if ( istrcaseeq(kwd->name, L"CDATA") ) | 
					
						
							|  |  |  | 	m->type = MS_CDATA; | 
					
						
							|  |  |  |       else if ( istrcaseeq(kwd->name, L"RCDATA") ) | 
					
						
							|  |  |  | 	m->type = MS_RCDATA; | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  | 	m->type = MS_INCLUDE;		/* default */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       empty_icharbuf(p->buffer); | 
					
						
							|  |  |  |       if ( m->type == MS_CDATA ) | 
					
						
							|  |  |  | 	p->state = S_MSCDATA; | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							|  |  |  |       if ( p->mark_state != MS_IGNORE ) | 
					
						
							|  |  |  | 	p->mark_state = m->type; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { decl = p->buffer->data; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */ | 
					
						
							|  |  |  | 	 !isee_func(dtd, decl, CF_DSO) ) /* [ */ | 
					
						
							|  |  |  |     { p->state = S_GROUP; | 
					
						
							|  |  |  |       p->grouplevel = 1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | pop_marked_section(dtd_parser *p) | 
					
						
							|  |  |  | { dtd_marked *m = p->marked; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( m ) | 
					
						
							|  |  |  |   { p->marked = m->parent; | 
					
						
							|  |  |  |     sgml_free(m); | 
					
						
							|  |  |  |     p->mark_state = (p->marked ? p->marked->type : MS_INCLUDE); | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Update the space-mode for the current element.  The space mode defines | 
					
						
							|  |  |  | how spaces are handled in the CDATA output. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static dtd_space_mode | 
					
						
							|  |  |  | istr_to_space_mode(const ichar *val) | 
					
						
							|  |  |  | { if ( istreq(val, L"default") ) | 
					
						
							|  |  |  |     return SP_DEFAULT; | 
					
						
							|  |  |  |   if ( istreq(val, L"preserve") ) | 
					
						
							|  |  |  |     return SP_PRESERVE; | 
					
						
							|  |  |  |   if ( istreq(val, L"sgml") ) | 
					
						
							|  |  |  |     return SP_SGML; | 
					
						
							|  |  |  |   if ( istreq(val, L"remove") ) | 
					
						
							|  |  |  |     return SP_REMOVE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return SP_INHERIT;			/* interpret as error */ | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | update_space_mode(dtd_parser *p, dtd_element *e, | 
					
						
							|  |  |  | 		  int natts, sgml_attribute *atts) | 
					
						
							|  |  |  | { for( ; natts-- > 0; atts++ ) | 
					
						
							|  |  |  |   { const ichar *name = atts->definition->name->name; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( istreq(name, L"xml:space") && | 
					
						
							|  |  |  | 	 atts->definition->type == AT_CDATA && | 
					
						
							|  |  |  | 	 atts->value.textW ) | 
					
						
							|  |  |  |     { dtd_space_mode m = istr_to_space_mode(atts->value.textW); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( m != SP_INHERIT ) | 
					
						
							|  |  |  | 	p->environments->space_mode = m; | 
					
						
							|  |  |  |       else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_EXISTENCE, L"xml:space-mode", atts->value.textW); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( e->space_mode != SP_INHERIT ) | 
					
						
							|  |  |  |     p->environments->space_mode = e->space_mode; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | empty_cdata(dtd_parser *p) | 
					
						
							|  |  |  | { if ( p->dmode == DM_DATA ) | 
					
						
							|  |  |  |   { empty_ocharbuf(p->cdata); | 
					
						
							|  |  |  |     p->blank_cdata = TRUE; | 
					
						
							|  |  |  |     p->cdata_must_be_empty = FALSE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | cb_cdata(dtd_parser *p, ocharbuf *buf, int offset, int size) | 
					
						
							|  |  |  | { if ( p->on_data ) | 
					
						
							|  |  |  |     (*p->on_data)(p, EC_CDATA, size, buf->data.w+offset); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | emit_cdata(dtd_parser *p, int last) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   locbuf locsafe; | 
					
						
							|  |  |  |   ocharbuf *cdata = p->cdata; | 
					
						
							|  |  |  |   int offset = 0; | 
					
						
							|  |  |  |   int size = cdata->size; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   if ( size == 0 ) | 
					
						
							|  |  |  |     return TRUE;			/* empty or done */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   push_location(p, &locsafe); | 
					
						
							|  |  |  |   sgml_cplocation(&p->location, &p->startloc);   /* start of markup */ | 
					
						
							|  |  |  |   sgml_cplocation(&p->startloc, &p->startcdata); /* real start of CDATA */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( p->environments ) | 
					
						
							|  |  |  |   { switch(p->environments->space_mode) | 
					
						
							|  |  |  |     { case SP_SGML: | 
					
						
							|  |  |  |       case SP_DEFAULT: | 
					
						
							|  |  |  | 	if ( p->first ) | 
					
						
							|  |  |  | 	{ wint_t c = fetch_ocharbuf(cdata, offset); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( HasClass(dtd, c, CH_RE) ) | 
					
						
							|  |  |  | 	  { inc_location(&p->startloc, c); | 
					
						
							|  |  |  | 	    offset++; | 
					
						
							|  |  |  | 	    size--; | 
					
						
							|  |  |  | 	    c = fetch_ocharbuf(cdata, offset); | 
					
						
							|  |  |  | 	  } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  if ( HasClass(dtd, c, CH_RS) ) | 
					
						
							|  |  |  | 	  { inc_location(&p->startloc, c); | 
					
						
							|  |  |  | 	    offset++; | 
					
						
							|  |  |  | 	    size--; | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if ( last && size > 0 ) | 
					
						
							|  |  |  | 	{ wint_t c = fetch_ocharbuf(cdata, offset+size-1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( HasClass(dtd, c, CH_RS) ) | 
					
						
							|  |  |  | 	  { dec_location(&p->location, c); | 
					
						
							|  |  |  | 	    size--; | 
					
						
							|  |  |  | 	    poke_ocharbuf(cdata, offset+size, '\0'); | 
					
						
							|  |  |  | 	    if ( size > 0 ) | 
					
						
							|  |  |  | 	      c = fetch_ocharbuf(cdata, offset+size-1); | 
					
						
							|  |  |  | 	    else | 
					
						
							|  |  |  | 	      c = 0;			/* HasClass(CH_RE) must fail */ | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	  if ( HasClass(dtd, c, CH_RE) ) | 
					
						
							|  |  |  | 	  { dec_location(&p->location, c); | 
					
						
							|  |  |  | 	    size--; | 
					
						
							|  |  |  | 	    poke_ocharbuf(cdata, offset+size, '\0'); | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if ( p->environments->space_mode == SP_DEFAULT ) | 
					
						
							|  |  |  | 	{ int o = 0; | 
					
						
							|  |  |  | 	  int i; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  for(i=0; i<size; i++) | 
					
						
							|  |  |  | 	  { wint_t c = fetch_ocharbuf(cdata, offset+i); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    if ( HasClass(dtd, c, CH_BLANK) ) | 
					
						
							|  |  |  | 	    { for(i++; i<size; i++) | 
					
						
							|  |  |  | 	      { wint_t c = fetch_ocharbuf(cdata, offset+i); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if ( !HasClass(dtd, c, CH_BLANK) ) | 
					
						
							|  |  |  | 		  break; | 
					
						
							|  |  |  | 	      } | 
					
						
							|  |  |  | 	      i--; | 
					
						
							|  |  |  | 	      poke_ocharbuf(cdata, o++, ' '); | 
					
						
							|  |  |  | 	      continue; | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	    poke_ocharbuf(cdata, o++, c); | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	  poke_ocharbuf(cdata, o, '\0'); | 
					
						
							|  |  |  | 	  offset = 0;			/* wrote new output from offset=0 */ | 
					
						
							|  |  |  | 	  size = o; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       case SP_REMOVE: | 
					
						
							|  |  |  |       { int o = 0; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 	int end = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for(i=0; i<size; i++) | 
					
						
							|  |  |  | 	{ wint_t c = fetch_ocharbuf(cdata, offset+i); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( HasClass(dtd, c, CH_BLANK) ) | 
					
						
							|  |  |  | 	    inc_location(&p->startloc, c); | 
					
						
							|  |  |  | 	  else | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( i<size ) | 
					
						
							|  |  |  | 	{ for(; i<size; i++) | 
					
						
							|  |  |  | 	  { wint_t c = fetch_ocharbuf(cdata, offset+i); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    if ( HasClass(dtd, c, CH_BLANK) ) | 
					
						
							|  |  |  | 	    { i++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	      while(i<size && HasClass(dtd, | 
					
						
							|  |  |  | 				       (wint_t)fetch_ocharbuf(cdata, offset+i), | 
					
						
							|  |  |  | 				       CH_BLANK)) | 
					
						
							|  |  |  | 		i++; | 
					
						
							|  |  |  | 	      i--; | 
					
						
							|  |  |  | 	      poke_ocharbuf(cdata, o++, ' '); | 
					
						
							|  |  |  | 	      continue; | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	    poke_ocharbuf(cdata, o++, c); | 
					
						
							|  |  |  | 	    end = o; | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 					/* TBD: adjust end */ | 
					
						
							|  |  |  | 	poke_ocharbuf(cdata, end, '\0'); | 
					
						
							|  |  |  | 	size = end; | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       case SP_PRESERVE: | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       case SP_INHERIT: | 
					
						
							|  |  |  | 	assert(0); | 
					
						
							|  |  |  | 	return FALSE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( size == 0 ) | 
					
						
							|  |  |  |   { pop_location(p, &locsafe); | 
					
						
							|  |  |  |     empty_cdata(p); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   assert(size > 0); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !p->blank_cdata ) | 
					
						
							|  |  |  |   { if ( p->cdata_must_be_empty ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { gripe(p, ERC_NOT_ALLOWED_PCDATA, p->cdata); /* TBD: now passes buffer! */ | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     cb_cdata(p, cdata, offset, size); | 
					
						
							|  |  |  |   } else if ( p->environments ) | 
					
						
							|  |  |  |   { sgml_environment *env = p->environments; | 
					
						
							|  |  |  |     dtd_state *new; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 				/* If an element is not in the DTD we must */ | 
					
						
							|  |  |  | 				/* assume mixed content and emit spaces */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( (new=make_dtd_transition(env->state, CDATA_ELEMENT)) ) | 
					
						
							|  |  |  |     { env->state = new; | 
					
						
							|  |  |  |       cb_cdata(p, cdata, offset, size); | 
					
						
							|  |  |  |     } else if ( env->element->undefined && | 
					
						
							|  |  |  | 		p->environments->space_mode == SP_PRESERVE ) | 
					
						
							|  |  |  |     { cb_cdata(p, cdata, offset, size); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   pop_location(p, &locsafe); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   empty_cdata(p); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | prepare_cdata(dtd_parser *p) | 
					
						
							|  |  |  | { if ( p->cdata->size == 0 ) | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   terminate_ocharbuf(p->cdata); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( p->mark_state == MS_INCLUDE ) | 
					
						
							|  |  |  |   { dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->environments )		/* needed for <img> <img> */ | 
					
						
							|  |  |  |     { dtd_element *e = p->environments->element; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( e->structure && e->structure->type == C_EMPTY && !e->undefined ) | 
					
						
							|  |  |  | 	close_element(p, e, FALSE); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->blank_cdata == TRUE ) | 
					
						
							|  |  |  |     { int blank = TRUE; | 
					
						
							|  |  |  |       int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       for(i=0; i<p->cdata->size; i++) | 
					
						
							|  |  |  |       { wint_t c = fetch_ocharbuf(p->cdata, i); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if ( !HasClass(dtd, c, CH_BLANK) ) | 
					
						
							|  |  |  | 	{ blank = FALSE; | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       p->blank_cdata = blank; | 
					
						
							|  |  |  |       if ( !blank ) | 
					
						
							|  |  |  |       { if ( p->dmode == DM_DTD ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  gripe(p, ERC_SYNTAX_ERROR, L"CDATA in DTD", p->cdata->data); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	else | 
					
						
							|  |  |  | 	  open_element(p, CDATA_ELEMENT, TRUE); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_cdata(dtd_parser *p, int last) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | { prepare_cdata(p); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   return emit_cdata(p, last); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | process_entity(dtd_parser *p, const ichar *name) | 
					
						
							|  |  |  | { if ( name[0] == '#' )			/* #charcode: character entity */ | 
					
						
							|  |  |  |   { int v = char_entity_value(name); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( v <= 0 ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, L"Bad character entity", name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     add_ocharbuf(p->cdata, v); | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { dtd_symbol *id; | 
					
						
							|  |  |  |     dtd_entity *e; | 
					
						
							|  |  |  |     dtd *dtd = p->dtd; | 
					
						
							|  |  |  |     int len; | 
					
						
							|  |  |  |     const ichar *text; | 
					
						
							|  |  |  |     const ichar *s; | 
					
						
							|  |  |  |     int   chr; | 
					
						
							|  |  |  |     ichar *file; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !(id=dtd_find_entity_symbol(dtd, name)) || | 
					
						
							|  |  |  | 	 !(e=id->entity) ) | 
					
						
							|  |  |  |     { if ( dtd->default_entity ) | 
					
						
							|  |  |  | 	e = dtd->default_entity; | 
					
						
							|  |  |  |       else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return gripe(p, ERC_EXISTENCE, L"entity", name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !e->value && | 
					
						
							|  |  |  | 	 e->content == EC_SGML && | 
					
						
							|  |  |  | 	 (file=entity_file(p->dtd, e)) ) | 
					
						
							|  |  |  |     { int rc; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       empty_icharbuf(p->buffer);		/* dubious */ | 
					
						
							|  |  |  |       rc = sgml_process_file(p, file, SGML_SUB_DOCUMENT); | 
					
						
							|  |  |  |       sgml_free(file); | 
					
						
							|  |  |  |       return rc; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !(text = entity_value(p, e, &len)) ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_NO_VALUE, e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     switch ( e->content ) | 
					
						
							|  |  |  |     { case EC_SGML: | 
					
						
							|  |  |  |       case EC_CDATA: | 
					
						
							|  |  |  | 	if ( (s=isee_character_entity(dtd, text, &chr)) && *s == '\0' ) | 
					
						
							|  |  |  | 	{ if ( chr == 0 ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	    return gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", text); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	  if ( p->blank_cdata == TRUE && | 
					
						
							|  |  |  | 	       !HasClass(dtd, (wint_t)chr, CH_BLANK) ) | 
					
						
							|  |  |  | 	  { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE); | 
					
						
							|  |  |  | 	    p->blank_cdata = FALSE; | 
					
						
							|  |  |  | 	  } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  add_ocharbuf(p->cdata, chr); | 
					
						
							|  |  |  | 	  return TRUE; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if ( e->content == EC_SGML ) | 
					
						
							|  |  |  | 	{ locbuf oldloc; | 
					
						
							|  |  |  | 	  int decode = p->utf8_decode; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  push_location(p, &oldloc); | 
					
						
							|  |  |  | 	  p->utf8_decode = FALSE; | 
					
						
							|  |  |  | 	  set_src_dtd_parser(p, IN_ENTITY, e->name->name); | 
					
						
							|  |  |  | 	  empty_icharbuf(p->buffer);		/* dubious */ | 
					
						
							|  |  |  | 	  for(s=text; *s; s++) | 
					
						
							|  |  |  | 	    putchar_dtd_parser(p, *s); | 
					
						
							|  |  |  | 	  p->utf8_decode = decode; | 
					
						
							|  |  |  | 	  pop_location(p, &oldloc); | 
					
						
							|  |  |  | 	} else if ( *text ) | 
					
						
							|  |  |  | 	{ const ichar *o; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( p->blank_cdata == TRUE ) | 
					
						
							|  |  |  | 	  { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE); | 
					
						
							|  |  |  | 	    p->blank_cdata = FALSE; | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  for(o=text; *o; o++) | 
					
						
							|  |  |  | 	    add_ocharbuf(p->cdata, *o); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       case EC_SDATA: | 
					
						
							|  |  |  |       case EC_NDATA: | 
					
						
							|  |  |  | 	process_cdata(p, FALSE); | 
					
						
							|  |  |  | 	if ( p->on_data ) | 
					
						
							|  |  |  | 	  (*p->on_data)(p, e->content, len, text); | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       case EC_PI: | 
					
						
							|  |  |  | 	process_cdata(p, FALSE); | 
					
						
							|  |  |  | 	if ( p->on_pi ) | 
					
						
							|  |  |  | 	  (*p->on_pi)(p, text); | 
					
						
							|  |  |  |       case EC_STARTTAG: | 
					
						
							|  |  |  | #if 0
 | 
					
						
							|  |  |  | 	prepare_cdata(p); | 
					
						
							|  |  |  | 	process_begin_element(p, text); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       case EC_ENDTAG: | 
					
						
							|  |  |  | #if 0
 | 
					
						
							|  |  |  | 	prepare_cdata(p); | 
					
						
							|  |  |  | 	process_end_element(p, text); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Deal with end of input.  We should give a proper error message depending | 
					
						
							|  |  |  | on the state and the start-location of the error. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | int | 
					
						
							|  |  |  | end_document_dtd_parser(dtd_parser *p) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { int rval; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   switch(p->state) | 
					
						
							|  |  |  |   { case S_RCDATA: | 
					
						
							|  |  |  |     case S_CDATA: | 
					
						
							|  |  |  |     case S_PCDATA: | 
					
						
							|  |  |  |       rval = TRUE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case S_CMT: | 
					
						
							|  |  |  |     case S_CMT1: | 
					
						
							|  |  |  |     case S_CMTE0: | 
					
						
							|  |  |  |     case S_CMTE1: | 
					
						
							|  |  |  |     case S_DECLCMT0: | 
					
						
							|  |  |  |     case S_DECLCMT: | 
					
						
							|  |  |  |     case S_DECLCMTE0: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       rval = gripe(p, ERC_SYNTAX_ERROR, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		   L"Unexpected end-of-file in comment", L""); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case S_ECDATA1: | 
					
						
							|  |  |  |     case S_ECDATA2: | 
					
						
							|  |  |  |     case S_EMSC1: | 
					
						
							|  |  |  |     case S_EMSC2: | 
					
						
							|  |  |  |     case S_DECL0: | 
					
						
							|  |  |  |     case S_DECL: | 
					
						
							|  |  |  |     case S_MDECL0: | 
					
						
							|  |  |  |     case S_STRING: | 
					
						
							|  |  |  |     case S_CMTO: | 
					
						
							|  |  |  |     case S_GROUP: | 
					
						
							|  |  |  |     case S_PENT: | 
					
						
							|  |  |  |     case S_ENT: | 
					
						
							|  |  |  |     case S_ENT0: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       rval = gripe(p, ERC_SYNTAX_ERROR, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		   L"Unexpected end-of-file", L""); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  | #ifdef UTF8
 | 
					
						
							|  |  |  |     case S_UTF8: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       rval = gripe(p, ERC_SYNTAX_ERROR, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		   L"Unexpected end-of-file in UTF-8 sequence", L""); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     case S_MSCDATA: | 
					
						
							|  |  |  |     case S_EMSCDATA1: | 
					
						
							|  |  |  |     case S_EMSCDATA2: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       rval = gripe(p, ERC_SYNTAX_ERROR, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		   L"Unexpected end-of-file in CDATA marked section", L""); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case S_PI: | 
					
						
							|  |  |  |     case S_PI2: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       rval = gripe(p, ERC_SYNTAX_ERROR, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		   L"Unexpected end-of-file in processing instruction", L""); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     default: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       rval = gripe(p, ERC_SYNTAX_ERROR, | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 		   L"Unexpected end-of-file in ???"); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( p->dmode == DM_DATA ) | 
					
						
							|  |  |  |   { sgml_environment *env; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->cdata->size > 0 && | 
					
						
							|  |  |  | 	 fetch_ocharbuf(p->cdata, p->cdata->size-1) == CR ) | 
					
						
							|  |  |  |       del_ocharbuf(p->cdata); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     process_cdata(p, TRUE); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( (env=p->environments) ) | 
					
						
							|  |  |  |     { dtd_element *e; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       while(env->parent) | 
					
						
							|  |  |  | 	env = env->parent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       pop_to(p, env, CDATA_ELEMENT); | 
					
						
							|  |  |  |       e = env->element; | 
					
						
							|  |  |  |       if ( e->structure && !e->structure->omit_close ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	gripe(p, ERC_OMITTED_CLOSE, e->name->name); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       close_element(p, e, FALSE); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return rval; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | begin_document_dtd_parser(dtd_parser *p) | 
					
						
							|  |  |  | { init_decoding(p); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | reset_document_dtd_parser(dtd_parser *p) | 
					
						
							|  |  |  | { if ( p->environments ) | 
					
						
							|  |  |  |   { sgml_environment *env, *parent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(env = p->environments; env; env=parent) | 
					
						
							|  |  |  |     { parent = env->parent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       free_environment(env); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     p->environments = NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   while(p->marked) | 
					
						
							|  |  |  |     pop_marked_section(p); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   empty_icharbuf(p->buffer); | 
					
						
							|  |  |  |   empty_ocharbuf(p->cdata); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   p->mark_state	   = MS_INCLUDE; | 
					
						
							|  |  |  |   p->state	   = S_PCDATA; | 
					
						
							|  |  |  |   p->grouplevel	   = 0; | 
					
						
							|  |  |  |   p->blank_cdata   = TRUE; | 
					
						
							|  |  |  |   p->event_class   = EV_EXPLICIT; | 
					
						
							|  |  |  |   p->dmode	   = DM_DATA; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   begin_document_dtd_parser(p); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Set the UTF-8 state | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef UTF8
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | process_utf8(dtd_parser *p, int chr) | 
					
						
							|  |  |  | { int bytes; | 
					
						
							|  |  |  |   int mask; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( bytes=1, mask=0x20; chr&mask; bytes++, mask >>= 1 ) | 
					
						
							|  |  |  |     ; | 
					
						
							|  |  |  |   mask--;				/* 0x20 --> 0x1f */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   p->utf8_saved_state = p->state;		/* state to return to */ | 
					
						
							|  |  |  |   p->state = S_UTF8; | 
					
						
							|  |  |  |   p->utf8_char = chr & mask; | 
					
						
							|  |  |  |   p->utf8_left = bytes; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | add_cdata() adds a character to the output  data. It also maps \r\n onto | 
					
						
							|  |  |  | a single \n for Windows newline conventions. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | There is a problem here in shortref  handling. We open the CDATA_ELEMENT | 
					
						
							|  |  |  | as soon as we find a character as   this may open other elements through | 
					
						
							|  |  |  | omitted tags and thus install a new shortref map. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If, at a later stage, all CDATA read sofar turns out to be a shortref we | 
					
						
							|  |  |  | have  incorrectly  opened   the   CDATA_ELEMENT.    As   `undoing'   the | 
					
						
							|  |  |  | open_element() is not an option (it may  already have caused `events' on | 
					
						
							|  |  |  | omitted tags) we are in trouble. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | add_cdata(dtd_parser *p, int chr) | 
					
						
							|  |  |  | { if ( p->mark_state == MS_INCLUDE ) | 
					
						
							|  |  |  |   { ocharbuf *buf = p->cdata; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->blank_cdata == TRUE && | 
					
						
							|  |  |  | 	 !HasClass(p->dtd, (wint_t)chr, CH_BLANK) ) | 
					
						
							|  |  |  |     { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE); | 
					
						
							|  |  |  |       p->blank_cdata = FALSE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( chr == '\n' )			/* insert missing CR */ | 
					
						
							|  |  |  |     { int sz; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( (sz=buf->size) == 0 || | 
					
						
							|  |  |  | 	   fetch_ocharbuf(buf, sz-1) != CR ) | 
					
						
							|  |  |  | 	add_cdata(p, CR); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     add_ocharbuf(buf, chr); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     if ( p->map && | 
					
						
							|  |  |  | 	 chr <= 0xff && p->map->ends[chr] && | 
					
						
							|  |  |  | 	 match_shortref(p) ) | 
					
						
							|  |  |  |       return; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( chr == '\n' )			/* dubious.  Whould we do that */ | 
					
						
							|  |  |  |     { int sz;				/* here or in space-handling? */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( (sz=buf->size) > 1 && | 
					
						
							|  |  |  | 	   fetch_ocharbuf(buf, sz-1) == LF && | 
					
						
							|  |  |  | 	   fetch_ocharbuf(buf, sz-2) == CR ) | 
					
						
							|  |  |  |       { poke_ocharbuf(buf, sz-2, LF); | 
					
						
							|  |  |  | 	buf->size--; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | add_verbatim_cdata(dtd_parser *p, int chr) | 
					
						
							|  |  |  | { if ( p->mark_state != MS_IGNORE ) | 
					
						
							|  |  |  |   { ocharbuf *buf = p->cdata; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( p->blank_cdata == TRUE && | 
					
						
							|  |  |  | 	 !HasClass(p->dtd, (wint_t)chr, CH_BLANK) ) | 
					
						
							|  |  |  |     { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE); | 
					
						
							|  |  |  |       p->blank_cdata = FALSE; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( chr == '\n' && buf->size > 0 && | 
					
						
							|  |  |  | 	 fetch_ocharbuf(buf, buf->size-1) == '\r' ) | 
					
						
							|  |  |  |       buf->size--; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     add_ocharbuf(buf, chr); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* We discovered illegal markup and now process it as normal CDATA
 | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | recover_parser(dtd_parser *p) | 
					
						
							|  |  |  | { const ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  |   add_cdata(p, p->saved); | 
					
						
							|  |  |  |   for(s=p->buffer->data; *s; s++) | 
					
						
							|  |  |  |     add_cdata(p, *s); | 
					
						
							|  |  |  |   p->state = S_PCDATA; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void | 
					
						
							|  |  |  | setlocation(dtd_srcloc *d, dtd_srcloc *loc, int line, int lpos) | 
					
						
							|  |  |  | { d->line    = line; | 
					
						
							|  |  |  |   d->linepos = lpos; | 
					
						
							|  |  |  |   d->charpos = loc->charpos - 1; | 
					
						
							|  |  |  |   d->type    = loc->type; | 
					
						
							|  |  |  |   d->name    = loc->name; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | int | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | putchar_dtd_parser(dtd_parser *p, int chr) | 
					
						
							|  |  |  | { dtd *dtd = p->dtd; | 
					
						
							|  |  |  |   const ichar *f = dtd->charfunc->func; | 
					
						
							|  |  |  |   int line = p->location.line; | 
					
						
							|  |  |  |   int lpos = p->location.linepos; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   p->location.charpos++;		/* TBD: actually `bytepos' */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef UTF8
 | 
					
						
							|  |  |  |   if ( p->state == S_UTF8 ) | 
					
						
							|  |  |  |   { if ( (chr & 0xc0) != 0x80 )	/* TBD: recover */ | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       gripe(p, ERC_SYNTAX_ERROR, L"Bad UTF-8 sequence", L""); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     p->utf8_char <<= 6; | 
					
						
							|  |  |  |     p->utf8_char |= (chr & ~0xc0); | 
					
						
							|  |  |  |     if ( --p->utf8_left == 0 ) | 
					
						
							|  |  |  |     { chr = p->utf8_char; | 
					
						
							|  |  |  |       p->state = p->utf8_saved_state; | 
					
						
							|  |  |  |     } else | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |   } else if ( ISUTF8_MB(chr) && p->utf8_decode ) | 
					
						
							|  |  |  |   { process_utf8(p, chr); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( f[CF_RS] == chr ) | 
					
						
							|  |  |  |   { p->location.line++; | 
					
						
							|  |  |  |     p->location.linepos = 0; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { if ( f[CF_RE] == chr ) | 
					
						
							|  |  |  |       p->location.linepos = 0; | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       p->location.linepos++; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | reprocess: | 
					
						
							|  |  |  |   switch(p->state) | 
					
						
							|  |  |  |   { case S_PCDATA: | 
					
						
							|  |  |  |     { if ( f[CF_MDO1] == chr )		/* < */ | 
					
						
							|  |  |  |       { setlocation(&p->startloc, &p->location, line, lpos); | 
					
						
							|  |  |  | 	p->state = S_DECL0; | 
					
						
							|  |  |  | 	empty_icharbuf(p->buffer); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       if ( p->dmode == DM_DTD ) | 
					
						
							|  |  |  |       { if ( f[CF_PERO] == chr )	/* % */ | 
					
						
							|  |  |  | 	{ setlocation(&p->startloc, &p->location, line, lpos); | 
					
						
							|  |  |  | 	  p->state = S_PENT; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { if ( f[CF_ERO] == chr )		/* & */ | 
					
						
							|  |  |  | 	{ setlocation(&p->startloc, &p->location, line, lpos); | 
					
						
							|  |  |  | 	  p->state = S_ENT0; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       if ( p->marked && f[CF_DSC] == chr ) /* ] in marked section */ | 
					
						
							|  |  |  |       { empty_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	p->state = S_EMSC1; | 
					
						
							|  |  |  | 	p->saved = chr;			/* for recovery */ | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( p->waiting_for_net && f[CF_ETAGO2] == chr ) /* shorttag */ | 
					
						
							|  |  |  |       { setlocation(&p->startloc, &p->location, line, lpos); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	process_net(p); | 
					
						
							|  |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* Real character data */ | 
					
						
							|  |  |  |       if ( p->cdata->size == 0 ) | 
					
						
							|  |  |  |         setlocation(&p->startcdata, &p->location, line, lpos); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       add_cdata(p, chr); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_ECDATA2:			/* Seen </ in CDATA/RCDATA */ | 
					
						
							|  |  |  |     { if ( f[CF_MDC] == chr && | 
					
						
							|  |  |  | 	   p->etaglen == p->buffer->size && | 
					
						
							|  |  |  | 	   istrncaseeq(p->buffer->data, p->etag, p->etaglen) ) | 
					
						
							|  |  |  |       { p->cdata->size -= p->etaglen+2;	/* 2 for </ */ | 
					
						
							|  |  |  | 	terminate_ocharbuf(p->cdata); | 
					
						
							|  |  |  | 	terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	if ( p->mark_state == MS_INCLUDE ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ process_cdata(p, TRUE); | 
					
						
							|  |  |  | 	  process_end_element(p, p->buffer->data); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  empty_cdata(p); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	empty_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	p->cdata_state = p->state = S_PCDATA; | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { add_verbatim_cdata(p, chr); | 
					
						
							|  |  |  | 	if ( p->etaglen < p->buffer->size || | 
					
						
							|  |  |  | 	     !HasClass(dtd, (wint_t)chr, CH_NAME)) | 
					
						
							|  |  |  | 	{ empty_icharbuf(p->buffer);	/* mismatch */ | 
					
						
							|  |  |  | 	  p->state = p->cdata_state; | 
					
						
							|  |  |  | 	} else | 
					
						
							|  |  |  | 	  add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  |       } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_ECDATA1:			/* seen < in CDATA */ | 
					
						
							|  |  |  |     { add_verbatim_cdata(p, chr); | 
					
						
							|  |  |  |       if ( f[CF_ETAGO2] == chr )	/* / */ | 
					
						
							|  |  |  |       { empty_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	p->state = S_ECDATA2; | 
					
						
							|  |  |  |       } else if ( f[CF_ETAGO1] != chr )	/* <: do not change state */ | 
					
						
							|  |  |  | 	p->state = p->cdata_state; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_RCDATA: | 
					
						
							|  |  |  |     { if ( f[CF_ERO] == chr ) /* & */ | 
					
						
							|  |  |  |       { setlocation(&p->startloc, &p->location, line, lpos); | 
					
						
							|  |  |  | 	p->state = S_ENT0; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       /*FALLTHROUGH*/ | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case S_CDATA: | 
					
						
							|  |  |  |     { add_verbatim_cdata(p, chr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( f[CF_MDO1] == chr )		/* < */ | 
					
						
							|  |  |  |       { setlocation(&p->startloc, &p->location, line, lpos); | 
					
						
							|  |  |  | 	p->state = S_ECDATA1; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* / in CDATA shorttag element */ | 
					
						
							|  |  |  |       if ( p->waiting_for_net && f[CF_ETAGO2] == chr ) | 
					
						
							|  |  |  |       { setlocation(&p->startloc, &p->location, line, lpos); | 
					
						
							|  |  |  | 	p->cdata->size--; | 
					
						
							|  |  |  | 	terminate_ocharbuf(p->cdata); | 
					
						
							|  |  |  | 	terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	if ( p->mark_state == MS_INCLUDE ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ process_cdata(p, TRUE); | 
					
						
							|  |  |  | 	  process_net(p); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	  empty_cdata(p); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	empty_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	p->cdata_state = p->state = S_PCDATA; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_MSCDATA: | 
					
						
							|  |  |  |     { add_verbatim_cdata(p, chr); | 
					
						
							|  |  |  |       if ( f[CF_DSC] == chr )		/* ] */ | 
					
						
							|  |  |  |         p->state = S_EMSCDATA1; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_EMSCDATA1: | 
					
						
							|  |  |  |     { add_verbatim_cdata(p, chr); | 
					
						
							|  |  |  |       if ( f[CF_DSC] == chr )		/* ]] */ | 
					
						
							|  |  |  |         p->state = S_EMSCDATA2; | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  |         p->state = S_MSCDATA; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_EMSCDATA2: | 
					
						
							|  |  |  |     { add_verbatim_cdata(p, chr); | 
					
						
							|  |  |  |       if ( f[CF_MDC] == chr )		/* ]]> */ | 
					
						
							|  |  |  |       { p->cdata->size -= 3;		/* Delete chars for ]] */ | 
					
						
							|  |  |  | 	pop_marked_section(p); | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							|  |  |  |       } else if ( f[CF_DSC] != chr )	/* if ]]], stay in this state */ | 
					
						
							|  |  |  |         p->state = S_MSCDATA; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_EMSC1: | 
					
						
							|  |  |  |     { if ( f[CF_DSC] == chr )		/* ]] in marked section */ | 
					
						
							|  |  |  |       { p->state = S_EMSC2; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } else | 
					
						
							|  |  |  |       { add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  | 	recover_parser(p); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case S_EMSC2: | 
					
						
							|  |  |  |     { if ( f[CF_MDC] == chr )		/* ]]> in marked section */ | 
					
						
							|  |  |  |       { pop_marked_section(p); | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } else | 
					
						
							|  |  |  |       { add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  | 	recover_parser(p); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case S_PENT:			/* %parameter entity; */ | 
					
						
							|  |  |  |     { if ( f[CF_ERC] == chr ) | 
					
						
							|  |  |  |       { p->state = S_PCDATA; | 
					
						
							|  |  |  | 	terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	if ( p->mark_state == MS_INCLUDE ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ process_include(p, p->buffer->data); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	empty_icharbuf(p->buffer); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       if ( HasClass(dtd, (wint_t)chr, CH_NAME) ) | 
					
						
							|  |  |  |       { add_icharbuf(p->buffer, chr); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       terminate_icharbuf(p->buffer); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return gripe(p, ERC_SYNTAX_ERROR, | 
					
						
							|  |  |  | 		   L"Illegal parameter entity", p->buffer->data); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_ENT0:			/* Seen & */ | 
					
						
							|  |  |  |     { if ( chr == '#' || HasClass(dtd, (wint_t)chr, CH_NAME) ) | 
					
						
							|  |  |  |       { empty_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  | 	p->state = S_ENT; | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       {	if ( dtd->dialect != DL_SGML ) | 
					
						
							|  |  |  | 	{ wchar_t buf[3]; | 
					
						
							|  |  |  | 	  buf[0] = '&'; | 
					
						
							|  |  |  | 	  buf[1] = chr; | 
					
						
							|  |  |  | 	  buf[2] = '\0'; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal entity", buf); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	add_cdata(p, f[CF_ERO]); | 
					
						
							|  |  |  | 	p->state = p->cdata_state; | 
					
						
							|  |  |  | 	goto reprocess; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     case S_ENT:				/* &entity; */ | 
					
						
							|  |  |  |     { if ( HasClass(dtd, (wint_t)chr, CH_NAME) ) | 
					
						
							|  |  |  |       { add_icharbuf(p->buffer, chr); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  |       p->state = p->cdata_state; | 
					
						
							|  |  |  |       if ( p->mark_state == MS_INCLUDE ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       { process_entity(p, p->buffer->data); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       empty_icharbuf(p->buffer); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       if ( chr == CR ) | 
					
						
							|  |  |  | 	p->state = S_ENTCR; | 
					
						
							|  |  |  |       else if ( f[CF_ERC] != chr && chr != '\n' ) | 
					
						
							|  |  |  | 	goto reprocess; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_ENTCR:			/* seen &entCR, eat the LF */ | 
					
						
							|  |  |  |     { p->state = p->cdata_state; | 
					
						
							|  |  |  |       if ( chr != LF ) | 
					
						
							|  |  |  | 	goto reprocess; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_DECL0:			/* Seen < */ | 
					
						
							|  |  |  |     { if ( f[CF_ETAGO2] == chr )	/* </ */ | 
					
						
							|  |  |  |       { add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  | 	p->state = S_DECL; | 
					
						
							|  |  |  |       } else if ( HasClass(dtd, (wint_t)chr, CH_NAME) ) /* <letter */ | 
					
						
							|  |  |  |       { add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  | 	p->state = S_DECL; | 
					
						
							|  |  |  |       } else if ( f[CF_MDO2] == chr )	/* <! */ | 
					
						
							|  |  |  |       { p->state = S_MDECL0; | 
					
						
							|  |  |  |       } else if ( f[CF_PRO2] == chr )	/* <? */ | 
					
						
							|  |  |  |       { p->state = S_PI; | 
					
						
							|  |  |  |       } else				/* recover */ | 
					
						
							|  |  |  |       { add_cdata(p, f[CF_MDO1]); | 
					
						
							|  |  |  | 	add_cdata(p, chr); | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_MDECL0:			/* Seen <! */ | 
					
						
							|  |  |  |     { if ( f[CF_CMT] == chr )		/* <!- */ | 
					
						
							|  |  |  |       { p->state = S_CMTO; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       add_icharbuf(p->buffer, f[CF_MDO2]); | 
					
						
							|  |  |  |       add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  |       p->state = S_DECL; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_DECL:			/* <...> */ | 
					
						
							|  |  |  |     { if ( f[CF_MDC] == chr )		/* > */ | 
					
						
							|  |  |  |       { prepare_cdata(p); | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							|  |  |  | 	terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	if ( p->mark_state == MS_INCLUDE ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ process_declaration(p, p->buffer->data); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	empty_icharbuf(p->buffer); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       if ( dtd->shorttag && f[CF_ETAGO2] == chr && p->buffer->size > 0 ) | 
					
						
							|  |  |  |       { prepare_cdata(p); | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							|  |  |  | 	terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	if ( p->mark_state == MS_INCLUDE ) | 
					
						
							|  |  |  | 	{ WITH_CLASS(p, EV_SHORTTAG, | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 		     process_declaration(p, p->buffer->data)); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	empty_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	p->waiting_for_net = TRUE; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( f[CF_LIT] == chr )		/* " */ | 
					
						
							|  |  |  |       { p->state = S_STRING; | 
					
						
							|  |  |  | 	p->saved = chr; | 
					
						
							|  |  |  | 	p->lit_saved_state = S_DECL; | 
					
						
							|  |  |  |       } else if ( f[CF_LITA] == chr )	/* ' */ | 
					
						
							|  |  |  |       { p->state = S_STRING; | 
					
						
							|  |  |  | 	p->saved = chr; | 
					
						
							|  |  |  | 	p->lit_saved_state = S_DECL; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } else if ( f[CF_CMT] == chr &&	/* - */ | 
					
						
							|  |  |  | 		  p->buffer->data[0] == f[CF_MDO2] ) /* Started <! */ | 
					
						
							|  |  |  |       { p->state = S_DECLCMT0; | 
					
						
							|  |  |  |       } else if ( f[CF_DSO] == chr )	/* [: marked section */ | 
					
						
							|  |  |  |       { terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	process_marked_section(p); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_DECLCMT0:			/* <...- */ | 
					
						
							|  |  |  |     { if ( f[CF_CMT] == chr ) | 
					
						
							|  |  |  |       { p->buffer->size--; | 
					
						
							|  |  |  | 	p->state = S_DECLCMT; | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  | 	p->state = S_DECL; | 
					
						
							|  |  |  |       } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_DECLCMT:			/* <...--.. */ | 
					
						
							|  |  |  |     { if ( f[CF_CMT] == chr ) | 
					
						
							|  |  |  | 	p->state = S_DECLCMTE0; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_DECLCMTE0:			/* <...--..- */ | 
					
						
							|  |  |  |     { if ( f[CF_CMT] == chr ) | 
					
						
							|  |  |  | 	p->state = S_DECL; | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  | 	p->state = S_DECLCMT; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_PI: | 
					
						
							|  |  |  |     { add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  |       if ( f[CF_PRO2] == chr )		/* <? ... ? */ | 
					
						
							|  |  |  | 	p->state = S_PI2; | 
					
						
							|  |  |  |       if ( f[CF_PRC] == chr )		/* no ? is ok too (XML/SGML) */ | 
					
						
							|  |  |  | 	goto pi; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_PI2: | 
					
						
							|  |  |  |     { if ( f[CF_PRC] == chr ) | 
					
						
							|  |  |  |       { pi: | 
					
						
							|  |  |  | 	process_cdata(p, FALSE); | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							|  |  |  | 	p->buffer->size--; | 
					
						
							|  |  |  | 	terminate_icharbuf(p->buffer); | 
					
						
							|  |  |  | 	if ( p->mark_state == MS_INCLUDE ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	{ process_pi(p, p->buffer->data); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	empty_icharbuf(p->buffer); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  |       p->state = S_PI; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_STRING: | 
					
						
							|  |  |  |     { add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  |       if ( chr == p->saved ) | 
					
						
							|  |  |  | 	p->state = p->lit_saved_state; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_CMTO:			/* Seen <!- */ | 
					
						
							|  |  |  |     { if ( f[CF_CMT] == chr )		/* - */ | 
					
						
							|  |  |  |       { p->state = S_CMT1; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } else | 
					
						
							|  |  |  |       { add_cdata(p, f[CF_MDO1]); | 
					
						
							|  |  |  | 	add_cdata(p, f[CF_MDO2]); | 
					
						
							|  |  |  | 	add_cdata(p, f[CF_CMT]); | 
					
						
							|  |  |  | 	add_cdata(p, chr); | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case S_CMT1:			/* <!-- */ | 
					
						
							|  |  |  |     { if ( f[CF_CMT] == chr )		/* <!--- */ | 
					
						
							|  |  |  |       { if ( dtd->dialect != DL_SGML ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal comment", L"<!---"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |       p->state = S_CMT; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_CMT: | 
					
						
							|  |  |  |     { if ( f[CF_CMT] == chr ) | 
					
						
							|  |  |  | 	p->state = S_CMTE0;		/* <!--...- */ | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_CMTE0:			/* <!--... -- */ | 
					
						
							|  |  |  |     { if ( f[CF_CMT] == chr ) | 
					
						
							|  |  |  | 	p->state = S_CMTE1; | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  | 	p->state = S_CMT; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_CMTE1:			/* <!--...-- seen */ | 
					
						
							|  |  |  |     { if ( f[CF_MDC] == chr )		/* > */ | 
					
						
							|  |  |  |       { if ( p->on_decl ) | 
					
						
							|  |  |  | 	  (*p->on_decl)(p, (ichar*)""); | 
					
						
							|  |  |  | 	p->state = S_PCDATA; | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { if ( dtd->dialect != DL_SGML ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal comment", L""); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	if ( f[CF_CMT] != chr ) | 
					
						
							|  |  |  | 	  p->state = S_CMT; | 
					
						
							|  |  |  |       } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case S_GROUP:			/* [...] in declaration */ | 
					
						
							|  |  |  |     { add_icharbuf(p->buffer, chr); | 
					
						
							|  |  |  |       if ( f[CF_DSO] == chr ) | 
					
						
							|  |  |  |       { p->grouplevel++; | 
					
						
							|  |  |  |       } else if ( f[CF_DSC] == chr ) | 
					
						
							|  |  |  |       { if ( --p->grouplevel == 0 ) | 
					
						
							|  |  |  | 	  p->state = S_DECL; | 
					
						
							|  |  |  |       } else if ( f[CF_LIT] == chr )	/* " */ | 
					
						
							|  |  |  |       { p->state = S_STRING; | 
					
						
							|  |  |  | 	p->saved = chr; | 
					
						
							|  |  |  | 	p->lit_saved_state = S_GROUP; | 
					
						
							|  |  |  |       } else if ( f[CF_LITA] == chr )	/* ' */ | 
					
						
							|  |  |  |       { p->state = S_STRING; | 
					
						
							|  |  |  | 	p->saved = chr; | 
					
						
							|  |  |  | 	p->lit_saved_state = S_GROUP; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 	return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       return TRUE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  | #ifdef UTF8
 | 
					
						
							|  |  |  |     case S_UTF8: | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     default: | 
					
						
							|  |  |  |       assert(0); | 
					
						
							|  |  |  |       return FALSE; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	     TOPLEVEL		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | load_dtd_from_file(dtd_parser *p, const ichar *file) | 
					
						
							|  |  |  | { FILE *fd; | 
					
						
							|  |  |  |   int rval; | 
					
						
							|  |  |  |   data_mode   oldmode  = p->dmode; | 
					
						
							|  |  |  |   dtdstate    oldstate = p->state; | 
					
						
							|  |  |  |   locbuf      oldloc; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   push_location(p, &oldloc); | 
					
						
							|  |  |  |   p->dmode = DM_DTD; | 
					
						
							|  |  |  |   p->state = S_PCDATA; | 
					
						
							|  |  |  |   empty_icharbuf(p->buffer);		/* dubious */ | 
					
						
							|  |  |  |   set_file_dtd_parser(p, IN_FILE, file); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (fd = wfopen(file, "rb")) ) | 
					
						
							|  |  |  |   { int chr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while( (chr = getc(fd)) != EOF ) | 
					
						
							|  |  |  |       putchar_dtd_parser(p, chr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     fclose(fd); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     p->dtd->implicit = FALSE; | 
					
						
							|  |  |  |     rval = TRUE; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |     rval = FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   pop_location(p, &oldloc); | 
					
						
							|  |  |  |   p->dmode = oldmode; | 
					
						
							|  |  |  |   p->state = oldstate; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return rval; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | dtd * | 
					
						
							|  |  |  | file_to_dtd(const ichar *file, const ichar *doctype, dtd_dialect dialect) | 
					
						
							|  |  |  | { dtd_parser *p = new_dtd_parser(new_dtd(doctype)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   set_dialect_dtd(p->dtd, dialect); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( load_dtd_from_file(p, file) ) | 
					
						
							|  |  |  |   { dtd *dtd = p->dtd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dtd->references++;			/* avoid deletion */ | 
					
						
							|  |  |  |     free_dtd_parser(p); | 
					
						
							|  |  |  |     return dtd; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { free_dtd_parser(p); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | SGML sees a file as | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | [<LF>]Line 1<CR> | 
					
						
							|  |  |  |  <LF> Line 2<CR> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | I.e. the newline  appearing  just  before   the  end-of-file  should  be | 
					
						
							|  |  |  | ignored. In addition, Unix-style files are   mapped  to CR-LF. Thanks to | 
					
						
							|  |  |  | Richard O'Keefe. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | sgml_process_stream(dtd_parser *p, FILE *fd, unsigned flags) | 
					
						
							|  |  |  | { int p0, p1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (p0 = getc(fd)) == EOF ) | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  |   if ( (p1 = getc(fd)) == EOF ) | 
					
						
							|  |  |  |   { putchar_dtd_parser(p, p0); | 
					
						
							|  |  |  |     return end_document_dtd_parser(p); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(;;) | 
					
						
							|  |  |  |   { int p2 = getc(fd); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     if ( p2 == EOF ) | 
					
						
							|  |  |  |     { putchar_dtd_parser(p, p0); | 
					
						
							|  |  |  |       if ( p1 != LF ) | 
					
						
							|  |  |  | 	putchar_dtd_parser(p, p1); | 
					
						
							|  |  |  |       else if ( p0 != CR ) | 
					
						
							|  |  |  | 	putchar_dtd_parser(p, CR); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( flags & SGML_SUB_DOCUMENT ) | 
					
						
							|  |  |  | 	return TRUE; | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  | 	return end_document_dtd_parser(p); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     putchar_dtd_parser(p, p0); | 
					
						
							|  |  |  |     p0 = p1; | 
					
						
							|  |  |  |     p1 = p2; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   } | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | sgml_process_file(dtd_parser *p, const ichar *file, unsigned flags) | 
					
						
							|  |  |  | { FILE *fd; | 
					
						
							|  |  |  |   int rval; | 
					
						
							|  |  |  |   locbuf oldloc; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   push_location(p, &oldloc); | 
					
						
							|  |  |  |   set_file_dtd_parser(p, IN_FILE, file); | 
					
						
							|  |  |  |   if ( !(flags & SGML_SUB_DOCUMENT) ) | 
					
						
							|  |  |  |     set_mode_dtd_parser(p, DM_DATA); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (fd = wfopen(file, "rb")) ) | 
					
						
							|  |  |  |   { rval = sgml_process_stream(p, fd, flags); | 
					
						
							|  |  |  |     fclose(fd); | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |     rval = FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   pop_location(p, &oldloc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return rval; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	       ERRORS		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static wchar_t * | 
					
						
							|  |  |  | format_location(wchar_t *s, size_t len, dtd_srcloc *l) | 
					
						
							|  |  |  | { int first = TRUE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !l || l->type == IN_NONE ) | 
					
						
							|  |  |  |     return s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for( ; l && l->type != IN_NONE; | 
					
						
							|  |  |  |          l = l->parent, first = FALSE ) | 
					
						
							|  |  |  |   { if ( !first ) | 
					
						
							|  |  |  |     { swprintf(s, len, L" (from "); | 
					
						
							|  |  |  |       s += wcslen(s); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch(l->type) | 
					
						
							|  |  |  |     { case IN_NONE: | 
					
						
							|  |  |  | 	assert(0); | 
					
						
							|  |  |  |       case IN_FILE: | 
					
						
							|  |  |  | 	swprintf(s, len, L"%ls:%d:%d", l->name.file, l->line, l->linepos); | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |       case IN_ENTITY: | 
					
						
							|  |  |  |         swprintf(s, len, L"&%ls;%d:%d", l->name.entity, l->line, l->linepos); | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     s += wcslen(s); | 
					
						
							|  |  |  |     if ( !first ) | 
					
						
							|  |  |  |     { *s++ = L')'; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   *s++ = L':'; | 
					
						
							|  |  |  |   *s++ = L' '; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return s; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | format_message(dtd_error *e) | 
					
						
							|  |  |  | { wchar_t buf[1024]; | 
					
						
							|  |  |  |   wchar_t *s; | 
					
						
							|  |  |  |   int prefix_len; | 
					
						
							|  |  |  |   int left; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   switch(e->severity) | 
					
						
							|  |  |  |   { case ERS_ERROR: | 
					
						
							|  |  |  |       wcscpy(buf, L"Error: "); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERS_WARNING: | 
					
						
							|  |  |  |       wcscpy(buf, L"Warning: "); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       buf[0] = '\0'; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   s = buf+wcslen(buf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   s = format_location(s, 1024-(s-buf), e->location); | 
					
						
							|  |  |  |   prefix_len = (int)(s-buf); | 
					
						
							|  |  |  |   left = 1024-prefix_len; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   switch(e->id) | 
					
						
							|  |  |  |   { case ERC_REPRESENTATION: | 
					
						
							|  |  |  |       swprintf(s, left, L"Cannot represent due to %ls", e->argv[0]); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERC_RESOURCE: | 
					
						
							|  |  |  |       swprintf(s, left, L"Insufficient %ls resources", e->argv[0]); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERC_LIMIT: | 
					
						
							|  |  |  |       swprintf(s, left, L"%ls limit exceeded", e->argv[0]); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERC_VALIDATE: | 
					
						
							|  |  |  |       swprintf(s, left, L"%ls", e->argv[0]); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERC_SYNTAX_ERROR: | 
					
						
							|  |  |  |       swprintf(s, left, L"Syntax error: %ls", e->argv[0]); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERC_EXISTENCE: | 
					
						
							|  |  |  |       swprintf(s, left, L"%ls \"%ls\" does not exist", e->argv[0], e->argv[1]); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERC_REDEFINED: | 
					
						
							|  |  |  |       swprintf(s, left, L"Redefined %ls \"%ls\"", e->argv[0], e->argv[1]); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       ; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   e->message = str2ring(buf); | 
					
						
							|  |  |  |   e->plain_message = e->message + prefix_len; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | gripe(dtd_parser *p, dtd_error_id e, ...) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { va_list args; | 
					
						
							|  |  |  |   wchar_t buf[1024]; | 
					
						
							|  |  |  |   dtd_error error; | 
					
						
							|  |  |  |   int dtdmode = FALSE; | 
					
						
							|  |  |  |   void *freeme = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   va_start(args, e); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   memset(&error, 0, sizeof(error)); | 
					
						
							|  |  |  |   error.minor = e;			/* detailed error code */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( p ) | 
					
						
							|  |  |  |   { error.location = &p->location; | 
					
						
							|  |  |  |     if ( p->dmode == DM_DTD ) | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       dtdmode = TRUE; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { error.location = NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   switch(e) | 
					
						
							|  |  |  |   { case ERC_REPRESENTATION: | 
					
						
							|  |  |  |     case ERC_RESOURCE: | 
					
						
							|  |  |  |       error.severity = ERS_ERROR; | 
					
						
							|  |  |  |       error.argv[0]  = va_arg(args, wchar_t *); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERC_LIMIT: | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  |       error.argv[0]  = va_arg(args, wchar_t *); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case ERC_SYNTAX_ERROR: | 
					
						
							|  |  |  |     case ERC_SYNTAX_WARNING: | 
					
						
							|  |  |  |     { wchar_t *m       = va_arg(args, wchar_t *); | 
					
						
							|  |  |  |       const wchar_t *s = va_arg(args, const wchar_t *); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( s && *s ) | 
					
						
							|  |  |  |       { swprintf(buf, 1024, L"%ls, found \"%ls\"", m, str_summary(s, 25)); | 
					
						
							|  |  |  | 	error.argv[0] = buf; | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  | 	error.argv[0] = m; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       error.severity = (e == ERC_SYNTAX_WARNING ? ERS_WARNING : ERS_ERROR); | 
					
						
							|  |  |  |       e = ERC_SYNTAX_ERROR; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_DOMAIN: | 
					
						
							|  |  |  |     { const wchar_t *expected = va_arg(args, const wchar_t *); | 
					
						
							|  |  |  |       const wchar_t *found    = str_summary(va_arg(args, const wchar_t *), 25); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       swprintf(buf, 1024, L"Expected type %ls, found \"%ls\"", expected, found); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_ERROR; | 
					
						
							|  |  |  |       e = (dtdmode ? ERC_SYNTAX_ERROR : ERC_VALIDATE); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_REDEFINED: | 
					
						
							|  |  |  |     { dtd_symbol *name; | 
					
						
							|  |  |  |       error.argv[0] = va_arg(args, wchar_t *); /* type */ | 
					
						
							|  |  |  |       name = va_arg(args, dtd_symbol *); /* name */ | 
					
						
							|  |  |  |       error.argv[1]  = (ichar*)name->name; | 
					
						
							|  |  |  |       error.severity = ERS_STYLE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_EXISTENCE: | 
					
						
							|  |  |  |     { error.argv[0] = va_arg(args, wchar_t *); /* type */ | 
					
						
							|  |  |  |       error.argv[1] = va_arg(args, wchar_t *); /* name */ | 
					
						
							|  |  |  |       error.severity = ERS_ERROR; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_VALIDATE: | 
					
						
							|  |  |  |     { error.argv[0] = va_arg(args, wchar_t *); /* message */ | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_OMITTED_CLOSE: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { const wchar_t *element = va_arg(args, const wchar_t *); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       swprintf(buf, 1024, L"Inserted omitted end-tag for \"%ls\"", element); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  |       e = ERC_VALIDATE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_OMITTED_OPEN: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { const wchar_t *element = va_arg(args, const wchar_t *); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       swprintf(buf, 1024, L"Inserted omitted start-tag for \"%ls\"", element); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  |       e = ERC_VALIDATE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_NOT_OPEN: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { const wchar_t *element = va_arg(args, const wchar_t *); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       swprintf(buf, 1024, L"Ignored end-tag for \"%ls\" which is not open", | 
					
						
							|  |  |  | 	       element); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  |       e = ERC_VALIDATE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_NOT_ALLOWED: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { const wchar_t *element = va_arg(args, const wchar_t *); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       swprintf(buf, 1024, L"Element \"%ls\" not allowed here", element); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  |       e = ERC_VALIDATE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_NOT_ALLOWED_PCDATA: | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     { const ocharbuf *cdata = va_arg(args, const ocharbuf *); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       swprintf(buf, 1024, L"#PCDATA (\"%ls\") not allowed here", | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 	       str_summary(cdata->data.w, 25)); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  |       e = ERC_VALIDATE; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |       break; | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     } | 
					
						
							|  |  |  |     case ERC_NO_ATTRIBUTE: | 
					
						
							|  |  |  |     { const wchar_t *elem = va_arg(args, wchar_t *); /* element */ | 
					
						
							|  |  |  |       const wchar_t *attr = va_arg(args, wchar_t *); /* attribute */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       swprintf(buf, 1024, L"Element \"%ls\" has no attribute \"%ls\"", | 
					
						
							|  |  |  | 	       elem, attr); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       e = ERC_VALIDATE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_NO_ATTRIBUTE_VALUE: | 
					
						
							|  |  |  |     { const wchar_t *elem  = va_arg(args, wchar_t *); /* element */ | 
					
						
							|  |  |  |       const wchar_t *value = va_arg(args, wchar_t *); /* attribute value */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       swprintf(buf, 1024, L"Element \"%ls\" has no attribute with value \"%ls\"", | 
					
						
							|  |  |  | 	       elem, value); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       e = ERC_VALIDATE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_NO_VALUE: | 
					
						
							|  |  |  |     { error.argv[0] = L"entity value"; | 
					
						
							|  |  |  |       error.argv[1] = va_arg(args, wchar_t *); /* entity */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       error.severity = ERS_ERROR; | 
					
						
							|  |  |  |       e = ERC_EXISTENCE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_NO_DOCTYPE: | 
					
						
							|  |  |  |     { const wchar_t *doctype = va_arg(args, wchar_t *); /* element */ | 
					
						
							|  |  |  |       const wchar_t *file    = va_arg(args, wchar_t *); /* DTD file */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       swprintf(buf, 1024, L"No <!DOCTYPE ...>, assuming \"%ls\" from DTD file \"%s\"", | 
					
						
							|  |  |  | 	      doctype, file); | 
					
						
							|  |  |  |       error.argv[0] = buf; | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |       e = ERC_VALIDATE; | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case ERC_NO_CATALOGUE: | 
					
						
							|  |  |  |     { char *file = va_arg(args, char *); /* catalogue file */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       error.argv[0] = L"catalogue file"; | 
					
						
							|  |  |  |       freeme = error.argv[1] = utf8towcs(file); | 
					
						
							|  |  |  |       error.severity = ERS_WARNING; | 
					
						
							|  |  |  |       e = ERC_EXISTENCE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   } | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   error.id      = e; | 
					
						
							|  |  |  |   format_message(&error); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   if ( p && p->on_error ) | 
					
						
							|  |  |  |     (*p->on_error)(p, &error); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   else | 
					
						
							|  |  |  |     fwprintf(stderr, L"SGML: %ls\n", error.message); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( freeme ) | 
					
						
							|  |  |  |     sgml_free(freeme); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   va_end(args); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return FALSE; | 
					
						
							|  |  |  | } |