5599 lines
		
	
	
		
			127 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			5599 lines
		
	
	
		
			127 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*  $Id$
 | |
| 
 | |
|     Part of SWI-Prolog
 | |
| 
 | |
|     Author:        Jan Wielemaker
 | |
|     E-mail:        wielemak@science.uva.nl
 | |
|     WWW:           http://www.swi-prolog.org
 | |
|     Copyright (C): 1985-2006, University of Amsterdam
 | |
| 
 | |
|     This library is free software; you can redistribute it and/or
 | |
|     modify it under the terms of the GNU Lesser General Public
 | |
|     License as published by the Free Software Foundation; either
 | |
|     version 2.1 of the License, or (at your option) any later version.
 | |
| 
 | |
|     This library is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|     Lesser General Public License for more details.
 | |
| 
 | |
|     You should have received a copy of the GNU Lesser General Public
 | |
|     License along with this library; if not, write to the Free Software
 | |
|     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| */
 | |
| 
 | |
| #define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
 | |
| 
 | |
| #define DTD_IMPLEMENTATION 1
 | |
| #include <stdio.h>
 | |
| #include <wchar.h>
 | |
| #include "dtd.h"
 | |
| #include "model.h"
 | |
| #include "util.h"
 | |
| #include "catalog.h"
 | |
| #include "parser.h"
 | |
| #include <stdlib.h>
 | |
| #include <assert.h>
 | |
| #include <stdarg.h>
 | |
| #include <ctype.h>
 | |
| #include <string.h>
 | |
| #include "utf8.h"
 | |
| #include <errno.h>
 | |
| #include <wctype.h>
 | |
| #include "xml_unicode.h"
 | |
| 
 | |
| #define DEBUG(g) ((void)0)
 | |
| #define ZERO_TERM_LEN (-1)		/* terminated by nul */
 | |
| 
 | |
| #ifdef __WINDOWS__
 | |
| #define inline __inline
 | |
| #define swprintf _snwprintf
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	    LOCAL TYPES		*
 | |
| 		 *******************************/
 | |
| 
 | |
| typedef struct locbuf
 | |
| { dtd_srcloc start;			/* p->startloc */
 | |
|   dtd_srcloc here;			/* p->location */
 | |
| } locbuf;
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      PROTOYPES		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static const ichar *	itake_name(dtd_parser *p,
 | |
| 				   const ichar *in, dtd_symbol **id);
 | |
| static const ichar *	itake_entity_name(dtd_parser *p, const ichar *in,
 | |
| 					  dtd_symbol **id);
 | |
| static const ichar *	itake_namegroup(dtd_parser *p, const ichar *decl,
 | |
| 					dtd_symbol **names, int *n);
 | |
| static const ichar *	iskip_layout(dtd *dtd, const ichar *in);
 | |
| static dtd_parser *	clone_dtd_parser(dtd_parser *p);
 | |
| static void		free_model(dtd_model *m);
 | |
| static int		process_entity_declaration(dtd_parser *p,
 | |
| 						    const ichar *decl);
 | |
| static void		free_notations(dtd_notation *n);
 | |
| static void		free_shortrefs(dtd_shortref *sr);
 | |
| static int		process_cdata(dtd_parser *p, int last);
 | |
| static int		process_entity(dtd_parser *p, const ichar *name);
 | |
| static int		emit_cdata(dtd_parser *p, int last);
 | |
| static dtd_space_mode	istr_to_space_mode(const ichar *val);
 | |
| static void		update_space_mode(dtd_parser *p, dtd_element *e,
 | |
| 					  int natts, sgml_attribute *atts);
 | |
| static dtd_model *	make_model(dtd_parser *p, const ichar *decl,
 | |
| 				   const ichar **end);
 | |
| static void		for_elements_in_model(dtd_model *m,
 | |
| 					      void (*f)(dtd_element *e,
 | |
| 							void *closure),
 | |
| 					      void *closure);
 | |
| int			putchar_dtd_parser(dtd_parser *p, int chr);
 | |
| void			free_dtd_parser(dtd_parser *p);
 | |
| static const ichar *	isee_character_entity(dtd *dtd, const ichar *in,
 | |
| 					      int *chr);
 | |
| static int		add_default_attributes(dtd_parser *p, dtd_element *e,
 | |
| 					       int natts,
 | |
| 					       sgml_attribute *atts);
 | |
| static int		prepare_cdata(dtd_parser *p);
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      MACROS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| #define WITH_CLASS(p, c, g) \
 | |
| 	{ sgml_event_class _oc = p->event_class; \
 | |
| 	  p->event_class = c; \
 | |
| 	  g; \
 | |
| 	  p->event_class = _oc; \
 | |
| 	}
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	     STATISTICS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| #ifdef O_STATISTICS
 | |
| 
 | |
| int edefs_created = 0;
 | |
| int edefs_freed = 0;
 | |
| int edefs_implicit = 0;
 | |
| int edefs_atts = 0;
 | |
| int edefs_decl = 0;
 | |
| int dtd_created = 0;
 | |
| int dtd_freed = 0;
 | |
| 
 | |
| void
 | |
| sgml_statistics(void)
 | |
| { fprintf(stderr, "EDEFS: created %d; freed %d\n", edefs_created, edefs_freed);
 | |
|   fprintf(stderr, "EDEFS: implicit %d; atts %d; decl %d\n",
 | |
| 	  edefs_implicit, edefs_atts, edefs_decl);
 | |
|   fprintf(stderr, "DTDs: created: %d; freed: %d\n", dtd_created, dtd_freed);
 | |
| }
 | |
| 
 | |
| #define STAT(g) g
 | |
| 
 | |
| #else
 | |
| 
 | |
| #define STAT(g) ((void)0)
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	   SRC LOCATION		*
 | |
| 		 *******************************/
 | |
| 
 | |
| 
 | |
| static void				/* TBD: also handle startloc */
 | |
| push_location(dtd_parser *p, locbuf *save)
 | |
| { save->here  = p->location;
 | |
|   save->start = p->startloc;
 | |
| 
 | |
|   p->location.parent = &save->here;
 | |
|   p->startloc.parent = &save->start;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| pop_location(dtd_parser *p, locbuf *saved)
 | |
| { p->location = saved->here;
 | |
|   p->startloc = saved->start;
 | |
| }
 | |
| 
 | |
| 
 | |
| static inline void
 | |
| _sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc)
 | |
| { d->type    = loc->type;
 | |
|   d->name.file = loc->name.file;
 | |
|   d->line    = loc->line;
 | |
|   d->linepos = loc->linepos;
 | |
|   d->charpos = loc->charpos;
 | |
| 					/* but not the parent! */
 | |
| }
 | |
| 
 | |
| void
 | |
| sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc)
 | |
| { _sgml_cplocation(d, loc);
 | |
| }
 | |
| 
 | |
| #define sgml_cplocation(d,s) _sgml_cplocation(d, s)
 | |
| 
 | |
| static void
 | |
| inc_location(dtd_srcloc *l, int chr)
 | |
| { if ( chr == '\n' )
 | |
|   { l->linepos = 0;
 | |
|     l->line++;
 | |
|   }
 | |
| 
 | |
|   l->linepos++;
 | |
|   l->charpos++;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| dec_location(dtd_srcloc *l, int chr)
 | |
| { if ( chr == '\n' )
 | |
|   { l->linepos = 2;			/* not good! */
 | |
|     l->line--;
 | |
|   }
 | |
|   l->linepos--;
 | |
|   l->charpos--;
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *   CLASSIFICATION PRIMITIVES	*
 | |
| 		 *******************************/
 | |
| 
 | |
| static inline int
 | |
| HasClass(dtd *dtd, wint_t chr, int mask)
 | |
| { if ( chr <= 0xff )
 | |
|     return (dtd->charclass->class[(chr)] & (mask));
 | |
|   else
 | |
|   { switch(mask)
 | |
|     { case CH_NAME:
 | |
| 	return ( xml_basechar(chr) ||
 | |
| 		 xml_digit(chr) ||
 | |
| 		 xml_ideographic(chr) ||
 | |
| 		 xml_combining_char(chr) ||
 | |
| 		 xml_extender(chr)
 | |
| 	       );
 | |
|       case CH_NMSTART:
 | |
| 	return ( xml_basechar(chr) ||
 | |
| 		 xml_ideographic(chr) );
 | |
|       case CH_WHITE:
 | |
| 	return FALSE;			/* only ' ' and '\t' */
 | |
|       case CH_BLANK:
 | |
| 	return iswspace(chr);
 | |
|       case CH_DIGIT:
 | |
| 	return xml_digit(chr);
 | |
|       case CH_RS:
 | |
|       case CH_RE:
 | |
| 	return FALSE;
 | |
|       default:
 | |
| 	assert(0);
 | |
|         return FALSE;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| isee_func(dtd *dtd, const ichar *in, charfunc func)
 | |
| { if ( dtd->charfunc->func[func] == *in )
 | |
|     return ++in;
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      SYMBOLS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static dtd_symbol_table *
 | |
| new_symbol_table()
 | |
| { dtd_symbol_table *t = sgml_calloc(1, sizeof(*t));
 | |
|   t->size    = SYMBOLHASHSIZE;
 | |
|   t->entries = sgml_calloc(t->size, sizeof(dtd_symbol*));
 | |
| 
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_symbol_table(dtd_symbol_table *t)
 | |
| { int i;
 | |
| 
 | |
|   for(i=0; i<t->size; i++)
 | |
|   { dtd_symbol *s, *next;
 | |
| 
 | |
|     for(s=t->entries[i]; s; s=next)
 | |
|     { next = s->next;
 | |
| 
 | |
|       sgml_free((ichar*)s->name);
 | |
|       sgml_free(s);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   sgml_free(t->entries);
 | |
|   sgml_free(t);
 | |
| }
 | |
| 
 | |
| 
 | |
| dtd_symbol *
 | |
| dtd_find_symbol(dtd *dtd, const ichar *name)
 | |
| { dtd_symbol_table *t = dtd->symbols;
 | |
| 
 | |
|   if ( dtd->case_sensitive )
 | |
|   { int k = istrhash(name, t->size);
 | |
|     dtd_symbol *s;
 | |
| 
 | |
|     for(s=t->entries[k]; s; s = s->next)
 | |
|     { if ( istreq(s->name, name) )
 | |
| 	return s;
 | |
|     }
 | |
|   } else
 | |
|   { int k = istrcasehash(name, t->size);
 | |
|     dtd_symbol *s;
 | |
| 
 | |
|     for(s=t->entries[k]; s; s = s->next)
 | |
|     { if ( istrcaseeq(s->name, name) )
 | |
| 	return s;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_symbol *
 | |
| dtd_find_entity_symbol(dtd *dtd, const ichar *name)
 | |
| { dtd_symbol_table *t = dtd->symbols;
 | |
| 
 | |
|   if ( dtd->ent_case_sensitive )
 | |
|   { int k = istrhash(name, t->size);
 | |
|     dtd_symbol *s;
 | |
| 
 | |
|     for(s=t->entries[k]; s; s = s->next)
 | |
|     { if ( istreq(s->name, name) )
 | |
| 	return s;
 | |
|     }
 | |
|   } else
 | |
|   { int k = istrcasehash(name, t->size);
 | |
|     dtd_symbol *s;
 | |
| 
 | |
|     for(s=t->entries[k]; s; s = s->next)
 | |
|     { if ( istrcaseeq(s->name, name) )
 | |
| 	return s;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| dtd_symbol *
 | |
| dtd_add_symbol(dtd *dtd, const ichar *name)
 | |
| { dtd_symbol_table *t = dtd->symbols;
 | |
|   int k = istrhash(name, t->size);
 | |
|   dtd_symbol *s;
 | |
| 
 | |
|   for(s=t->entries[k]; s; s = s->next)
 | |
|   { if ( istreq(s->name, name) )
 | |
|       return s;
 | |
|   }
 | |
| 
 | |
|   s = sgml_calloc(1, sizeof(*s));
 | |
|   s->name = istrdup(name);
 | |
|   s->next = t->entries[k];
 | |
|   t->entries[k] = s;
 | |
| 
 | |
|   return s;
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	    ENTITIES		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static void
 | |
| free_entity_list(dtd_entity *e)
 | |
| { dtd_entity *next;
 | |
| 
 | |
|   for( ; e; e=next)
 | |
|   { next = e->next;
 | |
| 
 | |
|     if ( e->value )   sgml_free(e->value);
 | |
|     if ( e->extid )   sgml_free(e->extid);
 | |
|     if ( e->exturl )  sgml_free(e->exturl);
 | |
|     if ( e->baseurl ) sgml_free(e->baseurl);
 | |
| 
 | |
|     sgml_free(e);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_entity *
 | |
| find_pentity(dtd *dtd, dtd_symbol *id)
 | |
| { dtd_entity *e;
 | |
| 
 | |
|   for(e = dtd->pentities; e; e=e->next)
 | |
|   { if ( e->name == id )
 | |
|       return e;
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* returned path must be freed when done */
 | |
| 
 | |
| static ichar *
 | |
| entity_file(dtd *dtd, dtd_entity *e)
 | |
| { switch(e->type)
 | |
|   { case ET_SYSTEM:
 | |
|     case ET_PUBLIC:
 | |
|     { const ichar *f;
 | |
| 
 | |
|       f = find_in_catalogue(e->catalog_location,
 | |
| 			    e->name->name,
 | |
| 			    e->extid,
 | |
| 			    e->exturl,
 | |
| 			    dtd->dialect != DL_SGML);
 | |
| 
 | |
|       if ( f )				/* owned by catalog */
 | |
|       { ichar *file;
 | |
| 
 | |
| 	if ( is_absolute_path(f) || !e->baseurl )
 | |
| 	  file = istrdup(f);
 | |
| 	else
 | |
| 	  file = localpath(e->baseurl, f);
 | |
| 
 | |
| 	return file;
 | |
|       }
 | |
|     }
 | |
|     default:
 | |
|       return NULL;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| entity_value(dtd_parser *p, dtd_entity *e, int *len)
 | |
| { ichar *file;
 | |
| 
 | |
|   if ( !e->value && (file=entity_file(p->dtd, e)) )
 | |
|   { int normalise = (e->content == EC_SGML || e->content == EC_CDATA);
 | |
|     size_t l;
 | |
| 
 | |
|     e->value = load_sgml_file_to_charp(file, normalise, &l);
 | |
|     e->length = (long)l;
 | |
|     sgml_free(file);
 | |
|   }
 | |
| 
 | |
|   if ( len )
 | |
|     *len = e->length;
 | |
| 
 | |
|   return e->value;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| expand_pentities(dtd_parser *p, const ichar *in, int ilen, ichar *out, int len)
 | |
| { dtd *dtd = p->dtd;
 | |
|   int pero = dtd->charfunc->func[CF_PERO]; /* % */
 | |
|   int ero = dtd->charfunc->func[CF_ERO]; /* & */
 | |
|   const ichar *s;
 | |
|   const ichar *end;
 | |
| 
 | |
|   if ( ilen == ZERO_TERM_LEN )
 | |
|   { end = in + wcslen(in);
 | |
|   } else
 | |
|   { end = &in[ilen];
 | |
|   }
 | |
| 
 | |
|   while(in < end)
 | |
|   { if ( *in == pero )
 | |
|     { dtd_symbol *id;
 | |
| 
 | |
|       if ( (s = itake_entity_name(p, in+1, &id)) )
 | |
|       { dtd_entity *e = find_pentity(dtd, id);
 | |
| 	const ichar *eval;
 | |
| 	int l;
 | |
| 
 | |
| 	in = s;
 | |
| 	if ( (s=isee_func(dtd, s, CF_ERC)) ) /* ; is not obligatory? */
 | |
| 	  in = s;
 | |
| 
 | |
| 	if ( !e )
 | |
| 	  return gripe(p, ERC_EXISTENCE, L"parameter entity", id->name);
 | |
| 
 | |
| 	if ( !(eval = entity_value(p, e, NULL)) )
 | |
| 	  return FALSE;
 | |
| 
 | |
| 	if ( !expand_pentities(p, eval, ZERO_TERM_LEN, out, len) )
 | |
| 	  return FALSE;
 | |
| 	l = (int)istrlen(out);		/* could be better */
 | |
| 	out += l;
 | |
| 	len -= l;
 | |
| 
 | |
| 	continue;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     if ( --len <= 0 )
 | |
|     { gripe(p, ERC_REPRESENTATION, L"Declaration too long");
 | |
|       return FALSE;
 | |
|     }
 | |
| 
 | |
|     if ( *in == ero && in[1] == '#' )	/* &# */
 | |
|     { int chr;
 | |
| 
 | |
|       if ( (s=isee_character_entity(dtd, in, &chr)) )
 | |
|       { if ( chr == 0 )
 | |
| 	{ gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", in);
 | |
| 	} else
 | |
| 	{ *out++ = chr;
 | |
| 	  in = s;
 | |
| 	  continue;
 | |
| 	}
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     *out++ = *in++;
 | |
|   }
 | |
| 
 | |
|   *out = '\0';
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| char_entity_value(const ichar *decl)
 | |
| { if ( *decl == '#' )
 | |
|   { const ichar *s = decl+1;
 | |
|     ichar *end;
 | |
|     long v;
 | |
| 
 | |
| 					/* do octal too? */
 | |
|     if ( s[0] == 'x' || s[0] == 'X' )
 | |
|       v = wcstoul(s+1, &end, 16);
 | |
|     else
 | |
|       v = wcstoul(s, &end, 10);
 | |
| 
 | |
|     if ( *end == '\0' )
 | |
|     { return (int)v;
 | |
|     } else if ( istreq(s, L"RS") )
 | |
|     { return '\n';
 | |
|     } else if ( istreq(s, L"RE") )
 | |
|     { return '\r';
 | |
|     } else if ( istreq(s, L"TAB") )
 | |
|     { return '\t';
 | |
|     } else if ( istreq(s, L"SPACE") )
 | |
|     { return ' ';
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return -1;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| isee_character_entity(dtd *dtd, const ichar *in, int *chr)
 | |
| { const ichar *s;
 | |
| 
 | |
|   if ( (s=isee_func(dtd, in, CF_ERO)) && *s == '#' )
 | |
|   { ichar e[32];
 | |
|     ichar *o = e;
 | |
|     int v;
 | |
| 
 | |
|     *o++ = *s++;
 | |
|     while(o < e+sizeof(e)/sizeof(ichar)-1 && HasClass(dtd, *s, CH_NAME))
 | |
|       *o++ = *s++;
 | |
|     if ( isee_func(dtd, s, CF_ERC))	/* skip ; */
 | |
|       s++;
 | |
| 
 | |
|     *o = '\0';
 | |
|     if ( (v=char_entity_value(e)) >= 0 )
 | |
|     { *chr = v;
 | |
|       return s;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Expand entities in a string.  Used to expand CDATA attribute values.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static int
 | |
| expand_entities(dtd_parser *p, const ichar *in, int len, ocharbuf *out)
 | |
| { const ichar *s;
 | |
|   const ichar *end = &in[len];
 | |
|   dtd *dtd = p->dtd;
 | |
|   int ero = dtd->charfunc->func[CF_ERO]; /* & */
 | |
| 
 | |
|   while(in < end)
 | |
|   { if ( *in == ero )
 | |
|     { const ichar *estart = in;		/* for recovery */
 | |
|       int chr;
 | |
| 
 | |
|       if ( (s=isee_character_entity(dtd, in, &chr)) )
 | |
|       { if ( chr == 0 )
 | |
| 	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", in);
 | |
| 
 | |
| 	add_ocharbuf(out, chr);
 | |
| 	in = s;
 | |
| 	continue;
 | |
|       }
 | |
| 
 | |
|       if ( HasClass(dtd, in[1], CH_NMSTART) )
 | |
|       { dtd_symbol *id;
 | |
| 	dtd_entity *e;
 | |
| 	const ichar *eval;
 | |
| 
 | |
| 	if ( !(in = itake_name(p, in+1, &id)) )
 | |
| 	{ in = estart;
 | |
| 	  goto recover;
 | |
| 	}
 | |
| 	if ( isee_func(dtd, in, CF_ERC) || *in == '\n' )
 | |
| 	  in++;
 | |
| 
 | |
| 	if ( !(e = id->entity) && !(e=dtd->default_entity) )
 | |
| 	{ gripe(p, ERC_EXISTENCE, L"entity", id->name);
 | |
| 	  in = estart;
 | |
| 	  goto recover;
 | |
| 	}
 | |
| 
 | |
| 	if ( !(eval = entity_value(p, e, NULL)) )
 | |
| 	{ gripe(p, ERC_NO_VALUE, e->name->name);
 | |
| 	  in = estart;
 | |
| 	  goto recover;
 | |
| 	}
 | |
| 
 | |
| 	if ( e->content == EC_SGML )
 | |
| 	{ if ( !expand_entities(p, eval, (int)istrlen(eval), out) )
 | |
| 	    return FALSE;
 | |
| 	} else
 | |
| 	{ const ichar *s;
 | |
| 
 | |
| 	  for(s=eval; *s; s++)
 | |
| 	    add_ocharbuf(out, *s);
 | |
| 	}
 | |
| 
 | |
| 	continue;
 | |
|       }
 | |
| 
 | |
|       if ( dtd->dialect != DL_SGML )
 | |
| 	gripe(p, ERC_SYNTAX_ERROR, L"Illegal entity", estart);
 | |
|     }
 | |
| 
 | |
|   recover:
 | |
| 
 | |
|     if ( *in == CR && in[1] == LF )
 | |
|       in++;
 | |
| 
 | |
|     if ( HasClass(dtd, *in, CH_BLANK) )
 | |
|     { add_ocharbuf(out, ' ');
 | |
|       in++;
 | |
|     } else
 | |
|     { add_ocharbuf(out, *in++);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   terminate_ocharbuf(out);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      ELEMENTS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static dtd_element *
 | |
| find_element(dtd *dtd, dtd_symbol *id)
 | |
| { dtd_element *e;
 | |
| 
 | |
|   if ( id->element )
 | |
|     return id->element;			/* must check */
 | |
| 
 | |
|   e = sgml_calloc(1, sizeof(*e));
 | |
|   e->space_mode = SP_INHERIT;
 | |
|   e->undefined = TRUE;
 | |
|   e->name = id;
 | |
|   id->element = e;
 | |
| 
 | |
|   e->next = dtd->elements;
 | |
|   dtd->elements = e;
 | |
| 
 | |
|   return e;
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_edef *
 | |
| new_element_definition(dtd *dtd)
 | |
| { dtd_edef *def = sgml_calloc(1, sizeof(*def));
 | |
| 
 | |
|   STAT(edefs_created++);
 | |
| 
 | |
|   return def;
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_element *
 | |
| def_element(dtd *dtd, dtd_symbol *id)
 | |
| { dtd_element *e = find_element(dtd, id);
 | |
| 
 | |
|   if ( !e->structure )
 | |
|   { e->structure = new_element_definition(dtd);
 | |
|     e->structure->references = 1;
 | |
|     e->structure->type = C_EMPTY;
 | |
|   }
 | |
| 
 | |
|   return e;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_name_list(dtd_name_list *nl)
 | |
| { dtd_name_list *next;
 | |
| 
 | |
|   for( ; nl; nl=next)
 | |
|   { next = nl->next;
 | |
| 
 | |
|     sgml_free(nl);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| #define REFS_VIRGIN (-42)
 | |
| 
 | |
| static void
 | |
| free_attribute(dtd_attr *a)
 | |
| { if ( a->references == REFS_VIRGIN || --a->references == 0 )
 | |
|   { switch(a->type)
 | |
|     { case AT_NAMEOF:
 | |
|       case AT_NOTATION:
 | |
| 	free_name_list(a->typeex.nameof);
 | |
|       default:
 | |
| 	;
 | |
|     }
 | |
|     switch(a->def)
 | |
|     { case AT_DEFAULT:
 | |
|       case AT_FIXED:
 | |
|       { if ( a->islist )
 | |
| 	  sgml_free(a->att_def.list);
 | |
| 	else if ( a->type == AT_CDATA && a->att_def.cdata )
 | |
| 	  sgml_free(a->att_def.cdata);
 | |
|       }
 | |
|       default:
 | |
| 	;
 | |
|     }
 | |
| 
 | |
|     sgml_free(a);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_attribute_list(dtd_attr_list *l)
 | |
| { dtd_attr_list *next;
 | |
| 
 | |
|   for(; l; l=next)
 | |
|   { next = l->next;
 | |
| 
 | |
|     free_attribute(l->attribute);
 | |
|     sgml_free(l);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_element_list(dtd_element_list *l)
 | |
| { dtd_element_list *next;
 | |
| 
 | |
|   for( ; l; l=next)
 | |
|   { next = l->next;
 | |
| 
 | |
|     sgml_free(l);
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void
 | |
| free_element_definition(dtd_edef *def)
 | |
| { if ( --def->references == 0 )
 | |
|   { STAT(edefs_freed++);
 | |
|     if ( def->content )
 | |
|       free_model(def->content);
 | |
|     free_element_list(def->included);
 | |
|     free_element_list(def->excluded);
 | |
|     free_state_engine(def->initial_state);
 | |
| 
 | |
|     sgml_free(def);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_elements(dtd_element *e)
 | |
| { dtd_element *next;
 | |
| 
 | |
|   for( ; e; e=next)
 | |
|   { next = e->next;
 | |
| 
 | |
|     if ( e->structure )
 | |
|       free_element_definition(e->structure);
 | |
|     free_attribute_list(e->attributes);
 | |
| 
 | |
|     sgml_free(e);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	    ATTRIBUTES		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static dtd_attr *
 | |
| find_attribute(dtd_element *e, dtd_symbol *name)
 | |
| { dtd_attr_list *a;
 | |
| 
 | |
|   for(a=e->attributes; a; a=a->next)
 | |
|   { if ( a->attribute->name == name )
 | |
|       return a->attribute;
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	  PARSE PRIMITIVES	*
 | |
| 		 *******************************/
 | |
| 
 | |
| static const ichar *
 | |
| iskip_layout(dtd *dtd, const ichar *in)
 | |
| { ichar cmt = dtd->charfunc->func[CF_CMT]; /* also skips comment */
 | |
| 
 | |
|   for( ; *in; in++ )
 | |
|   { if ( HasClass(dtd, *in, CH_BLANK) )
 | |
|       continue;
 | |
| 
 | |
|     if ( in[0] == cmt && in[1] == cmt )
 | |
|     { in += 2;
 | |
| 
 | |
|       for( ; *in; in++ )
 | |
|       { if ( in[0] == cmt && in[1] == cmt )
 | |
| 	  break;
 | |
|       }
 | |
|       in++;
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     return in;
 | |
|   }
 | |
| 
 | |
|   return in;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| See whether we are looking at identifier   "id". "id" must be lowercase!
 | |
| This is only used for reserved words,  and parsed case-insentive in both
 | |
| XML and SGML modes.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static const ichar *
 | |
| isee_identifier(dtd *dtd, const ichar *in, char *id)
 | |
| { in = iskip_layout(dtd, in);
 | |
| 
 | |
| 					/* match */
 | |
|   while (*id && (wint_t)*id == towlower(*in) )
 | |
|     id++, in++;
 | |
|   if ( *id == 0 && !HasClass(dtd, *in, CH_NAME) )
 | |
|     return iskip_layout(dtd, in);
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_name(dtd_parser *p, const ichar *in, dtd_symbol **id)
 | |
| { ichar buf[MAXNMLEN];
 | |
|   ichar *o = buf;
 | |
|   ichar *e = &buf[MAXNMLEN]-1;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   in = iskip_layout(dtd, in);
 | |
|   if ( !HasClass(dtd, *in, CH_NMSTART) )
 | |
|     return NULL;
 | |
| 
 | |
|   if ( dtd->case_sensitive )
 | |
|   { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | |
|       *o++ = *in++;
 | |
|   } else
 | |
|   { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | |
|       *o++ = towlower(*in++);
 | |
|   }
 | |
| 
 | |
|   if ( o == e )
 | |
|   { gripe(p, ERC_REPRESENTATION, L"NAME too long");
 | |
|     return NULL;
 | |
|   }
 | |
| 
 | |
|   *o++ = '\0';
 | |
| 
 | |
|   *id = dtd_add_symbol(dtd, buf);
 | |
| 
 | |
|   return iskip_layout(dtd, in);
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_entity_name(dtd_parser *p, const ichar *in, dtd_symbol **id)
 | |
| { ichar buf[MAXNMLEN];
 | |
|   ichar *o = buf;
 | |
|   ichar *e = &buf[MAXNMLEN]-1;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   in = iskip_layout(dtd, in);
 | |
|   if ( !HasClass(dtd, *in, CH_NMSTART) )
 | |
|     return NULL;
 | |
| 
 | |
|   if ( dtd->ent_case_sensitive )
 | |
|   { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | |
|       *o++ = *in++;
 | |
|   } else
 | |
|   { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | |
|       *o++ = towlower(*in++);
 | |
|   }
 | |
|   if ( o == e )
 | |
|   { gripe(p, ERC_REPRESENTATION, L"Entity NAME too long");
 | |
|     return NULL;
 | |
|   }
 | |
| 
 | |
|   *o++ = '\0';
 | |
| 
 | |
|   *id = dtd_add_symbol(dtd, buf);
 | |
| 
 | |
|   return in;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_nmtoken(dtd_parser *p, const ichar *in, dtd_symbol **id)
 | |
| { ichar buf[MAXNMLEN];
 | |
|   ichar *o = buf;
 | |
|   ichar *e = &buf[MAXNMLEN]-1;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   in = iskip_layout(dtd, in);
 | |
|   if ( !HasClass(dtd, *in, CH_NAME) )
 | |
|     return NULL;
 | |
|   if ( dtd->case_sensitive )
 | |
|   { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | |
|       *o++ = *in++;
 | |
|   } else
 | |
|   { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | |
|       *o++ = towlower(*in++);
 | |
|   }
 | |
|   if ( o == e )
 | |
|   { gripe(p, ERC_REPRESENTATION, L"NMTOKEN too long");
 | |
|     return NULL;
 | |
|   }
 | |
| 
 | |
|   *o = '\0';
 | |
| 
 | |
|   *id = dtd_add_symbol(dtd, buf);
 | |
| 
 | |
|   return iskip_layout(dtd, in);
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_nutoken(dtd_parser *p, const ichar *in, dtd_symbol **id)
 | |
| { ichar buf[MAXNMLEN];
 | |
|   ichar *o = buf;
 | |
|   ichar *e = &buf[MAXNMLEN]-1;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   in = iskip_layout(dtd, in);
 | |
|   if ( !HasClass(dtd, *in, CH_DIGIT) )
 | |
|     return NULL;
 | |
| 
 | |
|   if ( dtd->case_sensitive )
 | |
|   { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | |
|       *o++ = *in++;
 | |
|   } else
 | |
|   { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | |
|       *o++ = towlower(*in++);
 | |
|   }
 | |
| 
 | |
|   if ( o == e )
 | |
|   { gripe(p, ERC_REPRESENTATION, L"NUTOKEN too long");
 | |
|     return NULL;
 | |
|   }
 | |
| 
 | |
|   *o = '\0';
 | |
|   if ( o - buf > 8 )
 | |
|     gripe(p, ERC_LIMIT, L"nutoken length");
 | |
| 
 | |
|   *id = dtd_add_symbol(dtd, buf);
 | |
| 
 | |
|   return iskip_layout(dtd, in);
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_number(dtd_parser *p, const ichar *in, dtd_attr *at)
 | |
| { dtd *dtd = p->dtd;
 | |
| 
 | |
|   in = iskip_layout(dtd, in);
 | |
| 
 | |
|   switch(dtd->number_mode)
 | |
|   { case NU_TOKEN:
 | |
|     { ichar buf[MAXNMLEN];
 | |
|       ichar *o = buf;
 | |
| 
 | |
|       while( HasClass(dtd, *in, CH_DIGIT) )
 | |
| 	*o++ = *in++;
 | |
|       if ( o == buf )
 | |
| 	return NULL;			/* empty */
 | |
|       *o = '\0';
 | |
|       at->att_def.name = dtd_add_symbol(dtd, buf);
 | |
| 
 | |
|       return iskip_layout(dtd, (const ichar *)in);
 | |
|     }
 | |
|     case NU_INTEGER:
 | |
|     { ichar *end;
 | |
| 
 | |
|       at->att_def.number = wcstol(in, &end, 10);
 | |
|       if ( end > in && errno != ERANGE )
 | |
| 	return iskip_layout(dtd, end);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Get a quoted value. After successful return,  *start points to the start
 | |
| of the string in the input and  *len   to  the length. The data is *not*
 | |
| nul terminated.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static const ichar *
 | |
| itake_string(dtd *dtd, const ichar *in, ichar **start, int *len)
 | |
| { in = iskip_layout(dtd, in);
 | |
| 
 | |
|   if ( isee_func(dtd, in, CF_LIT) ||
 | |
|        isee_func(dtd, in, CF_LITA) )
 | |
|   { ichar q = *in++;
 | |
| 
 | |
|     *start = (ichar *)in;
 | |
|     while( *in && *in != q )
 | |
|       in++;
 | |
|     if ( *in )
 | |
|     { *len = (int)(in - (*start));
 | |
| 
 | |
|       return iskip_layout(dtd, ++in);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_dubbed_string(dtd *dtd, const ichar *in, ichar **out)
 | |
| { ichar *start;
 | |
|   int len;
 | |
|   const ichar *end;
 | |
| 
 | |
|   if ( (end=itake_string(dtd, in, &start, &len)) )
 | |
|     *out = istrndup(start, len);
 | |
| 
 | |
|   return end;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| itake_url() is used to get the argument of a SYSTEM or 2nd argument of a
 | |
| PUBLIC reference. Once upon a  time  it   tried  to  tag the argument as
 | |
| file:<path>, but this job cannot be before   lookup in the catalogue. It
 | |
| is now the same as itake_dubbed_string(), so we simply call this one.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static const ichar *
 | |
| itake_url(dtd *dtd, const ichar *in, ichar **out)
 | |
| { return itake_dubbed_string(dtd, in, out);
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_nmtoken_chars(dtd_parser *p, const ichar *in, ichar *out, int len)
 | |
| { dtd *dtd = p->dtd;
 | |
| 
 | |
|   in = iskip_layout(dtd, in);
 | |
|   if ( !HasClass(dtd, *in, CH_NAME) )
 | |
|     return NULL;
 | |
|   while( HasClass(dtd, *in, CH_NAME) )
 | |
|   { if ( --len <= 0 )
 | |
|       gripe(p, ERC_REPRESENTATION, L"Name token too long");
 | |
|     *out++ = (dtd->case_sensitive ? *in++ : (ichar)towlower(*in++));
 | |
|   }
 | |
|   *out++ = '\0';
 | |
| 
 | |
|   return iskip_layout(dtd, in);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*  There used to be a function
 | |
| 
 | |
|     itake_nonblank_chars(dtd, in, out, len) -> new end
 | |
| 
 | |
|     which
 | |
|     - skipped layout,
 | |
|     - copied characters from in[] to out[] until layout or \0 was found,
 | |
|     - added a terminating \0 to out[],
 | |
|     - skipped any following layout, and
 | |
|     - returned the new position.
 | |
| 
 | |
|     That function was only called by get_attribute_value(), which used
 | |
|     it to parse an unquoted attribute value.  According to SGML, that's
 | |
|     not right:  unquoted attribute values must look like NMTOKENs (but
 | |
|     have a different length bound).  In particular, elements like
 | |
| 	<foo a=bar>zoo</foo>
 | |
| 	<foo a=ugh/zip/
 | |
|     are perfectly legal, so scanning an unquoted attribute value MUST
 | |
|     stop at a '/' or '>'.  According to HTML practice, pretty much any
 | |
|     old junk will be accepted, and some HTML parsers will allow bare
 | |
|     slashes in such an attribute.
 | |
| 
 | |
|     Typical HTML is *so* bad that it doesn't agree with *any* part of
 | |
|     the HTML specifications (e.g., <FONT> is commonly wrapped around
 | |
|     block-level elements, which has never been legal).  It's not clear
 | |
|     that there is much point in trying to accomodate bad HTML; if you
 | |
|     really need to do that, use the free program HTML Tidy (from the
 | |
|     http://www.w3c.org/ site) to clean up, and parse its output instead.
 | |
| 
 | |
|     However, in order to break as little as possible, the new (sgml-1.0.14)
 | |
|     function accepts anything except > / \0 and blanks.
 | |
| 
 | |
| JW: I decided to accept / as part of an unquoted in SGML-mode if
 | |
|     shorttag is disabled as well as in XML mode if it is not the
 | |
|     end of the begin-element
 | |
| */
 | |
| 
 | |
| static ichar const *
 | |
| itake_unquoted(dtd_parser *p, ichar const *in, ichar *out, int len)
 | |
| { dtd *dtd = p->dtd;
 | |
|   ichar const end2 = dtd->charfunc->func[CF_ETAGO2];	/* / */
 | |
|   ichar c;
 | |
| 
 | |
|   /* skip leading layout.  Do NOT skip comments! --x-- is a value! */
 | |
|   while (c = *in, HasClass(dtd, c, CH_BLANK))
 | |
|     in++;
 | |
| 
 | |
|   /* copy the attribute to out[] */
 | |
|   while ( !HasClass(dtd, c, CH_BLANK) &&
 | |
| 	  c != '\0' )
 | |
|   { if ( c == end2 && (dtd->shorttag ||
 | |
| 		       (in[1] == '\0' && dtd->dialect != DL_SGML)) )
 | |
|       break;
 | |
| 
 | |
|     if ( --len > 0 )
 | |
|       *out++ = c;
 | |
|     else if ( len == 0 )
 | |
|       gripe(p, ERC_REPRESENTATION, L"Attribute too long");
 | |
|     c = *++in;
 | |
|   }
 | |
|   *out = '\0';
 | |
| 
 | |
|   /* skip trailing layout.  While it is kind to skip comments here,
 | |
|      it is technically wrong to do so.  Tags may not contain comments.
 | |
|    */
 | |
| 
 | |
|   return iskip_layout(dtd, in);
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *		DTD		*
 | |
| 		 *******************************/
 | |
| 
 | |
| dtd *
 | |
| new_dtd(const ichar *doctype)
 | |
| { dtd *dtd = sgml_calloc(1, sizeof(*dtd));
 | |
| 
 | |
|   STAT(dtd_created++);
 | |
|   dtd->magic	 = SGML_DTD_MAGIC;
 | |
|   dtd->implicit  = TRUE;
 | |
|   dtd->dialect   = DL_SGML;
 | |
|   if ( doctype )
 | |
|     dtd->doctype = istrdup(doctype);
 | |
|   dtd->symbols	 = new_symbol_table();
 | |
|   dtd->charclass = new_charclass();
 | |
|   dtd->charfunc	 = new_charfunc();
 | |
|   dtd->space_mode = SP_SGML;
 | |
|   dtd->ent_case_sensitive = TRUE;	/* case-sensitive entities */
 | |
|   dtd->shorttag    = TRUE;		/* allow for <tag/value/ */
 | |
|   dtd->number_mode = NU_TOKEN;
 | |
| 
 | |
|   return dtd;
 | |
| }
 | |
| 
 | |
| 
 | |
| void
 | |
| free_dtd(dtd *dtd)
 | |
| { if ( --dtd->references == 0 )
 | |
|   { STAT(dtd_freed++);
 | |
| 
 | |
|     if ( dtd->doctype )
 | |
|       sgml_free(dtd->doctype);
 | |
| 
 | |
|     free_entity_list(dtd->entities);
 | |
|     free_entity_list(dtd->pentities);
 | |
|     free_notations(dtd->notations);
 | |
|     free_shortrefs(dtd->shortrefs);
 | |
|     free_elements(dtd->elements);
 | |
|     free_symbol_table(dtd->symbols);
 | |
|     sgml_free(dtd->charfunc);
 | |
|     sgml_free(dtd->charclass);
 | |
|     dtd->magic = 0;
 | |
| 
 | |
|     sgml_free(dtd);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static const wchar_t *xml_entities[] =
 | |
| { L"lt CDATA \"<\"",		/* < */
 | |
|   L"gt CDATA \">\"",		/* > */
 | |
|   L"amp CDATA \"&\"",		/* & */
 | |
|   L"apos CDATA \"'\"",		/* ' */
 | |
|   L"quot CDATA \""\"",		/* " */
 | |
|   NULL
 | |
| };
 | |
| 
 | |
| 
 | |
| int
 | |
| set_dialect_dtd(dtd *dtd, dtd_dialect dialect)
 | |
| { if ( dtd->dialect != dialect )
 | |
|   { dtd->dialect = dialect;
 | |
| 
 | |
|     switch(dialect)
 | |
|     { case DL_SGML:
 | |
|       { dtd->case_sensitive = FALSE;
 | |
| 	dtd->space_mode = SP_SGML;
 | |
| 	dtd->shorttag = TRUE;
 | |
| 	break;
 | |
|       }
 | |
|       case DL_XML:
 | |
|       case DL_XMLNS:
 | |
|       { const ichar **el;
 | |
| 	dtd_parser p;
 | |
| 
 | |
| 	dtd->case_sensitive = TRUE;
 | |
| 	dtd->encoding = SGML_ENC_UTF8;
 | |
| 	dtd->space_mode = SP_PRESERVE;
 | |
| 	dtd->shorttag = FALSE;
 | |
| 
 | |
| 	memset(&p, 0, sizeof(p));
 | |
| 	p.dtd = dtd;
 | |
| 	for(el = xml_entities; *el; el++)
 | |
| 	  process_entity_declaration(&p, *el);
 | |
| 
 | |
| 	break;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| set_option_dtd(dtd *dtd, dtd_option option, int set)
 | |
| { switch(option)
 | |
|   { case OPT_SHORTTAG:
 | |
|       dtd->shorttag = set;
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| baseurl(dtd_parser *p)
 | |
| { if ( p->location.type == IN_FILE && p->location.name.file )
 | |
|   { return p->location.name.file;
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| process_entity_value_declaration(dtd_parser *p,
 | |
| 				 const ichar *decl, dtd_entity *e)
 | |
| { dtd *dtd = p->dtd;
 | |
|   const ichar *s;
 | |
| 
 | |
|   if ( e->type == ET_SYSTEM )
 | |
|   { if ( (s=itake_url(dtd, decl, &e->exturl)) )
 | |
|     { e->baseurl = istrdup(baseurl(p));
 | |
|       return s;
 | |
|     }
 | |
| 
 | |
|     goto string_expected;
 | |
|   } else
 | |
|   { ichar *start; int len;
 | |
|     ichar val[MAXSTRINGLEN];
 | |
| 
 | |
|     if ( !(s = itake_string(dtd, decl, &start, &len)) )
 | |
|       goto string_expected;
 | |
|     decl = s;
 | |
| 
 | |
|     expand_pentities(p, start, len, val, sizeof(val)/sizeof(ichar));
 | |
| 
 | |
|     switch ( e->type )
 | |
|     { case ET_PUBLIC:
 | |
|       { e->extid = istrdup(val);
 | |
| 	if ( isee_func(dtd, decl, CF_LIT) ||
 | |
| 	     isee_func(dtd, decl, CF_LITA) )
 | |
| 	{ if ( (s=itake_url(dtd, decl, &e->exturl)) )
 | |
| 	  { e->baseurl = istrdup(baseurl(p));
 | |
| 	    decl = s;
 | |
| 	  }
 | |
| 	}
 | |
| 	return decl;
 | |
|       }
 | |
|       case ET_LITERAL:
 | |
|       { e->value = istrdup(val);
 | |
| 	e->length = (int)wcslen(e->value);
 | |
| 	return decl;
 | |
|       }
 | |
|       default:
 | |
| 	assert(0);
 | |
| 	return NULL;
 | |
|     }
 | |
|   }
 | |
| 
 | |
| string_expected:
 | |
|   gripe(p, ERC_SYNTAX_ERROR, L"String expected", decl);
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| The sgml-standard tells us to accept the  first definition of an entity,
 | |
| silently suppressing any further attempt to redefine the entity.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static int
 | |
| process_entity_declaration(dtd_parser *p, const ichar *decl)
 | |
| { dtd *dtd = p->dtd;
 | |
|   const ichar *s;
 | |
|   dtd_symbol *id;
 | |
|   dtd_entity *e;
 | |
|   int isparam;
 | |
|   int isdef = FALSE;
 | |
| 					/* parameter entity */
 | |
|   if ( (s=isee_func(dtd, decl, CF_PERO)) )
 | |
|   { isparam = TRUE;
 | |
|     decl = s;
 | |
|   } else
 | |
|     isparam = FALSE;
 | |
| 
 | |
|   if ( !(s = itake_entity_name(p, decl, &id)) )
 | |
|   { if ( !(s = isee_identifier(dtd, decl, "#default")) )
 | |
|       return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | |
|     id = dtd_add_symbol(dtd, (ichar*)"#DEFAULT");
 | |
|     isdef = TRUE;
 | |
|   }
 | |
| 
 | |
|   if ( isparam && find_pentity(dtd, id) )
 | |
|   { gripe(p, ERC_REDEFINED, L"parameter entity", id);
 | |
|     return TRUE;			/* already defined parameter entity */
 | |
|   }
 | |
|   if ( id->entity )
 | |
|   { gripe(p, ERC_REDEFINED, L"entity", id);
 | |
|     return TRUE;			/* already defined normal entity */
 | |
|   }
 | |
| 
 | |
|   decl = iskip_layout(dtd, s);
 | |
|   e = sgml_calloc(1, sizeof(*e));
 | |
|   e->name = id;
 | |
|   e->catalog_location = (isparam ? CAT_PENTITY : CAT_ENTITY);
 | |
| 
 | |
|   if ( (s = isee_identifier(dtd, decl, "system")) )
 | |
|   { e->type = ET_SYSTEM;
 | |
|     e->content = EC_SGML;
 | |
|     decl = s;
 | |
|   } else if ( (s = isee_identifier(dtd, decl, "public")) )
 | |
|   { e->type = ET_PUBLIC;
 | |
|     e->content = EC_SGML;
 | |
|     decl = s;
 | |
|   } else
 | |
|   { e->type = ET_LITERAL;
 | |
| 
 | |
|     if ( !isparam )
 | |
|     { if ( (s=isee_identifier(dtd, decl, "cdata")) )
 | |
|       { decl = s;
 | |
| 	e->content = EC_CDATA;
 | |
|       } else if ( (s=isee_identifier(dtd, decl, "sdata")) )
 | |
|       { decl = s;
 | |
| 	e->content = EC_SDATA;
 | |
|       } else if ( (s=isee_identifier(dtd, decl, "pi")) )
 | |
|       { decl = s;
 | |
| 	e->content = EC_PI;
 | |
|       } else if ( (s=isee_identifier(dtd, decl, "starttag")) )
 | |
|       { decl = s;
 | |
| 	e->content = EC_STARTTAG;
 | |
|       } else if ( (s=isee_identifier(dtd, decl, "endtag")) )
 | |
|       { decl = s;
 | |
| 	e->content = EC_ENDTAG;
 | |
|       } else
 | |
| 	e->content = EC_SGML;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( (decl=process_entity_value_declaration(p, decl, e)) )
 | |
|   { if ( e->type == ET_LITERAL )
 | |
|     { switch(e->content)
 | |
|       { case EC_STARTTAG:
 | |
| 	{ ichar *buf = sgml_malloc((e->length + 3)*sizeof(ichar));
 | |
| 
 | |
| 	  buf[0] = dtd->charfunc->func[CF_STAGO];
 | |
| 	  istrcpy(&buf[1], e->value);
 | |
| 	  buf[++e->length] = dtd->charfunc->func[CF_STAGC];
 | |
| 	  buf[++e->length] = 0;
 | |
| 
 | |
| 	  sgml_free(e->value);
 | |
| 	  e->value = buf;
 | |
| 	  e->content = EC_SGML;
 | |
| 
 | |
| 	  break;
 | |
| 	}
 | |
| 	case EC_ENDTAG:
 | |
| 	{ ichar *buf = sgml_malloc((e->length + 4)*sizeof(ichar));
 | |
| 
 | |
| 	  buf[0] = dtd->charfunc->func[CF_ETAGO1];
 | |
| 	  buf[1] = dtd->charfunc->func[CF_ETAGO2];
 | |
| 	  istrcpy(&buf[2], e->value);
 | |
| 	  e->length++;
 | |
| 	  buf[++e->length] = dtd->charfunc->func[CF_STAGC];
 | |
| 	  buf[++e->length] = 0;
 | |
| 
 | |
| 	  sgml_free(e->value);
 | |
| 	  e->value = buf;
 | |
| 	  e->content = EC_SGML;
 | |
| 
 | |
| 	  break;
 | |
| 	}
 | |
| 	default:
 | |
| 	  break;
 | |
|       }
 | |
|     } else
 | |
|     { if ( *decl )
 | |
|       { dtd_symbol *nname;
 | |
| 
 | |
| 	if ( (s=isee_identifier(dtd, decl, "cdata")) )
 | |
| 	{ decl = s;
 | |
| 	  e->content = EC_CDATA;
 | |
| 	} else if ( (s=isee_identifier(dtd, decl, "sdata")) )
 | |
| 	{ decl = s;
 | |
| 	  e->content = EC_SDATA;
 | |
| 	} else if ( (s=isee_identifier(dtd, decl, "ndata")) )
 | |
| 	{ decl = s;
 | |
| 	  e->content = EC_NDATA;
 | |
| 	} else
 | |
| 	  return gripe(p, ERC_SYNTAX_ERROR, L"Bad datatype declaration", decl);
 | |
| 
 | |
| 	if ( (s=itake_name(p, decl, &nname)) ) /* what is this? */
 | |
| 	{ decl = s;
 | |
| 	} else
 | |
| 	  return gripe(p, ERC_SYNTAX_ERROR, L"Bad notation declaration", decl);
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     if ( *decl )
 | |
|       return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl);
 | |
|   }
 | |
| 
 | |
|   if ( isparam )
 | |
|   { e->next = dtd->pentities;
 | |
|     dtd->pentities = e;
 | |
|   } else
 | |
|   { e->name->entity = e;
 | |
|     e->next = dtd->entities;
 | |
|     dtd->entities = e;
 | |
|   }
 | |
| 
 | |
|   if ( isdef )
 | |
|     dtd->default_entity = e;
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      NOTATIONS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static dtd_notation *
 | |
| find_notation(dtd *dtd, dtd_symbol *name)
 | |
| { dtd_notation *n;
 | |
| 
 | |
|   for(n=dtd->notations; n; n = n->next)
 | |
|   { if ( n->name == name )
 | |
|       return n;
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| add_notation(dtd *dtd, dtd_notation *not)
 | |
| { dtd_notation **n = &dtd->notations;
 | |
| 
 | |
|   for( ; *n; n = &(*n)->next)
 | |
|     ;
 | |
|   *n = not;
 | |
| }
 | |
| 
 | |
| static int
 | |
| process_notation_declaration(dtd_parser *p, const ichar *decl)
 | |
| { dtd *dtd = p->dtd;
 | |
|   dtd_symbol *nname;
 | |
|   const ichar *s;
 | |
|   ichar *system = NULL, *public = NULL;
 | |
|   dtd_notation *not;
 | |
| 
 | |
|   if ( !(s=itake_name(p, decl, &nname)) )
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"Notation name expected", decl);
 | |
|   decl = s;
 | |
| 
 | |
|   if ( find_notation(dtd, nname) )
 | |
|   { gripe(p, ERC_REDEFINED, L"notation", nname);
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   if ( (s=isee_identifier(dtd, decl, "system")) )
 | |
|   { ;
 | |
|   } else if ( (s=isee_identifier(dtd, decl, "public")) )
 | |
|   { decl = s;
 | |
|     if ( !(s=itake_dubbed_string(dtd, decl, &public)) )
 | |
|       return gripe(p, ERC_SYNTAX_ERROR, L"Public identifier expected", decl);
 | |
|   } else
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"SYSTEM or PUBLIC expected", decl);
 | |
| 
 | |
|   decl = s;
 | |
|   if ( (s=itake_dubbed_string(dtd, decl, &system)) )
 | |
|     decl = s;
 | |
| 
 | |
|   if ( *decl )
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl);
 | |
| 
 | |
|   not = sgml_calloc(1, sizeof(*not));
 | |
|   not->name = nname;
 | |
|   not->system = system;
 | |
|   not->public = public;
 | |
|   not->next = NULL;
 | |
|   add_notation(dtd, not);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_notations(dtd_notation *n)
 | |
| { dtd_notation *next;
 | |
| 
 | |
|   for( ; n; n=next)
 | |
|   { next = n->next;
 | |
| 
 | |
|     sgml_free(n->system);
 | |
|     sgml_free(n->public);
 | |
| 
 | |
|     sgml_free(n);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	       SHORTREF		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static void
 | |
| free_maps(dtd_map *map)
 | |
| { dtd_map *next;
 | |
| 
 | |
|   for( ; map; map=next)
 | |
|   { next = map->next;
 | |
|     if ( map->from )
 | |
|       sgml_free(map->from);
 | |
|     sgml_free(map);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_shortrefs(dtd_shortref *sr)
 | |
| { dtd_shortref *next;
 | |
| 
 | |
|   for( ; sr; sr=next)
 | |
|   { next = sr->next;
 | |
|     free_maps(sr->map);
 | |
|     sgml_free(sr);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| shortref_add_map(dtd_parser *p, const ichar *decl, dtd_shortref *sr)
 | |
| { ichar *start; int len;
 | |
|   ichar from[MAXMAPLEN];
 | |
|   ichar *f = from;
 | |
|   dtd_symbol *to;
 | |
|   const ichar *s;
 | |
|   const ichar *end;
 | |
|   dtd *dtd = p->dtd;
 | |
|   dtd_map **prev;
 | |
|   dtd_map *m;
 | |
| 
 | |
|   if ( !(s=itake_string(dtd, decl, &start, &len)) )
 | |
|   { gripe(p, ERC_SYNTAX_ERROR, L"map-string expected", decl);
 | |
|     return NULL;
 | |
|   }
 | |
|   decl = s;
 | |
|   if ( !(s=itake_entity_name(p, decl, &to)) )
 | |
|   { gripe(p, ERC_SYNTAX_ERROR, L"map-to name expected", decl);
 | |
|     return NULL;
 | |
|   }
 | |
|   end = s;
 | |
| 
 | |
|   for(decl=start; len > 0;)
 | |
|   { if ( *decl == 'B' )		/* blank */
 | |
|     { if ( decl[1] == 'B' )
 | |
|       { *f++ = CHR_DBLANK;
 | |
| 	decl += 2;
 | |
| 	len -= 2;
 | |
|         continue;
 | |
|       }
 | |
|       *f++ = CHR_BLANK;
 | |
|       decl++;
 | |
|       len--;
 | |
|     } else
 | |
|     { *f++ = *decl++;			/* any other character */
 | |
|       len--;
 | |
|     }
 | |
|   }
 | |
|   *f = 0;
 | |
| 
 | |
|   for(prev=&sr->map; *prev; prev = &(*prev)->next)
 | |
|     ;
 | |
| 
 | |
|   m = sgml_calloc(1, sizeof(*m));
 | |
|   m->from = istrdup(from);
 | |
|   m->len  = (int)istrlen(from);
 | |
|   m->to   = to;
 | |
| 
 | |
|   *prev = m;
 | |
| 
 | |
|   return end;
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_shortref *
 | |
| def_shortref(dtd_parser *p, dtd_symbol *name)
 | |
| { dtd *dtd = p->dtd;
 | |
|   dtd_shortref *sr, **pr;
 | |
| 
 | |
|   for(pr=&dtd->shortrefs; *pr; pr = &(*pr)->next)
 | |
|   { dtd_shortref *r = *pr;
 | |
| 
 | |
|     if ( r->name == name )
 | |
|       return r;
 | |
|   }
 | |
| 
 | |
|   sr = sgml_calloc(1, sizeof(*sr));
 | |
|   sr->name = name;
 | |
|   *pr = sr;
 | |
| 
 | |
|   return sr;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Create an array with TRUE in any character   that can be the last of the
 | |
| shortref map.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static void
 | |
| compile_map(dtd *dtd, dtd_shortref *sr)
 | |
| { dtd_map *map;
 | |
| 
 | |
|   for(map = sr->map; map; map = map->next)
 | |
|   { ichar last = map->from[map->len-1];
 | |
| 
 | |
|     switch( last )
 | |
|     { case CHR_BLANK:
 | |
|       case CHR_DBLANK:
 | |
|       { wint_t i;
 | |
| 
 | |
| 	for( i=0; i< SHORTMAP_SIZE; i++)
 | |
| 	{ if ( HasClass(dtd, i, CH_BLANK) )
 | |
| 	    sr->ends[i] = TRUE;
 | |
| 	}
 | |
|       }
 | |
| 
 | |
|       default:
 | |
| 	sr->ends[last] = TRUE;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_shortref_declaration(dtd_parser *p, const ichar *decl)
 | |
| { dtd *dtd = p->dtd;
 | |
|   ichar buf[MAXDECL];
 | |
|   dtd_shortref *sr;
 | |
|   dtd_symbol *name;
 | |
|   const ichar *s;
 | |
| 
 | |
|   if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
 | |
|     return FALSE;
 | |
|   decl = buf;
 | |
| 
 | |
|   if ( !(s=itake_name(p, decl, &name)) )
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | |
|   decl = s;
 | |
| 
 | |
|   sr = def_shortref(p, name);
 | |
|   if ( sr->defined )
 | |
|   { gripe(p, ERC_REDEFINED, L"shortref", name);
 | |
| 
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   sr->defined = TRUE;
 | |
| 
 | |
|   while( *(decl = iskip_layout(dtd, decl)) != '\0'
 | |
| 	 && (s=shortref_add_map(p, decl, sr)) )
 | |
|     decl = s;
 | |
|   compile_map(dtd, sr);
 | |
| 
 | |
|   if ( *decl )
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"Map expected", decl);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Find named name.  The name NULL stands for the #empty map
 | |
| 
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static dtd_shortref *
 | |
| find_map(dtd *dtd, dtd_symbol *name)
 | |
| { dtd_shortref *sr;
 | |
| 
 | |
|   if ( !name )
 | |
|   { static dtd_shortref *empty;
 | |
| 
 | |
|     if ( !empty )
 | |
|     { empty = sgml_calloc(1, sizeof(*empty));
 | |
|       empty->name = dtd_add_symbol(dtd, (ichar*)"#EMPTY");
 | |
|       empty->defined = TRUE;
 | |
|     }
 | |
| 
 | |
|     return empty;
 | |
|   }
 | |
| 
 | |
|   for( sr = dtd->shortrefs; sr; sr = sr->next )
 | |
|   { if ( sr->name == name )
 | |
|     { if ( !sr->defined )
 | |
| 	break;
 | |
| 
 | |
|       return sr;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| set_map_element(dtd_element *e, void *closure)
 | |
| { e->map = closure;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_usemap_declaration(dtd_parser *p, const ichar *decl)
 | |
| { dtd *dtd = p->dtd;
 | |
|   ichar buf[MAXDECL];
 | |
|   dtd_symbol *name;
 | |
|   const ichar *s;
 | |
|   dtd_symbol *ename;
 | |
|   dtd_element *e;
 | |
|   dtd_shortref *map;
 | |
| 
 | |
|   if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
 | |
|     return FALSE;
 | |
|   decl = buf;
 | |
| 
 | |
|   if ( !(s=itake_name(p, decl, &name)) )
 | |
|   { if ( (s=isee_identifier(dtd, decl, "#empty")) )
 | |
|       name = NULL;
 | |
|     else
 | |
|       return gripe(p, ERC_SYNTAX_ERROR, L"map-name expected", decl);
 | |
|   }
 | |
| 
 | |
|   decl = s;
 | |
|   if ( !(map = find_map(dtd, name)) )
 | |
|     map = def_shortref(p, name);	/* make undefined map */
 | |
| 
 | |
|   if ( isee_func(dtd, decl, CF_GRPO) )	/* ( */
 | |
|   { dtd_model *model;
 | |
| 
 | |
|     if ( (model = make_model(p, decl, &s)) )
 | |
|     { for_elements_in_model(model, set_map_element, map);
 | |
|       free_model(model);
 | |
|       decl = s;
 | |
|     } else
 | |
|       return FALSE;
 | |
|   } else if ( (s=itake_name(p, decl, &ename)) )
 | |
|   { e = find_element(dtd, ename);
 | |
|     e->map = map;
 | |
|     decl = s;
 | |
|   } else if ( p->environments )
 | |
|   { if ( !map->defined )
 | |
|       gripe(p, ERC_EXISTENCE, L"map", name->name);
 | |
| 
 | |
|     p->environments->map = map;
 | |
|     p->map = p->environments->map;
 | |
|   } else
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"element-name expected", decl);
 | |
| 
 | |
|   if ( *decl )
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"Unparsed", decl);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| match_map(dtd *dtd, dtd_map *map, ocharbuf *buf)
 | |
| { wchar_t *data = buf->data.w;
 | |
|   wchar_t *e    = data+buf->size-1;
 | |
|   ichar *m      = map->from+map->len-1;
 | |
| 
 | |
|   while( m >= map->from )
 | |
|   { if ( e < data )
 | |
|       return 0;
 | |
| 
 | |
|     if ( *m == *e )
 | |
|     { m--;
 | |
|       e--;
 | |
|       continue;
 | |
|     }
 | |
|     if ( *m == CHR_DBLANK )
 | |
|     { if ( e>data && HasClass(dtd, *e, CH_WHITE) )
 | |
| 	e--;
 | |
|       else
 | |
| 	return FALSE;
 | |
|       goto wblank;
 | |
|     }
 | |
|     if ( *m == CHR_BLANK )
 | |
|     { wblank:
 | |
|       while( e>data && HasClass(dtd, *e, CH_WHITE) )
 | |
| 	e--;
 | |
|       m--;
 | |
|       continue;
 | |
|     }
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   return (int)(data+buf->size-1-e);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| match_shortref(dtd_parser *p)
 | |
| { dtd_map *map;
 | |
| 
 | |
|   for(map = p->map->map; map; map = map->next)
 | |
|   { int len;
 | |
| 
 | |
|     if ( (len=match_map(p->dtd, map, p->cdata)) )
 | |
|     { p->cdata->size -= len;
 | |
| 
 | |
|       if ( p->cdata_must_be_empty )
 | |
|       { int blank = TRUE;
 | |
| 	const wchar_t *s;
 | |
| 	int i;
 | |
| 
 | |
| 	for(s = p->cdata->data.w, i=0; i++ < p->cdata->size; s++)
 | |
| 	{ if ( !iswspace(*s) )
 | |
| 	  { blank = FALSE;
 | |
| 	    break;
 | |
| 	  }
 | |
| 	}
 | |
| 
 | |
| 	p->blank_cdata = blank;
 | |
|       }
 | |
| 
 | |
|       WITH_CLASS(p, EV_SHORTREF,
 | |
| 		 { sgml_cplocation(&p->startloc, &p->location);
 | |
| 		   p->startloc.charpos -= len;
 | |
| 		   p->startloc.linepos -= len;
 | |
| 		   if ( p->startloc.linepos < 0 )
 | |
| 		   { p->startloc.line--;
 | |
| 		     p->startloc.linepos = 0; /* not correct! */
 | |
| 		   }
 | |
| 		   DEBUG(printf("%d-%d: Matched map '%s' --> %s, len = %d\n",
 | |
| 				p->startloc.charpos,
 | |
| 				p->location.charpos,
 | |
| 				map->from, map->to->name, len));
 | |
| 
 | |
| 		   process_entity(p, map->to->name);
 | |
| 		 })			/* TBD: optimise */
 | |
|       return TRUE;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	       ELEMENTS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static void
 | |
| add_submodel(dtd_model *m, dtd_model *sub)
 | |
| { dtd_model **d;
 | |
| 
 | |
|   for( d = &m->content.group; *d; d = &(*d)->next )
 | |
|     ;
 | |
|   *d = sub;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* for_elements_in_model()
 | |
|    Walk along the model, calling f(e, closure) for any element found
 | |
|    in the model.  Used for <!SHORTREF name model>
 | |
| */
 | |
| 
 | |
| static void
 | |
| for_elements_in_model(dtd_model *m,
 | |
| 		      void (*f)(dtd_element *e, void *closure),
 | |
| 		      void *closure)
 | |
| { switch(m->type)
 | |
|   { case MT_SEQ:
 | |
|     case MT_AND:
 | |
|     case MT_OR:
 | |
|     { dtd_model *sub = m->content.group;
 | |
| 
 | |
|       for(; sub; sub = sub->next)
 | |
| 	for_elements_in_model(sub, f, closure);
 | |
|       break;
 | |
|     }
 | |
|     case MT_ELEMENT:
 | |
|       (*f)(m->content.element, closure);
 | |
|       break;
 | |
|     default:
 | |
|       ;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_model(dtd_model *m)
 | |
| { switch(m->type)
 | |
|   { case MT_SEQ:
 | |
|     case MT_AND:
 | |
|     case MT_OR:
 | |
|     { dtd_model *sub = m->content.group;
 | |
|       dtd_model *next;
 | |
| 
 | |
|       for(; sub; sub = next)
 | |
|       { next = sub->next;
 | |
| 
 | |
| 	free_model(sub);
 | |
|       }
 | |
|     }
 | |
|     default:
 | |
|       ;
 | |
|   }
 | |
| 
 | |
|   sgml_free(m);
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_model *
 | |
| make_model(dtd_parser *p, const ichar *decl, const ichar **end)
 | |
| { const ichar *s;
 | |
|   dtd_model *m = sgml_calloc(1, sizeof(*m));
 | |
|   dtd_symbol *id;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   decl = iskip_layout(dtd, decl);
 | |
| 
 | |
|   if ( (s=isee_identifier(dtd, decl, "#pcdata")) )
 | |
|   { m->type = MT_PCDATA;
 | |
|     m->cardinality = MC_ONE;		/* actually don't care */
 | |
|     *end = s;
 | |
|     return m;
 | |
|   }
 | |
| 
 | |
|   if ( (s=itake_name(p, decl, &id)) )
 | |
|   { m->type = MT_ELEMENT;
 | |
|     m->content.element = find_element(dtd, id);
 | |
|     decl = s;
 | |
|   } else
 | |
|   { if ( !(s=isee_func(dtd, decl, CF_GRPO)) )
 | |
|     { gripe(p, ERC_SYNTAX_ERROR, L"Name group expected", decl);
 | |
|       free_model(m);
 | |
|       return NULL;
 | |
|     }
 | |
|     decl = s;
 | |
| 
 | |
|     for(;;)
 | |
|     { dtd_model *sub;
 | |
|       modeltype mt;
 | |
| 
 | |
|       if ( !(sub = make_model(p, decl, &s)) )
 | |
|       { free_model(sub);
 | |
| 	return NULL;
 | |
|       }
 | |
|       decl = s;
 | |
|       add_submodel(m, sub);
 | |
| 
 | |
|       if ( (s = isee_func(dtd, decl, CF_OR)) )
 | |
|       { decl = s;
 | |
| 	mt = MT_OR;
 | |
|       } else if ( (s = isee_func(dtd, decl, CF_SEQ)) )
 | |
|       { decl = s;
 | |
| 	mt = MT_SEQ;
 | |
|       } else if ( (s = isee_func(dtd, decl, CF_AND)) )
 | |
|       { decl = s;
 | |
| 	mt = MT_AND;
 | |
|       } else if ( (s = isee_func(dtd, decl, CF_GRPC)) )
 | |
|       { decl = s;
 | |
| 	break;
 | |
|       } else
 | |
|       { gripe(p, ERC_SYNTAX_ERROR, L"Connector ('|', ',' or '&') expected", decl);
 | |
| 	free_model(m);
 | |
| 	return NULL;
 | |
|       }
 | |
|       decl = iskip_layout(dtd, decl);
 | |
| 
 | |
|       if ( m->type != mt )
 | |
|       { if ( !m->type )
 | |
| 	  m->type = mt;
 | |
| 	else
 | |
| 	{ gripe(p, ERC_SYNTAX_ERROR, L"Different connector types in model", decl);
 | |
| 	  free_model(m);
 | |
| 	  return NULL;
 | |
| 	}
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( (s = isee_func(dtd, decl, CF_OPT)) )
 | |
|   { decl = s;
 | |
|     m->cardinality = MC_OPT;
 | |
|   } else if ( (s=isee_func(dtd, decl, CF_REP)) )
 | |
|   { decl = s;
 | |
|     m->cardinality = MC_REP;
 | |
|   } else if ( (s=isee_func(dtd, decl, CF_PLUS)) )
 | |
|   {					/* ROK: watch out for (x) +(y) */
 | |
|     if ( isee_func(dtd, iskip_layout(dtd, s), CF_GRPO) == NULL )
 | |
|     { decl = s;
 | |
|       m->cardinality = MC_PLUS;
 | |
|     }
 | |
|   } else
 | |
|     m->cardinality = MC_ONE;
 | |
| 
 | |
|   if ( m->type == MT_UNDEF )		/* simplify (e+), etc. */
 | |
|   { dtd_model *sub = m->content.group;
 | |
|     modelcard card;
 | |
| 
 | |
|     assert(!sub->next);
 | |
|     if ( sub->cardinality == MC_ONE )
 | |
|       card = m->cardinality;
 | |
|     else if ( m->cardinality == MC_ONE )
 | |
|       card = sub->cardinality;
 | |
|     else
 | |
|     { m->type = MT_OR;
 | |
|       goto out;
 | |
|     }
 | |
| 
 | |
|     *m = *sub;
 | |
|     m->cardinality = card;
 | |
|     sgml_free(sub);
 | |
|   }
 | |
| 
 | |
| out:
 | |
|   *end = iskip_layout(dtd, decl);
 | |
|   return m;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| process_model(dtd_parser *p, dtd_edef *e, const ichar *decl)
 | |
| { const ichar *s;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   decl = iskip_layout(dtd, decl);
 | |
|   if ( (s = isee_identifier(dtd, decl, "empty")) )
 | |
|   { e->type = C_EMPTY;
 | |
|     return s;
 | |
|   }
 | |
|   if ( (s = isee_identifier(dtd, decl, "cdata")) )
 | |
|   { e->type = C_CDATA;
 | |
|     return s;
 | |
|   }
 | |
|   if ( (s = isee_identifier(dtd, decl, "rcdata")) )
 | |
|   { e->type = C_RCDATA;
 | |
|     return s;
 | |
|   }
 | |
|   if ( (s = isee_identifier(dtd, decl, "any")) )
 | |
|   { e->type = C_ANY;
 | |
|     return s;
 | |
|   }
 | |
| 
 | |
|   e->type = C_PCDATA;
 | |
|   if ( !(e->content = make_model(p, decl, &decl)) )
 | |
|     return FALSE;
 | |
| 
 | |
|   return decl;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| See a name-group separator.  As long as we haven't decided, this can be
 | |
| CF_NG.  If we have decided they must all be the same.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static const ichar *
 | |
| isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep)
 | |
| { const ichar *s;
 | |
| 
 | |
|   if ( (s=isee_func(dtd, decl, *sep)) )
 | |
|     return iskip_layout(dtd, s);
 | |
|   if ( *sep == CF_NG )			/* undecided */
 | |
|   { static const charfunc ng[] = { CF_SEQ, CF_OR, CF_AND };
 | |
|     int n;
 | |
| 
 | |
|     for(n=0; n<3; n++)
 | |
|     { if ( (s=isee_func(dtd, decl, ng[n])) )
 | |
|       { *sep = ng[n];
 | |
|         return iskip_layout(dtd, s);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_namegroup(dtd_parser *p, const ichar *decl,
 | |
| 		dtd_symbol **names, int *n)
 | |
| { const ichar *s;
 | |
|   int en = 0;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   if ( (s=isee_func(dtd, decl, CF_GRPO)) )
 | |
|   { charfunc ngs = CF_NG;
 | |
| 
 | |
|     for(;;)
 | |
|     { if ( !(decl=itake_name(p, s, &names[en++])) )
 | |
|       { gripe(p, ERC_SYNTAX_ERROR, L"Name expected", s);
 | |
| 	return NULL;
 | |
|       }
 | |
|       if ( (s=isee_ngsep(dtd, decl, &ngs)) )
 | |
|       { decl = iskip_layout(dtd, s);
 | |
| 	continue;
 | |
|       }
 | |
|       if ( (s=isee_func(dtd, decl, CF_GRPC)) )
 | |
|       { *n = en;
 | |
|         decl = s;
 | |
| 	return iskip_layout(dtd, decl);
 | |
|       }
 | |
|       gripe(p, ERC_SYNTAX_ERROR, L"Bad name-group", decl);
 | |
|       return NULL;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| typedef struct
 | |
| { dtd_symbol **list;
 | |
|   int size;
 | |
| } namelist;
 | |
| 
 | |
| 
 | |
| static void
 | |
| add_list_element(dtd_element *e, void *closure)
 | |
| { namelist *nl = closure;
 | |
| 
 | |
|   nl->list[nl->size++] = e->name;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| itake_el_or_model_element_list(dtd_parser *p,
 | |
| 			       const ichar *decl, dtd_symbol **names, int *n)
 | |
| { const ichar *s;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   if ( isee_func(dtd, decl, CF_GRPO) )
 | |
|   { dtd_model *model;
 | |
| 
 | |
|     if ( (model = make_model(p, decl, &s)) )
 | |
|     { namelist nl;
 | |
| 
 | |
|       nl.list = names;
 | |
|       nl.size = 0;
 | |
|       for_elements_in_model(model, add_list_element, &nl);
 | |
|       free_model(model);
 | |
| 
 | |
|       *n = nl.size;
 | |
|       return s;
 | |
|     } else
 | |
|       return NULL;
 | |
|   } else
 | |
|   { if ( !(s = itake_name(p, decl, &names[0])) )
 | |
|     { gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | |
|       return NULL;
 | |
|     }
 | |
|     *n = 1;
 | |
|     return s;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| add_element_list(dtd_element_list **l, dtd_element *e)
 | |
| { dtd_element_list *n = sgml_calloc(1, sizeof(*n));
 | |
| 
 | |
|   n->value = e;
 | |
| 
 | |
|   for( ; *l; l = &(*l)->next )
 | |
|     ;
 | |
|   *l = n;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_element_declaraction(dtd_parser *p, const ichar *decl)
 | |
| { dtd *dtd = p->dtd;
 | |
|   ichar buf[MAXDECL];
 | |
|   const ichar *s;
 | |
|   dtd_symbol *eid[MAXATTELEM];
 | |
|   dtd_edef *def;
 | |
|   int en;
 | |
|   int i;
 | |
| 
 | |
| 					/* expand parameter entities */
 | |
|   if ( !expand_pentities(p, decl, ZERO_TERM_LEN,
 | |
| 			 buf, sizeof(buf)/sizeof(ichar)) )
 | |
|     return FALSE;
 | |
|   decl = buf;
 | |
| 
 | |
|   if ( !(s=itake_el_or_model_element_list(p, decl, eid, &en)) )
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"Name or name-group expected", decl);
 | |
|   decl = s;
 | |
|   if ( en == 0 )
 | |
|     return TRUE;			/* 0 elements */
 | |
| 
 | |
|   STAT(edefs_decl++);
 | |
|   def = new_element_definition(dtd);
 | |
|   for(i=0; i<en; i++)
 | |
|   { find_element(dtd, eid[i]);
 | |
|     assert(eid[i]->element->structure == NULL);
 | |
|     eid[i]->element->structure = def;
 | |
|     eid[i]->element->undefined = FALSE;
 | |
|   }
 | |
|   def->references = en;			/* for GC */
 | |
| 
 | |
| 					/* omitted tag declarations (opt) */
 | |
|   if ( (s = isee_identifier(dtd, decl, "-")) )
 | |
|   { def->omit_close = FALSE;
 | |
|     goto seeclose;
 | |
|   } else if ( (s = isee_identifier(dtd, decl, "o")) )
 | |
|   { def->omit_open = TRUE;
 | |
| 
 | |
|   seeclose:
 | |
|     decl = s;
 | |
|     if ( (s = isee_identifier(dtd, decl, "-")) )
 | |
|     { def->omit_close = FALSE;
 | |
|     } else if ( (s = isee_identifier(dtd, decl, "o")) )
 | |
|     { for(i=0; i<en; i++)
 | |
| 	def->omit_close = TRUE;
 | |
|     } else
 | |
|       return gripe(p, ERC_SYNTAX_ERROR, L"Bad omit-tag declaration", decl);
 | |
| 
 | |
|     decl = s;
 | |
|   }
 | |
| 
 | |
| 					/* content model */
 | |
|   if ( !(decl=process_model(p, def, decl)) )
 | |
|     return FALSE;
 | |
| 
 | |
| 					/* in/excluded elements */
 | |
|   if ( decl[0] == '-' || decl[0] == '+' )
 | |
|   { dtd_symbol *ng[MAXNAMEGROUP];
 | |
|     int ns;
 | |
|     dtd_element_list **l;
 | |
| 
 | |
|     if ( decl[0] == '-' )
 | |
|       l = &def->excluded;
 | |
|     else
 | |
|       l = &def->included;
 | |
| 
 | |
|     decl++;
 | |
|     if ( (s=itake_namegroup(p, decl, ng, &ns)) )
 | |
|     { int i;
 | |
| 
 | |
|       decl = s;
 | |
| 
 | |
|       for(i=0; i<ns; i++)
 | |
| 	add_element_list(l, find_element(dtd, ng[i]));
 | |
|     } else
 | |
|     { return gripe(p, ERC_SYNTAX_ERROR, L"Name group expected", decl);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (*decl)
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaration", decl);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| add_name_list(dtd_name_list **nl, dtd_symbol *s)
 | |
| { dtd_name_list *n = sgml_calloc(1, sizeof(*n));
 | |
| 
 | |
|   n->value = s;
 | |
| 
 | |
|   for( ; *nl; nl = &(*nl)->next )
 | |
|     ;
 | |
| 
 | |
|   *nl = n;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| set_element_properties(dtd_element *e, dtd_attr *a)
 | |
| { if ( istreq(a->name->name, L"xml:space") )
 | |
|   { switch(a->def)
 | |
|     { case AT_FIXED:
 | |
|       case AT_DEFAULT:
 | |
| 	break;
 | |
|       default:
 | |
| 	return;
 | |
|     }
 | |
| 
 | |
|     switch (a->type )
 | |
|     { case AT_NAMEOF:
 | |
|       case AT_NAME:
 | |
|       case AT_NMTOKEN:
 | |
| 	e->space_mode = istr_to_space_mode(a->att_def.name->name);
 | |
| 	break;
 | |
|       case AT_CDATA:
 | |
| 	e->space_mode = istr_to_space_mode((ichar *)a->att_def.cdata);
 | |
| 	break;
 | |
|       default:
 | |
| 	break;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| add_attribute(dtd_parser *p, dtd_element *e, dtd_attr *a)
 | |
| { dtd_attr_list **l;
 | |
|   dtd_attr_list *n;
 | |
| 
 | |
|   for(l = &e->attributes; *l; l = &(*l)->next)
 | |
|   { if ( (*l)->attribute->name == a->name )
 | |
|     { gripe(p, ERC_REDEFINED, L"attribute", a->name);
 | |
|       a->references++;			/* attempt to redefine attribute: */
 | |
|       free_attribute(a);		/* first wins according to standard */
 | |
| 
 | |
|       return;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   n = sgml_calloc(1, sizeof(*n));
 | |
| 
 | |
|   n->attribute = a;
 | |
|   a->references++;
 | |
|   *l = n;
 | |
|   set_element_properties(e, a);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_attlist_declaraction(dtd_parser *p, const ichar *decl)
 | |
| { dtd *dtd = p->dtd;
 | |
|   dtd_symbol *eid[MAXATTELEM];
 | |
|   int i, en;
 | |
|   ichar buf[MAXDECL];
 | |
|   const ichar *s;
 | |
| 
 | |
| 					/* expand parameter entities */
 | |
|   if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
 | |
|     return FALSE;
 | |
|   decl = iskip_layout(dtd, buf);
 | |
|   DEBUG(printf("Expanded to %s\n", decl));
 | |
| 
 | |
|   if ( !(decl=itake_el_or_model_element_list(p, decl, eid, &en)) )
 | |
|     return FALSE;
 | |
| 
 | |
| 					/* fetch attributes */
 | |
|   while(*decl)
 | |
|   { dtd_attr *at = sgml_calloc(1, sizeof(*at));
 | |
|     at->references = REFS_VIRGIN;
 | |
| 
 | |
| 					/* name of attribute */
 | |
|     if ( !(s = itake_name(p, decl, &at->name)) )
 | |
|     { free_attribute(at);
 | |
|       return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | |
|     }
 | |
|     decl = s;
 | |
| 
 | |
| 					/* (name1|name2|...) type */
 | |
|     if ( (s=isee_func(dtd, decl, CF_GRPO)) )
 | |
|     { charfunc ngs = CF_NG;
 | |
| 
 | |
|       at->type = AT_NAMEOF;
 | |
|       decl=s;
 | |
| 
 | |
|       for(;;)
 | |
|       { dtd_symbol *nm;
 | |
| 
 | |
| 	if ( !(s = itake_nmtoken(p, decl, &nm)) )
 | |
| 	{ free_attribute(at);
 | |
| 	  return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | |
| 	}
 | |
| 	decl = s;
 | |
| 	add_name_list(&at->typeex.nameof, nm);
 | |
| 	if ( (s=isee_ngsep(dtd, decl, &ngs)) )
 | |
| 	{ decl = s;
 | |
| 	  continue;
 | |
| 	}
 | |
| 	if ( (s = isee_func(dtd, decl, CF_GRPC)) )
 | |
| 	{ decl=s;
 | |
| 	  decl = iskip_layout(dtd, decl);
 | |
| 	  break;
 | |
| 	}
 | |
| 	free_attribute(at);
 | |
| 	return gripe(p, ERC_SYNTAX_ERROR, L"Illegal name-group", decl);
 | |
|       }
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "cdata")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_CDATA;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "entity")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_ENTITY;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "entities")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_ENTITIES;
 | |
|       at->islist = TRUE;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "id")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_ID;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "idref")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_IDREF;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "idrefs")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_IDREFS;
 | |
|       at->islist = TRUE;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "name")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_NAME;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "names")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_NAMES;
 | |
|       at->islist = TRUE;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "nmtoken")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_NMTOKEN;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "nmtokens")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_NMTOKENS;
 | |
|       at->islist = TRUE;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "number")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_NUMBER;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "numbers")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_NUMBERS;
 | |
|       at->islist = TRUE;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "nutoken")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_NUTOKEN;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "nutokens")) )
 | |
|     { decl = s;
 | |
|       at->type = AT_NUTOKENS;
 | |
|       at->islist = TRUE;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "notation")) )
 | |
|     { dtd_symbol *ng[MAXNAMEGROUP];
 | |
|       int ns;
 | |
| 
 | |
|       at->type = AT_NOTATION;
 | |
|       decl=s;
 | |
|       if ( (s=itake_namegroup(p, decl, ng, &ns)) )
 | |
|       { decl = s;
 | |
| 
 | |
| 	for(i=0; i<ns; i++)
 | |
| 	  add_name_list(&at->typeex.nameof, ng[i]);
 | |
|       } else
 | |
|       { free_attribute(at);
 | |
| 	return gripe(p, ERC_SYNTAX_ERROR, L"name-group expected", decl);
 | |
|       }
 | |
|     } else
 | |
|     { free_attribute(at);
 | |
|       return gripe(p, ERC_SYNTAX_ERROR, L"Attribute-type expected", decl);
 | |
|     }
 | |
| 
 | |
| 					/* Attribute Defaults */
 | |
|     if ( (s=isee_identifier(dtd, decl, "#fixed")) )
 | |
|     { decl = s;
 | |
|       at->def = AT_FIXED;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "#required")) )
 | |
|     { decl = s;
 | |
|       at->def = AT_REQUIRED;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "#current")) )
 | |
|     { decl = s;
 | |
|       at->def = AT_CURRENT;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "#conref")) )
 | |
|     { decl = s;
 | |
|       at->def = AT_CONREF;
 | |
|     } else if ( (s=isee_identifier(dtd, decl, "#implied")) )
 | |
|     { decl = s;
 | |
|       at->def = AT_IMPLIED;
 | |
|     } else				/* real default */
 | |
|       at->def = AT_DEFAULT;
 | |
| 
 | |
|     if ( at->def == AT_DEFAULT || at->def == AT_FIXED )
 | |
|     { ichar buf[MAXSTRINGLEN];
 | |
|       ichar *start; int len;
 | |
|       const ichar *end;
 | |
| 
 | |
|       if ( !(end=itake_string(dtd, decl, &start, &len)) )
 | |
|       { end=itake_nmtoken_chars(p, decl, buf, sizeof(buf)/sizeof(ichar));
 | |
| 	start = buf;
 | |
| 	len = (int)istrlen(buf);
 | |
|       }
 | |
|       if ( !end )
 | |
| 	return gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute default", decl);
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Note: itake_name(), etc. work on nul-terminated   strings. The result of
 | |
| itake_string() is a  pointer  in  a   nul-terminated  string  and  these
 | |
| functions will stop scanning at the  quote   anyway,  so  we can use the
 | |
| length of the parsed data to verify we parsed all of it.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
|       switch(at->type)
 | |
|       { case AT_CDATA:
 | |
| 	{ at->att_def.cdata = istrndup(start, len);
 | |
| 	  break;
 | |
| 	}
 | |
| 	case AT_ENTITY:
 | |
| 	case AT_NOTATION:
 | |
| 	case AT_NAME:
 | |
| 	{ if ( !(s=itake_name(p, start, &at->att_def.name)) ||
 | |
| 	       (s-start) != len )
 | |
| 	    return gripe(p, ERC_DOMAIN, L"name", decl);
 | |
| 	  break;
 | |
| 	}
 | |
| 	case AT_NMTOKEN:
 | |
| 	case AT_NAMEOF:
 | |
| 	{ if ( !(s=itake_nmtoken(p, start, &at->att_def.name)) ||
 | |
| 	       (s-start) != len )
 | |
| 	    return gripe(p, ERC_DOMAIN, L"nmtoken", decl);
 | |
| 	  break;
 | |
| 	}
 | |
| 	case AT_NUTOKEN:
 | |
| 	{ if ( !(s=itake_nutoken(p, start, &at->att_def.name)) ||
 | |
| 	       (s-start) != len )
 | |
| 	    return gripe(p, ERC_DOMAIN, L"nutoken", decl);
 | |
| 	  break;
 | |
| 	}
 | |
| 	case AT_NUMBER:
 | |
| 	{ if ( !(s=itake_number(p, start, at)) ||
 | |
| 	       (s-start) != len )
 | |
| 	     return gripe(p, ERC_DOMAIN, L"number", decl);
 | |
| 	  break;
 | |
| 	}
 | |
| 	case AT_NAMES:
 | |
| 	case AT_ENTITIES:
 | |
| 	case AT_IDREFS:
 | |
| 	case AT_NMTOKENS:
 | |
| 	case AT_NUMBERS:
 | |
| 	case AT_NUTOKENS:
 | |
| 	{ at->att_def.list = istrndup(buf, len);
 | |
| 	  break;
 | |
| 	}
 | |
| 	default:
 | |
| 	{ free_attribute(at);
 | |
| 	  return gripe(p, ERC_REPRESENTATION, L"No default for type");
 | |
| 	}
 | |
|       }
 | |
| 
 | |
|       decl = end;
 | |
|     }
 | |
| 
 | |
| 					/* add to list */
 | |
|     at->references = 0;
 | |
|     for(i=0; i<en; i++)
 | |
|     { dtd_element *e = def_element(dtd, eid[i]);
 | |
| 
 | |
|       add_attribute(p, e, at);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *    GENERIC TAG PROCESSING	*
 | |
| 		 *******************************/
 | |
| 
 | |
| typedef enum
 | |
| { IE_NORMAL,
 | |
|   IE_INCLUDED,				/* is included */
 | |
|   IE_EXCLUDED				/* is excluded */
 | |
| } includetype;
 | |
| 
 | |
| 
 | |
| static includetype
 | |
| in_or_excluded(sgml_environment *env, dtd_element *e)
 | |
| { for(; env; env=env->parent)
 | |
|   { if ( env->element->structure )
 | |
|     { dtd_edef *def = env->element->structure;
 | |
|       dtd_element_list *el;
 | |
| 
 | |
|       for(el=def->excluded; el; el=el->next)
 | |
|       { if ( el->value == e )
 | |
| 	  return IE_EXCLUDED;
 | |
|       }
 | |
|       for(el=def->included; el; el=el->next)
 | |
|       { if ( el->value == e )
 | |
| 	  return IE_INCLUDED;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return IE_NORMAL;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| complete(sgml_environment *env)
 | |
| { if ( env->element->structure &&
 | |
|        !env->element->undefined &&
 | |
|        env->element->structure->type != C_ANY )
 | |
|   { dtd_edef *def = env->element->structure;
 | |
| 
 | |
|     if ( !same_state(def->final_state, env->state) )
 | |
|       return FALSE;
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| validate_completeness(dtd_parser *p, sgml_environment *env)
 | |
| { if ( !complete(env) )
 | |
|   { wchar_t buf[MAXNMLEN+50];
 | |
| 
 | |
|     swprintf(buf, MAXNMLEN+50, L"Incomplete element: <%s>",
 | |
| 	     env->element->name->name);
 | |
| 
 | |
|     gripe(p, ERC_VALIDATE, buf);		/* TBD: expected */
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static sgml_environment *
 | |
| push_element(dtd_parser *p, dtd_element *e, int callback)
 | |
| { if ( e != CDATA_ELEMENT )
 | |
|   { sgml_environment *env = sgml_calloc(1, sizeof(*env));
 | |
| 
 | |
|     emit_cdata(p, FALSE);
 | |
| 
 | |
|     env->element = e;
 | |
|     env->state = make_state_engine(e);
 | |
|     env->space_mode = (p->environments ? p->environments->space_mode
 | |
| 				       : p->dtd->space_mode);
 | |
|     env->parent = p->environments;
 | |
|     p->environments = env;
 | |
| 
 | |
|     if ( p->dtd->shorttag )
 | |
|     { env->saved_waiting_for_net = p->waiting_for_net;
 | |
| 
 | |
|       if ( p->event_class == EV_SHORTTAG )
 | |
|       { p->waiting_for_net = TRUE;
 | |
| 	env->wants_net = TRUE;
 | |
|       } else
 | |
|       { env->wants_net = FALSE;
 | |
| 	if ( e->structure && e->structure->omit_close == FALSE )
 | |
| 	  p->waiting_for_net = FALSE;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     if ( e->map )
 | |
|       p->map = env->map = e->map;
 | |
|     else if ( env->parent )
 | |
|       p->map = env->map = env->parent->map;
 | |
| 
 | |
|     p->first = TRUE;
 | |
|     if ( callback && p->on_begin_element )
 | |
|     { sgml_attribute atts[MAXATTRIBUTES];
 | |
|       int natts = 0;
 | |
| 
 | |
|       if ( !(p->flags & SGML_PARSER_NODEFS) )
 | |
| 	natts = add_default_attributes(p, e, natts, atts);
 | |
| 
 | |
|       (*p->on_begin_element)(p, e, natts, atts);
 | |
|     }
 | |
| 
 | |
|     if ( e->structure )
 | |
|     { if ( e->structure->type == C_CDATA ||
 | |
| 	   e->structure->type == C_RCDATA )
 | |
|       { p->state = (e->structure->type == C_CDATA ? S_CDATA : S_RCDATA);
 | |
| 	p->cdata_state = p->state;
 | |
| 	p->etag = e->name->name;
 | |
| 	p->etaglen = (int)istrlen(p->etag);
 | |
| 	sgml_cplocation(&p->startcdata, &p->location);
 | |
|       } else
 | |
| 	p->cdata_state = S_PCDATA;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return p->environments;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_environment(sgml_environment *env)
 | |
| {
 | |
| #ifdef XMLNS
 | |
|   if ( env->xmlns )
 | |
|     xmlns_free(env->xmlns);
 | |
| #endif
 | |
| 
 | |
|   sgml_free(env);
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Pop the stack,  closing  all  environment   uptil  `to'.  The  close was
 | |
| initiated by pushing the element `e'.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static int
 | |
| pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0)
 | |
| { sgml_environment *env, *parent;
 | |
| 
 | |
|   for(env = p->environments; env != to; env=parent)
 | |
|   { dtd_element *e = env->element;
 | |
| 
 | |
|     validate_completeness(p, env);
 | |
|     parent = env->parent;
 | |
| 
 | |
|     if ( e->structure && !e->structure->omit_close )
 | |
|       gripe(p, ERC_OMITTED_CLOSE, e->name->name);
 | |
| 
 | |
|     if ( e0 != CDATA_ELEMENT )
 | |
|       emit_cdata(p, TRUE);
 | |
| 
 | |
|     p->first = FALSE;
 | |
|     p->environments = env;
 | |
|     if ( p->dtd->shorttag )
 | |
|       p->waiting_for_net = env->saved_waiting_for_net;
 | |
| 
 | |
|     WITH_CLASS(p, EV_OMITTED,
 | |
| 	       if ( p->on_end_element )
 | |
| 	         (*p->on_end_element)(p, e));
 | |
|     free_environment(env);
 | |
|   }
 | |
|   p->environments = to;
 | |
|   p->map = to->map;
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| allow_for(dtd_element *in, dtd_element *e)
 | |
| { dtd_edef *def = in->structure;
 | |
|   dtd_model *g;
 | |
| 
 | |
|   if ( def->type == C_EMPTY )
 | |
|   { def->type = C_PCDATA;
 | |
|     def->content = sgml_calloc(1, sizeof(*def->content));
 | |
|     def->content->type = MT_OR;
 | |
|     def->content->cardinality = MC_REP;
 | |
|   }
 | |
|   assert(def->content->type == MT_OR);
 | |
| 
 | |
|   g = def->content->content.group;
 | |
| 
 | |
|   if ( e == CDATA_ELEMENT )
 | |
|   { dtd_model *m;
 | |
| 
 | |
|     for(; g; g = g->next)
 | |
|     { if ( g->type == MT_PCDATA )
 | |
| 	return;
 | |
|     }
 | |
|     m = sgml_calloc(1, sizeof(*m));
 | |
|     m->type	   = MT_PCDATA;
 | |
|     m->cardinality = MC_ONE;		/* ignored */
 | |
|     add_submodel(def->content, m);
 | |
|   } else
 | |
|   { dtd_model *m;
 | |
| 
 | |
|     for(; g; g = g->next)
 | |
|     { if ( g->type == MT_ELEMENT && g->content.element == e )
 | |
| 	return;
 | |
|     }
 | |
|     m = sgml_calloc(1, sizeof(*m));
 | |
|     m->type	   = MT_ELEMENT;
 | |
|     m->cardinality = MC_ONE;		/* ignored */
 | |
|     m->content.element = e;
 | |
|     add_submodel(def->content, m);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static int
 | |
| open_element(dtd_parser *p, dtd_element *e, int warn)
 | |
| { if ( !p->environments && p->enforce_outer_element )
 | |
|   { dtd_element *f = p->enforce_outer_element->element;
 | |
| 
 | |
|     if ( f && f != e )
 | |
|     { if ( !f->structure ||
 | |
| 	   !f->structure->omit_open )
 | |
| 	gripe(p, ERC_OMITTED_OPEN, f->name->name);
 | |
| 
 | |
|       WITH_CLASS(p, EV_OMITTED,
 | |
| 		 { open_element(p, f, TRUE);
 | |
| 		   if ( p->on_begin_element )
 | |
| 		   { sgml_attribute atts[MAXATTRIBUTES];
 | |
| 		     int natts = 0;
 | |
| 
 | |
| 		     if ( !(p->flags & SGML_PARSER_NODEFS) )
 | |
| 		       natts = add_default_attributes(p, f, natts, atts);
 | |
| 
 | |
| 		     (*p->on_begin_element)(p, f, natts, atts);
 | |
| 		   }
 | |
| 		 });
 | |
|     }
 | |
|   }
 | |
| 
 | |
| 					/* no DTD available yet */
 | |
|   if ( !p->environments && !p->dtd->doctype && e != CDATA_ELEMENT )
 | |
|   { const ichar *file;
 | |
| 
 | |
|     file = find_in_catalogue(CAT_DOCTYPE, e->name->name, NULL, NULL,
 | |
| 			     p->dtd->dialect != DL_SGML);
 | |
|     if ( file )
 | |
|     { dtd_parser *clone = clone_dtd_parser(p);
 | |
| 
 | |
|       gripe(p, ERC_NO_DOCTYPE, e->name->name, file);
 | |
| 
 | |
|       if ( load_dtd_from_file(clone, file) )
 | |
| 	p->dtd->doctype = istrdup(e->name->name);
 | |
|       else
 | |
| 	gripe(p, ERC_EXISTENCE, L"file", file);
 | |
| 
 | |
|       free_dtd_parser(clone);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( p->environments )
 | |
|   { sgml_environment *env = p->environments;
 | |
| 
 | |
|     if ( env->element->undefined )
 | |
|     { allow_for(env->element, e);	/* <!ELEMENT x - - (model) +(y)> */
 | |
|       push_element(p, e, FALSE);
 | |
|       return TRUE;
 | |
|     }
 | |
| 
 | |
|     if ( env->element->structure &&
 | |
| 	 env->element->structure->type == C_ANY )
 | |
|     { if ( e != CDATA_ELEMENT && e->undefined )
 | |
| 	gripe(p, ERC_EXISTENCE, L"Element", e->name->name);
 | |
|       push_element(p, e, FALSE);
 | |
|       return TRUE;
 | |
|     }
 | |
| 
 | |
|     switch(in_or_excluded(env, e))
 | |
|     { case IE_INCLUDED:
 | |
|         push_element(p, e, FALSE);
 | |
| 	return TRUE;
 | |
|       case IE_EXCLUDED:
 | |
| 	if ( warn )
 | |
| 	  gripe(p, ERC_NOT_ALLOWED, e->name->name);
 | |
| 	/*FALLTHROUGH*/
 | |
|       case IE_NORMAL:
 | |
| 	for(; env; env=env->parent)
 | |
| 	{ dtd_state *new;
 | |
| 
 | |
| 	  if ( (new = make_dtd_transition(env->state, e)) )
 | |
| 	  { env->state = new;
 | |
| 	    pop_to(p, env, e);
 | |
| 	    push_element(p, e, FALSE);
 | |
| 	    return TRUE;
 | |
| 	  } else
 | |
| 	  { dtd_element *oe[MAXOMITTED]; /* omitted open */
 | |
| 	    int olen;
 | |
| 	    int i;
 | |
| 
 | |
| 	    if ( (olen=find_omitted_path(env->state, e, oe)) > 0 )
 | |
| 	    { pop_to(p, env, e);
 | |
| 	      WITH_CLASS(p, EV_OMITTED,
 | |
| 	      for(i=0; i<olen; i++)
 | |
| 	      { env->state = make_dtd_transition(env->state, oe[i]);
 | |
| 		env = push_element(p, oe[i], TRUE);
 | |
| 	      })
 | |
| 	      env->state = make_dtd_transition(env->state, e);
 | |
| 	      push_element(p, e, FALSE);
 | |
| 	      return TRUE;
 | |
| 	    }
 | |
| 	  }
 | |
| 
 | |
| 	  if ( !env->element->structure ||
 | |
| 	       !env->element->structure->omit_close )
 | |
| 	    break;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     if ( warn )
 | |
|     { if ( e == CDATA_ELEMENT )
 | |
| 	gripe(p, ERC_VALIDATE, L"#PCDATA not allowed here");
 | |
|       else if ( e->undefined )
 | |
| 	gripe(p, ERC_EXISTENCE, L"Element", e->name->name);
 | |
|       else
 | |
| 	gripe(p, ERC_NOT_ALLOWED, e->name->name);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( warn )
 | |
|   { push_element(p, e, FALSE);
 | |
|     return TRUE;
 | |
|   } else
 | |
|     return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| close_element(dtd_parser *p, dtd_element *e, int conref)
 | |
| { sgml_environment *env;
 | |
| 
 | |
|   for(env = p->environments; env; env=env->parent)
 | |
|   { if ( env->element == e )		/* element is open */
 | |
|     { sgml_environment *parent;
 | |
| 
 | |
|       for(env = p->environments; ; env=parent)
 | |
|       {	dtd_element *ce	= env->element;
 | |
| 
 | |
| 	if ( !(conref && env == p->environments) )
 | |
| 	  validate_completeness(p, env);
 | |
| 	parent = env->parent;
 | |
| 
 | |
| 	p->first = FALSE;
 | |
| 	if ( p->on_end_element )
 | |
| 	  (*p->on_end_element)(p, env->element);
 | |
| 	free_environment(env);
 | |
| 	p->environments = parent;
 | |
| 
 | |
| 	if ( ce == e )			/* closing current element */
 | |
| 	{ p->map = (parent ? parent->map : NULL);
 | |
| 	  return TRUE;
 | |
| 	} else				/* omited close */
 | |
| 	{ if ( ce->structure && !ce->structure->omit_close )
 | |
| 	    gripe(p, ERC_OMITTED_CLOSE, ce->name->name);
 | |
| 	}
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return gripe(p, ERC_NOT_OPEN, e->name->name);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| close_current_element(dtd_parser *p)
 | |
| { if ( p->environments )
 | |
|   { dtd_element *e = p->environments->element;
 | |
| 
 | |
|     emit_cdata(p, TRUE);
 | |
|     return close_element(p, e, FALSE);
 | |
|   }
 | |
| 
 | |
|   return gripe(p, ERC_SYNTAX_ERROR, L"No element to close", "");
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| get_attribute_value()
 | |
| 
 | |
| Get the value for an attribute.  Once   I  thought  this was simple, but
 | |
| Richard O'Keefe pointed to the complex   handling of white-space in SGML
 | |
| attributes. Basically, if the attribute is quoted, we need:
 | |
| 
 | |
| 	* If CDATA, map all blank to space characters, then expand
 | |
| 	  entities
 | |
| 
 | |
| 	* If !CDATA expand all entities, canonise white space by
 | |
| 	  deleting leading and trailing space and squishing multiple
 | |
| 	  space characters to a single (lower for us) case.
 | |
| 
 | |
| This almost, but not completely matches the XML definition. This however
 | |
| is so complex we will ignore it for now.
 | |
| 
 | |
| [Rewritten by Richard O'Keefe with these addional comments]
 | |
| Reads a value, the  attribute  name   and  value  indicator  having been
 | |
| processed already. It calls itake_string() to   read  quoted values, and
 | |
| itake_unquoted() to read unquoted values.
 | |
| 
 | |
| itake_string(dtd, in, buf, size)
 | |
| 	- skips layout INCLUDING comments,
 | |
| 	- returns NULL if the next character is not ' or ",
 | |
| 	- copies characters from in to buf until a matching ' or " is found,
 | |
| 	- adds a terminating \0,
 | |
| 	- skips more layout INCLUDING comments, and
 | |
| 	- returns the new input position.
 | |
| It is quite wrong to skip leading comments here.  In the tag
 | |
| 
 | |
|     <foo bar = --ugh-- zoo>
 | |
| 
 | |
| the characters "--ugh--" *are the value*.  They are not a comment.
 | |
| Comments are not in fact allowed inside tags, unfortunately.
 | |
| This tag is equivalent to
 | |
| 
 | |
|     <foo bar="--ugh--" something="zoo">
 | |
| 
 | |
| where something is an attribute that has zoo as one of its enumerals.
 | |
| 
 | |
| Because itake_string() is called in many other places, this bug has
 | |
| not yet been fixed.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static ichar const *
 | |
| get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att)
 | |
| { ichar tmp[MAXSTRINGLEN];
 | |
|   ichar *buf = tmp;
 | |
|   ichar const *s;
 | |
|   ichar c;
 | |
|   dtd *dtd = p->dtd;
 | |
|   ichar const *end;
 | |
|   ichar *start; int len;
 | |
| 
 | |
|   enum
 | |
|   { DIG_FIRST = 8,		/* any token start with digit? */
 | |
|     NAM_FIRST = 4,		/* any token start with non-digit name char? */
 | |
|     NAM_LATER = 2,		/* any token have non-digit name char later? */
 | |
|     ANY_OTHER = 1,		/* any token have illegal character? */
 | |
|     YET_EMPTY = 0
 | |
|   }
 | |
|   token = YET_EMPTY;
 | |
| 
 | |
|   att->value.textW = NULL;		/* UCS text */
 | |
|   att->value.number = 0;
 | |
|   att->flags = 0;
 | |
| 
 | |
|   end = itake_string(dtd, decl, &start, &len);
 | |
| 
 | |
|   if ( end != NULL )
 | |
|   { ocharbuf out;
 | |
| 
 | |
|     init_ocharbuf(&out);
 | |
|     expand_entities(p, start, len, &out);
 | |
| 
 | |
|     if ( att->definition->type == AT_CDATA )
 | |
|     { malloc_ocharbuf(&out);
 | |
| 
 | |
|       att->value.number = out.size;
 | |
|       att->value.textW  = out.data.w;
 | |
| 
 | |
|       return end;
 | |
|     } else
 | |
|     { ichar *d;
 | |
| 
 | |
|       buf = out.data.w;
 | |
| 
 | |
|       /* canonicalise blanks */
 | |
|       s = buf;
 | |
|       while ((c = *s++) != '\0' && HasClass(dtd, c, CH_BLANK))
 | |
| 	;
 | |
|       d = buf;
 | |
|       while ( c != '\0' )
 | |
|       { token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST
 | |
| 	  : HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER;
 | |
| 	if ( d != buf )
 | |
| 	  *d++ = ' ';
 | |
| 	if ( dtd->case_sensitive )
 | |
| 	{ *d++ = c;
 | |
| 	  while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
 | |
| 	  { token |= HasClass(dtd, c, CH_DIGIT) ? 0
 | |
| 	      : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
 | |
| 	    *d++ = c;
 | |
| 	  }
 | |
| 	} else
 | |
| 	{ *d++ = towlower(c);
 | |
| 	  while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
 | |
| 	  { token |= HasClass(dtd, c, CH_DIGIT) ? 0
 | |
| 	      : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
 | |
| 	    *d++ = towlower(c);
 | |
| 	  }
 | |
| 	}
 | |
| 	while (c != '\0' && HasClass(dtd, c, CH_BLANK))
 | |
| 	  c = *s++;
 | |
|       }
 | |
|       *d = '\0';
 | |
|     }
 | |
|   } else
 | |
|   { end = itake_unquoted(p, decl, tmp, sizeof(tmp)/sizeof(ichar));
 | |
|     if (end == NULL)
 | |
|       return NULL;
 | |
| 
 | |
|     s = buf;
 | |
|     c = *s++;
 | |
|     if (c != '\0')
 | |
|     { token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST
 | |
| 	: HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER;
 | |
|       while ((c = *s++) != 0)
 | |
|       { token |= HasClass(dtd, c, CH_DIGIT) ? 0
 | |
| 	  : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
 | |
|       }
 | |
|     }
 | |
|     if ( token == YET_EMPTY || (token & ANY_OTHER) != 0)
 | |
|       gripe(p, ERC_SYNTAX_WARNING, L"Attribute value requires quotes", buf);
 | |
| 
 | |
|     if (!dtd->case_sensitive && att->definition->type != AT_CDATA)
 | |
|       istrlower(buf);
 | |
|   }
 | |
| 
 | |
|   switch (att->definition->type)
 | |
|   { case AT_NUMBER:		/* number */
 | |
|       if (token != DIG_FIRST)
 | |
|       { gripe(p, ERC_SYNTAX_WARNING, L"NUMBER expected", decl);
 | |
|       } else if (dtd->number_mode == NU_INTEGER)
 | |
|       { (void) istrtol(buf, &att->value.number);
 | |
|       } else
 | |
|       { att->value.textW  = istrdup(buf);
 | |
| 	att->value.number = (long)istrlen(buf);
 | |
|       }
 | |
|       return end;
 | |
|     case AT_CDATA:		/* CDATA attribute */
 | |
|       att->value.textW  = istrdup(buf);
 | |
|       att->value.number = (long)istrlen(buf);
 | |
|       return end;
 | |
|     case AT_ID:		/* identifier */
 | |
|     case AT_IDREF:		/* identifier reference */
 | |
|     case AT_NAME:		/* name token */
 | |
|     case AT_NOTATION:		/* notation-name */
 | |
|       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"NAME expected", decl);
 | |
|       break;
 | |
|     case AT_NAMEOF:		/* one of these names */
 | |
|     case AT_NMTOKEN:		/* name-token */
 | |
|       if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"NMTOKEN expected", decl);
 | |
|       if ( att->definition->type == AT_NAMEOF )
 | |
|       { dtd_name_list *nl;
 | |
| 
 | |
| 	for(nl=att->definition->typeex.nameof; nl; nl = nl->next)
 | |
| 	{ if ( istreq(nl->value->name, buf) )
 | |
| 	    goto passed;
 | |
| 	}
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"unexpected value", decl);
 | |
|       }
 | |
|       break;
 | |
|     case AT_NUTOKEN:		/* number token */
 | |
|       if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"NUTOKEN expected", decl);
 | |
|       break;
 | |
|     case AT_ENTITY:		/* entity-name */
 | |
|       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"entity NAME expected", decl);
 | |
|       break;
 | |
|     case AT_NAMES:		/* list of names */
 | |
|     case AT_IDREFS:		/* list of identifier references */
 | |
|       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"NAMES expected", decl);
 | |
|       break;
 | |
|     case AT_ENTITIES:		/* entity-name list */
 | |
|       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"entity NAMES expected", decl);
 | |
|       break;
 | |
|     case AT_NMTOKENS:		/* name-token list */
 | |
|       if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"NMTOKENS expected", decl);
 | |
|       break;
 | |
|     case AT_NUMBERS:		/* number list */
 | |
|       if (token != DIG_FIRST)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"NUMBERS expected", decl);
 | |
|       break;
 | |
|     case AT_NUTOKENS:
 | |
|       if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
 | |
| 	gripe(p, ERC_SYNTAX_WARNING, L"NUTOKENS expected", decl);
 | |
|       break;
 | |
|     default:
 | |
|       assert(0);
 | |
|       return NULL;
 | |
|   }
 | |
| 
 | |
| passed:
 | |
|   att->value.textW  = istrdup(buf);	/* TBD: more validation */
 | |
|   att->value.number = (long)istrlen(buf);
 | |
|   return end;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const ichar *
 | |
| process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl,
 | |
| 		   sgml_attribute *atts, int *argc)
 | |
| { int attn = 0;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   decl = iskip_layout(dtd, decl);
 | |
|   while(decl && *decl)
 | |
|   { dtd_symbol *nm;
 | |
|     const ichar *s;
 | |
| 
 | |
|     if ( (s=itake_nmtoken(p, decl, &nm)) )
 | |
|     { decl = s;
 | |
| 
 | |
|       if ( (s=isee_func(dtd, decl, CF_VI)) ) /* name= */
 | |
|       { dtd_attr *a;
 | |
| 
 | |
| 	if ( !HasClass(dtd, nm->name[0], CH_NMSTART) )
 | |
| 	  gripe(p, ERC_SYNTAX_WARNING,
 | |
| 		"Illegal start of attribute-name", decl);
 | |
| 
 | |
| 	decl = s;
 | |
| 	if ( !(a=find_attribute(e, nm)) )
 | |
| 	{ a = sgml_calloc(1, sizeof(*a));
 | |
| 
 | |
| 	  a->name = nm;
 | |
| 	  a->type = AT_CDATA;
 | |
| 	  a->def  = AT_IMPLIED;
 | |
| 	  add_attribute(p, e, a);
 | |
| 
 | |
| 	  if ( !e->undefined &&
 | |
| 	       !(dtd->dialect != DL_SGML &&
 | |
| 		 (istreq(L"xmlns", nm->name) ||
 | |
| 		  istrprefix(L"xmlns:", nm->name))) )
 | |
| 	    gripe(p, ERC_NO_ATTRIBUTE, e->name->name, nm->name);
 | |
| 	}
 | |
| 	atts[attn].definition = a;
 | |
| 	if ( (decl=get_attribute_value(p, decl, atts+attn)) )
 | |
| 	{ attn++;
 | |
| 	  continue;
 | |
| 	}
 | |
|       } else if ( e->structure )
 | |
|       { dtd_attr_list *al;		/* value shorthand */
 | |
| 
 | |
| 	for(al=e->attributes; al; al=al->next)
 | |
| 	{ dtd_attr *a = al->attribute;
 | |
| 
 | |
| 	  if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
 | |
| 	  { dtd_name_list *nl;
 | |
| 
 | |
| 	    for(nl=a->typeex.nameof; nl; nl = nl->next)
 | |
| 	    { if ( nl->value == nm )
 | |
| 	      { if ( dtd->dialect != DL_SGML )
 | |
| 		  gripe(p, ERC_SYNTAX_WARNING,
 | |
| 			"Value short-hand in XML mode", decl);
 | |
| 		atts[attn].flags	= 0;
 | |
| 		atts[attn].definition   = a;
 | |
| 		atts[attn].value.textW  = istrdup(nm->name);
 | |
| 		atts[attn].value.number = (long)istrlen(nm->name);
 | |
| 		attn++;
 | |
| 		goto next;
 | |
| 	      }
 | |
| 	    }
 | |
| 	  }
 | |
| 	}
 | |
| 	gripe(p, ERC_NO_ATTRIBUTE_VALUE, e->name->name, nm->name);
 | |
| 	decl = s;
 | |
|       } else
 | |
|       { gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute", decl);
 | |
| 	decl = s;
 | |
|       }
 | |
|     } else
 | |
|     { *argc = attn;
 | |
|       return decl;
 | |
|     }
 | |
| 
 | |
|   next:
 | |
|     ;
 | |
|   }
 | |
| 
 | |
|   *argc = attn;
 | |
|   return decl;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| sgml_add_default_attributes()
 | |
| 
 | |
| This function adds attributes for omitted  default and fixed attributes.
 | |
| These attributes are added to  the  end   of  the  attribute  list. This
 | |
| function returns the new  number  of   attributes.  The  `atts' array is
 | |
| assumed   to   be   MAXATTRIBUTES    long,     normally    passed   from
 | |
| process_begin_element.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static int
 | |
| add_default_attributes(dtd_parser *p, dtd_element *e,
 | |
| 		       int natts, sgml_attribute *atts)
 | |
| { dtd_attr_list *al;
 | |
| 
 | |
|   if ( e == CDATA_ELEMENT )
 | |
|     return natts;
 | |
| 
 | |
|   for(al=e->attributes; al; al=al->next)
 | |
|   { dtd_attr *a = al->attribute;
 | |
| 
 | |
|     switch(a->def)
 | |
|     { case AT_REQUIRED:			/* TBD: check if present */
 | |
|       case AT_CURRENT:			/* TBD: register in DTD and reuse */
 | |
|       case AT_CONREF:
 | |
|       case AT_IMPLIED:
 | |
| 	goto next;
 | |
|       case AT_FIXED:
 | |
|       case AT_DEFAULT:
 | |
|       { int i;
 | |
| 	sgml_attribute *ap;
 | |
| 
 | |
| 	for(i=0, ap=atts; i<natts; i++, ap++)
 | |
| 	{ if ( ap->definition == a )
 | |
| 	    goto next;
 | |
| 	}
 | |
| 
 | |
|         ap->definition   = a;
 | |
| 	ap->value.textW  = NULL;
 | |
| 	ap->value.number = 0;
 | |
| 	ap->flags        = SGML_AT_DEFAULT;
 | |
| 
 | |
| 	switch(a->type)
 | |
| 	{ case AT_CDATA:
 | |
| 	    ap->value.textW = a->att_def.cdata;
 | |
| 	    ap->value.number = (long)istrlen(ap->value.textW);
 | |
| 	    break;
 | |
| 	  case AT_NUMBER:
 | |
| 	    if ( p->dtd->number_mode == NU_TOKEN )
 | |
| 	    { ap->value.textW  = (ichar*)a->att_def.name->name;
 | |
| 	      ap->value.number = (long)istrlen(ap->value.textW);
 | |
| 	    } else
 | |
| 	    { ap->value.number = a->att_def.number;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	  default:
 | |
| 	    if ( a->islist )
 | |
| 	    { ap->value.textW = a->att_def.list;
 | |
| 	    } else
 | |
| 	    { ap->value.textW = (ichar*)a->att_def.name->name;
 | |
| 	    }
 | |
| 	    ap->value.number = (long)istrlen(ap->value.textW);
 | |
| 	}
 | |
| 
 | |
| 	natts++;
 | |
|       }
 | |
|     }
 | |
|   next:;
 | |
|   }
 | |
| 
 | |
|   return natts;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_attribute_values(int argc, sgml_attribute *argv)
 | |
| { int i;
 | |
| 
 | |
|   for(i=0; i<argc; i++, argv++)
 | |
|   { if ( (argv->flags & SGML_AT_DEFAULT) )
 | |
|       continue;				/* shared with the DTD */
 | |
| 
 | |
|     if ( argv->value.textW )
 | |
|       sgml_free(argv->value.textW);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_begin_element(dtd_parser *p, const ichar *decl)
 | |
| { dtd *dtd = p->dtd;
 | |
|   dtd_symbol *id;
 | |
|   const ichar *s;
 | |
| 
 | |
|   if ( (s=itake_name(p, decl, &id)) )
 | |
|   { sgml_attribute atts[MAXATTRIBUTES];
 | |
|     int natts;
 | |
|     dtd_element *e = find_element(dtd, id);
 | |
|     int empty = FALSE;
 | |
|     int conref = FALSE;
 | |
|     int rc = TRUE;
 | |
| 
 | |
|     if ( !e->structure )
 | |
|     { dtd_edef *def;
 | |
|       e->undefined = TRUE;
 | |
|       STAT(edefs_implicit++);
 | |
|       def_element(dtd, id);
 | |
|       def = e->structure;
 | |
|       def->type = C_EMPTY;
 | |
|     }
 | |
| 
 | |
|     open_element(p, e, TRUE);
 | |
| 
 | |
|     decl=s;
 | |
|     if ( (s=process_attributes(p, e, decl, atts, &natts)) )
 | |
|       decl=s;
 | |
| 
 | |
|     if ( dtd->dialect != DL_SGML )
 | |
|     { if ( (s=isee_func(dtd, decl, CF_ETAGO2)) )
 | |
|       { empty = TRUE;			/* XML <tag/> */
 | |
| 	decl = s;
 | |
|       }
 | |
| #ifdef XMLNS
 | |
|       if ( dtd->dialect == DL_XMLNS )
 | |
| 	update_xmlns(p, e, natts, atts);
 | |
| #endif
 | |
|       if ( dtd->dialect != DL_SGML )
 | |
| 	update_space_mode(p, e, natts, atts);
 | |
|     } else
 | |
|     { int i;
 | |
| 
 | |
|       for(i=0; i<natts; i++)
 | |
|       { if ( atts[i].definition->def == AT_CONREF )
 | |
| 	{ empty = TRUE;
 | |
| 	  conref = TRUE;
 | |
| 	}
 | |
|       }
 | |
|     }
 | |
|     if ( *decl )
 | |
|       gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute list", decl);
 | |
| 
 | |
|     if ( !(p->flags & SGML_PARSER_NODEFS) )
 | |
|       natts = add_default_attributes(p, e, natts, atts);
 | |
| 
 | |
|     if ( empty ||
 | |
| 	 (dtd->dialect == DL_SGML &&
 | |
| 	  e->structure &&
 | |
| 	  e->structure->type == C_EMPTY &&
 | |
| 	  !e->undefined) )
 | |
|       p->empty_element = e;
 | |
|     else
 | |
|       p->empty_element = NULL;
 | |
| 
 | |
|     if ( p->on_begin_element )
 | |
|       rc = (*p->on_begin_element)(p, e, natts, atts);
 | |
| 
 | |
|     free_attribute_values(natts, atts);
 | |
| 
 | |
|     if ( p->empty_element )
 | |
|     { p->empty_element = NULL;
 | |
|       close_element(p, e, conref);
 | |
|       if ( conref )	/* might be S_CDATA due to declared content */
 | |
| 	p->cdata_state = p->state = S_PCDATA;
 | |
|     }
 | |
| 
 | |
|     return rc;
 | |
|   }
 | |
| 
 | |
|   return gripe(p, ERC_SYNTAX_ERROR, L"Bad open-element tag", decl);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_end_element(dtd_parser *p, const ichar *decl)
 | |
| { dtd *dtd = p->dtd;
 | |
|   dtd_symbol *id;
 | |
|   const ichar *s;
 | |
| 
 | |
|   emit_cdata(p, TRUE);
 | |
|   if ( (s=itake_name(p, decl, &id)) && *s == '\0' )
 | |
|     return close_element(p, find_element(dtd, id), FALSE);
 | |
| 
 | |
|   if ( p->dtd->shorttag && *decl == '\0' ) /* </>: close current element */
 | |
|     return close_current_element(p);
 | |
| 
 | |
|   return gripe(p, ERC_SYNTAX_ERROR, L"Bad close-element tag", decl);
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| process_net(dtd_parser *p)
 | |
|     We've seen a / of a shorttag element.  Close this one.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static int
 | |
| process_net(dtd_parser *p)
 | |
| { sgml_environment *env;
 | |
| 
 | |
|   prepare_cdata(p);
 | |
|   for(env = p->environments; env; env=env->parent)
 | |
|   { if ( env->wants_net )
 | |
|     { sgml_environment *parent;
 | |
| 
 | |
|       pop_to(p, env, NULL);		/* close parents */
 | |
|       validate_completeness(p, env);
 | |
|       parent = env->parent;
 | |
| 
 | |
|       emit_cdata(p, TRUE);
 | |
|       p->first = FALSE;
 | |
| 
 | |
|       if ( p->on_end_element )
 | |
|       { WITH_CLASS(p, EV_SHORTTAG,
 | |
| 		   (*p->on_end_element)(p, env->element));
 | |
|       }
 | |
| 
 | |
|       free_environment(env);
 | |
|       p->environments = parent;
 | |
|       p->map = (parent ? parent->map : NULL);
 | |
| 
 | |
|       return TRUE;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int				/* <!DOCTYPE ...> */
 | |
| process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0)
 | |
| { dtd *dtd = p->dtd;
 | |
|   dtd_symbol *id;
 | |
|   const ichar *s;
 | |
|   dtd_entity *et = NULL;
 | |
| 
 | |
|   if ( !(s=itake_name(p, decl, &id)) )
 | |
|     return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | |
|   decl = s;
 | |
| 
 | |
|   if ( (s=isee_identifier(dtd, decl, "system")) )
 | |
|   { et = sgml_calloc(1, sizeof(*et));
 | |
|     et->type = ET_SYSTEM;
 | |
|     decl = s;
 | |
|   } else if ( (s=isee_identifier(dtd, decl, "public")) )
 | |
|   { et = sgml_calloc(1, sizeof(*et));
 | |
|     et->type = ET_PUBLIC;
 | |
|     decl = s;
 | |
|   } else if ( isee_func(dtd, decl, CF_DSO) )
 | |
|     goto local;
 | |
| 
 | |
|   if ( et )
 | |
|   { et->name = id;
 | |
|     et->catalog_location = CAT_DOCTYPE;
 | |
|     if ( !(s=process_entity_value_declaration(p, decl, et)) )
 | |
|       return FALSE;
 | |
|     decl = s;
 | |
|   }
 | |
| 
 | |
|   if ( !dtd->doctype )			/* i.e. anonymous DTD */
 | |
|   { ichar *file;
 | |
|     dtd_parser *clone;
 | |
| 
 | |
|     dtd->doctype = istrdup(id->name);	/* Fill it */
 | |
|     if ( et )
 | |
|       file = entity_file(dtd, et);
 | |
|     else
 | |
|       file = istrdup(find_in_catalogue(CAT_DOCTYPE,
 | |
| 				       dtd->doctype, NULL, NULL,
 | |
| 				       dtd->dialect != DL_SGML));
 | |
| 
 | |
|     if ( !file )
 | |
|     { gripe(p, ERC_EXISTENCE, L"DTD", dtd->doctype);
 | |
|     } else
 | |
|     { clone = clone_dtd_parser(p);
 | |
|       if ( !load_dtd_from_file(clone, file) )
 | |
| 	gripe(p, ERC_EXISTENCE, L"file", file);
 | |
|       free_dtd_parser(clone);
 | |
|       sgml_free(file);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( et )
 | |
|     free_entity_list(et);
 | |
| 
 | |
| local:
 | |
|   if ( (s=isee_func(dtd, decl, CF_DSO)) ) /* [...] */
 | |
|   { int grouplevel = 1;
 | |
|     data_mode oldmode  = p->dmode;
 | |
|     dtdstate  oldstate = p->state;
 | |
|     locbuf oldloc;
 | |
|     const ichar *q;
 | |
|     icharbuf *saved_ibuf = p->buffer;
 | |
| 
 | |
|     push_location(p, &oldloc);
 | |
| 					/* try to find start-location. */
 | |
| 					/* fails if there is comment before */
 | |
| 					/* the []! */
 | |
|     sgml_cplocation(&p->location, &p->startloc);
 | |
|     inc_location(&p->location, '<');
 | |
|     for(q=decl0; q < s; q++)
 | |
|       inc_location(&p->location, *q);
 | |
|     p->dmode = DM_DTD;
 | |
|     p->state = S_PCDATA;
 | |
|     p->buffer = new_icharbuf();
 | |
| 
 | |
|     for( ; *s; s++ )
 | |
|     { if ( isee_func(dtd, s, CF_LIT) ||	/* skip quoted strings */
 | |
| 	   isee_func(dtd, s, CF_LITA) )
 | |
|       { ichar q = *s;
 | |
| 
 | |
| 	putchar_dtd_parser(p, *s++);	/* pass open quote */
 | |
| 
 | |
| 	for( ; *s && *s != q; s++ )
 | |
| 	  putchar_dtd_parser(p, *s);
 | |
| 
 | |
| 	if ( *s == q )			/* pass closing quote */
 | |
| 	  putchar_dtd_parser(p, *s);
 | |
| 	continue;
 | |
|       }
 | |
| 
 | |
|       if ( isee_func(dtd, s, CF_DSO) )
 | |
| 	grouplevel++;
 | |
|       else if ( isee_func(dtd, s, CF_DSC) && --grouplevel == 0 )
 | |
| 	break;
 | |
|       putchar_dtd_parser(p, *s);
 | |
|     }
 | |
|     p->dtd->implicit = FALSE;
 | |
| 
 | |
|     p->state    = oldstate;
 | |
|     p->dmode    = oldmode;
 | |
|     free_icharbuf(p->buffer);
 | |
|     p->buffer = saved_ibuf;
 | |
|     pop_location(p, &oldloc);
 | |
|   }
 | |
| 
 | |
|   p->enforce_outer_element = id;	/* make this the outer element */
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| init_decoding(dtd_parser *p)
 | |
| {
 | |
| #ifdef UTF8
 | |
|   int decode;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   if ( dtd->encoding == SGML_ENC_UTF8 &&
 | |
|        p->encoded    == TRUE )
 | |
|     decode = TRUE;
 | |
|   else
 | |
|     decode = FALSE;
 | |
| 
 | |
|   if ( p->utf8_decode != decode )
 | |
|   { DEBUG(fprintf(stderr, "%s UTF-8 decoding on %p\n",
 | |
| 		  decode ? "Enable" : "Disable",
 | |
| 		  p));
 | |
| 
 | |
|     p->utf8_decode = decode;
 | |
|   }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| xml_set_encoding() is the public interface to   set the encoding for the
 | |
| parser.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static int				/* strcasecmp() with C locale */
 | |
| posix_strcasecmp(const char *s1, const char *s2)
 | |
| { for(; *s1 && *s2; s1++, s2++)
 | |
|   { int c1 = *s1&0xff;
 | |
|     int c2 = *s2&0xff;
 | |
| 
 | |
|     if ( c1 >= 'A' && c1 <= 'Z' ) c1 += 'a'-'A';
 | |
|     if ( c2 >= 'A' && c2 <= 'Z' ) c2 += 'a'-'A';
 | |
| 
 | |
|     if ( c1 != c2 )
 | |
|       return c1-c2;
 | |
|   }
 | |
| 
 | |
|   return *s1 - *s2;
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| xml_set_encoding(dtd_parser *p, const char *enc)
 | |
| { dtd *dtd = p->dtd;
 | |
| 
 | |
|   if ( posix_strcasecmp(enc, "iso-8859-1") == 0 )
 | |
|   { dtd->encoding = SGML_ENC_ISO_LATIN1;
 | |
|   } else if ( posix_strcasecmp(enc, "us-ascii") == 0 )
 | |
|   { dtd->encoding = SGML_ENC_ISO_LATIN1; 	/* doesn't make a difference */
 | |
|   } else if ( posix_strcasecmp(enc, "utf-8") == 0 )
 | |
|   { dtd->encoding = SGML_ENC_UTF8;
 | |
|   } else
 | |
|     return FALSE;
 | |
| 
 | |
|   init_decoding(p);
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| set_encoding() sets the encoding from the encoding="..." field of the
 | |
| XML header.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static void
 | |
| set_encoding(dtd_parser *p, const ichar *enc)
 | |
| { char buf[32];
 | |
|   char *e = buf+sizeof(buf)-1;
 | |
|   char *o;
 | |
|   const ichar *i;
 | |
| 
 | |
|   for(i=enc, o=buf; *i; )
 | |
|   { if ( *i < 128 && o < e )
 | |
|     { *o++ = (char)*i++;
 | |
|     } else
 | |
|     { goto error;
 | |
|     }
 | |
|   }
 | |
|   *o = '\0';
 | |
| 
 | |
|   if ( !xml_set_encoding(p, buf) )
 | |
|   { error:
 | |
|     gripe(p, ERC_EXISTENCE, L"character encoding", enc);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Process <? ... ?>
 | |
| 
 | |
| Should deal with character encoding for XML documents.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static int
 | |
| process_pi(dtd_parser *p, const ichar *decl)
 | |
| { const ichar *s;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   if ( (s=isee_identifier(dtd, decl, "xml")) ) /* <?xml version="1.0"?> */
 | |
|   { decl = s;
 | |
| 
 | |
|     switch(dtd->dialect)
 | |
|     { case DL_SGML:
 | |
| 	set_dialect_dtd(dtd, DL_XML);
 | |
|         break;
 | |
|       case DL_XML:
 | |
|       case DL_XMLNS:
 | |
| 	break;
 | |
|     }
 | |
| 
 | |
|     while(*decl)
 | |
|     { dtd_symbol *nm;
 | |
| 
 | |
|       if ( (s=itake_name(p, decl, &nm)) &&
 | |
| 	   (s=isee_func(dtd, s, CF_VI)) ) 		/* = */
 | |
|       { ichar *start;
 | |
| 	int len;
 | |
| 	ichar buf[MAXSTRINGLEN];
 | |
| 	const ichar *end;
 | |
| 
 | |
| 	if ( !(end=itake_string(dtd, s, &start, &len)) )
 | |
| 	{ end=itake_nmtoken_chars(p, s, buf, sizeof(buf)/sizeof(ichar));
 | |
| 	  start = buf;
 | |
| 	  len = (int)istrlen(buf);
 | |
| 	}
 | |
| 
 | |
| 	if ( end )
 | |
| 	{ decl = end;
 | |
| 
 | |
| 	  if ( istrcaseeq(nm->name, L"encoding") )
 | |
| 	  { ichar tmp[32];
 | |
| 
 | |
| 	    if ( len < (int)(sizeof(tmp)/sizeof(ichar)-1) )
 | |
| 	    { istrncpy(tmp, start, len);
 | |
| 	      tmp[len] = 0;
 | |
| 
 | |
| 	      set_encoding(p, tmp);
 | |
| 	    } else
 | |
| 	    { gripe(p, ERC_SYNTAX_ERROR, L"Unterminated encoding?", decl);
 | |
| 	    }
 | |
| 	  }
 | |
| 
 | |
| 	  /* fprintf(stderr, "XML %s = %s\n", nm->name, buf); */
 | |
| 
 | |
| 	  continue;
 | |
| 	}
 | |
|       }
 | |
| 
 | |
|       gripe(p, ERC_SYNTAX_ERROR, L"Illegal XML parameter", decl);
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   if ( p->on_pi )
 | |
|     (*p->on_pi)(p, decl);
 | |
| 
 | |
|   return FALSE;				/* Warn? */
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_sgml_declaration(dtd_parser *p, const ichar *decl)
 | |
| { return gripe(p, ERC_SYNTAX_WARNING, L"Ignored <!SGML ...> declaration", NULL);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_declaration(dtd_parser *p, const ichar *decl)
 | |
| { const ichar *s;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   if ( p->dmode != DM_DTD )
 | |
|   { if ( (s=isee_func(dtd, decl, CF_ETAGO2)) ) /* </ ... > */
 | |
|     { return process_end_element(p, s);
 | |
|     } else if ( HasClass(dtd, *decl, CH_NAME) ) /* <letter */
 | |
|     { return process_begin_element(p, decl);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( (s=isee_func(dtd, decl, CF_MDO2)) ) /* <! ... >*/
 | |
|   { decl = s;
 | |
| 
 | |
|     if ( p->on_decl )
 | |
|       (*p->on_decl)(p, decl);
 | |
| 
 | |
|     if ( (s = isee_identifier(dtd, decl, "entity")) )
 | |
|       process_entity_declaration(p, s);
 | |
|     else if ( (s = isee_identifier(dtd, decl, "element")) )
 | |
|       process_element_declaraction(p, s);
 | |
|     else if ( (s = isee_identifier(dtd, decl, "attlist")) )
 | |
|       process_attlist_declaraction(p, s);
 | |
|     else if ( (s = isee_identifier(dtd, decl, "notation")) )
 | |
|       process_notation_declaration(p, s);
 | |
|     else if ( (s = isee_identifier(dtd, decl, "shortref")) )
 | |
|       process_shortref_declaration(p, s);
 | |
|     else if ( (s = isee_identifier(dtd, decl, "usemap")) )
 | |
|       process_usemap_declaration(p, s);
 | |
|     else if ( (s = isee_identifier(dtd, decl, "sgml")) )
 | |
|       process_sgml_declaration(p, s);
 | |
|     else if ( (s = isee_identifier(dtd, decl, "doctype")) )
 | |
|     { if ( p->dmode != DM_DTD )
 | |
| 	process_doctype(p, s, decl-1);
 | |
|     } else
 | |
|     { s = iskip_layout(dtd, decl);
 | |
| 
 | |
|       if ( *s )
 | |
| 	gripe(p, ERC_SYNTAX_ERROR, L"Invalid declaration", s);
 | |
|     }
 | |
| 
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   return gripe(p, ERC_SYNTAX_ERROR, L"Invalid declaration", decl);
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	  STREAM BINDING	*
 | |
| 		 *******************************/
 | |
| 
 | |
| void
 | |
| set_file_dtd_parser(dtd_parser *p, input_type type, const ichar *name)
 | |
| { p->location.type      = type;
 | |
|   p->location.name.file = name;
 | |
|   p->location.line      = 1;
 | |
|   p->location.linepos   = 0;
 | |
|   p->location.charpos   = 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| set_src_dtd_parser(dtd_parser *p, input_type type, const ichar *name)
 | |
| { p->location.type        = type;
 | |
|   p->location.name.entity = name;
 | |
|   p->location.line        = 1;
 | |
|   p->location.linepos     = 0;
 | |
|   p->location.charpos     = 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| void
 | |
| set_mode_dtd_parser(dtd_parser *p, data_mode m)
 | |
| { p->dmode = m;				/* DM_DTD or DM_DATA */
 | |
|   p->state = S_PCDATA;
 | |
|   p->blank_cdata = TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| dtd_parser *
 | |
| new_dtd_parser(dtd *dtd)
 | |
| { dtd_parser *p = sgml_calloc(1, sizeof(*p));
 | |
| 
 | |
|   if ( !dtd )
 | |
|     dtd = new_dtd(NULL);
 | |
|   dtd->references++;
 | |
| 
 | |
|   p->magic       = SGML_PARSER_MAGIC;
 | |
|   p->dtd	 = dtd;
 | |
|   p->state	 = S_PCDATA;
 | |
|   p->mark_state	 = MS_INCLUDE;
 | |
|   p->dmode       = DM_DTD;
 | |
|   p->encoded	 = TRUE;		/* encoded octet stream */
 | |
|   p->buffer	 = new_icharbuf();
 | |
|   p->cdata	 = new_ocharbuf();
 | |
|   p->event_class = EV_EXPLICIT;
 | |
|   set_src_dtd_parser(p, IN_NONE, NULL);
 | |
| 
 | |
|   return p;
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_parser *
 | |
| clone_dtd_parser(dtd_parser *p)
 | |
| { dtd_parser *clone = sgml_calloc(1, sizeof(*p));
 | |
| 
 | |
|   *clone = *p;
 | |
|   clone->dtd->references++;
 | |
|   clone->environments =	NULL;
 | |
|   clone->marked	      =	NULL;
 | |
|   clone->etag	      =	NULL;
 | |
|   clone->grouplevel   =	0;
 | |
|   clone->state	      =	S_PCDATA;
 | |
|   clone->mark_state   =	MS_INCLUDE;
 | |
|   clone->dmode	      =	DM_DTD;
 | |
|   clone->buffer	      =	new_icharbuf();
 | |
|   clone->cdata	      =	new_ocharbuf();
 | |
| 
 | |
|   return clone;
 | |
| }
 | |
| 
 | |
| 
 | |
| void
 | |
| free_dtd_parser(dtd_parser *p)
 | |
| { free_icharbuf(p->buffer);
 | |
|   free_ocharbuf(p->cdata);
 | |
| #ifdef XMLNS
 | |
|   xmlns_free(p->xmlns);
 | |
| #endif
 | |
|   free_dtd(p->dtd);
 | |
| 
 | |
|   sgml_free(p);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s)
 | |
| { locbuf old;
 | |
| 
 | |
|   push_location(p, &old);
 | |
|   set_src_dtd_parser(p, in, name);
 | |
|   empty_icharbuf(p->buffer);		/* dubious */
 | |
|   for(; *s; s++)
 | |
|     putchar_dtd_parser(p, *s);
 | |
|   pop_location(p, &old);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_include(dtd_parser *p, const ichar *entity_name)
 | |
| { dtd_symbol *id;
 | |
|   dtd_entity *pe;
 | |
|   dtd *dtd = p->dtd;
 | |
| 
 | |
|   if ( (id=dtd_find_entity_symbol(dtd, entity_name)) &&
 | |
|        (pe=find_pentity(p->dtd, id)) )
 | |
|   { ichar *file;
 | |
| 
 | |
|     if ( (file = entity_file(dtd, pe)) )
 | |
|     { int rc = sgml_process_file(p, file, SGML_SUB_DOCUMENT);
 | |
|       sgml_free(file);
 | |
| 
 | |
|       return rc;
 | |
|     } else
 | |
|     { const ichar *text = entity_value(p, pe, NULL);
 | |
| 
 | |
|       if ( !text )
 | |
| 	return gripe(p, ERC_NO_VALUE, pe->name->name);
 | |
| 
 | |
|       return process_chars(p, IN_ENTITY, entity_name, text);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return gripe(p, ERC_EXISTENCE, L"parameter entity", entity_name);
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Process <![ KEYWORD [
 | |
| 
 | |
| Switches ->mark_state according to KEYWORD. Processes the rest in normal
 | |
| S_PCDATA style, which pops the mark-stack on seeing ]]>
 | |
| 
 | |
| For the purpose of <!DOCTYPE spec [additions]> we switch to S_GROUP if
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static void
 | |
| process_marked_section(dtd_parser *p)
 | |
| { ichar buf[MAXDECL];
 | |
|   dtd *dtd = p->dtd;
 | |
|   const ichar *decl = p->buffer->data;
 | |
|   const ichar *s;
 | |
| 
 | |
|   if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */
 | |
|        (decl=isee_func(dtd, decl, CF_DSO)) && /* [ */
 | |
|        expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
 | |
|   { dtd_symbol *kwd;
 | |
| 
 | |
|     decl = buf;
 | |
|     if ( (s=itake_name(p, decl, &kwd)) &&
 | |
| 	 isee_func(dtd, s, CF_DSO) )	/* [ */
 | |
|     { dtd_marked *m = sgml_calloc(1, sizeof(*m));
 | |
| 
 | |
|       m->keyword = kwd;			/* push on the stack */
 | |
|       m->parent = p->marked;
 | |
|       p->marked = m;
 | |
| 
 | |
|       if ( istrcaseeq(kwd->name, L"IGNORE") )
 | |
| 	m->type = MS_IGNORE;
 | |
|       else if ( istrcaseeq(kwd->name, L"INCLUDE") )
 | |
| 	m->type = MS_INCLUDE;
 | |
|       else if ( istrcaseeq(kwd->name, L"TEMP") )
 | |
| 	m->type = MS_INCLUDE;
 | |
|       else if ( istrcaseeq(kwd->name, L"CDATA") )
 | |
| 	m->type = MS_CDATA;
 | |
|       else if ( istrcaseeq(kwd->name, L"RCDATA") )
 | |
| 	m->type = MS_RCDATA;
 | |
|       else
 | |
| 	m->type = MS_INCLUDE;		/* default */
 | |
| 
 | |
|       empty_icharbuf(p->buffer);
 | |
|       if ( m->type == MS_CDATA )
 | |
| 	p->state = S_MSCDATA;
 | |
|       else
 | |
| 	p->state = S_PCDATA;
 | |
|       if ( p->mark_state != MS_IGNORE )
 | |
| 	p->mark_state = m->type;
 | |
|     }
 | |
|   } else
 | |
|   { decl = p->buffer->data;
 | |
| 
 | |
|     if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */
 | |
| 	 !isee_func(dtd, decl, CF_DSO) ) /* [ */
 | |
|     { p->state = S_GROUP;
 | |
|       p->grouplevel = 1;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| pop_marked_section(dtd_parser *p)
 | |
| { dtd_marked *m = p->marked;
 | |
| 
 | |
|   if ( m )
 | |
|   { p->marked = m->parent;
 | |
|     sgml_free(m);
 | |
|     p->mark_state = (p->marked ? p->marked->type : MS_INCLUDE);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Update the space-mode for the current element.  The space mode defines
 | |
| how spaces are handled in the CDATA output.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static dtd_space_mode
 | |
| istr_to_space_mode(const ichar *val)
 | |
| { if ( istreq(val, L"default") )
 | |
|     return SP_DEFAULT;
 | |
|   if ( istreq(val, L"preserve") )
 | |
|     return SP_PRESERVE;
 | |
|   if ( istreq(val, L"sgml") )
 | |
|     return SP_SGML;
 | |
|   if ( istreq(val, L"remove") )
 | |
|     return SP_REMOVE;
 | |
| 
 | |
|   return SP_INHERIT;			/* interpret as error */
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| update_space_mode(dtd_parser *p, dtd_element *e,
 | |
| 		  int natts, sgml_attribute *atts)
 | |
| { for( ; natts-- > 0; atts++ )
 | |
|   { const ichar *name = atts->definition->name->name;
 | |
| 
 | |
|     if ( istreq(name, L"xml:space") &&
 | |
| 	 atts->definition->type == AT_CDATA &&
 | |
| 	 atts->value.textW )
 | |
|     { dtd_space_mode m = istr_to_space_mode(atts->value.textW);
 | |
| 
 | |
|       if ( m != SP_INHERIT )
 | |
| 	p->environments->space_mode = m;
 | |
|       else
 | |
| 	gripe(p, ERC_EXISTENCE, L"xml:space-mode", atts->value.textW);
 | |
| 
 | |
|       return;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( e->space_mode != SP_INHERIT )
 | |
|     p->environments->space_mode = e->space_mode;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| empty_cdata(dtd_parser *p)
 | |
| { if ( p->dmode == DM_DATA )
 | |
|   { empty_ocharbuf(p->cdata);
 | |
|     p->blank_cdata = TRUE;
 | |
|     p->cdata_must_be_empty = FALSE;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| cb_cdata(dtd_parser *p, ocharbuf *buf, int offset, int size)
 | |
| { if ( p->on_data )
 | |
|     (*p->on_data)(p, EC_CDATA, size, buf->data.w+offset);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| emit_cdata(dtd_parser *p, int last)
 | |
| { dtd *dtd = p->dtd;
 | |
|   locbuf locsafe;
 | |
|   ocharbuf *cdata = p->cdata;
 | |
|   int offset = 0;
 | |
|   int size = cdata->size;
 | |
| 
 | |
|   if ( size == 0 )
 | |
|     return TRUE;			/* empty or done */
 | |
| 
 | |
|   push_location(p, &locsafe);
 | |
|   sgml_cplocation(&p->location, &p->startloc);   /* start of markup */
 | |
|   sgml_cplocation(&p->startloc, &p->startcdata); /* real start of CDATA */
 | |
| 
 | |
|   if ( p->environments )
 | |
|   { switch(p->environments->space_mode)
 | |
|     { case SP_SGML:
 | |
|       case SP_DEFAULT:
 | |
| 	if ( p->first )
 | |
| 	{ wint_t c = fetch_ocharbuf(cdata, offset);
 | |
| 
 | |
| 	  if ( HasClass(dtd, c, CH_RE) )
 | |
| 	  { inc_location(&p->startloc, c);
 | |
| 	    offset++;
 | |
| 	    size--;
 | |
| 	    c = fetch_ocharbuf(cdata, offset);
 | |
| 	  }
 | |
| 
 | |
| 	  if ( HasClass(dtd, c, CH_RS) )
 | |
| 	  { inc_location(&p->startloc, c);
 | |
| 	    offset++;
 | |
| 	    size--;
 | |
| 	  }
 | |
| 	}
 | |
| 	if ( last && size > 0 )
 | |
| 	{ wint_t c = fetch_ocharbuf(cdata, offset+size-1);
 | |
| 
 | |
| 	  if ( HasClass(dtd, c, CH_RS) )
 | |
| 	  { dec_location(&p->location, c);
 | |
| 	    size--;
 | |
| 	    poke_ocharbuf(cdata, offset+size, '\0');
 | |
| 	    if ( size > 0 )
 | |
| 	      c = fetch_ocharbuf(cdata, offset+size-1);
 | |
| 	    else
 | |
| 	      c = 0;			/* HasClass(CH_RE) must fail */
 | |
| 	  }
 | |
| 	  if ( HasClass(dtd, c, CH_RE) )
 | |
| 	  { dec_location(&p->location, c);
 | |
| 	    size--;
 | |
| 	    poke_ocharbuf(cdata, offset+size, '\0');
 | |
| 	  }
 | |
| 	}
 | |
| 	if ( p->environments->space_mode == SP_DEFAULT )
 | |
| 	{ int o = 0;
 | |
| 	  int i;
 | |
| 
 | |
| 	  for(i=0; i<size; i++)
 | |
| 	  { wint_t c = fetch_ocharbuf(cdata, offset+i);
 | |
| 
 | |
| 	    if ( HasClass(dtd, c, CH_BLANK) )
 | |
| 	    { for(i++; i<size; i++)
 | |
| 	      { wint_t c = fetch_ocharbuf(cdata, offset+i);
 | |
| 
 | |
| 		if ( !HasClass(dtd, c, CH_BLANK) )
 | |
| 		  break;
 | |
| 	      }
 | |
| 	      i--;
 | |
| 	      poke_ocharbuf(cdata, o++, ' ');
 | |
| 	      continue;
 | |
| 	    }
 | |
| 	    poke_ocharbuf(cdata, o++, c);
 | |
| 	  }
 | |
| 	  poke_ocharbuf(cdata, o, '\0');
 | |
| 	  offset = 0;			/* wrote new output from offset=0 */
 | |
| 	  size = o;
 | |
| 	}
 | |
| 	break;
 | |
|       case SP_REMOVE:
 | |
|       { int o = 0;
 | |
| 	int i;
 | |
| 	int end = 0;
 | |
| 
 | |
| 	for(i=0; i<size; i++)
 | |
| 	{ wint_t c = fetch_ocharbuf(cdata, offset+i);
 | |
| 
 | |
| 	  if ( HasClass(dtd, c, CH_BLANK) )
 | |
| 	    inc_location(&p->startloc, c);
 | |
| 	  else
 | |
| 	    break;
 | |
| 	}
 | |
| 
 | |
| 	if ( i<size )
 | |
| 	{ for(; i<size; i++)
 | |
| 	  { wint_t c = fetch_ocharbuf(cdata, offset+i);
 | |
| 
 | |
| 	    if ( HasClass(dtd, c, CH_BLANK) )
 | |
| 	    { i++;
 | |
| 
 | |
| 	      while(i<size && HasClass(dtd,
 | |
| 				       (wint_t)fetch_ocharbuf(cdata, offset+i),
 | |
| 				       CH_BLANK))
 | |
| 		i++;
 | |
| 	      i--;
 | |
| 	      poke_ocharbuf(cdata, o++, ' ');
 | |
| 	      continue;
 | |
| 	    }
 | |
| 	    poke_ocharbuf(cdata, o++, c);
 | |
| 	    end = o;
 | |
| 	  }
 | |
| 	}
 | |
| 					/* TBD: adjust end */
 | |
| 	poke_ocharbuf(cdata, end, '\0');
 | |
| 	size = end;
 | |
| 	break;
 | |
|       }
 | |
|       case SP_PRESERVE:
 | |
| 	break;
 | |
|       case SP_INHERIT:
 | |
| 	assert(0);
 | |
| 	return FALSE;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( size == 0 )
 | |
|   { pop_location(p, &locsafe);
 | |
|     empty_cdata(p);
 | |
| 
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   assert(size > 0);
 | |
| 
 | |
|   if ( !p->blank_cdata )
 | |
|   { if ( p->cdata_must_be_empty )
 | |
|     { gripe(p, ERC_NOT_ALLOWED_PCDATA, p->cdata); /* TBD: now passes buffer! */
 | |
|     }
 | |
|     cb_cdata(p, cdata, offset, size);
 | |
|   } else if ( p->environments )
 | |
|   { sgml_environment *env = p->environments;
 | |
|     dtd_state *new;
 | |
| 
 | |
| 				/* If an element is not in the DTD we must */
 | |
| 				/* assume mixed content and emit spaces */
 | |
| 
 | |
|     if ( (new=make_dtd_transition(env->state, CDATA_ELEMENT)) )
 | |
|     { env->state = new;
 | |
|       cb_cdata(p, cdata, offset, size);
 | |
|     } else if ( env->element->undefined &&
 | |
| 		p->environments->space_mode == SP_PRESERVE )
 | |
|     { cb_cdata(p, cdata, offset, size);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   pop_location(p, &locsafe);
 | |
| 
 | |
|   empty_cdata(p);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| prepare_cdata(dtd_parser *p)
 | |
| { if ( p->cdata->size == 0 )
 | |
|     return TRUE;
 | |
| 
 | |
|   terminate_ocharbuf(p->cdata);
 | |
| 
 | |
|   if ( p->mark_state == MS_INCLUDE )
 | |
|   { dtd *dtd = p->dtd;
 | |
| 
 | |
|     if ( p->environments )		/* needed for <img> <img> */
 | |
|     { dtd_element *e = p->environments->element;
 | |
| 
 | |
|       if ( e->structure && e->structure->type == C_EMPTY && !e->undefined )
 | |
| 	close_element(p, e, FALSE);
 | |
|     }
 | |
| 
 | |
|     if ( p->blank_cdata == TRUE )
 | |
|     { int blank = TRUE;
 | |
|       int i;
 | |
| 
 | |
|       for(i=0; i<p->cdata->size; i++)
 | |
|       { wint_t c = fetch_ocharbuf(p->cdata, i);
 | |
| 
 | |
| 	if ( !HasClass(dtd, c, CH_BLANK) )
 | |
| 	{ blank = FALSE;
 | |
| 	  break;
 | |
| 	}
 | |
|       }
 | |
| 
 | |
|       p->blank_cdata = blank;
 | |
|       if ( !blank )
 | |
|       { if ( p->dmode == DM_DTD )
 | |
| 	  gripe(p, ERC_SYNTAX_ERROR, L"CDATA in DTD", p->cdata->data);
 | |
| 	else
 | |
| 	  open_element(p, CDATA_ELEMENT, TRUE);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_cdata(dtd_parser *p, int last)
 | |
| { prepare_cdata(p);
 | |
| 
 | |
|   return emit_cdata(p, last);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| process_entity(dtd_parser *p, const ichar *name)
 | |
| { if ( name[0] == '#' )			/* #charcode: character entity */
 | |
|   { int v = char_entity_value(name);
 | |
| 
 | |
|     if ( v <= 0 )
 | |
|       return gripe(p, ERC_SYNTAX_ERROR, L"Bad character entity", name);
 | |
| 
 | |
|     add_ocharbuf(p->cdata, v);
 | |
|   } else
 | |
|   { dtd_symbol *id;
 | |
|     dtd_entity *e;
 | |
|     dtd *dtd = p->dtd;
 | |
|     int len;
 | |
|     const ichar *text;
 | |
|     const ichar *s;
 | |
|     int   chr;
 | |
|     ichar *file;
 | |
| 
 | |
|     if ( !(id=dtd_find_entity_symbol(dtd, name)) ||
 | |
| 	 !(e=id->entity) )
 | |
|     { if ( dtd->default_entity )
 | |
| 	e = dtd->default_entity;
 | |
|       else
 | |
| 	return gripe(p, ERC_EXISTENCE, L"entity", name);
 | |
|     }
 | |
| 
 | |
|     if ( !e->value &&
 | |
| 	 e->content == EC_SGML &&
 | |
| 	 (file=entity_file(p->dtd, e)) )
 | |
|     { int rc;
 | |
| 
 | |
|       empty_icharbuf(p->buffer);		/* dubious */
 | |
|       rc = sgml_process_file(p, file, SGML_SUB_DOCUMENT);
 | |
|       sgml_free(file);
 | |
|       return rc;
 | |
|     }
 | |
| 
 | |
|     if ( !(text = entity_value(p, e, &len)) )
 | |
|       return gripe(p, ERC_NO_VALUE, e->name->name);
 | |
| 
 | |
|     switch ( e->content )
 | |
|     { case EC_SGML:
 | |
|       case EC_CDATA:
 | |
| 	if ( (s=isee_character_entity(dtd, text, &chr)) && *s == '\0' )
 | |
| 	{ if ( chr == 0 )
 | |
| 	    return gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", text);
 | |
| 
 | |
| 	  if ( p->blank_cdata == TRUE &&
 | |
| 	       !HasClass(dtd, (wint_t)chr, CH_BLANK) )
 | |
| 	  { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
 | |
| 	    p->blank_cdata = FALSE;
 | |
| 	  }
 | |
| 
 | |
| 	  add_ocharbuf(p->cdata, chr);
 | |
| 	  return TRUE;
 | |
| 	}
 | |
| 	if ( e->content == EC_SGML )
 | |
| 	{ locbuf oldloc;
 | |
| 	  int decode = p->utf8_decode;
 | |
| 
 | |
| 	  push_location(p, &oldloc);
 | |
| 	  p->utf8_decode = FALSE;
 | |
| 	  set_src_dtd_parser(p, IN_ENTITY, e->name->name);
 | |
| 	  empty_icharbuf(p->buffer);		/* dubious */
 | |
| 	  for(s=text; *s; s++)
 | |
| 	    putchar_dtd_parser(p, *s);
 | |
| 	  p->utf8_decode = decode;
 | |
| 	  pop_location(p, &oldloc);
 | |
| 	} else if ( *text )
 | |
| 	{ const ichar *o;
 | |
| 
 | |
| 	  if ( p->blank_cdata == TRUE )
 | |
| 	  { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
 | |
| 	    p->blank_cdata = FALSE;
 | |
| 	  }
 | |
| 
 | |
| 	  for(o=text; *o; o++)
 | |
| 	    add_ocharbuf(p->cdata, *o);
 | |
| 	}
 | |
| 	break;
 | |
|       case EC_SDATA:
 | |
|       case EC_NDATA:
 | |
| 	process_cdata(p, FALSE);
 | |
| 	if ( p->on_data )
 | |
| 	  (*p->on_data)(p, e->content, len, text);
 | |
| 	break;
 | |
|       case EC_PI:
 | |
| 	process_cdata(p, FALSE);
 | |
| 	if ( p->on_pi )
 | |
| 	  (*p->on_pi)(p, text);
 | |
|       case EC_STARTTAG:
 | |
| #if 0
 | |
| 	prepare_cdata(p);
 | |
| 	process_begin_element(p, text);
 | |
| #endif
 | |
| 	break;
 | |
|       case EC_ENDTAG:
 | |
| #if 0
 | |
| 	prepare_cdata(p);
 | |
| 	process_end_element(p, text);
 | |
| #endif
 | |
| 	break;
 | |
|     }
 | |
| 
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Deal with end of input.  We should give a proper error message depending
 | |
| on the state and the start-location of the error.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| int
 | |
| end_document_dtd_parser(dtd_parser *p)
 | |
| { int rval;
 | |
| 
 | |
|   switch(p->state)
 | |
|   { case S_RCDATA:
 | |
|     case S_CDATA:
 | |
|     case S_PCDATA:
 | |
|       rval = TRUE;
 | |
|       break;
 | |
|     case S_CMT:
 | |
|     case S_CMT1:
 | |
|     case S_CMTE0:
 | |
|     case S_CMTE1:
 | |
|     case S_DECLCMT0:
 | |
|     case S_DECLCMT:
 | |
|     case S_DECLCMTE0:
 | |
|       rval = gripe(p, ERC_SYNTAX_ERROR,
 | |
| 		   L"Unexpected end-of-file in comment", L"");
 | |
|       break;
 | |
|     case S_ECDATA1:
 | |
|     case S_ECDATA2:
 | |
|     case S_EMSC1:
 | |
|     case S_EMSC2:
 | |
|     case S_DECL0:
 | |
|     case S_DECL:
 | |
|     case S_MDECL0:
 | |
|     case S_STRING:
 | |
|     case S_CMTO:
 | |
|     case S_GROUP:
 | |
|     case S_PENT:
 | |
|     case S_ENT:
 | |
|     case S_ENT0:
 | |
|       rval = gripe(p, ERC_SYNTAX_ERROR,
 | |
| 		   L"Unexpected end-of-file", L"");
 | |
|       break;
 | |
| #ifdef UTF8
 | |
|     case S_UTF8:
 | |
|       rval = gripe(p, ERC_SYNTAX_ERROR,
 | |
| 		   L"Unexpected end-of-file in UTF-8 sequence", L"");
 | |
|       break;
 | |
| #endif
 | |
|     case S_MSCDATA:
 | |
|     case S_EMSCDATA1:
 | |
|     case S_EMSCDATA2:
 | |
|       rval = gripe(p, ERC_SYNTAX_ERROR,
 | |
| 		   L"Unexpected end-of-file in CDATA marked section", L"");
 | |
|       break;
 | |
|     case S_PI:
 | |
|     case S_PI2:
 | |
|       rval = gripe(p, ERC_SYNTAX_ERROR,
 | |
| 		   L"Unexpected end-of-file in processing instruction", L"");
 | |
|       break;
 | |
|     default:
 | |
|       rval = gripe(p, ERC_SYNTAX_ERROR,
 | |
| 		   L"Unexpected end-of-file in ???");
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   if ( p->dmode == DM_DATA )
 | |
|   { sgml_environment *env;
 | |
| 
 | |
|     if ( p->cdata->size > 0 &&
 | |
| 	 fetch_ocharbuf(p->cdata, p->cdata->size-1) == CR )
 | |
|       del_ocharbuf(p->cdata);
 | |
| 
 | |
|     process_cdata(p, TRUE);
 | |
| 
 | |
|     if ( (env=p->environments) )
 | |
|     { dtd_element *e;
 | |
| 
 | |
|       while(env->parent)
 | |
| 	env = env->parent;
 | |
| 
 | |
|       pop_to(p, env, CDATA_ELEMENT);
 | |
|       e = env->element;
 | |
|       if ( e->structure && !e->structure->omit_close )
 | |
| 	gripe(p, ERC_OMITTED_CLOSE, e->name->name);
 | |
|       close_element(p, e, FALSE);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return rval;
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| begin_document_dtd_parser(dtd_parser *p)
 | |
| { init_decoding(p);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| void
 | |
| reset_document_dtd_parser(dtd_parser *p)
 | |
| { if ( p->environments )
 | |
|   { sgml_environment *env, *parent;
 | |
| 
 | |
|     for(env = p->environments; env; env=parent)
 | |
|     { parent = env->parent;
 | |
| 
 | |
|       free_environment(env);
 | |
|     }
 | |
| 
 | |
|     p->environments = NULL;
 | |
|   }
 | |
| 
 | |
|   while(p->marked)
 | |
|     pop_marked_section(p);
 | |
| 
 | |
|   empty_icharbuf(p->buffer);
 | |
|   empty_ocharbuf(p->cdata);
 | |
| 
 | |
|   p->mark_state	   = MS_INCLUDE;
 | |
|   p->state	   = S_PCDATA;
 | |
|   p->grouplevel	   = 0;
 | |
|   p->blank_cdata   = TRUE;
 | |
|   p->event_class   = EV_EXPLICIT;
 | |
|   p->dmode	   = DM_DATA;
 | |
| 
 | |
|   begin_document_dtd_parser(p);
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Set the UTF-8 state
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| #ifdef UTF8
 | |
| static void
 | |
| process_utf8(dtd_parser *p, int chr)
 | |
| { int bytes;
 | |
|   int mask;
 | |
| 
 | |
|   for( bytes=1, mask=0x20; chr&mask; bytes++, mask >>= 1 )
 | |
|     ;
 | |
|   mask--;				/* 0x20 --> 0x1f */
 | |
| 
 | |
|   p->utf8_saved_state = p->state;		/* state to return to */
 | |
|   p->state = S_UTF8;
 | |
|   p->utf8_char = chr & mask;
 | |
|   p->utf8_left = bytes;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| add_cdata() adds a character to the output  data. It also maps \r\n onto
 | |
| a single \n for Windows newline conventions.
 | |
| 
 | |
| There is a problem here in shortref  handling. We open the CDATA_ELEMENT
 | |
| as soon as we find a character as   this may open other elements through
 | |
| omitted tags and thus install a new shortref map.
 | |
| 
 | |
| If, at a later stage, all CDATA read sofar turns out to be a shortref we
 | |
| have  incorrectly  opened   the   CDATA_ELEMENT.    As   `undoing'   the
 | |
| open_element() is not an option (it may  already have caused `events' on
 | |
| omitted tags) we are in trouble.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static void
 | |
| add_cdata(dtd_parser *p, int chr)
 | |
| { if ( p->mark_state == MS_INCLUDE )
 | |
|   { ocharbuf *buf = p->cdata;
 | |
| 
 | |
|     if ( p->blank_cdata == TRUE &&
 | |
| 	 !HasClass(p->dtd, (wint_t)chr, CH_BLANK) )
 | |
|     { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
 | |
|       p->blank_cdata = FALSE;
 | |
|     }
 | |
| 
 | |
|     if ( chr == '\n' )			/* insert missing CR */
 | |
|     { int sz;
 | |
| 
 | |
|       if ( (sz=buf->size) == 0 ||
 | |
| 	   fetch_ocharbuf(buf, sz-1) != CR )
 | |
| 	add_cdata(p, CR);
 | |
|     }
 | |
| 
 | |
|     add_ocharbuf(buf, chr);
 | |
| 
 | |
|     if ( p->map &&
 | |
| 	 chr <= 0xff && p->map->ends[chr] &&
 | |
| 	 match_shortref(p) )
 | |
|       return;
 | |
| 
 | |
|     if ( chr == '\n' )			/* dubious.  Whould we do that */
 | |
|     { int sz;				/* here or in space-handling? */
 | |
| 
 | |
|       if ( (sz=buf->size) > 1 &&
 | |
| 	   fetch_ocharbuf(buf, sz-1) == LF &&
 | |
| 	   fetch_ocharbuf(buf, sz-2) == CR )
 | |
|       { poke_ocharbuf(buf, sz-2, LF);
 | |
| 	buf->size--;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| add_verbatim_cdata(dtd_parser *p, int chr)
 | |
| { if ( p->mark_state != MS_IGNORE )
 | |
|   { ocharbuf *buf = p->cdata;
 | |
| 
 | |
|     if ( p->blank_cdata == TRUE &&
 | |
| 	 !HasClass(p->dtd, (wint_t)chr, CH_BLANK) )
 | |
|     { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
 | |
|       p->blank_cdata = FALSE;
 | |
|     }
 | |
| 
 | |
|     if ( chr == '\n' && buf->size > 0 &&
 | |
| 	 fetch_ocharbuf(buf, buf->size-1) == '\r' )
 | |
|       buf->size--;
 | |
| 
 | |
|     add_ocharbuf(buf, chr);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /* We discovered illegal markup and now process it as normal CDATA
 | |
| */
 | |
| 
 | |
| static void
 | |
| recover_parser(dtd_parser *p)
 | |
| { const ichar *s;
 | |
| 
 | |
|   terminate_icharbuf(p->buffer);
 | |
|   add_cdata(p, p->saved);
 | |
|   for(s=p->buffer->data; *s; s++)
 | |
|     add_cdata(p, *s);
 | |
|   p->state = S_PCDATA;
 | |
| }
 | |
| 
 | |
| 
 | |
| static inline void
 | |
| setlocation(dtd_srcloc *d, dtd_srcloc *loc, int line, int lpos)
 | |
| { d->line    = line;
 | |
|   d->linepos = lpos;
 | |
|   d->charpos = loc->charpos - 1;
 | |
|   d->type    = loc->type;
 | |
|   d->name    = loc->name;
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| putchar_dtd_parser(dtd_parser *p, int chr)
 | |
| { dtd *dtd = p->dtd;
 | |
|   const ichar *f = dtd->charfunc->func;
 | |
|   int line = p->location.line;
 | |
|   int lpos = p->location.linepos;
 | |
| 
 | |
|   p->location.charpos++;		/* TBD: actually `bytepos' */
 | |
| 
 | |
| #ifdef UTF8
 | |
|   if ( p->state == S_UTF8 )
 | |
|   { if ( (chr & 0xc0) != 0x80 )	/* TBD: recover */
 | |
|       gripe(p, ERC_SYNTAX_ERROR, L"Bad UTF-8 sequence", L"");
 | |
|     p->utf8_char <<= 6;
 | |
|     p->utf8_char |= (chr & ~0xc0);
 | |
|     if ( --p->utf8_left == 0 )
 | |
|     { chr = p->utf8_char;
 | |
|       p->state = p->utf8_saved_state;
 | |
|     } else
 | |
|     { return TRUE;
 | |
|     }
 | |
|   } else if ( ISUTF8_MB(chr) && p->utf8_decode )
 | |
|   { process_utf8(p, chr);
 | |
|     return TRUE;
 | |
|   }
 | |
| #endif
 | |
| 
 | |
|   if ( f[CF_RS] == chr )
 | |
|   { p->location.line++;
 | |
|     p->location.linepos = 0;
 | |
|   } else
 | |
|   { if ( f[CF_RE] == chr )
 | |
|       p->location.linepos = 0;
 | |
|     else
 | |
|       p->location.linepos++;
 | |
|   }
 | |
| 
 | |
| reprocess:
 | |
|   switch(p->state)
 | |
|   { case S_PCDATA:
 | |
|     { if ( f[CF_MDO1] == chr )		/* < */
 | |
|       { setlocation(&p->startloc, &p->location, line, lpos);
 | |
| 	p->state = S_DECL0;
 | |
| 	empty_icharbuf(p->buffer);
 | |
| 	return TRUE;
 | |
|       }
 | |
|       if ( p->dmode == DM_DTD )
 | |
|       { if ( f[CF_PERO] == chr )	/* % */
 | |
| 	{ setlocation(&p->startloc, &p->location, line, lpos);
 | |
| 	  p->state = S_PENT;
 | |
| 	  return TRUE;
 | |
| 	}
 | |
|       } else
 | |
|       { if ( f[CF_ERO] == chr )		/* & */
 | |
| 	{ setlocation(&p->startloc, &p->location, line, lpos);
 | |
| 	  p->state = S_ENT0;
 | |
| 	  return TRUE;
 | |
| 	}
 | |
|       }
 | |
| 
 | |
|       if ( p->marked && f[CF_DSC] == chr ) /* ] in marked section */
 | |
|       { empty_icharbuf(p->buffer);
 | |
| 	p->state = S_EMSC1;
 | |
| 	p->saved = chr;			/* for recovery */
 | |
| 	return TRUE;
 | |
|       }
 | |
| 
 | |
|       if ( p->waiting_for_net && f[CF_ETAGO2] == chr ) /* shorttag */
 | |
|       { setlocation(&p->startloc, &p->location, line, lpos);
 | |
| 	process_net(p);
 | |
| 	return TRUE;
 | |
|       }
 | |
| 
 | |
| 					/* Real character data */
 | |
|       if ( p->cdata->size == 0 )
 | |
|         setlocation(&p->startcdata, &p->location, line, lpos);
 | |
| 
 | |
|       add_cdata(p, chr);
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_ECDATA2:			/* Seen </ in CDATA/RCDATA */
 | |
|     { if ( f[CF_MDC] == chr &&
 | |
| 	   p->etaglen == p->buffer->size &&
 | |
| 	   istrncaseeq(p->buffer->data, p->etag, p->etaglen) )
 | |
|       { p->cdata->size -= p->etaglen+2;	/* 2 for </ */
 | |
| 	terminate_ocharbuf(p->cdata);
 | |
| 	terminate_icharbuf(p->buffer);
 | |
| 	if ( p->mark_state == MS_INCLUDE )
 | |
| 	{ process_cdata(p, TRUE);
 | |
| 	  process_end_element(p, p->buffer->data);
 | |
| 	  empty_cdata(p);
 | |
| 	}
 | |
| 	empty_icharbuf(p->buffer);
 | |
| 	p->cdata_state = p->state = S_PCDATA;
 | |
|       } else
 | |
|       { add_verbatim_cdata(p, chr);
 | |
| 	if ( p->etaglen < p->buffer->size ||
 | |
| 	     !HasClass(dtd, (wint_t)chr, CH_NAME))
 | |
| 	{ empty_icharbuf(p->buffer);	/* mismatch */
 | |
| 	  p->state = p->cdata_state;
 | |
| 	} else
 | |
| 	  add_icharbuf(p->buffer, chr);
 | |
|       }
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_ECDATA1:			/* seen < in CDATA */
 | |
|     { add_verbatim_cdata(p, chr);
 | |
|       if ( f[CF_ETAGO2] == chr )	/* / */
 | |
|       { empty_icharbuf(p->buffer);
 | |
| 	p->state = S_ECDATA2;
 | |
|       } else if ( f[CF_ETAGO1] != chr )	/* <: do not change state */
 | |
| 	p->state = p->cdata_state;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_RCDATA:
 | |
|     { if ( f[CF_ERO] == chr ) /* & */
 | |
|       { setlocation(&p->startloc, &p->location, line, lpos);
 | |
| 	p->state = S_ENT0;
 | |
| 	return TRUE;
 | |
|       }
 | |
|       /*FALLTHROUGH*/
 | |
|     }
 | |
|     case S_CDATA:
 | |
|     { add_verbatim_cdata(p, chr);
 | |
| 
 | |
|       if ( f[CF_MDO1] == chr )		/* < */
 | |
|       { setlocation(&p->startloc, &p->location, line, lpos);
 | |
| 	p->state = S_ECDATA1;
 | |
|       }
 | |
| 
 | |
| 					/* / in CDATA shorttag element */
 | |
|       if ( p->waiting_for_net && f[CF_ETAGO2] == chr )
 | |
|       { setlocation(&p->startloc, &p->location, line, lpos);
 | |
| 	p->cdata->size--;
 | |
| 	terminate_ocharbuf(p->cdata);
 | |
| 	terminate_icharbuf(p->buffer);
 | |
| 	if ( p->mark_state == MS_INCLUDE )
 | |
| 	{ process_cdata(p, TRUE);
 | |
| 	  process_net(p);
 | |
| 	  empty_cdata(p);
 | |
| 	}
 | |
| 	empty_icharbuf(p->buffer);
 | |
| 	p->cdata_state = p->state = S_PCDATA;
 | |
|       }
 | |
| 
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_MSCDATA:
 | |
|     { add_verbatim_cdata(p, chr);
 | |
|       if ( f[CF_DSC] == chr )		/* ] */
 | |
|         p->state = S_EMSCDATA1;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_EMSCDATA1:
 | |
|     { add_verbatim_cdata(p, chr);
 | |
|       if ( f[CF_DSC] == chr )		/* ]] */
 | |
|         p->state = S_EMSCDATA2;
 | |
|       else
 | |
|         p->state = S_MSCDATA;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_EMSCDATA2:
 | |
|     { add_verbatim_cdata(p, chr);
 | |
|       if ( f[CF_MDC] == chr )		/* ]]> */
 | |
|       { p->cdata->size -= 3;		/* Delete chars for ]] */
 | |
| 	pop_marked_section(p);
 | |
| 	p->state = S_PCDATA;
 | |
|       } else if ( f[CF_DSC] != chr )	/* if ]]], stay in this state */
 | |
|         p->state = S_MSCDATA;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_EMSC1:
 | |
|     { if ( f[CF_DSC] == chr )		/* ]] in marked section */
 | |
|       { p->state = S_EMSC2;
 | |
| 	return TRUE;
 | |
|       } else
 | |
|       { add_icharbuf(p->buffer, chr);
 | |
| 	recover_parser(p);
 | |
| 	return TRUE;
 | |
|       }
 | |
|     }
 | |
|     case S_EMSC2:
 | |
|     { if ( f[CF_MDC] == chr )		/* ]]> in marked section */
 | |
|       { pop_marked_section(p);
 | |
| 	p->state = S_PCDATA;
 | |
| 	return TRUE;
 | |
|       } else
 | |
|       { add_icharbuf(p->buffer, chr);
 | |
| 	recover_parser(p);
 | |
| 	return TRUE;
 | |
|       }
 | |
|     }
 | |
|     case S_PENT:			/* %parameter entity; */
 | |
|     { if ( f[CF_ERC] == chr )
 | |
|       { p->state = S_PCDATA;
 | |
| 	terminate_icharbuf(p->buffer);
 | |
| 	if ( p->mark_state == MS_INCLUDE )
 | |
| 	{ process_include(p, p->buffer->data);
 | |
| 	}
 | |
| 	empty_icharbuf(p->buffer);
 | |
| 	return TRUE;
 | |
|       }
 | |
|       if ( HasClass(dtd, (wint_t)chr, CH_NAME) )
 | |
|       { add_icharbuf(p->buffer, chr);
 | |
| 	return TRUE;
 | |
|       }
 | |
| 
 | |
|       terminate_icharbuf(p->buffer);
 | |
|       return gripe(p, ERC_SYNTAX_ERROR,
 | |
| 		   L"Illegal parameter entity", p->buffer->data);
 | |
|     }
 | |
|     case S_ENT0:			/* Seen & */
 | |
|     { if ( chr == '#' || HasClass(dtd, (wint_t)chr, CH_NAME) )
 | |
|       { empty_icharbuf(p->buffer);
 | |
| 	add_icharbuf(p->buffer, chr);
 | |
| 	p->state = S_ENT;
 | |
|       } else
 | |
|       {	if ( dtd->dialect != DL_SGML )
 | |
| 	{ wchar_t buf[3];
 | |
| 	  buf[0] = '&';
 | |
| 	  buf[1] = chr;
 | |
| 	  buf[2] = '\0';
 | |
| 	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal entity", buf);
 | |
| 	}
 | |
| 
 | |
| 	add_cdata(p, f[CF_ERO]);
 | |
| 	p->state = p->cdata_state;
 | |
| 	goto reprocess;
 | |
|       }
 | |
| 
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_ENT:				/* &entity; */
 | |
|     { if ( HasClass(dtd, (wint_t)chr, CH_NAME) )
 | |
|       { add_icharbuf(p->buffer, chr);
 | |
| 	return TRUE;
 | |
|       }
 | |
| 
 | |
|       terminate_icharbuf(p->buffer);
 | |
|       p->state = p->cdata_state;
 | |
|       if ( p->mark_state == MS_INCLUDE )
 | |
|       { process_entity(p, p->buffer->data);
 | |
|       }
 | |
|       empty_icharbuf(p->buffer);
 | |
| 
 | |
|       if ( chr == CR )
 | |
| 	p->state = S_ENTCR;
 | |
|       else if ( f[CF_ERC] != chr && chr != '\n' )
 | |
| 	goto reprocess;
 | |
| 
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_ENTCR:			/* seen &entCR, eat the LF */
 | |
|     { p->state = p->cdata_state;
 | |
|       if ( chr != LF )
 | |
| 	goto reprocess;
 | |
| 
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_DECL0:			/* Seen < */
 | |
|     { if ( f[CF_ETAGO2] == chr )	/* </ */
 | |
|       { add_icharbuf(p->buffer, chr);
 | |
| 	p->state = S_DECL;
 | |
|       } else if ( HasClass(dtd, (wint_t)chr, CH_NAME) ) /* <letter */
 | |
|       { add_icharbuf(p->buffer, chr);
 | |
| 	p->state = S_DECL;
 | |
|       } else if ( f[CF_MDO2] == chr )	/* <! */
 | |
|       { p->state = S_MDECL0;
 | |
|       } else if ( f[CF_PRO2] == chr )	/* <? */
 | |
|       { p->state = S_PI;
 | |
|       } else				/* recover */
 | |
|       { add_cdata(p, f[CF_MDO1]);
 | |
| 	add_cdata(p, chr);
 | |
| 	p->state = S_PCDATA;
 | |
|       }
 | |
| 
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_MDECL0:			/* Seen <! */
 | |
|     { if ( f[CF_CMT] == chr )		/* <!- */
 | |
|       { p->state = S_CMTO;
 | |
| 	return TRUE;
 | |
|       }
 | |
|       add_icharbuf(p->buffer, f[CF_MDO2]);
 | |
|       add_icharbuf(p->buffer, chr);
 | |
|       p->state = S_DECL;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_DECL:			/* <...> */
 | |
|     { if ( f[CF_MDC] == chr )		/* > */
 | |
|       { prepare_cdata(p);
 | |
| 	p->state = S_PCDATA;
 | |
| 	terminate_icharbuf(p->buffer);
 | |
| 	if ( p->mark_state == MS_INCLUDE )
 | |
| 	{ process_declaration(p, p->buffer->data);
 | |
| 	}
 | |
| 	empty_icharbuf(p->buffer);
 | |
| 	return TRUE;
 | |
|       }
 | |
|       if ( dtd->shorttag && f[CF_ETAGO2] == chr && p->buffer->size > 0 )
 | |
|       { prepare_cdata(p);
 | |
| 	p->state = S_PCDATA;
 | |
| 	terminate_icharbuf(p->buffer);
 | |
| 	if ( p->mark_state == MS_INCLUDE )
 | |
| 	{ WITH_CLASS(p, EV_SHORTTAG,
 | |
| 		     process_declaration(p, p->buffer->data));
 | |
| 	}
 | |
| 	empty_icharbuf(p->buffer);
 | |
| 	p->waiting_for_net = TRUE;
 | |
| 	return TRUE;
 | |
|       }
 | |
| 
 | |
|       add_icharbuf(p->buffer, chr);
 | |
| 
 | |
|       if ( f[CF_LIT] == chr )		/* " */
 | |
|       { p->state = S_STRING;
 | |
| 	p->saved = chr;
 | |
| 	p->lit_saved_state = S_DECL;
 | |
|       } else if ( f[CF_LITA] == chr )	/* ' */
 | |
|       { p->state = S_STRING;
 | |
| 	p->saved = chr;
 | |
| 	p->lit_saved_state = S_DECL;
 | |
| 	return TRUE;
 | |
|       } else if ( f[CF_CMT] == chr &&	/* - */
 | |
| 		  p->buffer->data[0] == f[CF_MDO2] ) /* Started <! */
 | |
|       { p->state = S_DECLCMT0;
 | |
|       } else if ( f[CF_DSO] == chr )	/* [: marked section */
 | |
|       { terminate_icharbuf(p->buffer);
 | |
| 
 | |
| 	process_marked_section(p);
 | |
|       }
 | |
| 
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_DECLCMT0:			/* <...- */
 | |
|     { if ( f[CF_CMT] == chr )
 | |
|       { p->buffer->size--;
 | |
| 	p->state = S_DECLCMT;
 | |
|       } else
 | |
|       { add_icharbuf(p->buffer, chr);
 | |
| 	p->state = S_DECL;
 | |
|       }
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_DECLCMT:			/* <...--.. */
 | |
|     { if ( f[CF_CMT] == chr )
 | |
| 	p->state = S_DECLCMTE0;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_DECLCMTE0:			/* <...--..- */
 | |
|     { if ( f[CF_CMT] == chr )
 | |
| 	p->state = S_DECL;
 | |
|       else
 | |
| 	p->state = S_DECLCMT;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_PI:
 | |
|     { add_icharbuf(p->buffer, chr);
 | |
|       if ( f[CF_PRO2] == chr )		/* <? ... ? */
 | |
| 	p->state = S_PI2;
 | |
|       if ( f[CF_PRC] == chr )		/* no ? is ok too (XML/SGML) */
 | |
| 	goto pi;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_PI2:
 | |
|     { if ( f[CF_PRC] == chr )
 | |
|       { pi:
 | |
| 	process_cdata(p, FALSE);
 | |
| 	p->state = S_PCDATA;
 | |
| 	p->buffer->size--;
 | |
| 	terminate_icharbuf(p->buffer);
 | |
| 	if ( p->mark_state == MS_INCLUDE )
 | |
| 	{ process_pi(p, p->buffer->data);
 | |
| 	}
 | |
| 	empty_icharbuf(p->buffer);
 | |
| 	return TRUE;
 | |
|       }
 | |
|       add_icharbuf(p->buffer, chr);
 | |
|       p->state = S_PI;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_STRING:
 | |
|     { add_icharbuf(p->buffer, chr);
 | |
|       if ( chr == p->saved )
 | |
| 	p->state = p->lit_saved_state;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_CMTO:			/* Seen <!- */
 | |
|     { if ( f[CF_CMT] == chr )		/* - */
 | |
|       { p->state = S_CMT1;
 | |
| 	return TRUE;
 | |
|       } else
 | |
|       { add_cdata(p, f[CF_MDO1]);
 | |
| 	add_cdata(p, f[CF_MDO2]);
 | |
| 	add_cdata(p, f[CF_CMT]);
 | |
| 	add_cdata(p, chr);
 | |
| 	p->state = S_PCDATA;
 | |
| 	return TRUE;
 | |
|       }
 | |
|     }
 | |
|     case S_CMT1:			/* <!-- */
 | |
|     { if ( f[CF_CMT] == chr )		/* <!--- */
 | |
|       { if ( dtd->dialect != DL_SGML )
 | |
| 	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal comment", L"<!---");
 | |
|       }
 | |
|       p->state = S_CMT;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_CMT:
 | |
|     { if ( f[CF_CMT] == chr )
 | |
| 	p->state = S_CMTE0;		/* <!--...- */
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_CMTE0:			/* <!--... -- */
 | |
|     { if ( f[CF_CMT] == chr )
 | |
| 	p->state = S_CMTE1;
 | |
|       else
 | |
| 	p->state = S_CMT;
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_CMTE1:			/* <!--...-- seen */
 | |
|     { if ( f[CF_MDC] == chr )		/* > */
 | |
|       { if ( p->on_decl )
 | |
| 	  (*p->on_decl)(p, (ichar*)"");
 | |
| 	p->state = S_PCDATA;
 | |
|       } else
 | |
|       { if ( dtd->dialect != DL_SGML )
 | |
| 	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal comment", L"");
 | |
| 	if ( f[CF_CMT] != chr )
 | |
| 	  p->state = S_CMT;
 | |
|       }
 | |
|       return TRUE;
 | |
|     }
 | |
|     case S_GROUP:			/* [...] in declaration */
 | |
|     { add_icharbuf(p->buffer, chr);
 | |
|       if ( f[CF_DSO] == chr )
 | |
|       { p->grouplevel++;
 | |
|       } else if ( f[CF_DSC] == chr )
 | |
|       { if ( --p->grouplevel == 0 )
 | |
| 	  p->state = S_DECL;
 | |
|       } else if ( f[CF_LIT] == chr )	/* " */
 | |
|       { p->state = S_STRING;
 | |
| 	p->saved = chr;
 | |
| 	p->lit_saved_state = S_GROUP;
 | |
|       } else if ( f[CF_LITA] == chr )	/* ' */
 | |
|       { p->state = S_STRING;
 | |
| 	p->saved = chr;
 | |
| 	p->lit_saved_state = S_GROUP;
 | |
| 	return TRUE;
 | |
|       }
 | |
|       return TRUE;
 | |
|     }
 | |
| #ifdef UTF8
 | |
|     case S_UTF8:
 | |
| #endif
 | |
|     default:
 | |
|       assert(0);
 | |
|       return FALSE;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	     TOPLEVEL		*
 | |
| 		 *******************************/
 | |
| 
 | |
| int
 | |
| load_dtd_from_file(dtd_parser *p, const ichar *file)
 | |
| { FILE *fd;
 | |
|   int rval;
 | |
|   data_mode   oldmode  = p->dmode;
 | |
|   dtdstate    oldstate = p->state;
 | |
|   locbuf      oldloc;
 | |
| 
 | |
|   push_location(p, &oldloc);
 | |
|   p->dmode = DM_DTD;
 | |
|   p->state = S_PCDATA;
 | |
|   empty_icharbuf(p->buffer);		/* dubious */
 | |
|   set_file_dtd_parser(p, IN_FILE, file);
 | |
| 
 | |
|   if ( (fd = wfopen(file, "rb")) )
 | |
|   { int chr;
 | |
| 
 | |
|     while( (chr = getc(fd)) != EOF )
 | |
|       putchar_dtd_parser(p, chr);
 | |
| 
 | |
|     fclose(fd);
 | |
| 
 | |
|     p->dtd->implicit = FALSE;
 | |
|     rval = TRUE;
 | |
|   } else
 | |
|     rval = FALSE;
 | |
| 
 | |
|   pop_location(p, &oldloc);
 | |
|   p->dmode = oldmode;
 | |
|   p->state = oldstate;
 | |
| 
 | |
|   return rval;
 | |
| }
 | |
| 
 | |
| 
 | |
| dtd *
 | |
| file_to_dtd(const ichar *file, const ichar *doctype, dtd_dialect dialect)
 | |
| { dtd_parser *p = new_dtd_parser(new_dtd(doctype));
 | |
| 
 | |
|   set_dialect_dtd(p->dtd, dialect);
 | |
| 
 | |
|   if ( load_dtd_from_file(p, file) )
 | |
|   { dtd *dtd = p->dtd;
 | |
| 
 | |
|     dtd->references++;			/* avoid deletion */
 | |
|     free_dtd_parser(p);
 | |
|     return dtd;
 | |
|   } else
 | |
|   { free_dtd_parser(p);
 | |
| 
 | |
|     return NULL;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| SGML sees a file as
 | |
| 
 | |
| [<LF>]Line 1<CR>
 | |
|  <LF> Line 2<CR>
 | |
| 
 | |
| I.e. the newline  appearing  just  before   the  end-of-file  should  be
 | |
| ignored. In addition, Unix-style files are   mapped  to CR-LF. Thanks to
 | |
| Richard O'Keefe.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| int
 | |
| sgml_process_stream(dtd_parser *p, FILE *fd, unsigned flags)
 | |
| { int p0, p1;
 | |
| 
 | |
|   if ( (p0 = getc(fd)) == EOF )
 | |
|     return TRUE;
 | |
|   if ( (p1 = getc(fd)) == EOF )
 | |
|   { putchar_dtd_parser(p, p0);
 | |
|     return end_document_dtd_parser(p);
 | |
|   }
 | |
| 
 | |
|   for(;;)
 | |
|   { int p2 = getc(fd);
 | |
| 
 | |
|     if ( p2 == EOF )
 | |
|     { putchar_dtd_parser(p, p0);
 | |
|       if ( p1 != LF )
 | |
| 	putchar_dtd_parser(p, p1);
 | |
|       else if ( p0 != CR )
 | |
| 	putchar_dtd_parser(p, CR);
 | |
| 
 | |
|       if ( flags & SGML_SUB_DOCUMENT )
 | |
| 	return TRUE;
 | |
|       else
 | |
| 	return end_document_dtd_parser(p);
 | |
|     }
 | |
| 
 | |
|     putchar_dtd_parser(p, p0);
 | |
|     p0 = p1;
 | |
|     p1 = p2;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| sgml_process_file(dtd_parser *p, const ichar *file, unsigned flags)
 | |
| { FILE *fd;
 | |
|   int rval;
 | |
|   locbuf oldloc;
 | |
| 
 | |
|   push_location(p, &oldloc);
 | |
|   set_file_dtd_parser(p, IN_FILE, file);
 | |
|   if ( !(flags & SGML_SUB_DOCUMENT) )
 | |
|     set_mode_dtd_parser(p, DM_DATA);
 | |
| 
 | |
|   if ( (fd = wfopen(file, "rb")) )
 | |
|   { rval = sgml_process_stream(p, fd, flags);
 | |
|     fclose(fd);
 | |
|   } else
 | |
|     rval = FALSE;
 | |
| 
 | |
|   pop_location(p, &oldloc);
 | |
| 
 | |
|   return rval;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	       ERRORS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static wchar_t *
 | |
| format_location(wchar_t *s, size_t len, dtd_srcloc *l)
 | |
| { int first = TRUE;
 | |
| 
 | |
|   if ( !l || l->type == IN_NONE )
 | |
|     return s;
 | |
| 
 | |
|   for( ; l && l->type != IN_NONE;
 | |
|          l = l->parent, first = FALSE )
 | |
|   { if ( !first )
 | |
|     { swprintf(s, len, L" (from ");
 | |
|       s += wcslen(s);
 | |
|     }
 | |
| 
 | |
|     switch(l->type)
 | |
|     { case IN_NONE:
 | |
| 	assert(0);
 | |
|       case IN_FILE:
 | |
| 	swprintf(s, len, L"%ls:%d:%d", l->name.file, l->line, l->linepos);
 | |
|         break;
 | |
|       case IN_ENTITY:
 | |
|         swprintf(s, len, L"&%ls;%d:%d", l->name.entity, l->line, l->linepos);
 | |
|         break;
 | |
|     }
 | |
| 
 | |
|     s += wcslen(s);
 | |
|     if ( !first )
 | |
|     { *s++ = L')';
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   *s++ = L':';
 | |
|   *s++ = L' ';
 | |
| 
 | |
|   return s;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| format_message(dtd_error *e)
 | |
| { wchar_t buf[1024];
 | |
|   wchar_t *s;
 | |
|   int prefix_len;
 | |
|   int left;
 | |
| 
 | |
|   switch(e->severity)
 | |
|   { case ERS_ERROR:
 | |
|       wcscpy(buf, L"Error: ");
 | |
|       break;
 | |
|     case ERS_WARNING:
 | |
|       wcscpy(buf, L"Warning: ");
 | |
|       break;
 | |
|     default:
 | |
|       buf[0] = '\0';
 | |
|   }
 | |
|   s = buf+wcslen(buf);
 | |
| 
 | |
|   s = format_location(s, 1024-(s-buf), e->location);
 | |
|   prefix_len = (int)(s-buf);
 | |
|   left = 1024-prefix_len;
 | |
| 
 | |
|   switch(e->id)
 | |
|   { case ERC_REPRESENTATION:
 | |
|       swprintf(s, left, L"Cannot represent due to %ls", e->argv[0]);
 | |
|       break;
 | |
|     case ERC_RESOURCE:
 | |
|       swprintf(s, left, L"Insufficient %ls resources", e->argv[0]);
 | |
|       break;
 | |
|     case ERC_LIMIT:
 | |
|       swprintf(s, left, L"%ls limit exceeded", e->argv[0]);
 | |
|       break;
 | |
|     case ERC_VALIDATE:
 | |
|       swprintf(s, left, L"%ls", e->argv[0]);
 | |
|       break;
 | |
|     case ERC_SYNTAX_ERROR:
 | |
|       swprintf(s, left, L"Syntax error: %ls", e->argv[0]);
 | |
|       break;
 | |
|     case ERC_EXISTENCE:
 | |
|       swprintf(s, left, L"%ls \"%ls\" does not exist", e->argv[0], e->argv[1]);
 | |
|       break;
 | |
|     case ERC_REDEFINED:
 | |
|       swprintf(s, left, L"Redefined %ls \"%ls\"", e->argv[0], e->argv[1]);
 | |
|       break;
 | |
|     default:
 | |
|       ;
 | |
|   }
 | |
| 
 | |
|   e->message = str2ring(buf);
 | |
|   e->plain_message = e->message + prefix_len;
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| gripe(dtd_parser *p, dtd_error_id e, ...)
 | |
| { va_list args;
 | |
|   wchar_t buf[1024];
 | |
|   dtd_error error;
 | |
|   int dtdmode = FALSE;
 | |
|   void *freeme = NULL;
 | |
| 
 | |
|   va_start(args, e);
 | |
| 
 | |
|   memset(&error, 0, sizeof(error));
 | |
|   error.minor = e;			/* detailed error code */
 | |
| 
 | |
|   if ( p )
 | |
|   { error.location = &p->location;
 | |
|     if ( p->dmode == DM_DTD )
 | |
|       dtdmode = TRUE;
 | |
|   } else
 | |
|   { error.location = NULL;
 | |
|   }
 | |
| 
 | |
|   switch(e)
 | |
|   { case ERC_REPRESENTATION:
 | |
|     case ERC_RESOURCE:
 | |
|       error.severity = ERS_ERROR;
 | |
|       error.argv[0]  = va_arg(args, wchar_t *);
 | |
|       break;
 | |
|     case ERC_LIMIT:
 | |
|       error.severity = ERS_WARNING;
 | |
|       error.argv[0]  = va_arg(args, wchar_t *);
 | |
|       break;
 | |
|     case ERC_SYNTAX_ERROR:
 | |
|     case ERC_SYNTAX_WARNING:
 | |
|     { wchar_t *m       = va_arg(args, wchar_t *);
 | |
|       const wchar_t *s = va_arg(args, const wchar_t *);
 | |
| 
 | |
|       if ( s && *s )
 | |
|       { swprintf(buf, 1024, L"%ls, found \"%ls\"", m, str_summary(s, 25));
 | |
| 	error.argv[0] = buf;
 | |
|       } else
 | |
| 	error.argv[0] = m;
 | |
| 
 | |
|       error.severity = (e == ERC_SYNTAX_WARNING ? ERS_WARNING : ERS_ERROR);
 | |
|       e = ERC_SYNTAX_ERROR;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_DOMAIN:
 | |
|     { const wchar_t *expected = va_arg(args, const wchar_t *);
 | |
|       const wchar_t *found    = str_summary(va_arg(args, const wchar_t *), 25);
 | |
| 
 | |
|       swprintf(buf, 1024, L"Expected type %ls, found \"%ls\"", expected, found);
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_ERROR;
 | |
|       e = (dtdmode ? ERC_SYNTAX_ERROR : ERC_VALIDATE);
 | |
|       break;
 | |
|     }
 | |
|     case ERC_REDEFINED:
 | |
|     { dtd_symbol *name;
 | |
|       error.argv[0] = va_arg(args, wchar_t *); /* type */
 | |
|       name = va_arg(args, dtd_symbol *); /* name */
 | |
|       error.argv[1]  = (ichar*)name->name;
 | |
|       error.severity = ERS_STYLE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_EXISTENCE:
 | |
|     { error.argv[0] = va_arg(args, wchar_t *); /* type */
 | |
|       error.argv[1] = va_arg(args, wchar_t *); /* name */
 | |
|       error.severity = ERS_ERROR;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_VALIDATE:
 | |
|     { error.argv[0] = va_arg(args, wchar_t *); /* message */
 | |
|       error.severity = ERS_WARNING;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_OMITTED_CLOSE:
 | |
|     { const wchar_t *element = va_arg(args, const wchar_t *);
 | |
| 
 | |
|       swprintf(buf, 1024, L"Inserted omitted end-tag for \"%ls\"", element);
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_WARNING;
 | |
|       e = ERC_VALIDATE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_OMITTED_OPEN:
 | |
|     { const wchar_t *element = va_arg(args, const wchar_t *);
 | |
| 
 | |
|       swprintf(buf, 1024, L"Inserted omitted start-tag for \"%ls\"", element);
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_WARNING;
 | |
|       e = ERC_VALIDATE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_NOT_OPEN:
 | |
|     { const wchar_t *element = va_arg(args, const wchar_t *);
 | |
| 
 | |
|       swprintf(buf, 1024, L"Ignored end-tag for \"%ls\" which is not open",
 | |
| 	       element);
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_WARNING;
 | |
|       e = ERC_VALIDATE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_NOT_ALLOWED:
 | |
|     { const wchar_t *element = va_arg(args, const wchar_t *);
 | |
| 
 | |
|       swprintf(buf, 1024, L"Element \"%ls\" not allowed here", element);
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_WARNING;
 | |
|       e = ERC_VALIDATE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_NOT_ALLOWED_PCDATA:
 | |
|     { const ocharbuf *cdata = va_arg(args, const ocharbuf *);
 | |
| 
 | |
|       swprintf(buf, 1024, L"#PCDATA (\"%ls\") not allowed here",
 | |
| 	       str_summary(cdata->data.w, 25));
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_WARNING;
 | |
|       e = ERC_VALIDATE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_NO_ATTRIBUTE:
 | |
|     { const wchar_t *elem = va_arg(args, wchar_t *); /* element */
 | |
|       const wchar_t *attr = va_arg(args, wchar_t *); /* attribute */
 | |
| 
 | |
|       swprintf(buf, 1024, L"Element \"%ls\" has no attribute \"%ls\"",
 | |
| 	       elem, attr);
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_WARNING;
 | |
| 
 | |
|       e = ERC_VALIDATE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_NO_ATTRIBUTE_VALUE:
 | |
|     { const wchar_t *elem  = va_arg(args, wchar_t *); /* element */
 | |
|       const wchar_t *value = va_arg(args, wchar_t *); /* attribute value */
 | |
| 
 | |
|       swprintf(buf, 1024, L"Element \"%ls\" has no attribute with value \"%ls\"",
 | |
| 	       elem, value);
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_WARNING;
 | |
| 
 | |
|       e = ERC_VALIDATE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_NO_VALUE:
 | |
|     { error.argv[0] = L"entity value";
 | |
|       error.argv[1] = va_arg(args, wchar_t *); /* entity */
 | |
| 
 | |
|       error.severity = ERS_ERROR;
 | |
|       e = ERC_EXISTENCE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_NO_DOCTYPE:
 | |
|     { const wchar_t *doctype = va_arg(args, wchar_t *); /* element */
 | |
|       const wchar_t *file    = va_arg(args, wchar_t *); /* DTD file */
 | |
| 
 | |
|       swprintf(buf, 1024, L"No <!DOCTYPE ...>, assuming \"%ls\" from DTD file \"%s\"",
 | |
| 	      doctype, file);
 | |
|       error.argv[0] = buf;
 | |
|       error.severity = ERS_WARNING;
 | |
| 
 | |
|       e = ERC_VALIDATE;
 | |
|       break;
 | |
|     }
 | |
|     case ERC_NO_CATALOGUE:
 | |
|     { char *file = va_arg(args, char *); /* catalogue file */
 | |
| 
 | |
|       error.argv[0] = L"catalogue file";
 | |
|       freeme = error.argv[1] = utf8towcs(file);
 | |
|       error.severity = ERS_WARNING;
 | |
|       e = ERC_EXISTENCE;
 | |
| 
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   error.id      = e;
 | |
|   format_message(&error);
 | |
| 
 | |
|   if ( p && p->on_error )
 | |
|     (*p->on_error)(p, &error);
 | |
|   else
 | |
|     fwprintf(stderr, L"SGML: %ls\n", error.message);
 | |
| 
 | |
|   if ( freeme )
 | |
|     sgml_free(freeme);
 | |
| 
 | |
|   va_end(args);
 | |
| 
 | |
|   return FALSE;
 | |
| }
 |