5599 lines
		
	
	
		
			127 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			5599 lines
		
	
	
		
			127 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*  $Id$
 | 
						|
 | 
						|
    Part of SWI-Prolog
 | 
						|
 | 
						|
    Author:        Jan Wielemaker
 | 
						|
    E-mail:        wielemak@science.uva.nl
 | 
						|
    WWW:           http://www.swi-prolog.org
 | 
						|
    Copyright (C): 1985-2006, University of Amsterdam
 | 
						|
 | 
						|
    This library is free software; you can redistribute it and/or
 | 
						|
    modify it under the terms of the GNU Lesser General Public
 | 
						|
    License as published by the Free Software Foundation; either
 | 
						|
    version 2.1 of the License, or (at your option) any later version.
 | 
						|
 | 
						|
    This library is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
    Lesser General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Lesser General Public
 | 
						|
    License along with this library; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
*/
 | 
						|
 | 
						|
#define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
 | 
						|
 | 
						|
#define DTD_IMPLEMENTATION 1
 | 
						|
#include <stdio.h>
 | 
						|
#include <wchar.h>
 | 
						|
#include "dtd.h"
 | 
						|
#include "model.h"
 | 
						|
#include "util.h"
 | 
						|
#include "catalog.h"
 | 
						|
#include "parser.h"
 | 
						|
#include <stdlib.h>
 | 
						|
#include <assert.h>
 | 
						|
#include <stdarg.h>
 | 
						|
#include <ctype.h>
 | 
						|
#include <string.h>
 | 
						|
#include "utf8.h"
 | 
						|
#include <errno.h>
 | 
						|
#include <wctype.h>
 | 
						|
#include "xml_unicode.h"
 | 
						|
 | 
						|
#define DEBUG(g) ((void)0)
 | 
						|
#define ZERO_TERM_LEN (-1)		/* terminated by nul */
 | 
						|
 | 
						|
#ifdef __WINDOWS__
 | 
						|
#define inline __inline
 | 
						|
#define swprintf _snwprintf
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    LOCAL TYPES		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
typedef struct locbuf
 | 
						|
{ dtd_srcloc start;			/* p->startloc */
 | 
						|
  dtd_srcloc here;			/* p->location */
 | 
						|
} locbuf;
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      PROTOYPES		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static const ichar *	itake_name(dtd_parser *p,
 | 
						|
				   const ichar *in, dtd_symbol **id);
 | 
						|
static const ichar *	itake_entity_name(dtd_parser *p, const ichar *in,
 | 
						|
					  dtd_symbol **id);
 | 
						|
static const ichar *	itake_namegroup(dtd_parser *p, const ichar *decl,
 | 
						|
					dtd_symbol **names, int *n);
 | 
						|
static const ichar *	iskip_layout(dtd *dtd, const ichar *in);
 | 
						|
static dtd_parser *	clone_dtd_parser(dtd_parser *p);
 | 
						|
static void		free_model(dtd_model *m);
 | 
						|
static int		process_entity_declaration(dtd_parser *p,
 | 
						|
						    const ichar *decl);
 | 
						|
static void		free_notations(dtd_notation *n);
 | 
						|
static void		free_shortrefs(dtd_shortref *sr);
 | 
						|
static int		process_cdata(dtd_parser *p, int last);
 | 
						|
static int		process_entity(dtd_parser *p, const ichar *name);
 | 
						|
static int		emit_cdata(dtd_parser *p, int last);
 | 
						|
static dtd_space_mode	istr_to_space_mode(const ichar *val);
 | 
						|
static void		update_space_mode(dtd_parser *p, dtd_element *e,
 | 
						|
					  int natts, sgml_attribute *atts);
 | 
						|
static dtd_model *	make_model(dtd_parser *p, const ichar *decl,
 | 
						|
				   const ichar **end);
 | 
						|
static void		for_elements_in_model(dtd_model *m,
 | 
						|
					      void (*f)(dtd_element *e,
 | 
						|
							void *closure),
 | 
						|
					      void *closure);
 | 
						|
int			putchar_dtd_parser(dtd_parser *p, int chr);
 | 
						|
void			free_dtd_parser(dtd_parser *p);
 | 
						|
static const ichar *	isee_character_entity(dtd *dtd, const ichar *in,
 | 
						|
					      int *chr);
 | 
						|
static int		add_default_attributes(dtd_parser *p, dtd_element *e,
 | 
						|
					       int natts,
 | 
						|
					       sgml_attribute *atts);
 | 
						|
static int		prepare_cdata(dtd_parser *p);
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      MACROS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
#define WITH_CLASS(p, c, g) \
 | 
						|
	{ sgml_event_class _oc = p->event_class; \
 | 
						|
	  p->event_class = c; \
 | 
						|
	  g; \
 | 
						|
	  p->event_class = _oc; \
 | 
						|
	}
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	     STATISTICS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
#ifdef O_STATISTICS
 | 
						|
 | 
						|
int edefs_created = 0;
 | 
						|
int edefs_freed = 0;
 | 
						|
int edefs_implicit = 0;
 | 
						|
int edefs_atts = 0;
 | 
						|
int edefs_decl = 0;
 | 
						|
int dtd_created = 0;
 | 
						|
int dtd_freed = 0;
 | 
						|
 | 
						|
void
 | 
						|
sgml_statistics(void)
 | 
						|
{ fprintf(stderr, "EDEFS: created %d; freed %d\n", edefs_created, edefs_freed);
 | 
						|
  fprintf(stderr, "EDEFS: implicit %d; atts %d; decl %d\n",
 | 
						|
	  edefs_implicit, edefs_atts, edefs_decl);
 | 
						|
  fprintf(stderr, "DTDs: created: %d; freed: %d\n", dtd_created, dtd_freed);
 | 
						|
}
 | 
						|
 | 
						|
#define STAT(g) g
 | 
						|
 | 
						|
#else
 | 
						|
 | 
						|
#define STAT(g) ((void)0)
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	   SRC LOCATION		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
 | 
						|
static void				/* TBD: also handle startloc */
 | 
						|
push_location(dtd_parser *p, locbuf *save)
 | 
						|
{ save->here  = p->location;
 | 
						|
  save->start = p->startloc;
 | 
						|
 | 
						|
  p->location.parent = &save->here;
 | 
						|
  p->startloc.parent = &save->start;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
pop_location(dtd_parser *p, locbuf *saved)
 | 
						|
{ p->location = saved->here;
 | 
						|
  p->startloc = saved->start;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static inline void
 | 
						|
_sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc)
 | 
						|
{ d->type    = loc->type;
 | 
						|
  d->name.file = loc->name.file;
 | 
						|
  d->line    = loc->line;
 | 
						|
  d->linepos = loc->linepos;
 | 
						|
  d->charpos = loc->charpos;
 | 
						|
					/* but not the parent! */
 | 
						|
}
 | 
						|
 | 
						|
void
 | 
						|
sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc)
 | 
						|
{ _sgml_cplocation(d, loc);
 | 
						|
}
 | 
						|
 | 
						|
#define sgml_cplocation(d,s) _sgml_cplocation(d, s)
 | 
						|
 | 
						|
static void
 | 
						|
inc_location(dtd_srcloc *l, int chr)
 | 
						|
{ if ( chr == '\n' )
 | 
						|
  { l->linepos = 0;
 | 
						|
    l->line++;
 | 
						|
  }
 | 
						|
 | 
						|
  l->linepos++;
 | 
						|
  l->charpos++;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
dec_location(dtd_srcloc *l, int chr)
 | 
						|
{ if ( chr == '\n' )
 | 
						|
  { l->linepos = 2;			/* not good! */
 | 
						|
    l->line--;
 | 
						|
  }
 | 
						|
  l->linepos--;
 | 
						|
  l->charpos--;
 | 
						|
}
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *   CLASSIFICATION PRIMITIVES	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static inline int
 | 
						|
HasClass(dtd *dtd, wint_t chr, int mask)
 | 
						|
{ if ( chr <= 0xff )
 | 
						|
    return (dtd->charclass->class[(chr)] & (mask));
 | 
						|
  else
 | 
						|
  { switch(mask)
 | 
						|
    { case CH_NAME:
 | 
						|
	return ( xml_basechar(chr) ||
 | 
						|
		 xml_digit(chr) ||
 | 
						|
		 xml_ideographic(chr) ||
 | 
						|
		 xml_combining_char(chr) ||
 | 
						|
		 xml_extender(chr)
 | 
						|
	       );
 | 
						|
      case CH_NMSTART:
 | 
						|
	return ( xml_basechar(chr) ||
 | 
						|
		 xml_ideographic(chr) );
 | 
						|
      case CH_WHITE:
 | 
						|
	return FALSE;			/* only ' ' and '\t' */
 | 
						|
      case CH_BLANK:
 | 
						|
	return iswspace(chr);
 | 
						|
      case CH_DIGIT:
 | 
						|
	return xml_digit(chr);
 | 
						|
      case CH_RS:
 | 
						|
      case CH_RE:
 | 
						|
	return FALSE;
 | 
						|
      default:
 | 
						|
	assert(0);
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
isee_func(dtd *dtd, const ichar *in, charfunc func)
 | 
						|
{ if ( dtd->charfunc->func[func] == *in )
 | 
						|
    return ++in;
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      SYMBOLS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static dtd_symbol_table *
 | 
						|
new_symbol_table()
 | 
						|
{ dtd_symbol_table *t = sgml_calloc(1, sizeof(*t));
 | 
						|
  t->size    = SYMBOLHASHSIZE;
 | 
						|
  t->entries = sgml_calloc(t->size, sizeof(dtd_symbol*));
 | 
						|
 | 
						|
  return t;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_symbol_table(dtd_symbol_table *t)
 | 
						|
{ int i;
 | 
						|
 | 
						|
  for(i=0; i<t->size; i++)
 | 
						|
  { dtd_symbol *s, *next;
 | 
						|
 | 
						|
    for(s=t->entries[i]; s; s=next)
 | 
						|
    { next = s->next;
 | 
						|
 | 
						|
      sgml_free((ichar*)s->name);
 | 
						|
      sgml_free(s);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  sgml_free(t->entries);
 | 
						|
  sgml_free(t);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dtd_symbol *
 | 
						|
dtd_find_symbol(dtd *dtd, const ichar *name)
 | 
						|
{ dtd_symbol_table *t = dtd->symbols;
 | 
						|
 | 
						|
  if ( dtd->case_sensitive )
 | 
						|
  { int k = istrhash(name, t->size);
 | 
						|
    dtd_symbol *s;
 | 
						|
 | 
						|
    for(s=t->entries[k]; s; s = s->next)
 | 
						|
    { if ( istreq(s->name, name) )
 | 
						|
	return s;
 | 
						|
    }
 | 
						|
  } else
 | 
						|
  { int k = istrcasehash(name, t->size);
 | 
						|
    dtd_symbol *s;
 | 
						|
 | 
						|
    for(s=t->entries[k]; s; s = s->next)
 | 
						|
    { if ( istrcaseeq(s->name, name) )
 | 
						|
	return s;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static dtd_symbol *
 | 
						|
dtd_find_entity_symbol(dtd *dtd, const ichar *name)
 | 
						|
{ dtd_symbol_table *t = dtd->symbols;
 | 
						|
 | 
						|
  if ( dtd->ent_case_sensitive )
 | 
						|
  { int k = istrhash(name, t->size);
 | 
						|
    dtd_symbol *s;
 | 
						|
 | 
						|
    for(s=t->entries[k]; s; s = s->next)
 | 
						|
    { if ( istreq(s->name, name) )
 | 
						|
	return s;
 | 
						|
    }
 | 
						|
  } else
 | 
						|
  { int k = istrcasehash(name, t->size);
 | 
						|
    dtd_symbol *s;
 | 
						|
 | 
						|
    for(s=t->entries[k]; s; s = s->next)
 | 
						|
    { if ( istrcaseeq(s->name, name) )
 | 
						|
	return s;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dtd_symbol *
 | 
						|
dtd_add_symbol(dtd *dtd, const ichar *name)
 | 
						|
{ dtd_symbol_table *t = dtd->symbols;
 | 
						|
  int k = istrhash(name, t->size);
 | 
						|
  dtd_symbol *s;
 | 
						|
 | 
						|
  for(s=t->entries[k]; s; s = s->next)
 | 
						|
  { if ( istreq(s->name, name) )
 | 
						|
      return s;
 | 
						|
  }
 | 
						|
 | 
						|
  s = sgml_calloc(1, sizeof(*s));
 | 
						|
  s->name = istrdup(name);
 | 
						|
  s->next = t->entries[k];
 | 
						|
  t->entries[k] = s;
 | 
						|
 | 
						|
  return s;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    ENTITIES		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static void
 | 
						|
free_entity_list(dtd_entity *e)
 | 
						|
{ dtd_entity *next;
 | 
						|
 | 
						|
  for( ; e; e=next)
 | 
						|
  { next = e->next;
 | 
						|
 | 
						|
    if ( e->value )   sgml_free(e->value);
 | 
						|
    if ( e->extid )   sgml_free(e->extid);
 | 
						|
    if ( e->exturl )  sgml_free(e->exturl);
 | 
						|
    if ( e->baseurl ) sgml_free(e->baseurl);
 | 
						|
 | 
						|
    sgml_free(e);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static dtd_entity *
 | 
						|
find_pentity(dtd *dtd, dtd_symbol *id)
 | 
						|
{ dtd_entity *e;
 | 
						|
 | 
						|
  for(e = dtd->pentities; e; e=e->next)
 | 
						|
  { if ( e->name == id )
 | 
						|
      return e;
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* returned path must be freed when done */
 | 
						|
 | 
						|
static ichar *
 | 
						|
entity_file(dtd *dtd, dtd_entity *e)
 | 
						|
{ switch(e->type)
 | 
						|
  { case ET_SYSTEM:
 | 
						|
    case ET_PUBLIC:
 | 
						|
    { const ichar *f;
 | 
						|
 | 
						|
      f = find_in_catalogue(e->catalog_location,
 | 
						|
			    e->name->name,
 | 
						|
			    e->extid,
 | 
						|
			    e->exturl,
 | 
						|
			    dtd->dialect != DL_SGML);
 | 
						|
 | 
						|
      if ( f )				/* owned by catalog */
 | 
						|
      { ichar *file;
 | 
						|
 | 
						|
	if ( is_absolute_path(f) || !e->baseurl )
 | 
						|
	  file = istrdup(f);
 | 
						|
	else
 | 
						|
	  file = localpath(e->baseurl, f);
 | 
						|
 | 
						|
	return file;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    default:
 | 
						|
      return NULL;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
entity_value(dtd_parser *p, dtd_entity *e, int *len)
 | 
						|
{ ichar *file;
 | 
						|
 | 
						|
  if ( !e->value && (file=entity_file(p->dtd, e)) )
 | 
						|
  { int normalise = (e->content == EC_SGML || e->content == EC_CDATA);
 | 
						|
    size_t l;
 | 
						|
 | 
						|
    e->value = load_sgml_file_to_charp(file, normalise, &l);
 | 
						|
    e->length = (long)l;
 | 
						|
    sgml_free(file);
 | 
						|
  }
 | 
						|
 | 
						|
  if ( len )
 | 
						|
    *len = e->length;
 | 
						|
 | 
						|
  return e->value;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
expand_pentities(dtd_parser *p, const ichar *in, int ilen, ichar *out, int len)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  int pero = dtd->charfunc->func[CF_PERO]; /* % */
 | 
						|
  int ero = dtd->charfunc->func[CF_ERO]; /* & */
 | 
						|
  const ichar *s;
 | 
						|
  const ichar *end;
 | 
						|
 | 
						|
  if ( ilen == ZERO_TERM_LEN )
 | 
						|
  { end = in + wcslen(in);
 | 
						|
  } else
 | 
						|
  { end = &in[ilen];
 | 
						|
  }
 | 
						|
 | 
						|
  while(in < end)
 | 
						|
  { if ( *in == pero )
 | 
						|
    { dtd_symbol *id;
 | 
						|
 | 
						|
      if ( (s = itake_entity_name(p, in+1, &id)) )
 | 
						|
      { dtd_entity *e = find_pentity(dtd, id);
 | 
						|
	const ichar *eval;
 | 
						|
	int l;
 | 
						|
 | 
						|
	in = s;
 | 
						|
	if ( (s=isee_func(dtd, s, CF_ERC)) ) /* ; is not obligatory? */
 | 
						|
	  in = s;
 | 
						|
 | 
						|
	if ( !e )
 | 
						|
	  return gripe(p, ERC_EXISTENCE, L"parameter entity", id->name);
 | 
						|
 | 
						|
	if ( !(eval = entity_value(p, e, NULL)) )
 | 
						|
	  return FALSE;
 | 
						|
 | 
						|
	if ( !expand_pentities(p, eval, ZERO_TERM_LEN, out, len) )
 | 
						|
	  return FALSE;
 | 
						|
	l = (int)istrlen(out);		/* could be better */
 | 
						|
	out += l;
 | 
						|
	len -= l;
 | 
						|
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    if ( --len <= 0 )
 | 
						|
    { gripe(p, ERC_REPRESENTATION, L"Declaration too long");
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    if ( *in == ero && in[1] == '#' )	/* &# */
 | 
						|
    { int chr;
 | 
						|
 | 
						|
      if ( (s=isee_character_entity(dtd, in, &chr)) )
 | 
						|
      { if ( chr == 0 )
 | 
						|
	{ gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", in);
 | 
						|
	} else
 | 
						|
	{ *out++ = chr;
 | 
						|
	  in = s;
 | 
						|
	  continue;
 | 
						|
	}
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    *out++ = *in++;
 | 
						|
  }
 | 
						|
 | 
						|
  *out = '\0';
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
char_entity_value(const ichar *decl)
 | 
						|
{ if ( *decl == '#' )
 | 
						|
  { const ichar *s = decl+1;
 | 
						|
    ichar *end;
 | 
						|
    long v;
 | 
						|
 | 
						|
					/* do octal too? */
 | 
						|
    if ( s[0] == 'x' || s[0] == 'X' )
 | 
						|
      v = wcstoul(s+1, &end, 16);
 | 
						|
    else
 | 
						|
      v = wcstoul(s, &end, 10);
 | 
						|
 | 
						|
    if ( *end == '\0' )
 | 
						|
    { return (int)v;
 | 
						|
    } else if ( istreq(s, L"RS") )
 | 
						|
    { return '\n';
 | 
						|
    } else if ( istreq(s, L"RE") )
 | 
						|
    { return '\r';
 | 
						|
    } else if ( istreq(s, L"TAB") )
 | 
						|
    { return '\t';
 | 
						|
    } else if ( istreq(s, L"SPACE") )
 | 
						|
    { return ' ';
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return -1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
isee_character_entity(dtd *dtd, const ichar *in, int *chr)
 | 
						|
{ const ichar *s;
 | 
						|
 | 
						|
  if ( (s=isee_func(dtd, in, CF_ERO)) && *s == '#' )
 | 
						|
  { ichar e[32];
 | 
						|
    ichar *o = e;
 | 
						|
    int v;
 | 
						|
 | 
						|
    *o++ = *s++;
 | 
						|
    while(o < e+sizeof(e)/sizeof(ichar)-1 && HasClass(dtd, *s, CH_NAME))
 | 
						|
      *o++ = *s++;
 | 
						|
    if ( isee_func(dtd, s, CF_ERC))	/* skip ; */
 | 
						|
      s++;
 | 
						|
 | 
						|
    *o = '\0';
 | 
						|
    if ( (v=char_entity_value(e)) >= 0 )
 | 
						|
    { *chr = v;
 | 
						|
      return s;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Expand entities in a string.  Used to expand CDATA attribute values.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static int
 | 
						|
expand_entities(dtd_parser *p, const ichar *in, int len, ocharbuf *out)
 | 
						|
{ const ichar *s;
 | 
						|
  const ichar *end = &in[len];
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
  int ero = dtd->charfunc->func[CF_ERO]; /* & */
 | 
						|
 | 
						|
  while(in < end)
 | 
						|
  { if ( *in == ero )
 | 
						|
    { const ichar *estart = in;		/* for recovery */
 | 
						|
      int chr;
 | 
						|
 | 
						|
      if ( (s=isee_character_entity(dtd, in, &chr)) )
 | 
						|
      { if ( chr == 0 )
 | 
						|
	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", in);
 | 
						|
 | 
						|
	add_ocharbuf(out, chr);
 | 
						|
	in = s;
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
 | 
						|
      if ( HasClass(dtd, in[1], CH_NMSTART) )
 | 
						|
      { dtd_symbol *id;
 | 
						|
	dtd_entity *e;
 | 
						|
	const ichar *eval;
 | 
						|
 | 
						|
	if ( !(in = itake_name(p, in+1, &id)) )
 | 
						|
	{ in = estart;
 | 
						|
	  goto recover;
 | 
						|
	}
 | 
						|
	if ( isee_func(dtd, in, CF_ERC) || *in == '\n' )
 | 
						|
	  in++;
 | 
						|
 | 
						|
	if ( !(e = id->entity) && !(e=dtd->default_entity) )
 | 
						|
	{ gripe(p, ERC_EXISTENCE, L"entity", id->name);
 | 
						|
	  in = estart;
 | 
						|
	  goto recover;
 | 
						|
	}
 | 
						|
 | 
						|
	if ( !(eval = entity_value(p, e, NULL)) )
 | 
						|
	{ gripe(p, ERC_NO_VALUE, e->name->name);
 | 
						|
	  in = estart;
 | 
						|
	  goto recover;
 | 
						|
	}
 | 
						|
 | 
						|
	if ( e->content == EC_SGML )
 | 
						|
	{ if ( !expand_entities(p, eval, (int)istrlen(eval), out) )
 | 
						|
	    return FALSE;
 | 
						|
	} else
 | 
						|
	{ const ichar *s;
 | 
						|
 | 
						|
	  for(s=eval; *s; s++)
 | 
						|
	    add_ocharbuf(out, *s);
 | 
						|
	}
 | 
						|
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
 | 
						|
      if ( dtd->dialect != DL_SGML )
 | 
						|
	gripe(p, ERC_SYNTAX_ERROR, L"Illegal entity", estart);
 | 
						|
    }
 | 
						|
 | 
						|
  recover:
 | 
						|
 | 
						|
    if ( *in == CR && in[1] == LF )
 | 
						|
      in++;
 | 
						|
 | 
						|
    if ( HasClass(dtd, *in, CH_BLANK) )
 | 
						|
    { add_ocharbuf(out, ' ');
 | 
						|
      in++;
 | 
						|
    } else
 | 
						|
    { add_ocharbuf(out, *in++);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  terminate_ocharbuf(out);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      ELEMENTS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static dtd_element *
 | 
						|
find_element(dtd *dtd, dtd_symbol *id)
 | 
						|
{ dtd_element *e;
 | 
						|
 | 
						|
  if ( id->element )
 | 
						|
    return id->element;			/* must check */
 | 
						|
 | 
						|
  e = sgml_calloc(1, sizeof(*e));
 | 
						|
  e->space_mode = SP_INHERIT;
 | 
						|
  e->undefined = TRUE;
 | 
						|
  e->name = id;
 | 
						|
  id->element = e;
 | 
						|
 | 
						|
  e->next = dtd->elements;
 | 
						|
  dtd->elements = e;
 | 
						|
 | 
						|
  return e;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static dtd_edef *
 | 
						|
new_element_definition(dtd *dtd)
 | 
						|
{ dtd_edef *def = sgml_calloc(1, sizeof(*def));
 | 
						|
 | 
						|
  STAT(edefs_created++);
 | 
						|
 | 
						|
  return def;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static dtd_element *
 | 
						|
def_element(dtd *dtd, dtd_symbol *id)
 | 
						|
{ dtd_element *e = find_element(dtd, id);
 | 
						|
 | 
						|
  if ( !e->structure )
 | 
						|
  { e->structure = new_element_definition(dtd);
 | 
						|
    e->structure->references = 1;
 | 
						|
    e->structure->type = C_EMPTY;
 | 
						|
  }
 | 
						|
 | 
						|
  return e;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_name_list(dtd_name_list *nl)
 | 
						|
{ dtd_name_list *next;
 | 
						|
 | 
						|
  for( ; nl; nl=next)
 | 
						|
  { next = nl->next;
 | 
						|
 | 
						|
    sgml_free(nl);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#define REFS_VIRGIN (-42)
 | 
						|
 | 
						|
static void
 | 
						|
free_attribute(dtd_attr *a)
 | 
						|
{ if ( a->references == REFS_VIRGIN || --a->references == 0 )
 | 
						|
  { switch(a->type)
 | 
						|
    { case AT_NAMEOF:
 | 
						|
      case AT_NOTATION:
 | 
						|
	free_name_list(a->typeex.nameof);
 | 
						|
      default:
 | 
						|
	;
 | 
						|
    }
 | 
						|
    switch(a->def)
 | 
						|
    { case AT_DEFAULT:
 | 
						|
      case AT_FIXED:
 | 
						|
      { if ( a->islist )
 | 
						|
	  sgml_free(a->att_def.list);
 | 
						|
	else if ( a->type == AT_CDATA && a->att_def.cdata )
 | 
						|
	  sgml_free(a->att_def.cdata);
 | 
						|
      }
 | 
						|
      default:
 | 
						|
	;
 | 
						|
    }
 | 
						|
 | 
						|
    sgml_free(a);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_attribute_list(dtd_attr_list *l)
 | 
						|
{ dtd_attr_list *next;
 | 
						|
 | 
						|
  for(; l; l=next)
 | 
						|
  { next = l->next;
 | 
						|
 | 
						|
    free_attribute(l->attribute);
 | 
						|
    sgml_free(l);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_element_list(dtd_element_list *l)
 | 
						|
{ dtd_element_list *next;
 | 
						|
 | 
						|
  for( ; l; l=next)
 | 
						|
  { next = l->next;
 | 
						|
 | 
						|
    sgml_free(l);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
free_element_definition(dtd_edef *def)
 | 
						|
{ if ( --def->references == 0 )
 | 
						|
  { STAT(edefs_freed++);
 | 
						|
    if ( def->content )
 | 
						|
      free_model(def->content);
 | 
						|
    free_element_list(def->included);
 | 
						|
    free_element_list(def->excluded);
 | 
						|
    free_state_engine(def->initial_state);
 | 
						|
 | 
						|
    sgml_free(def);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_elements(dtd_element *e)
 | 
						|
{ dtd_element *next;
 | 
						|
 | 
						|
  for( ; e; e=next)
 | 
						|
  { next = e->next;
 | 
						|
 | 
						|
    if ( e->structure )
 | 
						|
      free_element_definition(e->structure);
 | 
						|
    free_attribute_list(e->attributes);
 | 
						|
 | 
						|
    sgml_free(e);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    ATTRIBUTES		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static dtd_attr *
 | 
						|
find_attribute(dtd_element *e, dtd_symbol *name)
 | 
						|
{ dtd_attr_list *a;
 | 
						|
 | 
						|
  for(a=e->attributes; a; a=a->next)
 | 
						|
  { if ( a->attribute->name == name )
 | 
						|
      return a->attribute;
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	  PARSE PRIMITIVES	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static const ichar *
 | 
						|
iskip_layout(dtd *dtd, const ichar *in)
 | 
						|
{ ichar cmt = dtd->charfunc->func[CF_CMT]; /* also skips comment */
 | 
						|
 | 
						|
  for( ; *in; in++ )
 | 
						|
  { if ( HasClass(dtd, *in, CH_BLANK) )
 | 
						|
      continue;
 | 
						|
 | 
						|
    if ( in[0] == cmt && in[1] == cmt )
 | 
						|
    { in += 2;
 | 
						|
 | 
						|
      for( ; *in; in++ )
 | 
						|
      { if ( in[0] == cmt && in[1] == cmt )
 | 
						|
	  break;
 | 
						|
      }
 | 
						|
      in++;
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
 | 
						|
    return in;
 | 
						|
  }
 | 
						|
 | 
						|
  return in;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
See whether we are looking at identifier   "id". "id" must be lowercase!
 | 
						|
This is only used for reserved words,  and parsed case-insentive in both
 | 
						|
XML and SGML modes.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static const ichar *
 | 
						|
isee_identifier(dtd *dtd, const ichar *in, char *id)
 | 
						|
{ in = iskip_layout(dtd, in);
 | 
						|
 | 
						|
					/* match */
 | 
						|
  while (*id && (wint_t)*id == towlower(*in) )
 | 
						|
    id++, in++;
 | 
						|
  if ( *id == 0 && !HasClass(dtd, *in, CH_NAME) )
 | 
						|
    return iskip_layout(dtd, in);
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_name(dtd_parser *p, const ichar *in, dtd_symbol **id)
 | 
						|
{ ichar buf[MAXNMLEN];
 | 
						|
  ichar *o = buf;
 | 
						|
  ichar *e = &buf[MAXNMLEN]-1;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  in = iskip_layout(dtd, in);
 | 
						|
  if ( !HasClass(dtd, *in, CH_NMSTART) )
 | 
						|
    return NULL;
 | 
						|
 | 
						|
  if ( dtd->case_sensitive )
 | 
						|
  { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | 
						|
      *o++ = *in++;
 | 
						|
  } else
 | 
						|
  { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | 
						|
      *o++ = towlower(*in++);
 | 
						|
  }
 | 
						|
 | 
						|
  if ( o == e )
 | 
						|
  { gripe(p, ERC_REPRESENTATION, L"NAME too long");
 | 
						|
    return NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  *o++ = '\0';
 | 
						|
 | 
						|
  *id = dtd_add_symbol(dtd, buf);
 | 
						|
 | 
						|
  return iskip_layout(dtd, in);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_entity_name(dtd_parser *p, const ichar *in, dtd_symbol **id)
 | 
						|
{ ichar buf[MAXNMLEN];
 | 
						|
  ichar *o = buf;
 | 
						|
  ichar *e = &buf[MAXNMLEN]-1;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  in = iskip_layout(dtd, in);
 | 
						|
  if ( !HasClass(dtd, *in, CH_NMSTART) )
 | 
						|
    return NULL;
 | 
						|
 | 
						|
  if ( dtd->ent_case_sensitive )
 | 
						|
  { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | 
						|
      *o++ = *in++;
 | 
						|
  } else
 | 
						|
  { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | 
						|
      *o++ = towlower(*in++);
 | 
						|
  }
 | 
						|
  if ( o == e )
 | 
						|
  { gripe(p, ERC_REPRESENTATION, L"Entity NAME too long");
 | 
						|
    return NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  *o++ = '\0';
 | 
						|
 | 
						|
  *id = dtd_add_symbol(dtd, buf);
 | 
						|
 | 
						|
  return in;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_nmtoken(dtd_parser *p, const ichar *in, dtd_symbol **id)
 | 
						|
{ ichar buf[MAXNMLEN];
 | 
						|
  ichar *o = buf;
 | 
						|
  ichar *e = &buf[MAXNMLEN]-1;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  in = iskip_layout(dtd, in);
 | 
						|
  if ( !HasClass(dtd, *in, CH_NAME) )
 | 
						|
    return NULL;
 | 
						|
  if ( dtd->case_sensitive )
 | 
						|
  { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | 
						|
      *o++ = *in++;
 | 
						|
  } else
 | 
						|
  { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | 
						|
      *o++ = towlower(*in++);
 | 
						|
  }
 | 
						|
  if ( o == e )
 | 
						|
  { gripe(p, ERC_REPRESENTATION, L"NMTOKEN too long");
 | 
						|
    return NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  *o = '\0';
 | 
						|
 | 
						|
  *id = dtd_add_symbol(dtd, buf);
 | 
						|
 | 
						|
  return iskip_layout(dtd, in);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_nutoken(dtd_parser *p, const ichar *in, dtd_symbol **id)
 | 
						|
{ ichar buf[MAXNMLEN];
 | 
						|
  ichar *o = buf;
 | 
						|
  ichar *e = &buf[MAXNMLEN]-1;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  in = iskip_layout(dtd, in);
 | 
						|
  if ( !HasClass(dtd, *in, CH_DIGIT) )
 | 
						|
    return NULL;
 | 
						|
 | 
						|
  if ( dtd->case_sensitive )
 | 
						|
  { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | 
						|
      *o++ = *in++;
 | 
						|
  } else
 | 
						|
  { while( HasClass(dtd, *in, CH_NAME) && o < e )
 | 
						|
      *o++ = towlower(*in++);
 | 
						|
  }
 | 
						|
 | 
						|
  if ( o == e )
 | 
						|
  { gripe(p, ERC_REPRESENTATION, L"NUTOKEN too long");
 | 
						|
    return NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  *o = '\0';
 | 
						|
  if ( o - buf > 8 )
 | 
						|
    gripe(p, ERC_LIMIT, L"nutoken length");
 | 
						|
 | 
						|
  *id = dtd_add_symbol(dtd, buf);
 | 
						|
 | 
						|
  return iskip_layout(dtd, in);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_number(dtd_parser *p, const ichar *in, dtd_attr *at)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  in = iskip_layout(dtd, in);
 | 
						|
 | 
						|
  switch(dtd->number_mode)
 | 
						|
  { case NU_TOKEN:
 | 
						|
    { ichar buf[MAXNMLEN];
 | 
						|
      ichar *o = buf;
 | 
						|
 | 
						|
      while( HasClass(dtd, *in, CH_DIGIT) )
 | 
						|
	*o++ = *in++;
 | 
						|
      if ( o == buf )
 | 
						|
	return NULL;			/* empty */
 | 
						|
      *o = '\0';
 | 
						|
      at->att_def.name = dtd_add_symbol(dtd, buf);
 | 
						|
 | 
						|
      return iskip_layout(dtd, (const ichar *)in);
 | 
						|
    }
 | 
						|
    case NU_INTEGER:
 | 
						|
    { ichar *end;
 | 
						|
 | 
						|
      at->att_def.number = wcstol(in, &end, 10);
 | 
						|
      if ( end > in && errno != ERANGE )
 | 
						|
	return iskip_layout(dtd, end);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Get a quoted value. After successful return,  *start points to the start
 | 
						|
of the string in the input and  *len   to  the length. The data is *not*
 | 
						|
nul terminated.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_string(dtd *dtd, const ichar *in, ichar **start, int *len)
 | 
						|
{ in = iskip_layout(dtd, in);
 | 
						|
 | 
						|
  if ( isee_func(dtd, in, CF_LIT) ||
 | 
						|
       isee_func(dtd, in, CF_LITA) )
 | 
						|
  { ichar q = *in++;
 | 
						|
 | 
						|
    *start = (ichar *)in;
 | 
						|
    while( *in && *in != q )
 | 
						|
      in++;
 | 
						|
    if ( *in )
 | 
						|
    { *len = (int)(in - (*start));
 | 
						|
 | 
						|
      return iskip_layout(dtd, ++in);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_dubbed_string(dtd *dtd, const ichar *in, ichar **out)
 | 
						|
{ ichar *start;
 | 
						|
  int len;
 | 
						|
  const ichar *end;
 | 
						|
 | 
						|
  if ( (end=itake_string(dtd, in, &start, &len)) )
 | 
						|
    *out = istrndup(start, len);
 | 
						|
 | 
						|
  return end;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
itake_url() is used to get the argument of a SYSTEM or 2nd argument of a
 | 
						|
PUBLIC reference. Once upon a  time  it   tried  to  tag the argument as
 | 
						|
file:<path>, but this job cannot be before   lookup in the catalogue. It
 | 
						|
is now the same as itake_dubbed_string(), so we simply call this one.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_url(dtd *dtd, const ichar *in, ichar **out)
 | 
						|
{ return itake_dubbed_string(dtd, in, out);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_nmtoken_chars(dtd_parser *p, const ichar *in, ichar *out, int len)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  in = iskip_layout(dtd, in);
 | 
						|
  if ( !HasClass(dtd, *in, CH_NAME) )
 | 
						|
    return NULL;
 | 
						|
  while( HasClass(dtd, *in, CH_NAME) )
 | 
						|
  { if ( --len <= 0 )
 | 
						|
      gripe(p, ERC_REPRESENTATION, L"Name token too long");
 | 
						|
    *out++ = (dtd->case_sensitive ? *in++ : (ichar)towlower(*in++));
 | 
						|
  }
 | 
						|
  *out++ = '\0';
 | 
						|
 | 
						|
  return iskip_layout(dtd, in);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/*  There used to be a function
 | 
						|
 | 
						|
    itake_nonblank_chars(dtd, in, out, len) -> new end
 | 
						|
 | 
						|
    which
 | 
						|
    - skipped layout,
 | 
						|
    - copied characters from in[] to out[] until layout or \0 was found,
 | 
						|
    - added a terminating \0 to out[],
 | 
						|
    - skipped any following layout, and
 | 
						|
    - returned the new position.
 | 
						|
 | 
						|
    That function was only called by get_attribute_value(), which used
 | 
						|
    it to parse an unquoted attribute value.  According to SGML, that's
 | 
						|
    not right:  unquoted attribute values must look like NMTOKENs (but
 | 
						|
    have a different length bound).  In particular, elements like
 | 
						|
	<foo a=bar>zoo</foo>
 | 
						|
	<foo a=ugh/zip/
 | 
						|
    are perfectly legal, so scanning an unquoted attribute value MUST
 | 
						|
    stop at a '/' or '>'.  According to HTML practice, pretty much any
 | 
						|
    old junk will be accepted, and some HTML parsers will allow bare
 | 
						|
    slashes in such an attribute.
 | 
						|
 | 
						|
    Typical HTML is *so* bad that it doesn't agree with *any* part of
 | 
						|
    the HTML specifications (e.g., <FONT> is commonly wrapped around
 | 
						|
    block-level elements, which has never been legal).  It's not clear
 | 
						|
    that there is much point in trying to accomodate bad HTML; if you
 | 
						|
    really need to do that, use the free program HTML Tidy (from the
 | 
						|
    http://www.w3c.org/ site) to clean up, and parse its output instead.
 | 
						|
 | 
						|
    However, in order to break as little as possible, the new (sgml-1.0.14)
 | 
						|
    function accepts anything except > / \0 and blanks.
 | 
						|
 | 
						|
JW: I decided to accept / as part of an unquoted in SGML-mode if
 | 
						|
    shorttag is disabled as well as in XML mode if it is not the
 | 
						|
    end of the begin-element
 | 
						|
*/
 | 
						|
 | 
						|
static ichar const *
 | 
						|
itake_unquoted(dtd_parser *p, ichar const *in, ichar *out, int len)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  ichar const end2 = dtd->charfunc->func[CF_ETAGO2];	/* / */
 | 
						|
  ichar c;
 | 
						|
 | 
						|
  /* skip leading layout.  Do NOT skip comments! --x-- is a value! */
 | 
						|
  while (c = *in, HasClass(dtd, c, CH_BLANK))
 | 
						|
    in++;
 | 
						|
 | 
						|
  /* copy the attribute to out[] */
 | 
						|
  while ( !HasClass(dtd, c, CH_BLANK) &&
 | 
						|
	  c != '\0' )
 | 
						|
  { if ( c == end2 && (dtd->shorttag ||
 | 
						|
		       (in[1] == '\0' && dtd->dialect != DL_SGML)) )
 | 
						|
      break;
 | 
						|
 | 
						|
    if ( --len > 0 )
 | 
						|
      *out++ = c;
 | 
						|
    else if ( len == 0 )
 | 
						|
      gripe(p, ERC_REPRESENTATION, L"Attribute too long");
 | 
						|
    c = *++in;
 | 
						|
  }
 | 
						|
  *out = '\0';
 | 
						|
 | 
						|
  /* skip trailing layout.  While it is kind to skip comments here,
 | 
						|
     it is technically wrong to do so.  Tags may not contain comments.
 | 
						|
   */
 | 
						|
 | 
						|
  return iskip_layout(dtd, in);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *		DTD		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
dtd *
 | 
						|
new_dtd(const ichar *doctype)
 | 
						|
{ dtd *dtd = sgml_calloc(1, sizeof(*dtd));
 | 
						|
 | 
						|
  STAT(dtd_created++);
 | 
						|
  dtd->magic	 = SGML_DTD_MAGIC;
 | 
						|
  dtd->implicit  = TRUE;
 | 
						|
  dtd->dialect   = DL_SGML;
 | 
						|
  if ( doctype )
 | 
						|
    dtd->doctype = istrdup(doctype);
 | 
						|
  dtd->symbols	 = new_symbol_table();
 | 
						|
  dtd->charclass = new_charclass();
 | 
						|
  dtd->charfunc	 = new_charfunc();
 | 
						|
  dtd->space_mode = SP_SGML;
 | 
						|
  dtd->ent_case_sensitive = TRUE;	/* case-sensitive entities */
 | 
						|
  dtd->shorttag    = TRUE;		/* allow for <tag/value/ */
 | 
						|
  dtd->number_mode = NU_TOKEN;
 | 
						|
 | 
						|
  return dtd;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void
 | 
						|
free_dtd(dtd *dtd)
 | 
						|
{ if ( --dtd->references == 0 )
 | 
						|
  { STAT(dtd_freed++);
 | 
						|
 | 
						|
    if ( dtd->doctype )
 | 
						|
      sgml_free(dtd->doctype);
 | 
						|
 | 
						|
    free_entity_list(dtd->entities);
 | 
						|
    free_entity_list(dtd->pentities);
 | 
						|
    free_notations(dtd->notations);
 | 
						|
    free_shortrefs(dtd->shortrefs);
 | 
						|
    free_elements(dtd->elements);
 | 
						|
    free_symbol_table(dtd->symbols);
 | 
						|
    sgml_free(dtd->charfunc);
 | 
						|
    sgml_free(dtd->charclass);
 | 
						|
    dtd->magic = 0;
 | 
						|
 | 
						|
    sgml_free(dtd);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const wchar_t *xml_entities[] =
 | 
						|
{ L"lt CDATA \"<\"",		/* < */
 | 
						|
  L"gt CDATA \">\"",		/* > */
 | 
						|
  L"amp CDATA \"&\"",		/* & */
 | 
						|
  L"apos CDATA \"'\"",		/* ' */
 | 
						|
  L"quot CDATA \""\"",		/* " */
 | 
						|
  NULL
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
set_dialect_dtd(dtd *dtd, dtd_dialect dialect)
 | 
						|
{ if ( dtd->dialect != dialect )
 | 
						|
  { dtd->dialect = dialect;
 | 
						|
 | 
						|
    switch(dialect)
 | 
						|
    { case DL_SGML:
 | 
						|
      { dtd->case_sensitive = FALSE;
 | 
						|
	dtd->space_mode = SP_SGML;
 | 
						|
	dtd->shorttag = TRUE;
 | 
						|
	break;
 | 
						|
      }
 | 
						|
      case DL_XML:
 | 
						|
      case DL_XMLNS:
 | 
						|
      { const ichar **el;
 | 
						|
	dtd_parser p;
 | 
						|
 | 
						|
	dtd->case_sensitive = TRUE;
 | 
						|
	dtd->encoding = SGML_ENC_UTF8;
 | 
						|
	dtd->space_mode = SP_PRESERVE;
 | 
						|
	dtd->shorttag = FALSE;
 | 
						|
 | 
						|
	memset(&p, 0, sizeof(p));
 | 
						|
	p.dtd = dtd;
 | 
						|
	for(el = xml_entities; *el; el++)
 | 
						|
	  process_entity_declaration(&p, *el);
 | 
						|
 | 
						|
	break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
set_option_dtd(dtd *dtd, dtd_option option, int set)
 | 
						|
{ switch(option)
 | 
						|
  { case OPT_SHORTTAG:
 | 
						|
      dtd->shorttag = set;
 | 
						|
      break;
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
baseurl(dtd_parser *p)
 | 
						|
{ if ( p->location.type == IN_FILE && p->location.name.file )
 | 
						|
  { return p->location.name.file;
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
process_entity_value_declaration(dtd_parser *p,
 | 
						|
				 const ichar *decl, dtd_entity *e)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  const ichar *s;
 | 
						|
 | 
						|
  if ( e->type == ET_SYSTEM )
 | 
						|
  { if ( (s=itake_url(dtd, decl, &e->exturl)) )
 | 
						|
    { e->baseurl = istrdup(baseurl(p));
 | 
						|
      return s;
 | 
						|
    }
 | 
						|
 | 
						|
    goto string_expected;
 | 
						|
  } else
 | 
						|
  { ichar *start; int len;
 | 
						|
    ichar val[MAXSTRINGLEN];
 | 
						|
 | 
						|
    if ( !(s = itake_string(dtd, decl, &start, &len)) )
 | 
						|
      goto string_expected;
 | 
						|
    decl = s;
 | 
						|
 | 
						|
    expand_pentities(p, start, len, val, sizeof(val)/sizeof(ichar));
 | 
						|
 | 
						|
    switch ( e->type )
 | 
						|
    { case ET_PUBLIC:
 | 
						|
      { e->extid = istrdup(val);
 | 
						|
	if ( isee_func(dtd, decl, CF_LIT) ||
 | 
						|
	     isee_func(dtd, decl, CF_LITA) )
 | 
						|
	{ if ( (s=itake_url(dtd, decl, &e->exturl)) )
 | 
						|
	  { e->baseurl = istrdup(baseurl(p));
 | 
						|
	    decl = s;
 | 
						|
	  }
 | 
						|
	}
 | 
						|
	return decl;
 | 
						|
      }
 | 
						|
      case ET_LITERAL:
 | 
						|
      { e->value = istrdup(val);
 | 
						|
	e->length = (int)wcslen(e->value);
 | 
						|
	return decl;
 | 
						|
      }
 | 
						|
      default:
 | 
						|
	assert(0);
 | 
						|
	return NULL;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
string_expected:
 | 
						|
  gripe(p, ERC_SYNTAX_ERROR, L"String expected", decl);
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
The sgml-standard tells us to accept the  first definition of an entity,
 | 
						|
silently suppressing any further attempt to redefine the entity.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static int
 | 
						|
process_entity_declaration(dtd_parser *p, const ichar *decl)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  const ichar *s;
 | 
						|
  dtd_symbol *id;
 | 
						|
  dtd_entity *e;
 | 
						|
  int isparam;
 | 
						|
  int isdef = FALSE;
 | 
						|
					/* parameter entity */
 | 
						|
  if ( (s=isee_func(dtd, decl, CF_PERO)) )
 | 
						|
  { isparam = TRUE;
 | 
						|
    decl = s;
 | 
						|
  } else
 | 
						|
    isparam = FALSE;
 | 
						|
 | 
						|
  if ( !(s = itake_entity_name(p, decl, &id)) )
 | 
						|
  { if ( !(s = isee_identifier(dtd, decl, "#default")) )
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | 
						|
    id = dtd_add_symbol(dtd, (ichar*)"#DEFAULT");
 | 
						|
    isdef = TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( isparam && find_pentity(dtd, id) )
 | 
						|
  { gripe(p, ERC_REDEFINED, L"parameter entity", id);
 | 
						|
    return TRUE;			/* already defined parameter entity */
 | 
						|
  }
 | 
						|
  if ( id->entity )
 | 
						|
  { gripe(p, ERC_REDEFINED, L"entity", id);
 | 
						|
    return TRUE;			/* already defined normal entity */
 | 
						|
  }
 | 
						|
 | 
						|
  decl = iskip_layout(dtd, s);
 | 
						|
  e = sgml_calloc(1, sizeof(*e));
 | 
						|
  e->name = id;
 | 
						|
  e->catalog_location = (isparam ? CAT_PENTITY : CAT_ENTITY);
 | 
						|
 | 
						|
  if ( (s = isee_identifier(dtd, decl, "system")) )
 | 
						|
  { e->type = ET_SYSTEM;
 | 
						|
    e->content = EC_SGML;
 | 
						|
    decl = s;
 | 
						|
  } else if ( (s = isee_identifier(dtd, decl, "public")) )
 | 
						|
  { e->type = ET_PUBLIC;
 | 
						|
    e->content = EC_SGML;
 | 
						|
    decl = s;
 | 
						|
  } else
 | 
						|
  { e->type = ET_LITERAL;
 | 
						|
 | 
						|
    if ( !isparam )
 | 
						|
    { if ( (s=isee_identifier(dtd, decl, "cdata")) )
 | 
						|
      { decl = s;
 | 
						|
	e->content = EC_CDATA;
 | 
						|
      } else if ( (s=isee_identifier(dtd, decl, "sdata")) )
 | 
						|
      { decl = s;
 | 
						|
	e->content = EC_SDATA;
 | 
						|
      } else if ( (s=isee_identifier(dtd, decl, "pi")) )
 | 
						|
      { decl = s;
 | 
						|
	e->content = EC_PI;
 | 
						|
      } else if ( (s=isee_identifier(dtd, decl, "starttag")) )
 | 
						|
      { decl = s;
 | 
						|
	e->content = EC_STARTTAG;
 | 
						|
      } else if ( (s=isee_identifier(dtd, decl, "endtag")) )
 | 
						|
      { decl = s;
 | 
						|
	e->content = EC_ENDTAG;
 | 
						|
      } else
 | 
						|
	e->content = EC_SGML;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if ( (decl=process_entity_value_declaration(p, decl, e)) )
 | 
						|
  { if ( e->type == ET_LITERAL )
 | 
						|
    { switch(e->content)
 | 
						|
      { case EC_STARTTAG:
 | 
						|
	{ ichar *buf = sgml_malloc((e->length + 3)*sizeof(ichar));
 | 
						|
 | 
						|
	  buf[0] = dtd->charfunc->func[CF_STAGO];
 | 
						|
	  istrcpy(&buf[1], e->value);
 | 
						|
	  buf[++e->length] = dtd->charfunc->func[CF_STAGC];
 | 
						|
	  buf[++e->length] = 0;
 | 
						|
 | 
						|
	  sgml_free(e->value);
 | 
						|
	  e->value = buf;
 | 
						|
	  e->content = EC_SGML;
 | 
						|
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	case EC_ENDTAG:
 | 
						|
	{ ichar *buf = sgml_malloc((e->length + 4)*sizeof(ichar));
 | 
						|
 | 
						|
	  buf[0] = dtd->charfunc->func[CF_ETAGO1];
 | 
						|
	  buf[1] = dtd->charfunc->func[CF_ETAGO2];
 | 
						|
	  istrcpy(&buf[2], e->value);
 | 
						|
	  e->length++;
 | 
						|
	  buf[++e->length] = dtd->charfunc->func[CF_STAGC];
 | 
						|
	  buf[++e->length] = 0;
 | 
						|
 | 
						|
	  sgml_free(e->value);
 | 
						|
	  e->value = buf;
 | 
						|
	  e->content = EC_SGML;
 | 
						|
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	default:
 | 
						|
	  break;
 | 
						|
      }
 | 
						|
    } else
 | 
						|
    { if ( *decl )
 | 
						|
      { dtd_symbol *nname;
 | 
						|
 | 
						|
	if ( (s=isee_identifier(dtd, decl, "cdata")) )
 | 
						|
	{ decl = s;
 | 
						|
	  e->content = EC_CDATA;
 | 
						|
	} else if ( (s=isee_identifier(dtd, decl, "sdata")) )
 | 
						|
	{ decl = s;
 | 
						|
	  e->content = EC_SDATA;
 | 
						|
	} else if ( (s=isee_identifier(dtd, decl, "ndata")) )
 | 
						|
	{ decl = s;
 | 
						|
	  e->content = EC_NDATA;
 | 
						|
	} else
 | 
						|
	  return gripe(p, ERC_SYNTAX_ERROR, L"Bad datatype declaration", decl);
 | 
						|
 | 
						|
	if ( (s=itake_name(p, decl, &nname)) ) /* what is this? */
 | 
						|
	{ decl = s;
 | 
						|
	} else
 | 
						|
	  return gripe(p, ERC_SYNTAX_ERROR, L"Bad notation declaration", decl);
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    if ( *decl )
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl);
 | 
						|
  }
 | 
						|
 | 
						|
  if ( isparam )
 | 
						|
  { e->next = dtd->pentities;
 | 
						|
    dtd->pentities = e;
 | 
						|
  } else
 | 
						|
  { e->name->entity = e;
 | 
						|
    e->next = dtd->entities;
 | 
						|
    dtd->entities = e;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( isdef )
 | 
						|
    dtd->default_entity = e;
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      NOTATIONS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static dtd_notation *
 | 
						|
find_notation(dtd *dtd, dtd_symbol *name)
 | 
						|
{ dtd_notation *n;
 | 
						|
 | 
						|
  for(n=dtd->notations; n; n = n->next)
 | 
						|
  { if ( n->name == name )
 | 
						|
      return n;
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
add_notation(dtd *dtd, dtd_notation *not)
 | 
						|
{ dtd_notation **n = &dtd->notations;
 | 
						|
 | 
						|
  for( ; *n; n = &(*n)->next)
 | 
						|
    ;
 | 
						|
  *n = not;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
process_notation_declaration(dtd_parser *p, const ichar *decl)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  dtd_symbol *nname;
 | 
						|
  const ichar *s;
 | 
						|
  ichar *system = NULL, *public = NULL;
 | 
						|
  dtd_notation *not;
 | 
						|
 | 
						|
  if ( !(s=itake_name(p, decl, &nname)) )
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"Notation name expected", decl);
 | 
						|
  decl = s;
 | 
						|
 | 
						|
  if ( find_notation(dtd, nname) )
 | 
						|
  { gripe(p, ERC_REDEFINED, L"notation", nname);
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( (s=isee_identifier(dtd, decl, "system")) )
 | 
						|
  { ;
 | 
						|
  } else if ( (s=isee_identifier(dtd, decl, "public")) )
 | 
						|
  { decl = s;
 | 
						|
    if ( !(s=itake_dubbed_string(dtd, decl, &public)) )
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR, L"Public identifier expected", decl);
 | 
						|
  } else
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"SYSTEM or PUBLIC expected", decl);
 | 
						|
 | 
						|
  decl = s;
 | 
						|
  if ( (s=itake_dubbed_string(dtd, decl, &system)) )
 | 
						|
    decl = s;
 | 
						|
 | 
						|
  if ( *decl )
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl);
 | 
						|
 | 
						|
  not = sgml_calloc(1, sizeof(*not));
 | 
						|
  not->name = nname;
 | 
						|
  not->system = system;
 | 
						|
  not->public = public;
 | 
						|
  not->next = NULL;
 | 
						|
  add_notation(dtd, not);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_notations(dtd_notation *n)
 | 
						|
{ dtd_notation *next;
 | 
						|
 | 
						|
  for( ; n; n=next)
 | 
						|
  { next = n->next;
 | 
						|
 | 
						|
    sgml_free(n->system);
 | 
						|
    sgml_free(n->public);
 | 
						|
 | 
						|
    sgml_free(n);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	       SHORTREF		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static void
 | 
						|
free_maps(dtd_map *map)
 | 
						|
{ dtd_map *next;
 | 
						|
 | 
						|
  for( ; map; map=next)
 | 
						|
  { next = map->next;
 | 
						|
    if ( map->from )
 | 
						|
      sgml_free(map->from);
 | 
						|
    sgml_free(map);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_shortrefs(dtd_shortref *sr)
 | 
						|
{ dtd_shortref *next;
 | 
						|
 | 
						|
  for( ; sr; sr=next)
 | 
						|
  { next = sr->next;
 | 
						|
    free_maps(sr->map);
 | 
						|
    sgml_free(sr);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
shortref_add_map(dtd_parser *p, const ichar *decl, dtd_shortref *sr)
 | 
						|
{ ichar *start; int len;
 | 
						|
  ichar from[MAXMAPLEN];
 | 
						|
  ichar *f = from;
 | 
						|
  dtd_symbol *to;
 | 
						|
  const ichar *s;
 | 
						|
  const ichar *end;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
  dtd_map **prev;
 | 
						|
  dtd_map *m;
 | 
						|
 | 
						|
  if ( !(s=itake_string(dtd, decl, &start, &len)) )
 | 
						|
  { gripe(p, ERC_SYNTAX_ERROR, L"map-string expected", decl);
 | 
						|
    return NULL;
 | 
						|
  }
 | 
						|
  decl = s;
 | 
						|
  if ( !(s=itake_entity_name(p, decl, &to)) )
 | 
						|
  { gripe(p, ERC_SYNTAX_ERROR, L"map-to name expected", decl);
 | 
						|
    return NULL;
 | 
						|
  }
 | 
						|
  end = s;
 | 
						|
 | 
						|
  for(decl=start; len > 0;)
 | 
						|
  { if ( *decl == 'B' )		/* blank */
 | 
						|
    { if ( decl[1] == 'B' )
 | 
						|
      { *f++ = CHR_DBLANK;
 | 
						|
	decl += 2;
 | 
						|
	len -= 2;
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      *f++ = CHR_BLANK;
 | 
						|
      decl++;
 | 
						|
      len--;
 | 
						|
    } else
 | 
						|
    { *f++ = *decl++;			/* any other character */
 | 
						|
      len--;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  *f = 0;
 | 
						|
 | 
						|
  for(prev=&sr->map; *prev; prev = &(*prev)->next)
 | 
						|
    ;
 | 
						|
 | 
						|
  m = sgml_calloc(1, sizeof(*m));
 | 
						|
  m->from = istrdup(from);
 | 
						|
  m->len  = (int)istrlen(from);
 | 
						|
  m->to   = to;
 | 
						|
 | 
						|
  *prev = m;
 | 
						|
 | 
						|
  return end;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static dtd_shortref *
 | 
						|
def_shortref(dtd_parser *p, dtd_symbol *name)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  dtd_shortref *sr, **pr;
 | 
						|
 | 
						|
  for(pr=&dtd->shortrefs; *pr; pr = &(*pr)->next)
 | 
						|
  { dtd_shortref *r = *pr;
 | 
						|
 | 
						|
    if ( r->name == name )
 | 
						|
      return r;
 | 
						|
  }
 | 
						|
 | 
						|
  sr = sgml_calloc(1, sizeof(*sr));
 | 
						|
  sr->name = name;
 | 
						|
  *pr = sr;
 | 
						|
 | 
						|
  return sr;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Create an array with TRUE in any character   that can be the last of the
 | 
						|
shortref map.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static void
 | 
						|
compile_map(dtd *dtd, dtd_shortref *sr)
 | 
						|
{ dtd_map *map;
 | 
						|
 | 
						|
  for(map = sr->map; map; map = map->next)
 | 
						|
  { ichar last = map->from[map->len-1];
 | 
						|
 | 
						|
    switch( last )
 | 
						|
    { case CHR_BLANK:
 | 
						|
      case CHR_DBLANK:
 | 
						|
      { wint_t i;
 | 
						|
 | 
						|
	for( i=0; i< SHORTMAP_SIZE; i++)
 | 
						|
	{ if ( HasClass(dtd, i, CH_BLANK) )
 | 
						|
	    sr->ends[i] = TRUE;
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      default:
 | 
						|
	sr->ends[last] = TRUE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_shortref_declaration(dtd_parser *p, const ichar *decl)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  ichar buf[MAXDECL];
 | 
						|
  dtd_shortref *sr;
 | 
						|
  dtd_symbol *name;
 | 
						|
  const ichar *s;
 | 
						|
 | 
						|
  if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
 | 
						|
    return FALSE;
 | 
						|
  decl = buf;
 | 
						|
 | 
						|
  if ( !(s=itake_name(p, decl, &name)) )
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | 
						|
  decl = s;
 | 
						|
 | 
						|
  sr = def_shortref(p, name);
 | 
						|
  if ( sr->defined )
 | 
						|
  { gripe(p, ERC_REDEFINED, L"shortref", name);
 | 
						|
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  sr->defined = TRUE;
 | 
						|
 | 
						|
  while( *(decl = iskip_layout(dtd, decl)) != '\0'
 | 
						|
	 && (s=shortref_add_map(p, decl, sr)) )
 | 
						|
    decl = s;
 | 
						|
  compile_map(dtd, sr);
 | 
						|
 | 
						|
  if ( *decl )
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"Map expected", decl);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Find named name.  The name NULL stands for the #empty map
 | 
						|
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static dtd_shortref *
 | 
						|
find_map(dtd *dtd, dtd_symbol *name)
 | 
						|
{ dtd_shortref *sr;
 | 
						|
 | 
						|
  if ( !name )
 | 
						|
  { static dtd_shortref *empty;
 | 
						|
 | 
						|
    if ( !empty )
 | 
						|
    { empty = sgml_calloc(1, sizeof(*empty));
 | 
						|
      empty->name = dtd_add_symbol(dtd, (ichar*)"#EMPTY");
 | 
						|
      empty->defined = TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    return empty;
 | 
						|
  }
 | 
						|
 | 
						|
  for( sr = dtd->shortrefs; sr; sr = sr->next )
 | 
						|
  { if ( sr->name == name )
 | 
						|
    { if ( !sr->defined )
 | 
						|
	break;
 | 
						|
 | 
						|
      return sr;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
set_map_element(dtd_element *e, void *closure)
 | 
						|
{ e->map = closure;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_usemap_declaration(dtd_parser *p, const ichar *decl)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  ichar buf[MAXDECL];
 | 
						|
  dtd_symbol *name;
 | 
						|
  const ichar *s;
 | 
						|
  dtd_symbol *ename;
 | 
						|
  dtd_element *e;
 | 
						|
  dtd_shortref *map;
 | 
						|
 | 
						|
  if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
 | 
						|
    return FALSE;
 | 
						|
  decl = buf;
 | 
						|
 | 
						|
  if ( !(s=itake_name(p, decl, &name)) )
 | 
						|
  { if ( (s=isee_identifier(dtd, decl, "#empty")) )
 | 
						|
      name = NULL;
 | 
						|
    else
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR, L"map-name expected", decl);
 | 
						|
  }
 | 
						|
 | 
						|
  decl = s;
 | 
						|
  if ( !(map = find_map(dtd, name)) )
 | 
						|
    map = def_shortref(p, name);	/* make undefined map */
 | 
						|
 | 
						|
  if ( isee_func(dtd, decl, CF_GRPO) )	/* ( */
 | 
						|
  { dtd_model *model;
 | 
						|
 | 
						|
    if ( (model = make_model(p, decl, &s)) )
 | 
						|
    { for_elements_in_model(model, set_map_element, map);
 | 
						|
      free_model(model);
 | 
						|
      decl = s;
 | 
						|
    } else
 | 
						|
      return FALSE;
 | 
						|
  } else if ( (s=itake_name(p, decl, &ename)) )
 | 
						|
  { e = find_element(dtd, ename);
 | 
						|
    e->map = map;
 | 
						|
    decl = s;
 | 
						|
  } else if ( p->environments )
 | 
						|
  { if ( !map->defined )
 | 
						|
      gripe(p, ERC_EXISTENCE, L"map", name->name);
 | 
						|
 | 
						|
    p->environments->map = map;
 | 
						|
    p->map = p->environments->map;
 | 
						|
  } else
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"element-name expected", decl);
 | 
						|
 | 
						|
  if ( *decl )
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"Unparsed", decl);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
match_map(dtd *dtd, dtd_map *map, ocharbuf *buf)
 | 
						|
{ wchar_t *data = buf->data.w;
 | 
						|
  wchar_t *e    = data+buf->size-1;
 | 
						|
  ichar *m      = map->from+map->len-1;
 | 
						|
 | 
						|
  while( m >= map->from )
 | 
						|
  { if ( e < data )
 | 
						|
      return 0;
 | 
						|
 | 
						|
    if ( *m == *e )
 | 
						|
    { m--;
 | 
						|
      e--;
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
    if ( *m == CHR_DBLANK )
 | 
						|
    { if ( e>data && HasClass(dtd, *e, CH_WHITE) )
 | 
						|
	e--;
 | 
						|
      else
 | 
						|
	return FALSE;
 | 
						|
      goto wblank;
 | 
						|
    }
 | 
						|
    if ( *m == CHR_BLANK )
 | 
						|
    { wblank:
 | 
						|
      while( e>data && HasClass(dtd, *e, CH_WHITE) )
 | 
						|
	e--;
 | 
						|
      m--;
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
  }
 | 
						|
 | 
						|
  return (int)(data+buf->size-1-e);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
match_shortref(dtd_parser *p)
 | 
						|
{ dtd_map *map;
 | 
						|
 | 
						|
  for(map = p->map->map; map; map = map->next)
 | 
						|
  { int len;
 | 
						|
 | 
						|
    if ( (len=match_map(p->dtd, map, p->cdata)) )
 | 
						|
    { p->cdata->size -= len;
 | 
						|
 | 
						|
      if ( p->cdata_must_be_empty )
 | 
						|
      { int blank = TRUE;
 | 
						|
	const wchar_t *s;
 | 
						|
	int i;
 | 
						|
 | 
						|
	for(s = p->cdata->data.w, i=0; i++ < p->cdata->size; s++)
 | 
						|
	{ if ( !iswspace(*s) )
 | 
						|
	  { blank = FALSE;
 | 
						|
	    break;
 | 
						|
	  }
 | 
						|
	}
 | 
						|
 | 
						|
	p->blank_cdata = blank;
 | 
						|
      }
 | 
						|
 | 
						|
      WITH_CLASS(p, EV_SHORTREF,
 | 
						|
		 { sgml_cplocation(&p->startloc, &p->location);
 | 
						|
		   p->startloc.charpos -= len;
 | 
						|
		   p->startloc.linepos -= len;
 | 
						|
		   if ( p->startloc.linepos < 0 )
 | 
						|
		   { p->startloc.line--;
 | 
						|
		     p->startloc.linepos = 0; /* not correct! */
 | 
						|
		   }
 | 
						|
		   DEBUG(printf("%d-%d: Matched map '%s' --> %s, len = %d\n",
 | 
						|
				p->startloc.charpos,
 | 
						|
				p->location.charpos,
 | 
						|
				map->from, map->to->name, len));
 | 
						|
 | 
						|
		   process_entity(p, map->to->name);
 | 
						|
		 })			/* TBD: optimise */
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	       ELEMENTS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static void
 | 
						|
add_submodel(dtd_model *m, dtd_model *sub)
 | 
						|
{ dtd_model **d;
 | 
						|
 | 
						|
  for( d = &m->content.group; *d; d = &(*d)->next )
 | 
						|
    ;
 | 
						|
  *d = sub;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* for_elements_in_model()
 | 
						|
   Walk along the model, calling f(e, closure) for any element found
 | 
						|
   in the model.  Used for <!SHORTREF name model>
 | 
						|
*/
 | 
						|
 | 
						|
static void
 | 
						|
for_elements_in_model(dtd_model *m,
 | 
						|
		      void (*f)(dtd_element *e, void *closure),
 | 
						|
		      void *closure)
 | 
						|
{ switch(m->type)
 | 
						|
  { case MT_SEQ:
 | 
						|
    case MT_AND:
 | 
						|
    case MT_OR:
 | 
						|
    { dtd_model *sub = m->content.group;
 | 
						|
 | 
						|
      for(; sub; sub = sub->next)
 | 
						|
	for_elements_in_model(sub, f, closure);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case MT_ELEMENT:
 | 
						|
      (*f)(m->content.element, closure);
 | 
						|
      break;
 | 
						|
    default:
 | 
						|
      ;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_model(dtd_model *m)
 | 
						|
{ switch(m->type)
 | 
						|
  { case MT_SEQ:
 | 
						|
    case MT_AND:
 | 
						|
    case MT_OR:
 | 
						|
    { dtd_model *sub = m->content.group;
 | 
						|
      dtd_model *next;
 | 
						|
 | 
						|
      for(; sub; sub = next)
 | 
						|
      { next = sub->next;
 | 
						|
 | 
						|
	free_model(sub);
 | 
						|
      }
 | 
						|
    }
 | 
						|
    default:
 | 
						|
      ;
 | 
						|
  }
 | 
						|
 | 
						|
  sgml_free(m);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static dtd_model *
 | 
						|
make_model(dtd_parser *p, const ichar *decl, const ichar **end)
 | 
						|
{ const ichar *s;
 | 
						|
  dtd_model *m = sgml_calloc(1, sizeof(*m));
 | 
						|
  dtd_symbol *id;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  decl = iskip_layout(dtd, decl);
 | 
						|
 | 
						|
  if ( (s=isee_identifier(dtd, decl, "#pcdata")) )
 | 
						|
  { m->type = MT_PCDATA;
 | 
						|
    m->cardinality = MC_ONE;		/* actually don't care */
 | 
						|
    *end = s;
 | 
						|
    return m;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( (s=itake_name(p, decl, &id)) )
 | 
						|
  { m->type = MT_ELEMENT;
 | 
						|
    m->content.element = find_element(dtd, id);
 | 
						|
    decl = s;
 | 
						|
  } else
 | 
						|
  { if ( !(s=isee_func(dtd, decl, CF_GRPO)) )
 | 
						|
    { gripe(p, ERC_SYNTAX_ERROR, L"Name group expected", decl);
 | 
						|
      free_model(m);
 | 
						|
      return NULL;
 | 
						|
    }
 | 
						|
    decl = s;
 | 
						|
 | 
						|
    for(;;)
 | 
						|
    { dtd_model *sub;
 | 
						|
      modeltype mt;
 | 
						|
 | 
						|
      if ( !(sub = make_model(p, decl, &s)) )
 | 
						|
      { free_model(sub);
 | 
						|
	return NULL;
 | 
						|
      }
 | 
						|
      decl = s;
 | 
						|
      add_submodel(m, sub);
 | 
						|
 | 
						|
      if ( (s = isee_func(dtd, decl, CF_OR)) )
 | 
						|
      { decl = s;
 | 
						|
	mt = MT_OR;
 | 
						|
      } else if ( (s = isee_func(dtd, decl, CF_SEQ)) )
 | 
						|
      { decl = s;
 | 
						|
	mt = MT_SEQ;
 | 
						|
      } else if ( (s = isee_func(dtd, decl, CF_AND)) )
 | 
						|
      { decl = s;
 | 
						|
	mt = MT_AND;
 | 
						|
      } else if ( (s = isee_func(dtd, decl, CF_GRPC)) )
 | 
						|
      { decl = s;
 | 
						|
	break;
 | 
						|
      } else
 | 
						|
      { gripe(p, ERC_SYNTAX_ERROR, L"Connector ('|', ',' or '&') expected", decl);
 | 
						|
	free_model(m);
 | 
						|
	return NULL;
 | 
						|
      }
 | 
						|
      decl = iskip_layout(dtd, decl);
 | 
						|
 | 
						|
      if ( m->type != mt )
 | 
						|
      { if ( !m->type )
 | 
						|
	  m->type = mt;
 | 
						|
	else
 | 
						|
	{ gripe(p, ERC_SYNTAX_ERROR, L"Different connector types in model", decl);
 | 
						|
	  free_model(m);
 | 
						|
	  return NULL;
 | 
						|
	}
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if ( (s = isee_func(dtd, decl, CF_OPT)) )
 | 
						|
  { decl = s;
 | 
						|
    m->cardinality = MC_OPT;
 | 
						|
  } else if ( (s=isee_func(dtd, decl, CF_REP)) )
 | 
						|
  { decl = s;
 | 
						|
    m->cardinality = MC_REP;
 | 
						|
  } else if ( (s=isee_func(dtd, decl, CF_PLUS)) )
 | 
						|
  {					/* ROK: watch out for (x) +(y) */
 | 
						|
    if ( isee_func(dtd, iskip_layout(dtd, s), CF_GRPO) == NULL )
 | 
						|
    { decl = s;
 | 
						|
      m->cardinality = MC_PLUS;
 | 
						|
    }
 | 
						|
  } else
 | 
						|
    m->cardinality = MC_ONE;
 | 
						|
 | 
						|
  if ( m->type == MT_UNDEF )		/* simplify (e+), etc. */
 | 
						|
  { dtd_model *sub = m->content.group;
 | 
						|
    modelcard card;
 | 
						|
 | 
						|
    assert(!sub->next);
 | 
						|
    if ( sub->cardinality == MC_ONE )
 | 
						|
      card = m->cardinality;
 | 
						|
    else if ( m->cardinality == MC_ONE )
 | 
						|
      card = sub->cardinality;
 | 
						|
    else
 | 
						|
    { m->type = MT_OR;
 | 
						|
      goto out;
 | 
						|
    }
 | 
						|
 | 
						|
    *m = *sub;
 | 
						|
    m->cardinality = card;
 | 
						|
    sgml_free(sub);
 | 
						|
  }
 | 
						|
 | 
						|
out:
 | 
						|
  *end = iskip_layout(dtd, decl);
 | 
						|
  return m;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
process_model(dtd_parser *p, dtd_edef *e, const ichar *decl)
 | 
						|
{ const ichar *s;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  decl = iskip_layout(dtd, decl);
 | 
						|
  if ( (s = isee_identifier(dtd, decl, "empty")) )
 | 
						|
  { e->type = C_EMPTY;
 | 
						|
    return s;
 | 
						|
  }
 | 
						|
  if ( (s = isee_identifier(dtd, decl, "cdata")) )
 | 
						|
  { e->type = C_CDATA;
 | 
						|
    return s;
 | 
						|
  }
 | 
						|
  if ( (s = isee_identifier(dtd, decl, "rcdata")) )
 | 
						|
  { e->type = C_RCDATA;
 | 
						|
    return s;
 | 
						|
  }
 | 
						|
  if ( (s = isee_identifier(dtd, decl, "any")) )
 | 
						|
  { e->type = C_ANY;
 | 
						|
    return s;
 | 
						|
  }
 | 
						|
 | 
						|
  e->type = C_PCDATA;
 | 
						|
  if ( !(e->content = make_model(p, decl, &decl)) )
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
  return decl;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
See a name-group separator.  As long as we haven't decided, this can be
 | 
						|
CF_NG.  If we have decided they must all be the same.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static const ichar *
 | 
						|
isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep)
 | 
						|
{ const ichar *s;
 | 
						|
 | 
						|
  if ( (s=isee_func(dtd, decl, *sep)) )
 | 
						|
    return iskip_layout(dtd, s);
 | 
						|
  if ( *sep == CF_NG )			/* undecided */
 | 
						|
  { static const charfunc ng[] = { CF_SEQ, CF_OR, CF_AND };
 | 
						|
    int n;
 | 
						|
 | 
						|
    for(n=0; n<3; n++)
 | 
						|
    { if ( (s=isee_func(dtd, decl, ng[n])) )
 | 
						|
      { *sep = ng[n];
 | 
						|
        return iskip_layout(dtd, s);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_namegroup(dtd_parser *p, const ichar *decl,
 | 
						|
		dtd_symbol **names, int *n)
 | 
						|
{ const ichar *s;
 | 
						|
  int en = 0;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  if ( (s=isee_func(dtd, decl, CF_GRPO)) )
 | 
						|
  { charfunc ngs = CF_NG;
 | 
						|
 | 
						|
    for(;;)
 | 
						|
    { if ( !(decl=itake_name(p, s, &names[en++])) )
 | 
						|
      { gripe(p, ERC_SYNTAX_ERROR, L"Name expected", s);
 | 
						|
	return NULL;
 | 
						|
      }
 | 
						|
      if ( (s=isee_ngsep(dtd, decl, &ngs)) )
 | 
						|
      { decl = iskip_layout(dtd, s);
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
      if ( (s=isee_func(dtd, decl, CF_GRPC)) )
 | 
						|
      { *n = en;
 | 
						|
        decl = s;
 | 
						|
	return iskip_layout(dtd, decl);
 | 
						|
      }
 | 
						|
      gripe(p, ERC_SYNTAX_ERROR, L"Bad name-group", decl);
 | 
						|
      return NULL;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
typedef struct
 | 
						|
{ dtd_symbol **list;
 | 
						|
  int size;
 | 
						|
} namelist;
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
add_list_element(dtd_element *e, void *closure)
 | 
						|
{ namelist *nl = closure;
 | 
						|
 | 
						|
  nl->list[nl->size++] = e->name;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
itake_el_or_model_element_list(dtd_parser *p,
 | 
						|
			       const ichar *decl, dtd_symbol **names, int *n)
 | 
						|
{ const ichar *s;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  if ( isee_func(dtd, decl, CF_GRPO) )
 | 
						|
  { dtd_model *model;
 | 
						|
 | 
						|
    if ( (model = make_model(p, decl, &s)) )
 | 
						|
    { namelist nl;
 | 
						|
 | 
						|
      nl.list = names;
 | 
						|
      nl.size = 0;
 | 
						|
      for_elements_in_model(model, add_list_element, &nl);
 | 
						|
      free_model(model);
 | 
						|
 | 
						|
      *n = nl.size;
 | 
						|
      return s;
 | 
						|
    } else
 | 
						|
      return NULL;
 | 
						|
  } else
 | 
						|
  { if ( !(s = itake_name(p, decl, &names[0])) )
 | 
						|
    { gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | 
						|
      return NULL;
 | 
						|
    }
 | 
						|
    *n = 1;
 | 
						|
    return s;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
add_element_list(dtd_element_list **l, dtd_element *e)
 | 
						|
{ dtd_element_list *n = sgml_calloc(1, sizeof(*n));
 | 
						|
 | 
						|
  n->value = e;
 | 
						|
 | 
						|
  for( ; *l; l = &(*l)->next )
 | 
						|
    ;
 | 
						|
  *l = n;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_element_declaraction(dtd_parser *p, const ichar *decl)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  ichar buf[MAXDECL];
 | 
						|
  const ichar *s;
 | 
						|
  dtd_symbol *eid[MAXATTELEM];
 | 
						|
  dtd_edef *def;
 | 
						|
  int en;
 | 
						|
  int i;
 | 
						|
 | 
						|
					/* expand parameter entities */
 | 
						|
  if ( !expand_pentities(p, decl, ZERO_TERM_LEN,
 | 
						|
			 buf, sizeof(buf)/sizeof(ichar)) )
 | 
						|
    return FALSE;
 | 
						|
  decl = buf;
 | 
						|
 | 
						|
  if ( !(s=itake_el_or_model_element_list(p, decl, eid, &en)) )
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"Name or name-group expected", decl);
 | 
						|
  decl = s;
 | 
						|
  if ( en == 0 )
 | 
						|
    return TRUE;			/* 0 elements */
 | 
						|
 | 
						|
  STAT(edefs_decl++);
 | 
						|
  def = new_element_definition(dtd);
 | 
						|
  for(i=0; i<en; i++)
 | 
						|
  { find_element(dtd, eid[i]);
 | 
						|
    assert(eid[i]->element->structure == NULL);
 | 
						|
    eid[i]->element->structure = def;
 | 
						|
    eid[i]->element->undefined = FALSE;
 | 
						|
  }
 | 
						|
  def->references = en;			/* for GC */
 | 
						|
 | 
						|
					/* omitted tag declarations (opt) */
 | 
						|
  if ( (s = isee_identifier(dtd, decl, "-")) )
 | 
						|
  { def->omit_close = FALSE;
 | 
						|
    goto seeclose;
 | 
						|
  } else if ( (s = isee_identifier(dtd, decl, "o")) )
 | 
						|
  { def->omit_open = TRUE;
 | 
						|
 | 
						|
  seeclose:
 | 
						|
    decl = s;
 | 
						|
    if ( (s = isee_identifier(dtd, decl, "-")) )
 | 
						|
    { def->omit_close = FALSE;
 | 
						|
    } else if ( (s = isee_identifier(dtd, decl, "o")) )
 | 
						|
    { for(i=0; i<en; i++)
 | 
						|
	def->omit_close = TRUE;
 | 
						|
    } else
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR, L"Bad omit-tag declaration", decl);
 | 
						|
 | 
						|
    decl = s;
 | 
						|
  }
 | 
						|
 | 
						|
					/* content model */
 | 
						|
  if ( !(decl=process_model(p, def, decl)) )
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
					/* in/excluded elements */
 | 
						|
  if ( decl[0] == '-' || decl[0] == '+' )
 | 
						|
  { dtd_symbol *ng[MAXNAMEGROUP];
 | 
						|
    int ns;
 | 
						|
    dtd_element_list **l;
 | 
						|
 | 
						|
    if ( decl[0] == '-' )
 | 
						|
      l = &def->excluded;
 | 
						|
    else
 | 
						|
      l = &def->included;
 | 
						|
 | 
						|
    decl++;
 | 
						|
    if ( (s=itake_namegroup(p, decl, ng, &ns)) )
 | 
						|
    { int i;
 | 
						|
 | 
						|
      decl = s;
 | 
						|
 | 
						|
      for(i=0; i<ns; i++)
 | 
						|
	add_element_list(l, find_element(dtd, ng[i]));
 | 
						|
    } else
 | 
						|
    { return gripe(p, ERC_SYNTAX_ERROR, L"Name group expected", decl);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if (*decl)
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaration", decl);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
add_name_list(dtd_name_list **nl, dtd_symbol *s)
 | 
						|
{ dtd_name_list *n = sgml_calloc(1, sizeof(*n));
 | 
						|
 | 
						|
  n->value = s;
 | 
						|
 | 
						|
  for( ; *nl; nl = &(*nl)->next )
 | 
						|
    ;
 | 
						|
 | 
						|
  *nl = n;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
set_element_properties(dtd_element *e, dtd_attr *a)
 | 
						|
{ if ( istreq(a->name->name, L"xml:space") )
 | 
						|
  { switch(a->def)
 | 
						|
    { case AT_FIXED:
 | 
						|
      case AT_DEFAULT:
 | 
						|
	break;
 | 
						|
      default:
 | 
						|
	return;
 | 
						|
    }
 | 
						|
 | 
						|
    switch (a->type )
 | 
						|
    { case AT_NAMEOF:
 | 
						|
      case AT_NAME:
 | 
						|
      case AT_NMTOKEN:
 | 
						|
	e->space_mode = istr_to_space_mode(a->att_def.name->name);
 | 
						|
	break;
 | 
						|
      case AT_CDATA:
 | 
						|
	e->space_mode = istr_to_space_mode((ichar *)a->att_def.cdata);
 | 
						|
	break;
 | 
						|
      default:
 | 
						|
	break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
add_attribute(dtd_parser *p, dtd_element *e, dtd_attr *a)
 | 
						|
{ dtd_attr_list **l;
 | 
						|
  dtd_attr_list *n;
 | 
						|
 | 
						|
  for(l = &e->attributes; *l; l = &(*l)->next)
 | 
						|
  { if ( (*l)->attribute->name == a->name )
 | 
						|
    { gripe(p, ERC_REDEFINED, L"attribute", a->name);
 | 
						|
      a->references++;			/* attempt to redefine attribute: */
 | 
						|
      free_attribute(a);		/* first wins according to standard */
 | 
						|
 | 
						|
      return;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  n = sgml_calloc(1, sizeof(*n));
 | 
						|
 | 
						|
  n->attribute = a;
 | 
						|
  a->references++;
 | 
						|
  *l = n;
 | 
						|
  set_element_properties(e, a);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_attlist_declaraction(dtd_parser *p, const ichar *decl)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  dtd_symbol *eid[MAXATTELEM];
 | 
						|
  int i, en;
 | 
						|
  ichar buf[MAXDECL];
 | 
						|
  const ichar *s;
 | 
						|
 | 
						|
					/* expand parameter entities */
 | 
						|
  if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
 | 
						|
    return FALSE;
 | 
						|
  decl = iskip_layout(dtd, buf);
 | 
						|
  DEBUG(printf("Expanded to %s\n", decl));
 | 
						|
 | 
						|
  if ( !(decl=itake_el_or_model_element_list(p, decl, eid, &en)) )
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
					/* fetch attributes */
 | 
						|
  while(*decl)
 | 
						|
  { dtd_attr *at = sgml_calloc(1, sizeof(*at));
 | 
						|
    at->references = REFS_VIRGIN;
 | 
						|
 | 
						|
					/* name of attribute */
 | 
						|
    if ( !(s = itake_name(p, decl, &at->name)) )
 | 
						|
    { free_attribute(at);
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | 
						|
    }
 | 
						|
    decl = s;
 | 
						|
 | 
						|
					/* (name1|name2|...) type */
 | 
						|
    if ( (s=isee_func(dtd, decl, CF_GRPO)) )
 | 
						|
    { charfunc ngs = CF_NG;
 | 
						|
 | 
						|
      at->type = AT_NAMEOF;
 | 
						|
      decl=s;
 | 
						|
 | 
						|
      for(;;)
 | 
						|
      { dtd_symbol *nm;
 | 
						|
 | 
						|
	if ( !(s = itake_nmtoken(p, decl, &nm)) )
 | 
						|
	{ free_attribute(at);
 | 
						|
	  return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | 
						|
	}
 | 
						|
	decl = s;
 | 
						|
	add_name_list(&at->typeex.nameof, nm);
 | 
						|
	if ( (s=isee_ngsep(dtd, decl, &ngs)) )
 | 
						|
	{ decl = s;
 | 
						|
	  continue;
 | 
						|
	}
 | 
						|
	if ( (s = isee_func(dtd, decl, CF_GRPC)) )
 | 
						|
	{ decl=s;
 | 
						|
	  decl = iskip_layout(dtd, decl);
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	free_attribute(at);
 | 
						|
	return gripe(p, ERC_SYNTAX_ERROR, L"Illegal name-group", decl);
 | 
						|
      }
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "cdata")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_CDATA;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "entity")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_ENTITY;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "entities")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_ENTITIES;
 | 
						|
      at->islist = TRUE;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "id")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_ID;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "idref")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_IDREF;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "idrefs")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_IDREFS;
 | 
						|
      at->islist = TRUE;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "name")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_NAME;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "names")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_NAMES;
 | 
						|
      at->islist = TRUE;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "nmtoken")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_NMTOKEN;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "nmtokens")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_NMTOKENS;
 | 
						|
      at->islist = TRUE;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "number")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_NUMBER;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "numbers")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_NUMBERS;
 | 
						|
      at->islist = TRUE;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "nutoken")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_NUTOKEN;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "nutokens")) )
 | 
						|
    { decl = s;
 | 
						|
      at->type = AT_NUTOKENS;
 | 
						|
      at->islist = TRUE;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "notation")) )
 | 
						|
    { dtd_symbol *ng[MAXNAMEGROUP];
 | 
						|
      int ns;
 | 
						|
 | 
						|
      at->type = AT_NOTATION;
 | 
						|
      decl=s;
 | 
						|
      if ( (s=itake_namegroup(p, decl, ng, &ns)) )
 | 
						|
      { decl = s;
 | 
						|
 | 
						|
	for(i=0; i<ns; i++)
 | 
						|
	  add_name_list(&at->typeex.nameof, ng[i]);
 | 
						|
      } else
 | 
						|
      { free_attribute(at);
 | 
						|
	return gripe(p, ERC_SYNTAX_ERROR, L"name-group expected", decl);
 | 
						|
      }
 | 
						|
    } else
 | 
						|
    { free_attribute(at);
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR, L"Attribute-type expected", decl);
 | 
						|
    }
 | 
						|
 | 
						|
					/* Attribute Defaults */
 | 
						|
    if ( (s=isee_identifier(dtd, decl, "#fixed")) )
 | 
						|
    { decl = s;
 | 
						|
      at->def = AT_FIXED;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "#required")) )
 | 
						|
    { decl = s;
 | 
						|
      at->def = AT_REQUIRED;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "#current")) )
 | 
						|
    { decl = s;
 | 
						|
      at->def = AT_CURRENT;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "#conref")) )
 | 
						|
    { decl = s;
 | 
						|
      at->def = AT_CONREF;
 | 
						|
    } else if ( (s=isee_identifier(dtd, decl, "#implied")) )
 | 
						|
    { decl = s;
 | 
						|
      at->def = AT_IMPLIED;
 | 
						|
    } else				/* real default */
 | 
						|
      at->def = AT_DEFAULT;
 | 
						|
 | 
						|
    if ( at->def == AT_DEFAULT || at->def == AT_FIXED )
 | 
						|
    { ichar buf[MAXSTRINGLEN];
 | 
						|
      ichar *start; int len;
 | 
						|
      const ichar *end;
 | 
						|
 | 
						|
      if ( !(end=itake_string(dtd, decl, &start, &len)) )
 | 
						|
      { end=itake_nmtoken_chars(p, decl, buf, sizeof(buf)/sizeof(ichar));
 | 
						|
	start = buf;
 | 
						|
	len = (int)istrlen(buf);
 | 
						|
      }
 | 
						|
      if ( !end )
 | 
						|
	return gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute default", decl);
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Note: itake_name(), etc. work on nul-terminated   strings. The result of
 | 
						|
itake_string() is a  pointer  in  a   nul-terminated  string  and  these
 | 
						|
functions will stop scanning at the  quote   anyway,  so  we can use the
 | 
						|
length of the parsed data to verify we parsed all of it.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
      switch(at->type)
 | 
						|
      { case AT_CDATA:
 | 
						|
	{ at->att_def.cdata = istrndup(start, len);
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	case AT_ENTITY:
 | 
						|
	case AT_NOTATION:
 | 
						|
	case AT_NAME:
 | 
						|
	{ if ( !(s=itake_name(p, start, &at->att_def.name)) ||
 | 
						|
	       (s-start) != len )
 | 
						|
	    return gripe(p, ERC_DOMAIN, L"name", decl);
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	case AT_NMTOKEN:
 | 
						|
	case AT_NAMEOF:
 | 
						|
	{ if ( !(s=itake_nmtoken(p, start, &at->att_def.name)) ||
 | 
						|
	       (s-start) != len )
 | 
						|
	    return gripe(p, ERC_DOMAIN, L"nmtoken", decl);
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	case AT_NUTOKEN:
 | 
						|
	{ if ( !(s=itake_nutoken(p, start, &at->att_def.name)) ||
 | 
						|
	       (s-start) != len )
 | 
						|
	    return gripe(p, ERC_DOMAIN, L"nutoken", decl);
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	case AT_NUMBER:
 | 
						|
	{ if ( !(s=itake_number(p, start, at)) ||
 | 
						|
	       (s-start) != len )
 | 
						|
	     return gripe(p, ERC_DOMAIN, L"number", decl);
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	case AT_NAMES:
 | 
						|
	case AT_ENTITIES:
 | 
						|
	case AT_IDREFS:
 | 
						|
	case AT_NMTOKENS:
 | 
						|
	case AT_NUMBERS:
 | 
						|
	case AT_NUTOKENS:
 | 
						|
	{ at->att_def.list = istrndup(buf, len);
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
	default:
 | 
						|
	{ free_attribute(at);
 | 
						|
	  return gripe(p, ERC_REPRESENTATION, L"No default for type");
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      decl = end;
 | 
						|
    }
 | 
						|
 | 
						|
					/* add to list */
 | 
						|
    at->references = 0;
 | 
						|
    for(i=0; i<en; i++)
 | 
						|
    { dtd_element *e = def_element(dtd, eid[i]);
 | 
						|
 | 
						|
      add_attribute(p, e, at);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *    GENERIC TAG PROCESSING	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
typedef enum
 | 
						|
{ IE_NORMAL,
 | 
						|
  IE_INCLUDED,				/* is included */
 | 
						|
  IE_EXCLUDED				/* is excluded */
 | 
						|
} includetype;
 | 
						|
 | 
						|
 | 
						|
static includetype
 | 
						|
in_or_excluded(sgml_environment *env, dtd_element *e)
 | 
						|
{ for(; env; env=env->parent)
 | 
						|
  { if ( env->element->structure )
 | 
						|
    { dtd_edef *def = env->element->structure;
 | 
						|
      dtd_element_list *el;
 | 
						|
 | 
						|
      for(el=def->excluded; el; el=el->next)
 | 
						|
      { if ( el->value == e )
 | 
						|
	  return IE_EXCLUDED;
 | 
						|
      }
 | 
						|
      for(el=def->included; el; el=el->next)
 | 
						|
      { if ( el->value == e )
 | 
						|
	  return IE_INCLUDED;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return IE_NORMAL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
complete(sgml_environment *env)
 | 
						|
{ if ( env->element->structure &&
 | 
						|
       !env->element->undefined &&
 | 
						|
       env->element->structure->type != C_ANY )
 | 
						|
  { dtd_edef *def = env->element->structure;
 | 
						|
 | 
						|
    if ( !same_state(def->final_state, env->state) )
 | 
						|
      return FALSE;
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
validate_completeness(dtd_parser *p, sgml_environment *env)
 | 
						|
{ if ( !complete(env) )
 | 
						|
  { wchar_t buf[MAXNMLEN+50];
 | 
						|
 | 
						|
    swprintf(buf, MAXNMLEN+50, L"Incomplete element: <%s>",
 | 
						|
	     env->element->name->name);
 | 
						|
 | 
						|
    gripe(p, ERC_VALIDATE, buf);		/* TBD: expected */
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static sgml_environment *
 | 
						|
push_element(dtd_parser *p, dtd_element *e, int callback)
 | 
						|
{ if ( e != CDATA_ELEMENT )
 | 
						|
  { sgml_environment *env = sgml_calloc(1, sizeof(*env));
 | 
						|
 | 
						|
    emit_cdata(p, FALSE);
 | 
						|
 | 
						|
    env->element = e;
 | 
						|
    env->state = make_state_engine(e);
 | 
						|
    env->space_mode = (p->environments ? p->environments->space_mode
 | 
						|
				       : p->dtd->space_mode);
 | 
						|
    env->parent = p->environments;
 | 
						|
    p->environments = env;
 | 
						|
 | 
						|
    if ( p->dtd->shorttag )
 | 
						|
    { env->saved_waiting_for_net = p->waiting_for_net;
 | 
						|
 | 
						|
      if ( p->event_class == EV_SHORTTAG )
 | 
						|
      { p->waiting_for_net = TRUE;
 | 
						|
	env->wants_net = TRUE;
 | 
						|
      } else
 | 
						|
      { env->wants_net = FALSE;
 | 
						|
	if ( e->structure && e->structure->omit_close == FALSE )
 | 
						|
	  p->waiting_for_net = FALSE;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    if ( e->map )
 | 
						|
      p->map = env->map = e->map;
 | 
						|
    else if ( env->parent )
 | 
						|
      p->map = env->map = env->parent->map;
 | 
						|
 | 
						|
    p->first = TRUE;
 | 
						|
    if ( callback && p->on_begin_element )
 | 
						|
    { sgml_attribute atts[MAXATTRIBUTES];
 | 
						|
      int natts = 0;
 | 
						|
 | 
						|
      if ( !(p->flags & SGML_PARSER_NODEFS) )
 | 
						|
	natts = add_default_attributes(p, e, natts, atts);
 | 
						|
 | 
						|
      (*p->on_begin_element)(p, e, natts, atts);
 | 
						|
    }
 | 
						|
 | 
						|
    if ( e->structure )
 | 
						|
    { if ( e->structure->type == C_CDATA ||
 | 
						|
	   e->structure->type == C_RCDATA )
 | 
						|
      { p->state = (e->structure->type == C_CDATA ? S_CDATA : S_RCDATA);
 | 
						|
	p->cdata_state = p->state;
 | 
						|
	p->etag = e->name->name;
 | 
						|
	p->etaglen = (int)istrlen(p->etag);
 | 
						|
	sgml_cplocation(&p->startcdata, &p->location);
 | 
						|
      } else
 | 
						|
	p->cdata_state = S_PCDATA;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return p->environments;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_environment(sgml_environment *env)
 | 
						|
{
 | 
						|
#ifdef XMLNS
 | 
						|
  if ( env->xmlns )
 | 
						|
    xmlns_free(env->xmlns);
 | 
						|
#endif
 | 
						|
 | 
						|
  sgml_free(env);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Pop the stack,  closing  all  environment   uptil  `to'.  The  close was
 | 
						|
initiated by pushing the element `e'.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static int
 | 
						|
pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0)
 | 
						|
{ sgml_environment *env, *parent;
 | 
						|
 | 
						|
  for(env = p->environments; env != to; env=parent)
 | 
						|
  { dtd_element *e = env->element;
 | 
						|
 | 
						|
    validate_completeness(p, env);
 | 
						|
    parent = env->parent;
 | 
						|
 | 
						|
    if ( e->structure && !e->structure->omit_close )
 | 
						|
      gripe(p, ERC_OMITTED_CLOSE, e->name->name);
 | 
						|
 | 
						|
    if ( e0 != CDATA_ELEMENT )
 | 
						|
      emit_cdata(p, TRUE);
 | 
						|
 | 
						|
    p->first = FALSE;
 | 
						|
    p->environments = env;
 | 
						|
    if ( p->dtd->shorttag )
 | 
						|
      p->waiting_for_net = env->saved_waiting_for_net;
 | 
						|
 | 
						|
    WITH_CLASS(p, EV_OMITTED,
 | 
						|
	       if ( p->on_end_element )
 | 
						|
	         (*p->on_end_element)(p, e));
 | 
						|
    free_environment(env);
 | 
						|
  }
 | 
						|
  p->environments = to;
 | 
						|
  p->map = to->map;
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
allow_for(dtd_element *in, dtd_element *e)
 | 
						|
{ dtd_edef *def = in->structure;
 | 
						|
  dtd_model *g;
 | 
						|
 | 
						|
  if ( def->type == C_EMPTY )
 | 
						|
  { def->type = C_PCDATA;
 | 
						|
    def->content = sgml_calloc(1, sizeof(*def->content));
 | 
						|
    def->content->type = MT_OR;
 | 
						|
    def->content->cardinality = MC_REP;
 | 
						|
  }
 | 
						|
  assert(def->content->type == MT_OR);
 | 
						|
 | 
						|
  g = def->content->content.group;
 | 
						|
 | 
						|
  if ( e == CDATA_ELEMENT )
 | 
						|
  { dtd_model *m;
 | 
						|
 | 
						|
    for(; g; g = g->next)
 | 
						|
    { if ( g->type == MT_PCDATA )
 | 
						|
	return;
 | 
						|
    }
 | 
						|
    m = sgml_calloc(1, sizeof(*m));
 | 
						|
    m->type	   = MT_PCDATA;
 | 
						|
    m->cardinality = MC_ONE;		/* ignored */
 | 
						|
    add_submodel(def->content, m);
 | 
						|
  } else
 | 
						|
  { dtd_model *m;
 | 
						|
 | 
						|
    for(; g; g = g->next)
 | 
						|
    { if ( g->type == MT_ELEMENT && g->content.element == e )
 | 
						|
	return;
 | 
						|
    }
 | 
						|
    m = sgml_calloc(1, sizeof(*m));
 | 
						|
    m->type	   = MT_ELEMENT;
 | 
						|
    m->cardinality = MC_ONE;		/* ignored */
 | 
						|
    m->content.element = e;
 | 
						|
    add_submodel(def->content, m);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
open_element(dtd_parser *p, dtd_element *e, int warn)
 | 
						|
{ if ( !p->environments && p->enforce_outer_element )
 | 
						|
  { dtd_element *f = p->enforce_outer_element->element;
 | 
						|
 | 
						|
    if ( f && f != e )
 | 
						|
    { if ( !f->structure ||
 | 
						|
	   !f->structure->omit_open )
 | 
						|
	gripe(p, ERC_OMITTED_OPEN, f->name->name);
 | 
						|
 | 
						|
      WITH_CLASS(p, EV_OMITTED,
 | 
						|
		 { open_element(p, f, TRUE);
 | 
						|
		   if ( p->on_begin_element )
 | 
						|
		   { sgml_attribute atts[MAXATTRIBUTES];
 | 
						|
		     int natts = 0;
 | 
						|
 | 
						|
		     if ( !(p->flags & SGML_PARSER_NODEFS) )
 | 
						|
		       natts = add_default_attributes(p, f, natts, atts);
 | 
						|
 | 
						|
		     (*p->on_begin_element)(p, f, natts, atts);
 | 
						|
		   }
 | 
						|
		 });
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
					/* no DTD available yet */
 | 
						|
  if ( !p->environments && !p->dtd->doctype && e != CDATA_ELEMENT )
 | 
						|
  { const ichar *file;
 | 
						|
 | 
						|
    file = find_in_catalogue(CAT_DOCTYPE, e->name->name, NULL, NULL,
 | 
						|
			     p->dtd->dialect != DL_SGML);
 | 
						|
    if ( file )
 | 
						|
    { dtd_parser *clone = clone_dtd_parser(p);
 | 
						|
 | 
						|
      gripe(p, ERC_NO_DOCTYPE, e->name->name, file);
 | 
						|
 | 
						|
      if ( load_dtd_from_file(clone, file) )
 | 
						|
	p->dtd->doctype = istrdup(e->name->name);
 | 
						|
      else
 | 
						|
	gripe(p, ERC_EXISTENCE, L"file", file);
 | 
						|
 | 
						|
      free_dtd_parser(clone);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if ( p->environments )
 | 
						|
  { sgml_environment *env = p->environments;
 | 
						|
 | 
						|
    if ( env->element->undefined )
 | 
						|
    { allow_for(env->element, e);	/* <!ELEMENT x - - (model) +(y)> */
 | 
						|
      push_element(p, e, FALSE);
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    if ( env->element->structure &&
 | 
						|
	 env->element->structure->type == C_ANY )
 | 
						|
    { if ( e != CDATA_ELEMENT && e->undefined )
 | 
						|
	gripe(p, ERC_EXISTENCE, L"Element", e->name->name);
 | 
						|
      push_element(p, e, FALSE);
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    switch(in_or_excluded(env, e))
 | 
						|
    { case IE_INCLUDED:
 | 
						|
        push_element(p, e, FALSE);
 | 
						|
	return TRUE;
 | 
						|
      case IE_EXCLUDED:
 | 
						|
	if ( warn )
 | 
						|
	  gripe(p, ERC_NOT_ALLOWED, e->name->name);
 | 
						|
	/*FALLTHROUGH*/
 | 
						|
      case IE_NORMAL:
 | 
						|
	for(; env; env=env->parent)
 | 
						|
	{ dtd_state *new;
 | 
						|
 | 
						|
	  if ( (new = make_dtd_transition(env->state, e)) )
 | 
						|
	  { env->state = new;
 | 
						|
	    pop_to(p, env, e);
 | 
						|
	    push_element(p, e, FALSE);
 | 
						|
	    return TRUE;
 | 
						|
	  } else
 | 
						|
	  { dtd_element *oe[MAXOMITTED]; /* omitted open */
 | 
						|
	    int olen;
 | 
						|
	    int i;
 | 
						|
 | 
						|
	    if ( (olen=find_omitted_path(env->state, e, oe)) > 0 )
 | 
						|
	    { pop_to(p, env, e);
 | 
						|
	      WITH_CLASS(p, EV_OMITTED,
 | 
						|
	      for(i=0; i<olen; i++)
 | 
						|
	      { env->state = make_dtd_transition(env->state, oe[i]);
 | 
						|
		env = push_element(p, oe[i], TRUE);
 | 
						|
	      })
 | 
						|
	      env->state = make_dtd_transition(env->state, e);
 | 
						|
	      push_element(p, e, FALSE);
 | 
						|
	      return TRUE;
 | 
						|
	    }
 | 
						|
	  }
 | 
						|
 | 
						|
	  if ( !env->element->structure ||
 | 
						|
	       !env->element->structure->omit_close )
 | 
						|
	    break;
 | 
						|
	}
 | 
						|
    }
 | 
						|
 | 
						|
    if ( warn )
 | 
						|
    { if ( e == CDATA_ELEMENT )
 | 
						|
	gripe(p, ERC_VALIDATE, L"#PCDATA not allowed here");
 | 
						|
      else if ( e->undefined )
 | 
						|
	gripe(p, ERC_EXISTENCE, L"Element", e->name->name);
 | 
						|
      else
 | 
						|
	gripe(p, ERC_NOT_ALLOWED, e->name->name);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if ( warn )
 | 
						|
  { push_element(p, e, FALSE);
 | 
						|
    return TRUE;
 | 
						|
  } else
 | 
						|
    return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
close_element(dtd_parser *p, dtd_element *e, int conref)
 | 
						|
{ sgml_environment *env;
 | 
						|
 | 
						|
  for(env = p->environments; env; env=env->parent)
 | 
						|
  { if ( env->element == e )		/* element is open */
 | 
						|
    { sgml_environment *parent;
 | 
						|
 | 
						|
      for(env = p->environments; ; env=parent)
 | 
						|
      {	dtd_element *ce	= env->element;
 | 
						|
 | 
						|
	if ( !(conref && env == p->environments) )
 | 
						|
	  validate_completeness(p, env);
 | 
						|
	parent = env->parent;
 | 
						|
 | 
						|
	p->first = FALSE;
 | 
						|
	if ( p->on_end_element )
 | 
						|
	  (*p->on_end_element)(p, env->element);
 | 
						|
	free_environment(env);
 | 
						|
	p->environments = parent;
 | 
						|
 | 
						|
	if ( ce == e )			/* closing current element */
 | 
						|
	{ p->map = (parent ? parent->map : NULL);
 | 
						|
	  return TRUE;
 | 
						|
	} else				/* omited close */
 | 
						|
	{ if ( ce->structure && !ce->structure->omit_close )
 | 
						|
	    gripe(p, ERC_OMITTED_CLOSE, ce->name->name);
 | 
						|
	}
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return gripe(p, ERC_NOT_OPEN, e->name->name);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
close_current_element(dtd_parser *p)
 | 
						|
{ if ( p->environments )
 | 
						|
  { dtd_element *e = p->environments->element;
 | 
						|
 | 
						|
    emit_cdata(p, TRUE);
 | 
						|
    return close_element(p, e, FALSE);
 | 
						|
  }
 | 
						|
 | 
						|
  return gripe(p, ERC_SYNTAX_ERROR, L"No element to close", "");
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
get_attribute_value()
 | 
						|
 | 
						|
Get the value for an attribute.  Once   I  thought  this was simple, but
 | 
						|
Richard O'Keefe pointed to the complex   handling of white-space in SGML
 | 
						|
attributes. Basically, if the attribute is quoted, we need:
 | 
						|
 | 
						|
	* If CDATA, map all blank to space characters, then expand
 | 
						|
	  entities
 | 
						|
 | 
						|
	* If !CDATA expand all entities, canonise white space by
 | 
						|
	  deleting leading and trailing space and squishing multiple
 | 
						|
	  space characters to a single (lower for us) case.
 | 
						|
 | 
						|
This almost, but not completely matches the XML definition. This however
 | 
						|
is so complex we will ignore it for now.
 | 
						|
 | 
						|
[Rewritten by Richard O'Keefe with these addional comments]
 | 
						|
Reads a value, the  attribute  name   and  value  indicator  having been
 | 
						|
processed already. It calls itake_string() to   read  quoted values, and
 | 
						|
itake_unquoted() to read unquoted values.
 | 
						|
 | 
						|
itake_string(dtd, in, buf, size)
 | 
						|
	- skips layout INCLUDING comments,
 | 
						|
	- returns NULL if the next character is not ' or ",
 | 
						|
	- copies characters from in to buf until a matching ' or " is found,
 | 
						|
	- adds a terminating \0,
 | 
						|
	- skips more layout INCLUDING comments, and
 | 
						|
	- returns the new input position.
 | 
						|
It is quite wrong to skip leading comments here.  In the tag
 | 
						|
 | 
						|
    <foo bar = --ugh-- zoo>
 | 
						|
 | 
						|
the characters "--ugh--" *are the value*.  They are not a comment.
 | 
						|
Comments are not in fact allowed inside tags, unfortunately.
 | 
						|
This tag is equivalent to
 | 
						|
 | 
						|
    <foo bar="--ugh--" something="zoo">
 | 
						|
 | 
						|
where something is an attribute that has zoo as one of its enumerals.
 | 
						|
 | 
						|
Because itake_string() is called in many other places, this bug has
 | 
						|
not yet been fixed.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static ichar const *
 | 
						|
get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att)
 | 
						|
{ ichar tmp[MAXSTRINGLEN];
 | 
						|
  ichar *buf = tmp;
 | 
						|
  ichar const *s;
 | 
						|
  ichar c;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
  ichar const *end;
 | 
						|
  ichar *start; int len;
 | 
						|
 | 
						|
  enum
 | 
						|
  { DIG_FIRST = 8,		/* any token start with digit? */
 | 
						|
    NAM_FIRST = 4,		/* any token start with non-digit name char? */
 | 
						|
    NAM_LATER = 2,		/* any token have non-digit name char later? */
 | 
						|
    ANY_OTHER = 1,		/* any token have illegal character? */
 | 
						|
    YET_EMPTY = 0
 | 
						|
  }
 | 
						|
  token = YET_EMPTY;
 | 
						|
 | 
						|
  att->value.textW = NULL;		/* UCS text */
 | 
						|
  att->value.number = 0;
 | 
						|
  att->flags = 0;
 | 
						|
 | 
						|
  end = itake_string(dtd, decl, &start, &len);
 | 
						|
 | 
						|
  if ( end != NULL )
 | 
						|
  { ocharbuf out;
 | 
						|
 | 
						|
    init_ocharbuf(&out);
 | 
						|
    expand_entities(p, start, len, &out);
 | 
						|
 | 
						|
    if ( att->definition->type == AT_CDATA )
 | 
						|
    { malloc_ocharbuf(&out);
 | 
						|
 | 
						|
      att->value.number = out.size;
 | 
						|
      att->value.textW  = out.data.w;
 | 
						|
 | 
						|
      return end;
 | 
						|
    } else
 | 
						|
    { ichar *d;
 | 
						|
 | 
						|
      buf = out.data.w;
 | 
						|
 | 
						|
      /* canonicalise blanks */
 | 
						|
      s = buf;
 | 
						|
      while ((c = *s++) != '\0' && HasClass(dtd, c, CH_BLANK))
 | 
						|
	;
 | 
						|
      d = buf;
 | 
						|
      while ( c != '\0' )
 | 
						|
      { token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST
 | 
						|
	  : HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER;
 | 
						|
	if ( d != buf )
 | 
						|
	  *d++ = ' ';
 | 
						|
	if ( dtd->case_sensitive )
 | 
						|
	{ *d++ = c;
 | 
						|
	  while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
 | 
						|
	  { token |= HasClass(dtd, c, CH_DIGIT) ? 0
 | 
						|
	      : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
 | 
						|
	    *d++ = c;
 | 
						|
	  }
 | 
						|
	} else
 | 
						|
	{ *d++ = towlower(c);
 | 
						|
	  while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
 | 
						|
	  { token |= HasClass(dtd, c, CH_DIGIT) ? 0
 | 
						|
	      : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
 | 
						|
	    *d++ = towlower(c);
 | 
						|
	  }
 | 
						|
	}
 | 
						|
	while (c != '\0' && HasClass(dtd, c, CH_BLANK))
 | 
						|
	  c = *s++;
 | 
						|
      }
 | 
						|
      *d = '\0';
 | 
						|
    }
 | 
						|
  } else
 | 
						|
  { end = itake_unquoted(p, decl, tmp, sizeof(tmp)/sizeof(ichar));
 | 
						|
    if (end == NULL)
 | 
						|
      return NULL;
 | 
						|
 | 
						|
    s = buf;
 | 
						|
    c = *s++;
 | 
						|
    if (c != '\0')
 | 
						|
    { token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST
 | 
						|
	: HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER;
 | 
						|
      while ((c = *s++) != 0)
 | 
						|
      { token |= HasClass(dtd, c, CH_DIGIT) ? 0
 | 
						|
	  : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if ( token == YET_EMPTY || (token & ANY_OTHER) != 0)
 | 
						|
      gripe(p, ERC_SYNTAX_WARNING, L"Attribute value requires quotes", buf);
 | 
						|
 | 
						|
    if (!dtd->case_sensitive && att->definition->type != AT_CDATA)
 | 
						|
      istrlower(buf);
 | 
						|
  }
 | 
						|
 | 
						|
  switch (att->definition->type)
 | 
						|
  { case AT_NUMBER:		/* number */
 | 
						|
      if (token != DIG_FIRST)
 | 
						|
      { gripe(p, ERC_SYNTAX_WARNING, L"NUMBER expected", decl);
 | 
						|
      } else if (dtd->number_mode == NU_INTEGER)
 | 
						|
      { (void) istrtol(buf, &att->value.number);
 | 
						|
      } else
 | 
						|
      { att->value.textW  = istrdup(buf);
 | 
						|
	att->value.number = (long)istrlen(buf);
 | 
						|
      }
 | 
						|
      return end;
 | 
						|
    case AT_CDATA:		/* CDATA attribute */
 | 
						|
      att->value.textW  = istrdup(buf);
 | 
						|
      att->value.number = (long)istrlen(buf);
 | 
						|
      return end;
 | 
						|
    case AT_ID:		/* identifier */
 | 
						|
    case AT_IDREF:		/* identifier reference */
 | 
						|
    case AT_NAME:		/* name token */
 | 
						|
    case AT_NOTATION:		/* notation-name */
 | 
						|
      if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"NAME expected", decl);
 | 
						|
      break;
 | 
						|
    case AT_NAMEOF:		/* one of these names */
 | 
						|
    case AT_NMTOKEN:		/* name-token */
 | 
						|
      if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"NMTOKEN expected", decl);
 | 
						|
      if ( att->definition->type == AT_NAMEOF )
 | 
						|
      { dtd_name_list *nl;
 | 
						|
 | 
						|
	for(nl=att->definition->typeex.nameof; nl; nl = nl->next)
 | 
						|
	{ if ( istreq(nl->value->name, buf) )
 | 
						|
	    goto passed;
 | 
						|
	}
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"unexpected value", decl);
 | 
						|
      }
 | 
						|
      break;
 | 
						|
    case AT_NUTOKEN:		/* number token */
 | 
						|
      if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"NUTOKEN expected", decl);
 | 
						|
      break;
 | 
						|
    case AT_ENTITY:		/* entity-name */
 | 
						|
      if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"entity NAME expected", decl);
 | 
						|
      break;
 | 
						|
    case AT_NAMES:		/* list of names */
 | 
						|
    case AT_IDREFS:		/* list of identifier references */
 | 
						|
      if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"NAMES expected", decl);
 | 
						|
      break;
 | 
						|
    case AT_ENTITIES:		/* entity-name list */
 | 
						|
      if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"entity NAMES expected", decl);
 | 
						|
      break;
 | 
						|
    case AT_NMTOKENS:		/* name-token list */
 | 
						|
      if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"NMTOKENS expected", decl);
 | 
						|
      break;
 | 
						|
    case AT_NUMBERS:		/* number list */
 | 
						|
      if (token != DIG_FIRST)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"NUMBERS expected", decl);
 | 
						|
      break;
 | 
						|
    case AT_NUTOKENS:
 | 
						|
      if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
 | 
						|
	gripe(p, ERC_SYNTAX_WARNING, L"NUTOKENS expected", decl);
 | 
						|
      break;
 | 
						|
    default:
 | 
						|
      assert(0);
 | 
						|
      return NULL;
 | 
						|
  }
 | 
						|
 | 
						|
passed:
 | 
						|
  att->value.textW  = istrdup(buf);	/* TBD: more validation */
 | 
						|
  att->value.number = (long)istrlen(buf);
 | 
						|
  return end;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const ichar *
 | 
						|
process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl,
 | 
						|
		   sgml_attribute *atts, int *argc)
 | 
						|
{ int attn = 0;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  decl = iskip_layout(dtd, decl);
 | 
						|
  while(decl && *decl)
 | 
						|
  { dtd_symbol *nm;
 | 
						|
    const ichar *s;
 | 
						|
 | 
						|
    if ( (s=itake_nmtoken(p, decl, &nm)) )
 | 
						|
    { decl = s;
 | 
						|
 | 
						|
      if ( (s=isee_func(dtd, decl, CF_VI)) ) /* name= */
 | 
						|
      { dtd_attr *a;
 | 
						|
 | 
						|
	if ( !HasClass(dtd, nm->name[0], CH_NMSTART) )
 | 
						|
	  gripe(p, ERC_SYNTAX_WARNING,
 | 
						|
		"Illegal start of attribute-name", decl);
 | 
						|
 | 
						|
	decl = s;
 | 
						|
	if ( !(a=find_attribute(e, nm)) )
 | 
						|
	{ a = sgml_calloc(1, sizeof(*a));
 | 
						|
 | 
						|
	  a->name = nm;
 | 
						|
	  a->type = AT_CDATA;
 | 
						|
	  a->def  = AT_IMPLIED;
 | 
						|
	  add_attribute(p, e, a);
 | 
						|
 | 
						|
	  if ( !e->undefined &&
 | 
						|
	       !(dtd->dialect != DL_SGML &&
 | 
						|
		 (istreq(L"xmlns", nm->name) ||
 | 
						|
		  istrprefix(L"xmlns:", nm->name))) )
 | 
						|
	    gripe(p, ERC_NO_ATTRIBUTE, e->name->name, nm->name);
 | 
						|
	}
 | 
						|
	atts[attn].definition = a;
 | 
						|
	if ( (decl=get_attribute_value(p, decl, atts+attn)) )
 | 
						|
	{ attn++;
 | 
						|
	  continue;
 | 
						|
	}
 | 
						|
      } else if ( e->structure )
 | 
						|
      { dtd_attr_list *al;		/* value shorthand */
 | 
						|
 | 
						|
	for(al=e->attributes; al; al=al->next)
 | 
						|
	{ dtd_attr *a = al->attribute;
 | 
						|
 | 
						|
	  if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
 | 
						|
	  { dtd_name_list *nl;
 | 
						|
 | 
						|
	    for(nl=a->typeex.nameof; nl; nl = nl->next)
 | 
						|
	    { if ( nl->value == nm )
 | 
						|
	      { if ( dtd->dialect != DL_SGML )
 | 
						|
		  gripe(p, ERC_SYNTAX_WARNING,
 | 
						|
			"Value short-hand in XML mode", decl);
 | 
						|
		atts[attn].flags	= 0;
 | 
						|
		atts[attn].definition   = a;
 | 
						|
		atts[attn].value.textW  = istrdup(nm->name);
 | 
						|
		atts[attn].value.number = (long)istrlen(nm->name);
 | 
						|
		attn++;
 | 
						|
		goto next;
 | 
						|
	      }
 | 
						|
	    }
 | 
						|
	  }
 | 
						|
	}
 | 
						|
	gripe(p, ERC_NO_ATTRIBUTE_VALUE, e->name->name, nm->name);
 | 
						|
	decl = s;
 | 
						|
      } else
 | 
						|
      { gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute", decl);
 | 
						|
	decl = s;
 | 
						|
      }
 | 
						|
    } else
 | 
						|
    { *argc = attn;
 | 
						|
      return decl;
 | 
						|
    }
 | 
						|
 | 
						|
  next:
 | 
						|
    ;
 | 
						|
  }
 | 
						|
 | 
						|
  *argc = attn;
 | 
						|
  return decl;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
sgml_add_default_attributes()
 | 
						|
 | 
						|
This function adds attributes for omitted  default and fixed attributes.
 | 
						|
These attributes are added to  the  end   of  the  attribute  list. This
 | 
						|
function returns the new  number  of   attributes.  The  `atts' array is
 | 
						|
assumed   to   be   MAXATTRIBUTES    long,     normally    passed   from
 | 
						|
process_begin_element.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static int
 | 
						|
add_default_attributes(dtd_parser *p, dtd_element *e,
 | 
						|
		       int natts, sgml_attribute *atts)
 | 
						|
{ dtd_attr_list *al;
 | 
						|
 | 
						|
  if ( e == CDATA_ELEMENT )
 | 
						|
    return natts;
 | 
						|
 | 
						|
  for(al=e->attributes; al; al=al->next)
 | 
						|
  { dtd_attr *a = al->attribute;
 | 
						|
 | 
						|
    switch(a->def)
 | 
						|
    { case AT_REQUIRED:			/* TBD: check if present */
 | 
						|
      case AT_CURRENT:			/* TBD: register in DTD and reuse */
 | 
						|
      case AT_CONREF:
 | 
						|
      case AT_IMPLIED:
 | 
						|
	goto next;
 | 
						|
      case AT_FIXED:
 | 
						|
      case AT_DEFAULT:
 | 
						|
      { int i;
 | 
						|
	sgml_attribute *ap;
 | 
						|
 | 
						|
	for(i=0, ap=atts; i<natts; i++, ap++)
 | 
						|
	{ if ( ap->definition == a )
 | 
						|
	    goto next;
 | 
						|
	}
 | 
						|
 | 
						|
        ap->definition   = a;
 | 
						|
	ap->value.textW  = NULL;
 | 
						|
	ap->value.number = 0;
 | 
						|
	ap->flags        = SGML_AT_DEFAULT;
 | 
						|
 | 
						|
	switch(a->type)
 | 
						|
	{ case AT_CDATA:
 | 
						|
	    ap->value.textW = a->att_def.cdata;
 | 
						|
	    ap->value.number = (long)istrlen(ap->value.textW);
 | 
						|
	    break;
 | 
						|
	  case AT_NUMBER:
 | 
						|
	    if ( p->dtd->number_mode == NU_TOKEN )
 | 
						|
	    { ap->value.textW  = (ichar*)a->att_def.name->name;
 | 
						|
	      ap->value.number = (long)istrlen(ap->value.textW);
 | 
						|
	    } else
 | 
						|
	    { ap->value.number = a->att_def.number;
 | 
						|
	    }
 | 
						|
	    break;
 | 
						|
	  default:
 | 
						|
	    if ( a->islist )
 | 
						|
	    { ap->value.textW = a->att_def.list;
 | 
						|
	    } else
 | 
						|
	    { ap->value.textW = (ichar*)a->att_def.name->name;
 | 
						|
	    }
 | 
						|
	    ap->value.number = (long)istrlen(ap->value.textW);
 | 
						|
	}
 | 
						|
 | 
						|
	natts++;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  next:;
 | 
						|
  }
 | 
						|
 | 
						|
  return natts;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_attribute_values(int argc, sgml_attribute *argv)
 | 
						|
{ int i;
 | 
						|
 | 
						|
  for(i=0; i<argc; i++, argv++)
 | 
						|
  { if ( (argv->flags & SGML_AT_DEFAULT) )
 | 
						|
      continue;				/* shared with the DTD */
 | 
						|
 | 
						|
    if ( argv->value.textW )
 | 
						|
      sgml_free(argv->value.textW);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_begin_element(dtd_parser *p, const ichar *decl)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  dtd_symbol *id;
 | 
						|
  const ichar *s;
 | 
						|
 | 
						|
  if ( (s=itake_name(p, decl, &id)) )
 | 
						|
  { sgml_attribute atts[MAXATTRIBUTES];
 | 
						|
    int natts;
 | 
						|
    dtd_element *e = find_element(dtd, id);
 | 
						|
    int empty = FALSE;
 | 
						|
    int conref = FALSE;
 | 
						|
    int rc = TRUE;
 | 
						|
 | 
						|
    if ( !e->structure )
 | 
						|
    { dtd_edef *def;
 | 
						|
      e->undefined = TRUE;
 | 
						|
      STAT(edefs_implicit++);
 | 
						|
      def_element(dtd, id);
 | 
						|
      def = e->structure;
 | 
						|
      def->type = C_EMPTY;
 | 
						|
    }
 | 
						|
 | 
						|
    open_element(p, e, TRUE);
 | 
						|
 | 
						|
    decl=s;
 | 
						|
    if ( (s=process_attributes(p, e, decl, atts, &natts)) )
 | 
						|
      decl=s;
 | 
						|
 | 
						|
    if ( dtd->dialect != DL_SGML )
 | 
						|
    { if ( (s=isee_func(dtd, decl, CF_ETAGO2)) )
 | 
						|
      { empty = TRUE;			/* XML <tag/> */
 | 
						|
	decl = s;
 | 
						|
      }
 | 
						|
#ifdef XMLNS
 | 
						|
      if ( dtd->dialect == DL_XMLNS )
 | 
						|
	update_xmlns(p, e, natts, atts);
 | 
						|
#endif
 | 
						|
      if ( dtd->dialect != DL_SGML )
 | 
						|
	update_space_mode(p, e, natts, atts);
 | 
						|
    } else
 | 
						|
    { int i;
 | 
						|
 | 
						|
      for(i=0; i<natts; i++)
 | 
						|
      { if ( atts[i].definition->def == AT_CONREF )
 | 
						|
	{ empty = TRUE;
 | 
						|
	  conref = TRUE;
 | 
						|
	}
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if ( *decl )
 | 
						|
      gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute list", decl);
 | 
						|
 | 
						|
    if ( !(p->flags & SGML_PARSER_NODEFS) )
 | 
						|
      natts = add_default_attributes(p, e, natts, atts);
 | 
						|
 | 
						|
    if ( empty ||
 | 
						|
	 (dtd->dialect == DL_SGML &&
 | 
						|
	  e->structure &&
 | 
						|
	  e->structure->type == C_EMPTY &&
 | 
						|
	  !e->undefined) )
 | 
						|
      p->empty_element = e;
 | 
						|
    else
 | 
						|
      p->empty_element = NULL;
 | 
						|
 | 
						|
    if ( p->on_begin_element )
 | 
						|
      rc = (*p->on_begin_element)(p, e, natts, atts);
 | 
						|
 | 
						|
    free_attribute_values(natts, atts);
 | 
						|
 | 
						|
    if ( p->empty_element )
 | 
						|
    { p->empty_element = NULL;
 | 
						|
      close_element(p, e, conref);
 | 
						|
      if ( conref )	/* might be S_CDATA due to declared content */
 | 
						|
	p->cdata_state = p->state = S_PCDATA;
 | 
						|
    }
 | 
						|
 | 
						|
    return rc;
 | 
						|
  }
 | 
						|
 | 
						|
  return gripe(p, ERC_SYNTAX_ERROR, L"Bad open-element tag", decl);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_end_element(dtd_parser *p, const ichar *decl)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  dtd_symbol *id;
 | 
						|
  const ichar *s;
 | 
						|
 | 
						|
  emit_cdata(p, TRUE);
 | 
						|
  if ( (s=itake_name(p, decl, &id)) && *s == '\0' )
 | 
						|
    return close_element(p, find_element(dtd, id), FALSE);
 | 
						|
 | 
						|
  if ( p->dtd->shorttag && *decl == '\0' ) /* </>: close current element */
 | 
						|
    return close_current_element(p);
 | 
						|
 | 
						|
  return gripe(p, ERC_SYNTAX_ERROR, L"Bad close-element tag", decl);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
process_net(dtd_parser *p)
 | 
						|
    We've seen a / of a shorttag element.  Close this one.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static int
 | 
						|
process_net(dtd_parser *p)
 | 
						|
{ sgml_environment *env;
 | 
						|
 | 
						|
  prepare_cdata(p);
 | 
						|
  for(env = p->environments; env; env=env->parent)
 | 
						|
  { if ( env->wants_net )
 | 
						|
    { sgml_environment *parent;
 | 
						|
 | 
						|
      pop_to(p, env, NULL);		/* close parents */
 | 
						|
      validate_completeness(p, env);
 | 
						|
      parent = env->parent;
 | 
						|
 | 
						|
      emit_cdata(p, TRUE);
 | 
						|
      p->first = FALSE;
 | 
						|
 | 
						|
      if ( p->on_end_element )
 | 
						|
      { WITH_CLASS(p, EV_SHORTTAG,
 | 
						|
		   (*p->on_end_element)(p, env->element));
 | 
						|
      }
 | 
						|
 | 
						|
      free_environment(env);
 | 
						|
      p->environments = parent;
 | 
						|
      p->map = (parent ? parent->map : NULL);
 | 
						|
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int				/* <!DOCTYPE ...> */
 | 
						|
process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  dtd_symbol *id;
 | 
						|
  const ichar *s;
 | 
						|
  dtd_entity *et = NULL;
 | 
						|
 | 
						|
  if ( !(s=itake_name(p, decl, &id)) )
 | 
						|
    return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl);
 | 
						|
  decl = s;
 | 
						|
 | 
						|
  if ( (s=isee_identifier(dtd, decl, "system")) )
 | 
						|
  { et = sgml_calloc(1, sizeof(*et));
 | 
						|
    et->type = ET_SYSTEM;
 | 
						|
    decl = s;
 | 
						|
  } else if ( (s=isee_identifier(dtd, decl, "public")) )
 | 
						|
  { et = sgml_calloc(1, sizeof(*et));
 | 
						|
    et->type = ET_PUBLIC;
 | 
						|
    decl = s;
 | 
						|
  } else if ( isee_func(dtd, decl, CF_DSO) )
 | 
						|
    goto local;
 | 
						|
 | 
						|
  if ( et )
 | 
						|
  { et->name = id;
 | 
						|
    et->catalog_location = CAT_DOCTYPE;
 | 
						|
    if ( !(s=process_entity_value_declaration(p, decl, et)) )
 | 
						|
      return FALSE;
 | 
						|
    decl = s;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( !dtd->doctype )			/* i.e. anonymous DTD */
 | 
						|
  { ichar *file;
 | 
						|
    dtd_parser *clone;
 | 
						|
 | 
						|
    dtd->doctype = istrdup(id->name);	/* Fill it */
 | 
						|
    if ( et )
 | 
						|
      file = entity_file(dtd, et);
 | 
						|
    else
 | 
						|
      file = istrdup(find_in_catalogue(CAT_DOCTYPE,
 | 
						|
				       dtd->doctype, NULL, NULL,
 | 
						|
				       dtd->dialect != DL_SGML));
 | 
						|
 | 
						|
    if ( !file )
 | 
						|
    { gripe(p, ERC_EXISTENCE, L"DTD", dtd->doctype);
 | 
						|
    } else
 | 
						|
    { clone = clone_dtd_parser(p);
 | 
						|
      if ( !load_dtd_from_file(clone, file) )
 | 
						|
	gripe(p, ERC_EXISTENCE, L"file", file);
 | 
						|
      free_dtd_parser(clone);
 | 
						|
      sgml_free(file);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if ( et )
 | 
						|
    free_entity_list(et);
 | 
						|
 | 
						|
local:
 | 
						|
  if ( (s=isee_func(dtd, decl, CF_DSO)) ) /* [...] */
 | 
						|
  { int grouplevel = 1;
 | 
						|
    data_mode oldmode  = p->dmode;
 | 
						|
    dtdstate  oldstate = p->state;
 | 
						|
    locbuf oldloc;
 | 
						|
    const ichar *q;
 | 
						|
    icharbuf *saved_ibuf = p->buffer;
 | 
						|
 | 
						|
    push_location(p, &oldloc);
 | 
						|
					/* try to find start-location. */
 | 
						|
					/* fails if there is comment before */
 | 
						|
					/* the []! */
 | 
						|
    sgml_cplocation(&p->location, &p->startloc);
 | 
						|
    inc_location(&p->location, '<');
 | 
						|
    for(q=decl0; q < s; q++)
 | 
						|
      inc_location(&p->location, *q);
 | 
						|
    p->dmode = DM_DTD;
 | 
						|
    p->state = S_PCDATA;
 | 
						|
    p->buffer = new_icharbuf();
 | 
						|
 | 
						|
    for( ; *s; s++ )
 | 
						|
    { if ( isee_func(dtd, s, CF_LIT) ||	/* skip quoted strings */
 | 
						|
	   isee_func(dtd, s, CF_LITA) )
 | 
						|
      { ichar q = *s;
 | 
						|
 | 
						|
	putchar_dtd_parser(p, *s++);	/* pass open quote */
 | 
						|
 | 
						|
	for( ; *s && *s != q; s++ )
 | 
						|
	  putchar_dtd_parser(p, *s);
 | 
						|
 | 
						|
	if ( *s == q )			/* pass closing quote */
 | 
						|
	  putchar_dtd_parser(p, *s);
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
 | 
						|
      if ( isee_func(dtd, s, CF_DSO) )
 | 
						|
	grouplevel++;
 | 
						|
      else if ( isee_func(dtd, s, CF_DSC) && --grouplevel == 0 )
 | 
						|
	break;
 | 
						|
      putchar_dtd_parser(p, *s);
 | 
						|
    }
 | 
						|
    p->dtd->implicit = FALSE;
 | 
						|
 | 
						|
    p->state    = oldstate;
 | 
						|
    p->dmode    = oldmode;
 | 
						|
    free_icharbuf(p->buffer);
 | 
						|
    p->buffer = saved_ibuf;
 | 
						|
    pop_location(p, &oldloc);
 | 
						|
  }
 | 
						|
 | 
						|
  p->enforce_outer_element = id;	/* make this the outer element */
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
init_decoding(dtd_parser *p)
 | 
						|
{
 | 
						|
#ifdef UTF8
 | 
						|
  int decode;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  if ( dtd->encoding == SGML_ENC_UTF8 &&
 | 
						|
       p->encoded    == TRUE )
 | 
						|
    decode = TRUE;
 | 
						|
  else
 | 
						|
    decode = FALSE;
 | 
						|
 | 
						|
  if ( p->utf8_decode != decode )
 | 
						|
  { DEBUG(fprintf(stderr, "%s UTF-8 decoding on %p\n",
 | 
						|
		  decode ? "Enable" : "Disable",
 | 
						|
		  p));
 | 
						|
 | 
						|
    p->utf8_decode = decode;
 | 
						|
  }
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
xml_set_encoding() is the public interface to   set the encoding for the
 | 
						|
parser.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static int				/* strcasecmp() with C locale */
 | 
						|
posix_strcasecmp(const char *s1, const char *s2)
 | 
						|
{ for(; *s1 && *s2; s1++, s2++)
 | 
						|
  { int c1 = *s1&0xff;
 | 
						|
    int c2 = *s2&0xff;
 | 
						|
 | 
						|
    if ( c1 >= 'A' && c1 <= 'Z' ) c1 += 'a'-'A';
 | 
						|
    if ( c2 >= 'A' && c2 <= 'Z' ) c2 += 'a'-'A';
 | 
						|
 | 
						|
    if ( c1 != c2 )
 | 
						|
      return c1-c2;
 | 
						|
  }
 | 
						|
 | 
						|
  return *s1 - *s2;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
xml_set_encoding(dtd_parser *p, const char *enc)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  if ( posix_strcasecmp(enc, "iso-8859-1") == 0 )
 | 
						|
  { dtd->encoding = SGML_ENC_ISO_LATIN1;
 | 
						|
  } else if ( posix_strcasecmp(enc, "us-ascii") == 0 )
 | 
						|
  { dtd->encoding = SGML_ENC_ISO_LATIN1; 	/* doesn't make a difference */
 | 
						|
  } else if ( posix_strcasecmp(enc, "utf-8") == 0 )
 | 
						|
  { dtd->encoding = SGML_ENC_UTF8;
 | 
						|
  } else
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
  init_decoding(p);
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
set_encoding() sets the encoding from the encoding="..." field of the
 | 
						|
XML header.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static void
 | 
						|
set_encoding(dtd_parser *p, const ichar *enc)
 | 
						|
{ char buf[32];
 | 
						|
  char *e = buf+sizeof(buf)-1;
 | 
						|
  char *o;
 | 
						|
  const ichar *i;
 | 
						|
 | 
						|
  for(i=enc, o=buf; *i; )
 | 
						|
  { if ( *i < 128 && o < e )
 | 
						|
    { *o++ = (char)*i++;
 | 
						|
    } else
 | 
						|
    { goto error;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  *o = '\0';
 | 
						|
 | 
						|
  if ( !xml_set_encoding(p, buf) )
 | 
						|
  { error:
 | 
						|
    gripe(p, ERC_EXISTENCE, L"character encoding", enc);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Process <? ... ?>
 | 
						|
 | 
						|
Should deal with character encoding for XML documents.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static int
 | 
						|
process_pi(dtd_parser *p, const ichar *decl)
 | 
						|
{ const ichar *s;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  if ( (s=isee_identifier(dtd, decl, "xml")) ) /* <?xml version="1.0"?> */
 | 
						|
  { decl = s;
 | 
						|
 | 
						|
    switch(dtd->dialect)
 | 
						|
    { case DL_SGML:
 | 
						|
	set_dialect_dtd(dtd, DL_XML);
 | 
						|
        break;
 | 
						|
      case DL_XML:
 | 
						|
      case DL_XMLNS:
 | 
						|
	break;
 | 
						|
    }
 | 
						|
 | 
						|
    while(*decl)
 | 
						|
    { dtd_symbol *nm;
 | 
						|
 | 
						|
      if ( (s=itake_name(p, decl, &nm)) &&
 | 
						|
	   (s=isee_func(dtd, s, CF_VI)) ) 		/* = */
 | 
						|
      { ichar *start;
 | 
						|
	int len;
 | 
						|
	ichar buf[MAXSTRINGLEN];
 | 
						|
	const ichar *end;
 | 
						|
 | 
						|
	if ( !(end=itake_string(dtd, s, &start, &len)) )
 | 
						|
	{ end=itake_nmtoken_chars(p, s, buf, sizeof(buf)/sizeof(ichar));
 | 
						|
	  start = buf;
 | 
						|
	  len = (int)istrlen(buf);
 | 
						|
	}
 | 
						|
 | 
						|
	if ( end )
 | 
						|
	{ decl = end;
 | 
						|
 | 
						|
	  if ( istrcaseeq(nm->name, L"encoding") )
 | 
						|
	  { ichar tmp[32];
 | 
						|
 | 
						|
	    if ( len < (int)(sizeof(tmp)/sizeof(ichar)-1) )
 | 
						|
	    { istrncpy(tmp, start, len);
 | 
						|
	      tmp[len] = 0;
 | 
						|
 | 
						|
	      set_encoding(p, tmp);
 | 
						|
	    } else
 | 
						|
	    { gripe(p, ERC_SYNTAX_ERROR, L"Unterminated encoding?", decl);
 | 
						|
	    }
 | 
						|
	  }
 | 
						|
 | 
						|
	  /* fprintf(stderr, "XML %s = %s\n", nm->name, buf); */
 | 
						|
 | 
						|
	  continue;
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      gripe(p, ERC_SYNTAX_ERROR, L"Illegal XML parameter", decl);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( p->on_pi )
 | 
						|
    (*p->on_pi)(p, decl);
 | 
						|
 | 
						|
  return FALSE;				/* Warn? */
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_sgml_declaration(dtd_parser *p, const ichar *decl)
 | 
						|
{ return gripe(p, ERC_SYNTAX_WARNING, L"Ignored <!SGML ...> declaration", NULL);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_declaration(dtd_parser *p, const ichar *decl)
 | 
						|
{ const ichar *s;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  if ( p->dmode != DM_DTD )
 | 
						|
  { if ( (s=isee_func(dtd, decl, CF_ETAGO2)) ) /* </ ... > */
 | 
						|
    { return process_end_element(p, s);
 | 
						|
    } else if ( HasClass(dtd, *decl, CH_NAME) ) /* <letter */
 | 
						|
    { return process_begin_element(p, decl);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if ( (s=isee_func(dtd, decl, CF_MDO2)) ) /* <! ... >*/
 | 
						|
  { decl = s;
 | 
						|
 | 
						|
    if ( p->on_decl )
 | 
						|
      (*p->on_decl)(p, decl);
 | 
						|
 | 
						|
    if ( (s = isee_identifier(dtd, decl, "entity")) )
 | 
						|
      process_entity_declaration(p, s);
 | 
						|
    else if ( (s = isee_identifier(dtd, decl, "element")) )
 | 
						|
      process_element_declaraction(p, s);
 | 
						|
    else if ( (s = isee_identifier(dtd, decl, "attlist")) )
 | 
						|
      process_attlist_declaraction(p, s);
 | 
						|
    else if ( (s = isee_identifier(dtd, decl, "notation")) )
 | 
						|
      process_notation_declaration(p, s);
 | 
						|
    else if ( (s = isee_identifier(dtd, decl, "shortref")) )
 | 
						|
      process_shortref_declaration(p, s);
 | 
						|
    else if ( (s = isee_identifier(dtd, decl, "usemap")) )
 | 
						|
      process_usemap_declaration(p, s);
 | 
						|
    else if ( (s = isee_identifier(dtd, decl, "sgml")) )
 | 
						|
      process_sgml_declaration(p, s);
 | 
						|
    else if ( (s = isee_identifier(dtd, decl, "doctype")) )
 | 
						|
    { if ( p->dmode != DM_DTD )
 | 
						|
	process_doctype(p, s, decl-1);
 | 
						|
    } else
 | 
						|
    { s = iskip_layout(dtd, decl);
 | 
						|
 | 
						|
      if ( *s )
 | 
						|
	gripe(p, ERC_SYNTAX_ERROR, L"Invalid declaration", s);
 | 
						|
    }
 | 
						|
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  return gripe(p, ERC_SYNTAX_ERROR, L"Invalid declaration", decl);
 | 
						|
}
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	  STREAM BINDING	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
void
 | 
						|
set_file_dtd_parser(dtd_parser *p, input_type type, const ichar *name)
 | 
						|
{ p->location.type      = type;
 | 
						|
  p->location.name.file = name;
 | 
						|
  p->location.line      = 1;
 | 
						|
  p->location.linepos   = 0;
 | 
						|
  p->location.charpos   = 0;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
set_src_dtd_parser(dtd_parser *p, input_type type, const ichar *name)
 | 
						|
{ p->location.type        = type;
 | 
						|
  p->location.name.entity = name;
 | 
						|
  p->location.line        = 1;
 | 
						|
  p->location.linepos     = 0;
 | 
						|
  p->location.charpos     = 0;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void
 | 
						|
set_mode_dtd_parser(dtd_parser *p, data_mode m)
 | 
						|
{ p->dmode = m;				/* DM_DTD or DM_DATA */
 | 
						|
  p->state = S_PCDATA;
 | 
						|
  p->blank_cdata = TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dtd_parser *
 | 
						|
new_dtd_parser(dtd *dtd)
 | 
						|
{ dtd_parser *p = sgml_calloc(1, sizeof(*p));
 | 
						|
 | 
						|
  if ( !dtd )
 | 
						|
    dtd = new_dtd(NULL);
 | 
						|
  dtd->references++;
 | 
						|
 | 
						|
  p->magic       = SGML_PARSER_MAGIC;
 | 
						|
  p->dtd	 = dtd;
 | 
						|
  p->state	 = S_PCDATA;
 | 
						|
  p->mark_state	 = MS_INCLUDE;
 | 
						|
  p->dmode       = DM_DTD;
 | 
						|
  p->encoded	 = TRUE;		/* encoded octet stream */
 | 
						|
  p->buffer	 = new_icharbuf();
 | 
						|
  p->cdata	 = new_ocharbuf();
 | 
						|
  p->event_class = EV_EXPLICIT;
 | 
						|
  set_src_dtd_parser(p, IN_NONE, NULL);
 | 
						|
 | 
						|
  return p;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static dtd_parser *
 | 
						|
clone_dtd_parser(dtd_parser *p)
 | 
						|
{ dtd_parser *clone = sgml_calloc(1, sizeof(*p));
 | 
						|
 | 
						|
  *clone = *p;
 | 
						|
  clone->dtd->references++;
 | 
						|
  clone->environments =	NULL;
 | 
						|
  clone->marked	      =	NULL;
 | 
						|
  clone->etag	      =	NULL;
 | 
						|
  clone->grouplevel   =	0;
 | 
						|
  clone->state	      =	S_PCDATA;
 | 
						|
  clone->mark_state   =	MS_INCLUDE;
 | 
						|
  clone->dmode	      =	DM_DTD;
 | 
						|
  clone->buffer	      =	new_icharbuf();
 | 
						|
  clone->cdata	      =	new_ocharbuf();
 | 
						|
 | 
						|
  return clone;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void
 | 
						|
free_dtd_parser(dtd_parser *p)
 | 
						|
{ free_icharbuf(p->buffer);
 | 
						|
  free_ocharbuf(p->cdata);
 | 
						|
#ifdef XMLNS
 | 
						|
  xmlns_free(p->xmlns);
 | 
						|
#endif
 | 
						|
  free_dtd(p->dtd);
 | 
						|
 | 
						|
  sgml_free(p);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s)
 | 
						|
{ locbuf old;
 | 
						|
 | 
						|
  push_location(p, &old);
 | 
						|
  set_src_dtd_parser(p, in, name);
 | 
						|
  empty_icharbuf(p->buffer);		/* dubious */
 | 
						|
  for(; *s; s++)
 | 
						|
    putchar_dtd_parser(p, *s);
 | 
						|
  pop_location(p, &old);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_include(dtd_parser *p, const ichar *entity_name)
 | 
						|
{ dtd_symbol *id;
 | 
						|
  dtd_entity *pe;
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
 | 
						|
  if ( (id=dtd_find_entity_symbol(dtd, entity_name)) &&
 | 
						|
       (pe=find_pentity(p->dtd, id)) )
 | 
						|
  { ichar *file;
 | 
						|
 | 
						|
    if ( (file = entity_file(dtd, pe)) )
 | 
						|
    { int rc = sgml_process_file(p, file, SGML_SUB_DOCUMENT);
 | 
						|
      sgml_free(file);
 | 
						|
 | 
						|
      return rc;
 | 
						|
    } else
 | 
						|
    { const ichar *text = entity_value(p, pe, NULL);
 | 
						|
 | 
						|
      if ( !text )
 | 
						|
	return gripe(p, ERC_NO_VALUE, pe->name->name);
 | 
						|
 | 
						|
      return process_chars(p, IN_ENTITY, entity_name, text);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return gripe(p, ERC_EXISTENCE, L"parameter entity", entity_name);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Process <![ KEYWORD [
 | 
						|
 | 
						|
Switches ->mark_state according to KEYWORD. Processes the rest in normal
 | 
						|
S_PCDATA style, which pops the mark-stack on seeing ]]>
 | 
						|
 | 
						|
For the purpose of <!DOCTYPE spec [additions]> we switch to S_GROUP if
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static void
 | 
						|
process_marked_section(dtd_parser *p)
 | 
						|
{ ichar buf[MAXDECL];
 | 
						|
  dtd *dtd = p->dtd;
 | 
						|
  const ichar *decl = p->buffer->data;
 | 
						|
  const ichar *s;
 | 
						|
 | 
						|
  if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */
 | 
						|
       (decl=isee_func(dtd, decl, CF_DSO)) && /* [ */
 | 
						|
       expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
 | 
						|
  { dtd_symbol *kwd;
 | 
						|
 | 
						|
    decl = buf;
 | 
						|
    if ( (s=itake_name(p, decl, &kwd)) &&
 | 
						|
	 isee_func(dtd, s, CF_DSO) )	/* [ */
 | 
						|
    { dtd_marked *m = sgml_calloc(1, sizeof(*m));
 | 
						|
 | 
						|
      m->keyword = kwd;			/* push on the stack */
 | 
						|
      m->parent = p->marked;
 | 
						|
      p->marked = m;
 | 
						|
 | 
						|
      if ( istrcaseeq(kwd->name, L"IGNORE") )
 | 
						|
	m->type = MS_IGNORE;
 | 
						|
      else if ( istrcaseeq(kwd->name, L"INCLUDE") )
 | 
						|
	m->type = MS_INCLUDE;
 | 
						|
      else if ( istrcaseeq(kwd->name, L"TEMP") )
 | 
						|
	m->type = MS_INCLUDE;
 | 
						|
      else if ( istrcaseeq(kwd->name, L"CDATA") )
 | 
						|
	m->type = MS_CDATA;
 | 
						|
      else if ( istrcaseeq(kwd->name, L"RCDATA") )
 | 
						|
	m->type = MS_RCDATA;
 | 
						|
      else
 | 
						|
	m->type = MS_INCLUDE;		/* default */
 | 
						|
 | 
						|
      empty_icharbuf(p->buffer);
 | 
						|
      if ( m->type == MS_CDATA )
 | 
						|
	p->state = S_MSCDATA;
 | 
						|
      else
 | 
						|
	p->state = S_PCDATA;
 | 
						|
      if ( p->mark_state != MS_IGNORE )
 | 
						|
	p->mark_state = m->type;
 | 
						|
    }
 | 
						|
  } else
 | 
						|
  { decl = p->buffer->data;
 | 
						|
 | 
						|
    if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */
 | 
						|
	 !isee_func(dtd, decl, CF_DSO) ) /* [ */
 | 
						|
    { p->state = S_GROUP;
 | 
						|
      p->grouplevel = 1;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
pop_marked_section(dtd_parser *p)
 | 
						|
{ dtd_marked *m = p->marked;
 | 
						|
 | 
						|
  if ( m )
 | 
						|
  { p->marked = m->parent;
 | 
						|
    sgml_free(m);
 | 
						|
    p->mark_state = (p->marked ? p->marked->type : MS_INCLUDE);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Update the space-mode for the current element.  The space mode defines
 | 
						|
how spaces are handled in the CDATA output.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static dtd_space_mode
 | 
						|
istr_to_space_mode(const ichar *val)
 | 
						|
{ if ( istreq(val, L"default") )
 | 
						|
    return SP_DEFAULT;
 | 
						|
  if ( istreq(val, L"preserve") )
 | 
						|
    return SP_PRESERVE;
 | 
						|
  if ( istreq(val, L"sgml") )
 | 
						|
    return SP_SGML;
 | 
						|
  if ( istreq(val, L"remove") )
 | 
						|
    return SP_REMOVE;
 | 
						|
 | 
						|
  return SP_INHERIT;			/* interpret as error */
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
update_space_mode(dtd_parser *p, dtd_element *e,
 | 
						|
		  int natts, sgml_attribute *atts)
 | 
						|
{ for( ; natts-- > 0; atts++ )
 | 
						|
  { const ichar *name = atts->definition->name->name;
 | 
						|
 | 
						|
    if ( istreq(name, L"xml:space") &&
 | 
						|
	 atts->definition->type == AT_CDATA &&
 | 
						|
	 atts->value.textW )
 | 
						|
    { dtd_space_mode m = istr_to_space_mode(atts->value.textW);
 | 
						|
 | 
						|
      if ( m != SP_INHERIT )
 | 
						|
	p->environments->space_mode = m;
 | 
						|
      else
 | 
						|
	gripe(p, ERC_EXISTENCE, L"xml:space-mode", atts->value.textW);
 | 
						|
 | 
						|
      return;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if ( e->space_mode != SP_INHERIT )
 | 
						|
    p->environments->space_mode = e->space_mode;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
empty_cdata(dtd_parser *p)
 | 
						|
{ if ( p->dmode == DM_DATA )
 | 
						|
  { empty_ocharbuf(p->cdata);
 | 
						|
    p->blank_cdata = TRUE;
 | 
						|
    p->cdata_must_be_empty = FALSE;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
cb_cdata(dtd_parser *p, ocharbuf *buf, int offset, int size)
 | 
						|
{ if ( p->on_data )
 | 
						|
    (*p->on_data)(p, EC_CDATA, size, buf->data.w+offset);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
emit_cdata(dtd_parser *p, int last)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  locbuf locsafe;
 | 
						|
  ocharbuf *cdata = p->cdata;
 | 
						|
  int offset = 0;
 | 
						|
  int size = cdata->size;
 | 
						|
 | 
						|
  if ( size == 0 )
 | 
						|
    return TRUE;			/* empty or done */
 | 
						|
 | 
						|
  push_location(p, &locsafe);
 | 
						|
  sgml_cplocation(&p->location, &p->startloc);   /* start of markup */
 | 
						|
  sgml_cplocation(&p->startloc, &p->startcdata); /* real start of CDATA */
 | 
						|
 | 
						|
  if ( p->environments )
 | 
						|
  { switch(p->environments->space_mode)
 | 
						|
    { case SP_SGML:
 | 
						|
      case SP_DEFAULT:
 | 
						|
	if ( p->first )
 | 
						|
	{ wint_t c = fetch_ocharbuf(cdata, offset);
 | 
						|
 | 
						|
	  if ( HasClass(dtd, c, CH_RE) )
 | 
						|
	  { inc_location(&p->startloc, c);
 | 
						|
	    offset++;
 | 
						|
	    size--;
 | 
						|
	    c = fetch_ocharbuf(cdata, offset);
 | 
						|
	  }
 | 
						|
 | 
						|
	  if ( HasClass(dtd, c, CH_RS) )
 | 
						|
	  { inc_location(&p->startloc, c);
 | 
						|
	    offset++;
 | 
						|
	    size--;
 | 
						|
	  }
 | 
						|
	}
 | 
						|
	if ( last && size > 0 )
 | 
						|
	{ wint_t c = fetch_ocharbuf(cdata, offset+size-1);
 | 
						|
 | 
						|
	  if ( HasClass(dtd, c, CH_RS) )
 | 
						|
	  { dec_location(&p->location, c);
 | 
						|
	    size--;
 | 
						|
	    poke_ocharbuf(cdata, offset+size, '\0');
 | 
						|
	    if ( size > 0 )
 | 
						|
	      c = fetch_ocharbuf(cdata, offset+size-1);
 | 
						|
	    else
 | 
						|
	      c = 0;			/* HasClass(CH_RE) must fail */
 | 
						|
	  }
 | 
						|
	  if ( HasClass(dtd, c, CH_RE) )
 | 
						|
	  { dec_location(&p->location, c);
 | 
						|
	    size--;
 | 
						|
	    poke_ocharbuf(cdata, offset+size, '\0');
 | 
						|
	  }
 | 
						|
	}
 | 
						|
	if ( p->environments->space_mode == SP_DEFAULT )
 | 
						|
	{ int o = 0;
 | 
						|
	  int i;
 | 
						|
 | 
						|
	  for(i=0; i<size; i++)
 | 
						|
	  { wint_t c = fetch_ocharbuf(cdata, offset+i);
 | 
						|
 | 
						|
	    if ( HasClass(dtd, c, CH_BLANK) )
 | 
						|
	    { for(i++; i<size; i++)
 | 
						|
	      { wint_t c = fetch_ocharbuf(cdata, offset+i);
 | 
						|
 | 
						|
		if ( !HasClass(dtd, c, CH_BLANK) )
 | 
						|
		  break;
 | 
						|
	      }
 | 
						|
	      i--;
 | 
						|
	      poke_ocharbuf(cdata, o++, ' ');
 | 
						|
	      continue;
 | 
						|
	    }
 | 
						|
	    poke_ocharbuf(cdata, o++, c);
 | 
						|
	  }
 | 
						|
	  poke_ocharbuf(cdata, o, '\0');
 | 
						|
	  offset = 0;			/* wrote new output from offset=0 */
 | 
						|
	  size = o;
 | 
						|
	}
 | 
						|
	break;
 | 
						|
      case SP_REMOVE:
 | 
						|
      { int o = 0;
 | 
						|
	int i;
 | 
						|
	int end = 0;
 | 
						|
 | 
						|
	for(i=0; i<size; i++)
 | 
						|
	{ wint_t c = fetch_ocharbuf(cdata, offset+i);
 | 
						|
 | 
						|
	  if ( HasClass(dtd, c, CH_BLANK) )
 | 
						|
	    inc_location(&p->startloc, c);
 | 
						|
	  else
 | 
						|
	    break;
 | 
						|
	}
 | 
						|
 | 
						|
	if ( i<size )
 | 
						|
	{ for(; i<size; i++)
 | 
						|
	  { wint_t c = fetch_ocharbuf(cdata, offset+i);
 | 
						|
 | 
						|
	    if ( HasClass(dtd, c, CH_BLANK) )
 | 
						|
	    { i++;
 | 
						|
 | 
						|
	      while(i<size && HasClass(dtd,
 | 
						|
				       (wint_t)fetch_ocharbuf(cdata, offset+i),
 | 
						|
				       CH_BLANK))
 | 
						|
		i++;
 | 
						|
	      i--;
 | 
						|
	      poke_ocharbuf(cdata, o++, ' ');
 | 
						|
	      continue;
 | 
						|
	    }
 | 
						|
	    poke_ocharbuf(cdata, o++, c);
 | 
						|
	    end = o;
 | 
						|
	  }
 | 
						|
	}
 | 
						|
					/* TBD: adjust end */
 | 
						|
	poke_ocharbuf(cdata, end, '\0');
 | 
						|
	size = end;
 | 
						|
	break;
 | 
						|
      }
 | 
						|
      case SP_PRESERVE:
 | 
						|
	break;
 | 
						|
      case SP_INHERIT:
 | 
						|
	assert(0);
 | 
						|
	return FALSE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  if ( size == 0 )
 | 
						|
  { pop_location(p, &locsafe);
 | 
						|
    empty_cdata(p);
 | 
						|
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  assert(size > 0);
 | 
						|
 | 
						|
  if ( !p->blank_cdata )
 | 
						|
  { if ( p->cdata_must_be_empty )
 | 
						|
    { gripe(p, ERC_NOT_ALLOWED_PCDATA, p->cdata); /* TBD: now passes buffer! */
 | 
						|
    }
 | 
						|
    cb_cdata(p, cdata, offset, size);
 | 
						|
  } else if ( p->environments )
 | 
						|
  { sgml_environment *env = p->environments;
 | 
						|
    dtd_state *new;
 | 
						|
 | 
						|
				/* If an element is not in the DTD we must */
 | 
						|
				/* assume mixed content and emit spaces */
 | 
						|
 | 
						|
    if ( (new=make_dtd_transition(env->state, CDATA_ELEMENT)) )
 | 
						|
    { env->state = new;
 | 
						|
      cb_cdata(p, cdata, offset, size);
 | 
						|
    } else if ( env->element->undefined &&
 | 
						|
		p->environments->space_mode == SP_PRESERVE )
 | 
						|
    { cb_cdata(p, cdata, offset, size);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  pop_location(p, &locsafe);
 | 
						|
 | 
						|
  empty_cdata(p);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
prepare_cdata(dtd_parser *p)
 | 
						|
{ if ( p->cdata->size == 0 )
 | 
						|
    return TRUE;
 | 
						|
 | 
						|
  terminate_ocharbuf(p->cdata);
 | 
						|
 | 
						|
  if ( p->mark_state == MS_INCLUDE )
 | 
						|
  { dtd *dtd = p->dtd;
 | 
						|
 | 
						|
    if ( p->environments )		/* needed for <img> <img> */
 | 
						|
    { dtd_element *e = p->environments->element;
 | 
						|
 | 
						|
      if ( e->structure && e->structure->type == C_EMPTY && !e->undefined )
 | 
						|
	close_element(p, e, FALSE);
 | 
						|
    }
 | 
						|
 | 
						|
    if ( p->blank_cdata == TRUE )
 | 
						|
    { int blank = TRUE;
 | 
						|
      int i;
 | 
						|
 | 
						|
      for(i=0; i<p->cdata->size; i++)
 | 
						|
      { wint_t c = fetch_ocharbuf(p->cdata, i);
 | 
						|
 | 
						|
	if ( !HasClass(dtd, c, CH_BLANK) )
 | 
						|
	{ blank = FALSE;
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      p->blank_cdata = blank;
 | 
						|
      if ( !blank )
 | 
						|
      { if ( p->dmode == DM_DTD )
 | 
						|
	  gripe(p, ERC_SYNTAX_ERROR, L"CDATA in DTD", p->cdata->data);
 | 
						|
	else
 | 
						|
	  open_element(p, CDATA_ELEMENT, TRUE);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_cdata(dtd_parser *p, int last)
 | 
						|
{ prepare_cdata(p);
 | 
						|
 | 
						|
  return emit_cdata(p, last);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
process_entity(dtd_parser *p, const ichar *name)
 | 
						|
{ if ( name[0] == '#' )			/* #charcode: character entity */
 | 
						|
  { int v = char_entity_value(name);
 | 
						|
 | 
						|
    if ( v <= 0 )
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR, L"Bad character entity", name);
 | 
						|
 | 
						|
    add_ocharbuf(p->cdata, v);
 | 
						|
  } else
 | 
						|
  { dtd_symbol *id;
 | 
						|
    dtd_entity *e;
 | 
						|
    dtd *dtd = p->dtd;
 | 
						|
    int len;
 | 
						|
    const ichar *text;
 | 
						|
    const ichar *s;
 | 
						|
    int   chr;
 | 
						|
    ichar *file;
 | 
						|
 | 
						|
    if ( !(id=dtd_find_entity_symbol(dtd, name)) ||
 | 
						|
	 !(e=id->entity) )
 | 
						|
    { if ( dtd->default_entity )
 | 
						|
	e = dtd->default_entity;
 | 
						|
      else
 | 
						|
	return gripe(p, ERC_EXISTENCE, L"entity", name);
 | 
						|
    }
 | 
						|
 | 
						|
    if ( !e->value &&
 | 
						|
	 e->content == EC_SGML &&
 | 
						|
	 (file=entity_file(p->dtd, e)) )
 | 
						|
    { int rc;
 | 
						|
 | 
						|
      empty_icharbuf(p->buffer);		/* dubious */
 | 
						|
      rc = sgml_process_file(p, file, SGML_SUB_DOCUMENT);
 | 
						|
      sgml_free(file);
 | 
						|
      return rc;
 | 
						|
    }
 | 
						|
 | 
						|
    if ( !(text = entity_value(p, e, &len)) )
 | 
						|
      return gripe(p, ERC_NO_VALUE, e->name->name);
 | 
						|
 | 
						|
    switch ( e->content )
 | 
						|
    { case EC_SGML:
 | 
						|
      case EC_CDATA:
 | 
						|
	if ( (s=isee_character_entity(dtd, text, &chr)) && *s == '\0' )
 | 
						|
	{ if ( chr == 0 )
 | 
						|
	    return gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", text);
 | 
						|
 | 
						|
	  if ( p->blank_cdata == TRUE &&
 | 
						|
	       !HasClass(dtd, (wint_t)chr, CH_BLANK) )
 | 
						|
	  { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
 | 
						|
	    p->blank_cdata = FALSE;
 | 
						|
	  }
 | 
						|
 | 
						|
	  add_ocharbuf(p->cdata, chr);
 | 
						|
	  return TRUE;
 | 
						|
	}
 | 
						|
	if ( e->content == EC_SGML )
 | 
						|
	{ locbuf oldloc;
 | 
						|
	  int decode = p->utf8_decode;
 | 
						|
 | 
						|
	  push_location(p, &oldloc);
 | 
						|
	  p->utf8_decode = FALSE;
 | 
						|
	  set_src_dtd_parser(p, IN_ENTITY, e->name->name);
 | 
						|
	  empty_icharbuf(p->buffer);		/* dubious */
 | 
						|
	  for(s=text; *s; s++)
 | 
						|
	    putchar_dtd_parser(p, *s);
 | 
						|
	  p->utf8_decode = decode;
 | 
						|
	  pop_location(p, &oldloc);
 | 
						|
	} else if ( *text )
 | 
						|
	{ const ichar *o;
 | 
						|
 | 
						|
	  if ( p->blank_cdata == TRUE )
 | 
						|
	  { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
 | 
						|
	    p->blank_cdata = FALSE;
 | 
						|
	  }
 | 
						|
 | 
						|
	  for(o=text; *o; o++)
 | 
						|
	    add_ocharbuf(p->cdata, *o);
 | 
						|
	}
 | 
						|
	break;
 | 
						|
      case EC_SDATA:
 | 
						|
      case EC_NDATA:
 | 
						|
	process_cdata(p, FALSE);
 | 
						|
	if ( p->on_data )
 | 
						|
	  (*p->on_data)(p, e->content, len, text);
 | 
						|
	break;
 | 
						|
      case EC_PI:
 | 
						|
	process_cdata(p, FALSE);
 | 
						|
	if ( p->on_pi )
 | 
						|
	  (*p->on_pi)(p, text);
 | 
						|
      case EC_STARTTAG:
 | 
						|
#if 0
 | 
						|
	prepare_cdata(p);
 | 
						|
	process_begin_element(p, text);
 | 
						|
#endif
 | 
						|
	break;
 | 
						|
      case EC_ENDTAG:
 | 
						|
#if 0
 | 
						|
	prepare_cdata(p);
 | 
						|
	process_end_element(p, text);
 | 
						|
#endif
 | 
						|
	break;
 | 
						|
    }
 | 
						|
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Deal with end of input.  We should give a proper error message depending
 | 
						|
on the state and the start-location of the error.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
int
 | 
						|
end_document_dtd_parser(dtd_parser *p)
 | 
						|
{ int rval;
 | 
						|
 | 
						|
  switch(p->state)
 | 
						|
  { case S_RCDATA:
 | 
						|
    case S_CDATA:
 | 
						|
    case S_PCDATA:
 | 
						|
      rval = TRUE;
 | 
						|
      break;
 | 
						|
    case S_CMT:
 | 
						|
    case S_CMT1:
 | 
						|
    case S_CMTE0:
 | 
						|
    case S_CMTE1:
 | 
						|
    case S_DECLCMT0:
 | 
						|
    case S_DECLCMT:
 | 
						|
    case S_DECLCMTE0:
 | 
						|
      rval = gripe(p, ERC_SYNTAX_ERROR,
 | 
						|
		   L"Unexpected end-of-file in comment", L"");
 | 
						|
      break;
 | 
						|
    case S_ECDATA1:
 | 
						|
    case S_ECDATA2:
 | 
						|
    case S_EMSC1:
 | 
						|
    case S_EMSC2:
 | 
						|
    case S_DECL0:
 | 
						|
    case S_DECL:
 | 
						|
    case S_MDECL0:
 | 
						|
    case S_STRING:
 | 
						|
    case S_CMTO:
 | 
						|
    case S_GROUP:
 | 
						|
    case S_PENT:
 | 
						|
    case S_ENT:
 | 
						|
    case S_ENT0:
 | 
						|
      rval = gripe(p, ERC_SYNTAX_ERROR,
 | 
						|
		   L"Unexpected end-of-file", L"");
 | 
						|
      break;
 | 
						|
#ifdef UTF8
 | 
						|
    case S_UTF8:
 | 
						|
      rval = gripe(p, ERC_SYNTAX_ERROR,
 | 
						|
		   L"Unexpected end-of-file in UTF-8 sequence", L"");
 | 
						|
      break;
 | 
						|
#endif
 | 
						|
    case S_MSCDATA:
 | 
						|
    case S_EMSCDATA1:
 | 
						|
    case S_EMSCDATA2:
 | 
						|
      rval = gripe(p, ERC_SYNTAX_ERROR,
 | 
						|
		   L"Unexpected end-of-file in CDATA marked section", L"");
 | 
						|
      break;
 | 
						|
    case S_PI:
 | 
						|
    case S_PI2:
 | 
						|
      rval = gripe(p, ERC_SYNTAX_ERROR,
 | 
						|
		   L"Unexpected end-of-file in processing instruction", L"");
 | 
						|
      break;
 | 
						|
    default:
 | 
						|
      rval = gripe(p, ERC_SYNTAX_ERROR,
 | 
						|
		   L"Unexpected end-of-file in ???");
 | 
						|
      break;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( p->dmode == DM_DATA )
 | 
						|
  { sgml_environment *env;
 | 
						|
 | 
						|
    if ( p->cdata->size > 0 &&
 | 
						|
	 fetch_ocharbuf(p->cdata, p->cdata->size-1) == CR )
 | 
						|
      del_ocharbuf(p->cdata);
 | 
						|
 | 
						|
    process_cdata(p, TRUE);
 | 
						|
 | 
						|
    if ( (env=p->environments) )
 | 
						|
    { dtd_element *e;
 | 
						|
 | 
						|
      while(env->parent)
 | 
						|
	env = env->parent;
 | 
						|
 | 
						|
      pop_to(p, env, CDATA_ELEMENT);
 | 
						|
      e = env->element;
 | 
						|
      if ( e->structure && !e->structure->omit_close )
 | 
						|
	gripe(p, ERC_OMITTED_CLOSE, e->name->name);
 | 
						|
      close_element(p, e, FALSE);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return rval;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
begin_document_dtd_parser(dtd_parser *p)
 | 
						|
{ init_decoding(p);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void
 | 
						|
reset_document_dtd_parser(dtd_parser *p)
 | 
						|
{ if ( p->environments )
 | 
						|
  { sgml_environment *env, *parent;
 | 
						|
 | 
						|
    for(env = p->environments; env; env=parent)
 | 
						|
    { parent = env->parent;
 | 
						|
 | 
						|
      free_environment(env);
 | 
						|
    }
 | 
						|
 | 
						|
    p->environments = NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  while(p->marked)
 | 
						|
    pop_marked_section(p);
 | 
						|
 | 
						|
  empty_icharbuf(p->buffer);
 | 
						|
  empty_ocharbuf(p->cdata);
 | 
						|
 | 
						|
  p->mark_state	   = MS_INCLUDE;
 | 
						|
  p->state	   = S_PCDATA;
 | 
						|
  p->grouplevel	   = 0;
 | 
						|
  p->blank_cdata   = TRUE;
 | 
						|
  p->event_class   = EV_EXPLICIT;
 | 
						|
  p->dmode	   = DM_DATA;
 | 
						|
 | 
						|
  begin_document_dtd_parser(p);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Set the UTF-8 state
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
#ifdef UTF8
 | 
						|
static void
 | 
						|
process_utf8(dtd_parser *p, int chr)
 | 
						|
{ int bytes;
 | 
						|
  int mask;
 | 
						|
 | 
						|
  for( bytes=1, mask=0x20; chr&mask; bytes++, mask >>= 1 )
 | 
						|
    ;
 | 
						|
  mask--;				/* 0x20 --> 0x1f */
 | 
						|
 | 
						|
  p->utf8_saved_state = p->state;		/* state to return to */
 | 
						|
  p->state = S_UTF8;
 | 
						|
  p->utf8_char = chr & mask;
 | 
						|
  p->utf8_left = bytes;
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
add_cdata() adds a character to the output  data. It also maps \r\n onto
 | 
						|
a single \n for Windows newline conventions.
 | 
						|
 | 
						|
There is a problem here in shortref  handling. We open the CDATA_ELEMENT
 | 
						|
as soon as we find a character as   this may open other elements through
 | 
						|
omitted tags and thus install a new shortref map.
 | 
						|
 | 
						|
If, at a later stage, all CDATA read sofar turns out to be a shortref we
 | 
						|
have  incorrectly  opened   the   CDATA_ELEMENT.    As   `undoing'   the
 | 
						|
open_element() is not an option (it may  already have caused `events' on
 | 
						|
omitted tags) we are in trouble.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static void
 | 
						|
add_cdata(dtd_parser *p, int chr)
 | 
						|
{ if ( p->mark_state == MS_INCLUDE )
 | 
						|
  { ocharbuf *buf = p->cdata;
 | 
						|
 | 
						|
    if ( p->blank_cdata == TRUE &&
 | 
						|
	 !HasClass(p->dtd, (wint_t)chr, CH_BLANK) )
 | 
						|
    { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
 | 
						|
      p->blank_cdata = FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    if ( chr == '\n' )			/* insert missing CR */
 | 
						|
    { int sz;
 | 
						|
 | 
						|
      if ( (sz=buf->size) == 0 ||
 | 
						|
	   fetch_ocharbuf(buf, sz-1) != CR )
 | 
						|
	add_cdata(p, CR);
 | 
						|
    }
 | 
						|
 | 
						|
    add_ocharbuf(buf, chr);
 | 
						|
 | 
						|
    if ( p->map &&
 | 
						|
	 chr <= 0xff && p->map->ends[chr] &&
 | 
						|
	 match_shortref(p) )
 | 
						|
      return;
 | 
						|
 | 
						|
    if ( chr == '\n' )			/* dubious.  Whould we do that */
 | 
						|
    { int sz;				/* here or in space-handling? */
 | 
						|
 | 
						|
      if ( (sz=buf->size) > 1 &&
 | 
						|
	   fetch_ocharbuf(buf, sz-1) == LF &&
 | 
						|
	   fetch_ocharbuf(buf, sz-2) == CR )
 | 
						|
      { poke_ocharbuf(buf, sz-2, LF);
 | 
						|
	buf->size--;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
add_verbatim_cdata(dtd_parser *p, int chr)
 | 
						|
{ if ( p->mark_state != MS_IGNORE )
 | 
						|
  { ocharbuf *buf = p->cdata;
 | 
						|
 | 
						|
    if ( p->blank_cdata == TRUE &&
 | 
						|
	 !HasClass(p->dtd, (wint_t)chr, CH_BLANK) )
 | 
						|
    { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
 | 
						|
      p->blank_cdata = FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    if ( chr == '\n' && buf->size > 0 &&
 | 
						|
	 fetch_ocharbuf(buf, buf->size-1) == '\r' )
 | 
						|
      buf->size--;
 | 
						|
 | 
						|
    add_ocharbuf(buf, chr);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* We discovered illegal markup and now process it as normal CDATA
 | 
						|
*/
 | 
						|
 | 
						|
static void
 | 
						|
recover_parser(dtd_parser *p)
 | 
						|
{ const ichar *s;
 | 
						|
 | 
						|
  terminate_icharbuf(p->buffer);
 | 
						|
  add_cdata(p, p->saved);
 | 
						|
  for(s=p->buffer->data; *s; s++)
 | 
						|
    add_cdata(p, *s);
 | 
						|
  p->state = S_PCDATA;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static inline void
 | 
						|
setlocation(dtd_srcloc *d, dtd_srcloc *loc, int line, int lpos)
 | 
						|
{ d->line    = line;
 | 
						|
  d->linepos = lpos;
 | 
						|
  d->charpos = loc->charpos - 1;
 | 
						|
  d->type    = loc->type;
 | 
						|
  d->name    = loc->name;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
putchar_dtd_parser(dtd_parser *p, int chr)
 | 
						|
{ dtd *dtd = p->dtd;
 | 
						|
  const ichar *f = dtd->charfunc->func;
 | 
						|
  int line = p->location.line;
 | 
						|
  int lpos = p->location.linepos;
 | 
						|
 | 
						|
  p->location.charpos++;		/* TBD: actually `bytepos' */
 | 
						|
 | 
						|
#ifdef UTF8
 | 
						|
  if ( p->state == S_UTF8 )
 | 
						|
  { if ( (chr & 0xc0) != 0x80 )	/* TBD: recover */
 | 
						|
      gripe(p, ERC_SYNTAX_ERROR, L"Bad UTF-8 sequence", L"");
 | 
						|
    p->utf8_char <<= 6;
 | 
						|
    p->utf8_char |= (chr & ~0xc0);
 | 
						|
    if ( --p->utf8_left == 0 )
 | 
						|
    { chr = p->utf8_char;
 | 
						|
      p->state = p->utf8_saved_state;
 | 
						|
    } else
 | 
						|
    { return TRUE;
 | 
						|
    }
 | 
						|
  } else if ( ISUTF8_MB(chr) && p->utf8_decode )
 | 
						|
  { process_utf8(p, chr);
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
#endif
 | 
						|
 | 
						|
  if ( f[CF_RS] == chr )
 | 
						|
  { p->location.line++;
 | 
						|
    p->location.linepos = 0;
 | 
						|
  } else
 | 
						|
  { if ( f[CF_RE] == chr )
 | 
						|
      p->location.linepos = 0;
 | 
						|
    else
 | 
						|
      p->location.linepos++;
 | 
						|
  }
 | 
						|
 | 
						|
reprocess:
 | 
						|
  switch(p->state)
 | 
						|
  { case S_PCDATA:
 | 
						|
    { if ( f[CF_MDO1] == chr )		/* < */
 | 
						|
      { setlocation(&p->startloc, &p->location, line, lpos);
 | 
						|
	p->state = S_DECL0;
 | 
						|
	empty_icharbuf(p->buffer);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
      if ( p->dmode == DM_DTD )
 | 
						|
      { if ( f[CF_PERO] == chr )	/* % */
 | 
						|
	{ setlocation(&p->startloc, &p->location, line, lpos);
 | 
						|
	  p->state = S_PENT;
 | 
						|
	  return TRUE;
 | 
						|
	}
 | 
						|
      } else
 | 
						|
      { if ( f[CF_ERO] == chr )		/* & */
 | 
						|
	{ setlocation(&p->startloc, &p->location, line, lpos);
 | 
						|
	  p->state = S_ENT0;
 | 
						|
	  return TRUE;
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      if ( p->marked && f[CF_DSC] == chr ) /* ] in marked section */
 | 
						|
      { empty_icharbuf(p->buffer);
 | 
						|
	p->state = S_EMSC1;
 | 
						|
	p->saved = chr;			/* for recovery */
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
 | 
						|
      if ( p->waiting_for_net && f[CF_ETAGO2] == chr ) /* shorttag */
 | 
						|
      { setlocation(&p->startloc, &p->location, line, lpos);
 | 
						|
	process_net(p);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
 | 
						|
					/* Real character data */
 | 
						|
      if ( p->cdata->size == 0 )
 | 
						|
        setlocation(&p->startcdata, &p->location, line, lpos);
 | 
						|
 | 
						|
      add_cdata(p, chr);
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_ECDATA2:			/* Seen </ in CDATA/RCDATA */
 | 
						|
    { if ( f[CF_MDC] == chr &&
 | 
						|
	   p->etaglen == p->buffer->size &&
 | 
						|
	   istrncaseeq(p->buffer->data, p->etag, p->etaglen) )
 | 
						|
      { p->cdata->size -= p->etaglen+2;	/* 2 for </ */
 | 
						|
	terminate_ocharbuf(p->cdata);
 | 
						|
	terminate_icharbuf(p->buffer);
 | 
						|
	if ( p->mark_state == MS_INCLUDE )
 | 
						|
	{ process_cdata(p, TRUE);
 | 
						|
	  process_end_element(p, p->buffer->data);
 | 
						|
	  empty_cdata(p);
 | 
						|
	}
 | 
						|
	empty_icharbuf(p->buffer);
 | 
						|
	p->cdata_state = p->state = S_PCDATA;
 | 
						|
      } else
 | 
						|
      { add_verbatim_cdata(p, chr);
 | 
						|
	if ( p->etaglen < p->buffer->size ||
 | 
						|
	     !HasClass(dtd, (wint_t)chr, CH_NAME))
 | 
						|
	{ empty_icharbuf(p->buffer);	/* mismatch */
 | 
						|
	  p->state = p->cdata_state;
 | 
						|
	} else
 | 
						|
	  add_icharbuf(p->buffer, chr);
 | 
						|
      }
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_ECDATA1:			/* seen < in CDATA */
 | 
						|
    { add_verbatim_cdata(p, chr);
 | 
						|
      if ( f[CF_ETAGO2] == chr )	/* / */
 | 
						|
      { empty_icharbuf(p->buffer);
 | 
						|
	p->state = S_ECDATA2;
 | 
						|
      } else if ( f[CF_ETAGO1] != chr )	/* <: do not change state */
 | 
						|
	p->state = p->cdata_state;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_RCDATA:
 | 
						|
    { if ( f[CF_ERO] == chr ) /* & */
 | 
						|
      { setlocation(&p->startloc, &p->location, line, lpos);
 | 
						|
	p->state = S_ENT0;
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
      /*FALLTHROUGH*/
 | 
						|
    }
 | 
						|
    case S_CDATA:
 | 
						|
    { add_verbatim_cdata(p, chr);
 | 
						|
 | 
						|
      if ( f[CF_MDO1] == chr )		/* < */
 | 
						|
      { setlocation(&p->startloc, &p->location, line, lpos);
 | 
						|
	p->state = S_ECDATA1;
 | 
						|
      }
 | 
						|
 | 
						|
					/* / in CDATA shorttag element */
 | 
						|
      if ( p->waiting_for_net && f[CF_ETAGO2] == chr )
 | 
						|
      { setlocation(&p->startloc, &p->location, line, lpos);
 | 
						|
	p->cdata->size--;
 | 
						|
	terminate_ocharbuf(p->cdata);
 | 
						|
	terminate_icharbuf(p->buffer);
 | 
						|
	if ( p->mark_state == MS_INCLUDE )
 | 
						|
	{ process_cdata(p, TRUE);
 | 
						|
	  process_net(p);
 | 
						|
	  empty_cdata(p);
 | 
						|
	}
 | 
						|
	empty_icharbuf(p->buffer);
 | 
						|
	p->cdata_state = p->state = S_PCDATA;
 | 
						|
      }
 | 
						|
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_MSCDATA:
 | 
						|
    { add_verbatim_cdata(p, chr);
 | 
						|
      if ( f[CF_DSC] == chr )		/* ] */
 | 
						|
        p->state = S_EMSCDATA1;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_EMSCDATA1:
 | 
						|
    { add_verbatim_cdata(p, chr);
 | 
						|
      if ( f[CF_DSC] == chr )		/* ]] */
 | 
						|
        p->state = S_EMSCDATA2;
 | 
						|
      else
 | 
						|
        p->state = S_MSCDATA;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_EMSCDATA2:
 | 
						|
    { add_verbatim_cdata(p, chr);
 | 
						|
      if ( f[CF_MDC] == chr )		/* ]]> */
 | 
						|
      { p->cdata->size -= 3;		/* Delete chars for ]] */
 | 
						|
	pop_marked_section(p);
 | 
						|
	p->state = S_PCDATA;
 | 
						|
      } else if ( f[CF_DSC] != chr )	/* if ]]], stay in this state */
 | 
						|
        p->state = S_MSCDATA;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_EMSC1:
 | 
						|
    { if ( f[CF_DSC] == chr )		/* ]] in marked section */
 | 
						|
      { p->state = S_EMSC2;
 | 
						|
	return TRUE;
 | 
						|
      } else
 | 
						|
      { add_icharbuf(p->buffer, chr);
 | 
						|
	recover_parser(p);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    case S_EMSC2:
 | 
						|
    { if ( f[CF_MDC] == chr )		/* ]]> in marked section */
 | 
						|
      { pop_marked_section(p);
 | 
						|
	p->state = S_PCDATA;
 | 
						|
	return TRUE;
 | 
						|
      } else
 | 
						|
      { add_icharbuf(p->buffer, chr);
 | 
						|
	recover_parser(p);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    case S_PENT:			/* %parameter entity; */
 | 
						|
    { if ( f[CF_ERC] == chr )
 | 
						|
      { p->state = S_PCDATA;
 | 
						|
	terminate_icharbuf(p->buffer);
 | 
						|
	if ( p->mark_state == MS_INCLUDE )
 | 
						|
	{ process_include(p, p->buffer->data);
 | 
						|
	}
 | 
						|
	empty_icharbuf(p->buffer);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
      if ( HasClass(dtd, (wint_t)chr, CH_NAME) )
 | 
						|
      { add_icharbuf(p->buffer, chr);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
 | 
						|
      terminate_icharbuf(p->buffer);
 | 
						|
      return gripe(p, ERC_SYNTAX_ERROR,
 | 
						|
		   L"Illegal parameter entity", p->buffer->data);
 | 
						|
    }
 | 
						|
    case S_ENT0:			/* Seen & */
 | 
						|
    { if ( chr == '#' || HasClass(dtd, (wint_t)chr, CH_NAME) )
 | 
						|
      { empty_icharbuf(p->buffer);
 | 
						|
	add_icharbuf(p->buffer, chr);
 | 
						|
	p->state = S_ENT;
 | 
						|
      } else
 | 
						|
      {	if ( dtd->dialect != DL_SGML )
 | 
						|
	{ wchar_t buf[3];
 | 
						|
	  buf[0] = '&';
 | 
						|
	  buf[1] = chr;
 | 
						|
	  buf[2] = '\0';
 | 
						|
	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal entity", buf);
 | 
						|
	}
 | 
						|
 | 
						|
	add_cdata(p, f[CF_ERO]);
 | 
						|
	p->state = p->cdata_state;
 | 
						|
	goto reprocess;
 | 
						|
      }
 | 
						|
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_ENT:				/* &entity; */
 | 
						|
    { if ( HasClass(dtd, (wint_t)chr, CH_NAME) )
 | 
						|
      { add_icharbuf(p->buffer, chr);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
 | 
						|
      terminate_icharbuf(p->buffer);
 | 
						|
      p->state = p->cdata_state;
 | 
						|
      if ( p->mark_state == MS_INCLUDE )
 | 
						|
      { process_entity(p, p->buffer->data);
 | 
						|
      }
 | 
						|
      empty_icharbuf(p->buffer);
 | 
						|
 | 
						|
      if ( chr == CR )
 | 
						|
	p->state = S_ENTCR;
 | 
						|
      else if ( f[CF_ERC] != chr && chr != '\n' )
 | 
						|
	goto reprocess;
 | 
						|
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_ENTCR:			/* seen &entCR, eat the LF */
 | 
						|
    { p->state = p->cdata_state;
 | 
						|
      if ( chr != LF )
 | 
						|
	goto reprocess;
 | 
						|
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_DECL0:			/* Seen < */
 | 
						|
    { if ( f[CF_ETAGO2] == chr )	/* </ */
 | 
						|
      { add_icharbuf(p->buffer, chr);
 | 
						|
	p->state = S_DECL;
 | 
						|
      } else if ( HasClass(dtd, (wint_t)chr, CH_NAME) ) /* <letter */
 | 
						|
      { add_icharbuf(p->buffer, chr);
 | 
						|
	p->state = S_DECL;
 | 
						|
      } else if ( f[CF_MDO2] == chr )	/* <! */
 | 
						|
      { p->state = S_MDECL0;
 | 
						|
      } else if ( f[CF_PRO2] == chr )	/* <? */
 | 
						|
      { p->state = S_PI;
 | 
						|
      } else				/* recover */
 | 
						|
      { add_cdata(p, f[CF_MDO1]);
 | 
						|
	add_cdata(p, chr);
 | 
						|
	p->state = S_PCDATA;
 | 
						|
      }
 | 
						|
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_MDECL0:			/* Seen <! */
 | 
						|
    { if ( f[CF_CMT] == chr )		/* <!- */
 | 
						|
      { p->state = S_CMTO;
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
      add_icharbuf(p->buffer, f[CF_MDO2]);
 | 
						|
      add_icharbuf(p->buffer, chr);
 | 
						|
      p->state = S_DECL;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_DECL:			/* <...> */
 | 
						|
    { if ( f[CF_MDC] == chr )		/* > */
 | 
						|
      { prepare_cdata(p);
 | 
						|
	p->state = S_PCDATA;
 | 
						|
	terminate_icharbuf(p->buffer);
 | 
						|
	if ( p->mark_state == MS_INCLUDE )
 | 
						|
	{ process_declaration(p, p->buffer->data);
 | 
						|
	}
 | 
						|
	empty_icharbuf(p->buffer);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
      if ( dtd->shorttag && f[CF_ETAGO2] == chr && p->buffer->size > 0 )
 | 
						|
      { prepare_cdata(p);
 | 
						|
	p->state = S_PCDATA;
 | 
						|
	terminate_icharbuf(p->buffer);
 | 
						|
	if ( p->mark_state == MS_INCLUDE )
 | 
						|
	{ WITH_CLASS(p, EV_SHORTTAG,
 | 
						|
		     process_declaration(p, p->buffer->data));
 | 
						|
	}
 | 
						|
	empty_icharbuf(p->buffer);
 | 
						|
	p->waiting_for_net = TRUE;
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
 | 
						|
      add_icharbuf(p->buffer, chr);
 | 
						|
 | 
						|
      if ( f[CF_LIT] == chr )		/* " */
 | 
						|
      { p->state = S_STRING;
 | 
						|
	p->saved = chr;
 | 
						|
	p->lit_saved_state = S_DECL;
 | 
						|
      } else if ( f[CF_LITA] == chr )	/* ' */
 | 
						|
      { p->state = S_STRING;
 | 
						|
	p->saved = chr;
 | 
						|
	p->lit_saved_state = S_DECL;
 | 
						|
	return TRUE;
 | 
						|
      } else if ( f[CF_CMT] == chr &&	/* - */
 | 
						|
		  p->buffer->data[0] == f[CF_MDO2] ) /* Started <! */
 | 
						|
      { p->state = S_DECLCMT0;
 | 
						|
      } else if ( f[CF_DSO] == chr )	/* [: marked section */
 | 
						|
      { terminate_icharbuf(p->buffer);
 | 
						|
 | 
						|
	process_marked_section(p);
 | 
						|
      }
 | 
						|
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_DECLCMT0:			/* <...- */
 | 
						|
    { if ( f[CF_CMT] == chr )
 | 
						|
      { p->buffer->size--;
 | 
						|
	p->state = S_DECLCMT;
 | 
						|
      } else
 | 
						|
      { add_icharbuf(p->buffer, chr);
 | 
						|
	p->state = S_DECL;
 | 
						|
      }
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_DECLCMT:			/* <...--.. */
 | 
						|
    { if ( f[CF_CMT] == chr )
 | 
						|
	p->state = S_DECLCMTE0;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_DECLCMTE0:			/* <...--..- */
 | 
						|
    { if ( f[CF_CMT] == chr )
 | 
						|
	p->state = S_DECL;
 | 
						|
      else
 | 
						|
	p->state = S_DECLCMT;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_PI:
 | 
						|
    { add_icharbuf(p->buffer, chr);
 | 
						|
      if ( f[CF_PRO2] == chr )		/* <? ... ? */
 | 
						|
	p->state = S_PI2;
 | 
						|
      if ( f[CF_PRC] == chr )		/* no ? is ok too (XML/SGML) */
 | 
						|
	goto pi;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_PI2:
 | 
						|
    { if ( f[CF_PRC] == chr )
 | 
						|
      { pi:
 | 
						|
	process_cdata(p, FALSE);
 | 
						|
	p->state = S_PCDATA;
 | 
						|
	p->buffer->size--;
 | 
						|
	terminate_icharbuf(p->buffer);
 | 
						|
	if ( p->mark_state == MS_INCLUDE )
 | 
						|
	{ process_pi(p, p->buffer->data);
 | 
						|
	}
 | 
						|
	empty_icharbuf(p->buffer);
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
      add_icharbuf(p->buffer, chr);
 | 
						|
      p->state = S_PI;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_STRING:
 | 
						|
    { add_icharbuf(p->buffer, chr);
 | 
						|
      if ( chr == p->saved )
 | 
						|
	p->state = p->lit_saved_state;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_CMTO:			/* Seen <!- */
 | 
						|
    { if ( f[CF_CMT] == chr )		/* - */
 | 
						|
      { p->state = S_CMT1;
 | 
						|
	return TRUE;
 | 
						|
      } else
 | 
						|
      { add_cdata(p, f[CF_MDO1]);
 | 
						|
	add_cdata(p, f[CF_MDO2]);
 | 
						|
	add_cdata(p, f[CF_CMT]);
 | 
						|
	add_cdata(p, chr);
 | 
						|
	p->state = S_PCDATA;
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    case S_CMT1:			/* <!-- */
 | 
						|
    { if ( f[CF_CMT] == chr )		/* <!--- */
 | 
						|
      { if ( dtd->dialect != DL_SGML )
 | 
						|
	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal comment", L"<!---");
 | 
						|
      }
 | 
						|
      p->state = S_CMT;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_CMT:
 | 
						|
    { if ( f[CF_CMT] == chr )
 | 
						|
	p->state = S_CMTE0;		/* <!--...- */
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_CMTE0:			/* <!--... -- */
 | 
						|
    { if ( f[CF_CMT] == chr )
 | 
						|
	p->state = S_CMTE1;
 | 
						|
      else
 | 
						|
	p->state = S_CMT;
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_CMTE1:			/* <!--...-- seen */
 | 
						|
    { if ( f[CF_MDC] == chr )		/* > */
 | 
						|
      { if ( p->on_decl )
 | 
						|
	  (*p->on_decl)(p, (ichar*)"");
 | 
						|
	p->state = S_PCDATA;
 | 
						|
      } else
 | 
						|
      { if ( dtd->dialect != DL_SGML )
 | 
						|
	  gripe(p, ERC_SYNTAX_ERROR, L"Illegal comment", L"");
 | 
						|
	if ( f[CF_CMT] != chr )
 | 
						|
	  p->state = S_CMT;
 | 
						|
      }
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
    case S_GROUP:			/* [...] in declaration */
 | 
						|
    { add_icharbuf(p->buffer, chr);
 | 
						|
      if ( f[CF_DSO] == chr )
 | 
						|
      { p->grouplevel++;
 | 
						|
      } else if ( f[CF_DSC] == chr )
 | 
						|
      { if ( --p->grouplevel == 0 )
 | 
						|
	  p->state = S_DECL;
 | 
						|
      } else if ( f[CF_LIT] == chr )	/* " */
 | 
						|
      { p->state = S_STRING;
 | 
						|
	p->saved = chr;
 | 
						|
	p->lit_saved_state = S_GROUP;
 | 
						|
      } else if ( f[CF_LITA] == chr )	/* ' */
 | 
						|
      { p->state = S_STRING;
 | 
						|
	p->saved = chr;
 | 
						|
	p->lit_saved_state = S_GROUP;
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
      return TRUE;
 | 
						|
    }
 | 
						|
#ifdef UTF8
 | 
						|
    case S_UTF8:
 | 
						|
#endif
 | 
						|
    default:
 | 
						|
      assert(0);
 | 
						|
      return FALSE;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	     TOPLEVEL		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
int
 | 
						|
load_dtd_from_file(dtd_parser *p, const ichar *file)
 | 
						|
{ FILE *fd;
 | 
						|
  int rval;
 | 
						|
  data_mode   oldmode  = p->dmode;
 | 
						|
  dtdstate    oldstate = p->state;
 | 
						|
  locbuf      oldloc;
 | 
						|
 | 
						|
  push_location(p, &oldloc);
 | 
						|
  p->dmode = DM_DTD;
 | 
						|
  p->state = S_PCDATA;
 | 
						|
  empty_icharbuf(p->buffer);		/* dubious */
 | 
						|
  set_file_dtd_parser(p, IN_FILE, file);
 | 
						|
 | 
						|
  if ( (fd = wfopen(file, "rb")) )
 | 
						|
  { int chr;
 | 
						|
 | 
						|
    while( (chr = getc(fd)) != EOF )
 | 
						|
      putchar_dtd_parser(p, chr);
 | 
						|
 | 
						|
    fclose(fd);
 | 
						|
 | 
						|
    p->dtd->implicit = FALSE;
 | 
						|
    rval = TRUE;
 | 
						|
  } else
 | 
						|
    rval = FALSE;
 | 
						|
 | 
						|
  pop_location(p, &oldloc);
 | 
						|
  p->dmode = oldmode;
 | 
						|
  p->state = oldstate;
 | 
						|
 | 
						|
  return rval;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dtd *
 | 
						|
file_to_dtd(const ichar *file, const ichar *doctype, dtd_dialect dialect)
 | 
						|
{ dtd_parser *p = new_dtd_parser(new_dtd(doctype));
 | 
						|
 | 
						|
  set_dialect_dtd(p->dtd, dialect);
 | 
						|
 | 
						|
  if ( load_dtd_from_file(p, file) )
 | 
						|
  { dtd *dtd = p->dtd;
 | 
						|
 | 
						|
    dtd->references++;			/* avoid deletion */
 | 
						|
    free_dtd_parser(p);
 | 
						|
    return dtd;
 | 
						|
  } else
 | 
						|
  { free_dtd_parser(p);
 | 
						|
 | 
						|
    return NULL;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
SGML sees a file as
 | 
						|
 | 
						|
[<LF>]Line 1<CR>
 | 
						|
 <LF> Line 2<CR>
 | 
						|
 | 
						|
I.e. the newline  appearing  just  before   the  end-of-file  should  be
 | 
						|
ignored. In addition, Unix-style files are   mapped  to CR-LF. Thanks to
 | 
						|
Richard O'Keefe.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
int
 | 
						|
sgml_process_stream(dtd_parser *p, FILE *fd, unsigned flags)
 | 
						|
{ int p0, p1;
 | 
						|
 | 
						|
  if ( (p0 = getc(fd)) == EOF )
 | 
						|
    return TRUE;
 | 
						|
  if ( (p1 = getc(fd)) == EOF )
 | 
						|
  { putchar_dtd_parser(p, p0);
 | 
						|
    return end_document_dtd_parser(p);
 | 
						|
  }
 | 
						|
 | 
						|
  for(;;)
 | 
						|
  { int p2 = getc(fd);
 | 
						|
 | 
						|
    if ( p2 == EOF )
 | 
						|
    { putchar_dtd_parser(p, p0);
 | 
						|
      if ( p1 != LF )
 | 
						|
	putchar_dtd_parser(p, p1);
 | 
						|
      else if ( p0 != CR )
 | 
						|
	putchar_dtd_parser(p, CR);
 | 
						|
 | 
						|
      if ( flags & SGML_SUB_DOCUMENT )
 | 
						|
	return TRUE;
 | 
						|
      else
 | 
						|
	return end_document_dtd_parser(p);
 | 
						|
    }
 | 
						|
 | 
						|
    putchar_dtd_parser(p, p0);
 | 
						|
    p0 = p1;
 | 
						|
    p1 = p2;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
sgml_process_file(dtd_parser *p, const ichar *file, unsigned flags)
 | 
						|
{ FILE *fd;
 | 
						|
  int rval;
 | 
						|
  locbuf oldloc;
 | 
						|
 | 
						|
  push_location(p, &oldloc);
 | 
						|
  set_file_dtd_parser(p, IN_FILE, file);
 | 
						|
  if ( !(flags & SGML_SUB_DOCUMENT) )
 | 
						|
    set_mode_dtd_parser(p, DM_DATA);
 | 
						|
 | 
						|
  if ( (fd = wfopen(file, "rb")) )
 | 
						|
  { rval = sgml_process_stream(p, fd, flags);
 | 
						|
    fclose(fd);
 | 
						|
  } else
 | 
						|
    rval = FALSE;
 | 
						|
 | 
						|
  pop_location(p, &oldloc);
 | 
						|
 | 
						|
  return rval;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	       ERRORS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static wchar_t *
 | 
						|
format_location(wchar_t *s, size_t len, dtd_srcloc *l)
 | 
						|
{ int first = TRUE;
 | 
						|
 | 
						|
  if ( !l || l->type == IN_NONE )
 | 
						|
    return s;
 | 
						|
 | 
						|
  for( ; l && l->type != IN_NONE;
 | 
						|
         l = l->parent, first = FALSE )
 | 
						|
  { if ( !first )
 | 
						|
    { swprintf(s, len, L" (from ");
 | 
						|
      s += wcslen(s);
 | 
						|
    }
 | 
						|
 | 
						|
    switch(l->type)
 | 
						|
    { case IN_NONE:
 | 
						|
	assert(0);
 | 
						|
      case IN_FILE:
 | 
						|
	swprintf(s, len, L"%ls:%d:%d", l->name.file, l->line, l->linepos);
 | 
						|
        break;
 | 
						|
      case IN_ENTITY:
 | 
						|
        swprintf(s, len, L"&%ls;%d:%d", l->name.entity, l->line, l->linepos);
 | 
						|
        break;
 | 
						|
    }
 | 
						|
 | 
						|
    s += wcslen(s);
 | 
						|
    if ( !first )
 | 
						|
    { *s++ = L')';
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  *s++ = L':';
 | 
						|
  *s++ = L' ';
 | 
						|
 | 
						|
  return s;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
format_message(dtd_error *e)
 | 
						|
{ wchar_t buf[1024];
 | 
						|
  wchar_t *s;
 | 
						|
  int prefix_len;
 | 
						|
  int left;
 | 
						|
 | 
						|
  switch(e->severity)
 | 
						|
  { case ERS_ERROR:
 | 
						|
      wcscpy(buf, L"Error: ");
 | 
						|
      break;
 | 
						|
    case ERS_WARNING:
 | 
						|
      wcscpy(buf, L"Warning: ");
 | 
						|
      break;
 | 
						|
    default:
 | 
						|
      buf[0] = '\0';
 | 
						|
  }
 | 
						|
  s = buf+wcslen(buf);
 | 
						|
 | 
						|
  s = format_location(s, 1024-(s-buf), e->location);
 | 
						|
  prefix_len = (int)(s-buf);
 | 
						|
  left = 1024-prefix_len;
 | 
						|
 | 
						|
  switch(e->id)
 | 
						|
  { case ERC_REPRESENTATION:
 | 
						|
      swprintf(s, left, L"Cannot represent due to %ls", e->argv[0]);
 | 
						|
      break;
 | 
						|
    case ERC_RESOURCE:
 | 
						|
      swprintf(s, left, L"Insufficient %ls resources", e->argv[0]);
 | 
						|
      break;
 | 
						|
    case ERC_LIMIT:
 | 
						|
      swprintf(s, left, L"%ls limit exceeded", e->argv[0]);
 | 
						|
      break;
 | 
						|
    case ERC_VALIDATE:
 | 
						|
      swprintf(s, left, L"%ls", e->argv[0]);
 | 
						|
      break;
 | 
						|
    case ERC_SYNTAX_ERROR:
 | 
						|
      swprintf(s, left, L"Syntax error: %ls", e->argv[0]);
 | 
						|
      break;
 | 
						|
    case ERC_EXISTENCE:
 | 
						|
      swprintf(s, left, L"%ls \"%ls\" does not exist", e->argv[0], e->argv[1]);
 | 
						|
      break;
 | 
						|
    case ERC_REDEFINED:
 | 
						|
      swprintf(s, left, L"Redefined %ls \"%ls\"", e->argv[0], e->argv[1]);
 | 
						|
      break;
 | 
						|
    default:
 | 
						|
      ;
 | 
						|
  }
 | 
						|
 | 
						|
  e->message = str2ring(buf);
 | 
						|
  e->plain_message = e->message + prefix_len;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
gripe(dtd_parser *p, dtd_error_id e, ...)
 | 
						|
{ va_list args;
 | 
						|
  wchar_t buf[1024];
 | 
						|
  dtd_error error;
 | 
						|
  int dtdmode = FALSE;
 | 
						|
  void *freeme = NULL;
 | 
						|
 | 
						|
  va_start(args, e);
 | 
						|
 | 
						|
  memset(&error, 0, sizeof(error));
 | 
						|
  error.minor = e;			/* detailed error code */
 | 
						|
 | 
						|
  if ( p )
 | 
						|
  { error.location = &p->location;
 | 
						|
    if ( p->dmode == DM_DTD )
 | 
						|
      dtdmode = TRUE;
 | 
						|
  } else
 | 
						|
  { error.location = NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  switch(e)
 | 
						|
  { case ERC_REPRESENTATION:
 | 
						|
    case ERC_RESOURCE:
 | 
						|
      error.severity = ERS_ERROR;
 | 
						|
      error.argv[0]  = va_arg(args, wchar_t *);
 | 
						|
      break;
 | 
						|
    case ERC_LIMIT:
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
      error.argv[0]  = va_arg(args, wchar_t *);
 | 
						|
      break;
 | 
						|
    case ERC_SYNTAX_ERROR:
 | 
						|
    case ERC_SYNTAX_WARNING:
 | 
						|
    { wchar_t *m       = va_arg(args, wchar_t *);
 | 
						|
      const wchar_t *s = va_arg(args, const wchar_t *);
 | 
						|
 | 
						|
      if ( s && *s )
 | 
						|
      { swprintf(buf, 1024, L"%ls, found \"%ls\"", m, str_summary(s, 25));
 | 
						|
	error.argv[0] = buf;
 | 
						|
      } else
 | 
						|
	error.argv[0] = m;
 | 
						|
 | 
						|
      error.severity = (e == ERC_SYNTAX_WARNING ? ERS_WARNING : ERS_ERROR);
 | 
						|
      e = ERC_SYNTAX_ERROR;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_DOMAIN:
 | 
						|
    { const wchar_t *expected = va_arg(args, const wchar_t *);
 | 
						|
      const wchar_t *found    = str_summary(va_arg(args, const wchar_t *), 25);
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"Expected type %ls, found \"%ls\"", expected, found);
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_ERROR;
 | 
						|
      e = (dtdmode ? ERC_SYNTAX_ERROR : ERC_VALIDATE);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_REDEFINED:
 | 
						|
    { dtd_symbol *name;
 | 
						|
      error.argv[0] = va_arg(args, wchar_t *); /* type */
 | 
						|
      name = va_arg(args, dtd_symbol *); /* name */
 | 
						|
      error.argv[1]  = (ichar*)name->name;
 | 
						|
      error.severity = ERS_STYLE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_EXISTENCE:
 | 
						|
    { error.argv[0] = va_arg(args, wchar_t *); /* type */
 | 
						|
      error.argv[1] = va_arg(args, wchar_t *); /* name */
 | 
						|
      error.severity = ERS_ERROR;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_VALIDATE:
 | 
						|
    { error.argv[0] = va_arg(args, wchar_t *); /* message */
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_OMITTED_CLOSE:
 | 
						|
    { const wchar_t *element = va_arg(args, const wchar_t *);
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"Inserted omitted end-tag for \"%ls\"", element);
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
      e = ERC_VALIDATE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_OMITTED_OPEN:
 | 
						|
    { const wchar_t *element = va_arg(args, const wchar_t *);
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"Inserted omitted start-tag for \"%ls\"", element);
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
      e = ERC_VALIDATE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_NOT_OPEN:
 | 
						|
    { const wchar_t *element = va_arg(args, const wchar_t *);
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"Ignored end-tag for \"%ls\" which is not open",
 | 
						|
	       element);
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
      e = ERC_VALIDATE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_NOT_ALLOWED:
 | 
						|
    { const wchar_t *element = va_arg(args, const wchar_t *);
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"Element \"%ls\" not allowed here", element);
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
      e = ERC_VALIDATE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_NOT_ALLOWED_PCDATA:
 | 
						|
    { const ocharbuf *cdata = va_arg(args, const ocharbuf *);
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"#PCDATA (\"%ls\") not allowed here",
 | 
						|
	       str_summary(cdata->data.w, 25));
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
      e = ERC_VALIDATE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_NO_ATTRIBUTE:
 | 
						|
    { const wchar_t *elem = va_arg(args, wchar_t *); /* element */
 | 
						|
      const wchar_t *attr = va_arg(args, wchar_t *); /* attribute */
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"Element \"%ls\" has no attribute \"%ls\"",
 | 
						|
	       elem, attr);
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
 | 
						|
      e = ERC_VALIDATE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_NO_ATTRIBUTE_VALUE:
 | 
						|
    { const wchar_t *elem  = va_arg(args, wchar_t *); /* element */
 | 
						|
      const wchar_t *value = va_arg(args, wchar_t *); /* attribute value */
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"Element \"%ls\" has no attribute with value \"%ls\"",
 | 
						|
	       elem, value);
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
 | 
						|
      e = ERC_VALIDATE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_NO_VALUE:
 | 
						|
    { error.argv[0] = L"entity value";
 | 
						|
      error.argv[1] = va_arg(args, wchar_t *); /* entity */
 | 
						|
 | 
						|
      error.severity = ERS_ERROR;
 | 
						|
      e = ERC_EXISTENCE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_NO_DOCTYPE:
 | 
						|
    { const wchar_t *doctype = va_arg(args, wchar_t *); /* element */
 | 
						|
      const wchar_t *file    = va_arg(args, wchar_t *); /* DTD file */
 | 
						|
 | 
						|
      swprintf(buf, 1024, L"No <!DOCTYPE ...>, assuming \"%ls\" from DTD file \"%s\"",
 | 
						|
	      doctype, file);
 | 
						|
      error.argv[0] = buf;
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
 | 
						|
      e = ERC_VALIDATE;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case ERC_NO_CATALOGUE:
 | 
						|
    { char *file = va_arg(args, char *); /* catalogue file */
 | 
						|
 | 
						|
      error.argv[0] = L"catalogue file";
 | 
						|
      freeme = error.argv[1] = utf8towcs(file);
 | 
						|
      error.severity = ERS_WARNING;
 | 
						|
      e = ERC_EXISTENCE;
 | 
						|
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  error.id      = e;
 | 
						|
  format_message(&error);
 | 
						|
 | 
						|
  if ( p && p->on_error )
 | 
						|
    (*p->on_error)(p, &error);
 | 
						|
  else
 | 
						|
    fwprintf(stderr, L"SGML: %ls\n", error.message);
 | 
						|
 | 
						|
  if ( freeme )
 | 
						|
    sgml_free(freeme);
 | 
						|
 | 
						|
  va_end(args);
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 |