d6a06fe092
as possible.
5603 lines
126 KiB
C
5603 lines
126 KiB
C
/* $Id$
|
|
|
|
Part of SWI-Prolog
|
|
|
|
Author: Jan Wielemaker
|
|
E-mail: wielemak@science.uva.nl
|
|
WWW: http://www.swi-prolog.org
|
|
Copyright (C): 1985-2006, University of Amsterdam
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#define _ISOC99_SOURCE 1 /* fwprintf(), etc prototypes */
|
|
|
|
#define DTD_IMPLEMENTATION 1
|
|
#include <stdio.h>
|
|
#include <wchar.h>
|
|
#include "dtd.h"
|
|
#include "model.h"
|
|
#include "util.h"
|
|
#include "catalog.h"
|
|
#include "parser.h"
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <stdarg.h>
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
#include "utf8.h"
|
|
#include <errno.h>
|
|
#include <wctype.h>
|
|
#include "xml_unicode.h"
|
|
|
|
#define DEBUG(g) ((void)0)
|
|
#define ZERO_TERM_LEN (-1) /* terminated by nul */
|
|
|
|
#ifdef __WINDOWS__
|
|
#define inline __inline
|
|
#define swprintf _snwprintf
|
|
#endif
|
|
|
|
|
|
/*******************************
|
|
* LOCAL TYPES *
|
|
*******************************/
|
|
|
|
typedef struct locbuf
|
|
{ dtd_srcloc start; /* p->startloc */
|
|
dtd_srcloc here; /* p->location */
|
|
} locbuf;
|
|
|
|
|
|
/*******************************
|
|
* PROTOYPES *
|
|
*******************************/
|
|
|
|
static const ichar * itake_name(dtd *dtd, const ichar *in, dtd_symbol **id);
|
|
static const ichar * itake_entity_name(dtd *dtd, const ichar *in,
|
|
dtd_symbol **id);
|
|
static const ichar * itake_namegroup(dtd *dtd, const ichar *decl,
|
|
dtd_symbol **names, int *n);
|
|
static const ichar * iskip_layout(dtd *dtd, const ichar *in);
|
|
static dtd_parser * clone_dtd_parser(dtd_parser *p);
|
|
static void free_model(dtd_model *m);
|
|
static int process_entity_declaration(dtd_parser *p,
|
|
const ichar *decl);
|
|
static void free_notations(dtd_notation *n);
|
|
static void free_shortrefs(dtd_shortref *sr);
|
|
static int process_cdata(dtd_parser *p, int last);
|
|
static int process_entity(dtd_parser *p, const ichar *name);
|
|
static int emit_cdata(dtd_parser *p, int last);
|
|
static dtd_space_mode istr_to_space_mode(const ichar *val);
|
|
static void update_space_mode(dtd_parser *p, dtd_element *e,
|
|
int natts, sgml_attribute *atts);
|
|
static dtd_model * make_model(dtd *dtd, const ichar *decl,
|
|
const ichar **end);
|
|
static void for_elements_in_model(dtd_model *m,
|
|
void (*f)(dtd_element *e,
|
|
void *closure),
|
|
void *closure);
|
|
void putchar_dtd_parser(dtd_parser *p, int chr);
|
|
void free_dtd_parser(dtd_parser *p);
|
|
static const ichar * isee_character_entity(dtd *dtd, const ichar *in,
|
|
int *chr);
|
|
static int add_default_attributes(dtd_parser *p, dtd_element *e,
|
|
int natts,
|
|
sgml_attribute *atts);
|
|
static int prepare_cdata(dtd_parser *p);
|
|
|
|
|
|
/*******************************
|
|
* MACROS *
|
|
*******************************/
|
|
|
|
#define WITH_CLASS(p, c, g) \
|
|
{ sgml_event_class _oc = p->event_class; \
|
|
p->event_class = c; \
|
|
g; \
|
|
p->event_class = _oc; \
|
|
}
|
|
|
|
#define WITH_PARSER(p, g) \
|
|
{ dtd_parser *_old = p; \
|
|
current_parser = p; \
|
|
g; \
|
|
current_parser = _old; \
|
|
}
|
|
|
|
/*******************************
|
|
* STATISTICS *
|
|
*******************************/
|
|
|
|
#ifdef O_STATISTICS
|
|
|
|
int edefs_created = 0;
|
|
int edefs_freed = 0;
|
|
int edefs_implicit = 0;
|
|
int edefs_atts = 0;
|
|
int edefs_decl = 0;
|
|
int dtd_created = 0;
|
|
int dtd_freed = 0;
|
|
|
|
void
|
|
sgml_statistics(void)
|
|
{ fprintf(stderr, "EDEFS: created %d; freed %d\n", edefs_created, edefs_freed);
|
|
fprintf(stderr, "EDEFS: implicit %d; atts %d; decl %d\n",
|
|
edefs_implicit, edefs_atts, edefs_decl);
|
|
fprintf(stderr, "DTDs: created: %d; freed: %d\n", dtd_created, dtd_freed);
|
|
}
|
|
|
|
#define STAT(g) g
|
|
|
|
#else
|
|
|
|
#define STAT(g) ((void)0)
|
|
|
|
#endif
|
|
|
|
|
|
/*******************************
|
|
* SRC LOCATION *
|
|
*******************************/
|
|
|
|
|
|
static void /* TBD: also handle startloc */
|
|
push_location(dtd_parser *p, locbuf *save)
|
|
{ save->here = p->location;
|
|
save->start = p->startloc;
|
|
|
|
p->location.parent = &save->here;
|
|
p->startloc.parent = &save->start;
|
|
}
|
|
|
|
|
|
static void
|
|
pop_location(dtd_parser *p, locbuf *saved)
|
|
{ p->location = saved->here;
|
|
p->startloc = saved->start;
|
|
}
|
|
|
|
|
|
static inline void
|
|
_sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc)
|
|
{ d->type = loc->type;
|
|
d->name.file = loc->name.file;
|
|
d->line = loc->line;
|
|
d->linepos = loc->linepos;
|
|
d->charpos = loc->charpos;
|
|
/* but not the parent! */
|
|
}
|
|
|
|
void
|
|
sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc)
|
|
{ _sgml_cplocation(d, loc);
|
|
}
|
|
|
|
#define sgml_cplocation(d,s) _sgml_cplocation(d, s)
|
|
|
|
static void
|
|
inc_location(dtd_srcloc *l, int chr)
|
|
{ if ( chr == '\n' )
|
|
{ l->linepos = 0;
|
|
l->line++;
|
|
}
|
|
|
|
l->linepos++;
|
|
l->charpos++;
|
|
}
|
|
|
|
|
|
static void
|
|
dec_location(dtd_srcloc *l, int chr)
|
|
{ if ( chr == '\n' )
|
|
{ l->linepos = 2; /* not good! */
|
|
l->line--;
|
|
}
|
|
l->linepos--;
|
|
l->charpos--;
|
|
}
|
|
|
|
/*******************************
|
|
* CLASSIFICATION PRIMITIVES *
|
|
*******************************/
|
|
|
|
static inline int
|
|
HasClass(dtd *dtd, wint_t chr, int mask)
|
|
{ if ( chr <= 0xff )
|
|
return (dtd->charclass->class[(chr)] & (mask));
|
|
else
|
|
{ switch(mask)
|
|
{ case CH_NAME:
|
|
return ( xml_basechar(chr) ||
|
|
xml_digit(chr) ||
|
|
xml_ideographic(chr) ||
|
|
xml_combining_char(chr) ||
|
|
xml_extender(chr)
|
|
);
|
|
case CH_NMSTART:
|
|
return ( xml_basechar(chr) ||
|
|
xml_ideographic(chr) );
|
|
case CH_WHITE:
|
|
return FALSE; /* only ' ' and '\t' */
|
|
case CH_BLANK:
|
|
return iswspace(chr);
|
|
case CH_DIGIT:
|
|
return xml_digit(chr);
|
|
case CH_RS:
|
|
case CH_RE:
|
|
return FALSE;
|
|
default:
|
|
assert(0);
|
|
return FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
isee_func(dtd *dtd, const ichar *in, charfunc func)
|
|
{ if ( dtd->charfunc->func[func] == *in )
|
|
return ++in;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*******************************
|
|
* SYMBOLS *
|
|
*******************************/
|
|
|
|
static dtd_symbol_table *
|
|
new_symbol_table(void)
|
|
{ dtd_symbol_table *t = sgml_calloc(1, sizeof(*t));
|
|
t->size = SYMBOLHASHSIZE;
|
|
t->entries = sgml_calloc(t->size, sizeof(dtd_symbol*));
|
|
|
|
return t;
|
|
}
|
|
|
|
|
|
static void
|
|
free_symbol_table(dtd_symbol_table *t)
|
|
{ int i;
|
|
|
|
for(i=0; i<t->size; i++)
|
|
{ dtd_symbol *s, *next;
|
|
|
|
for(s=t->entries[i]; s; s=next)
|
|
{ next = s->next;
|
|
|
|
sgml_free((ichar*)s->name);
|
|
sgml_free(s);
|
|
}
|
|
}
|
|
|
|
sgml_free(t->entries);
|
|
sgml_free(t);
|
|
}
|
|
|
|
|
|
dtd_symbol *
|
|
dtd_find_symbol(dtd *dtd, const ichar *name)
|
|
{ dtd_symbol_table *t = dtd->symbols;
|
|
|
|
if ( dtd->case_sensitive )
|
|
{ int k = istrhash(name, t->size);
|
|
dtd_symbol *s;
|
|
|
|
for(s=t->entries[k]; s; s = s->next)
|
|
{ if ( istreq(s->name, name) )
|
|
return s;
|
|
}
|
|
} else
|
|
{ int k = istrcasehash(name, t->size);
|
|
dtd_symbol *s;
|
|
|
|
for(s=t->entries[k]; s; s = s->next)
|
|
{ if ( istrcaseeq(s->name, name) )
|
|
return s;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static dtd_symbol *
|
|
dtd_find_entity_symbol(dtd *dtd, const ichar *name)
|
|
{ dtd_symbol_table *t = dtd->symbols;
|
|
|
|
if ( dtd->ent_case_sensitive )
|
|
{ int k = istrhash(name, t->size);
|
|
dtd_symbol *s;
|
|
|
|
for(s=t->entries[k]; s; s = s->next)
|
|
{ if ( istreq(s->name, name) )
|
|
return s;
|
|
}
|
|
} else
|
|
{ int k = istrcasehash(name, t->size);
|
|
dtd_symbol *s;
|
|
|
|
for(s=t->entries[k]; s; s = s->next)
|
|
{ if ( istrcaseeq(s->name, name) )
|
|
return s;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
dtd_symbol *
|
|
dtd_add_symbol(dtd *dtd, const ichar *name)
|
|
{ dtd_symbol_table *t = dtd->symbols;
|
|
int k = istrhash(name, t->size);
|
|
dtd_symbol *s;
|
|
|
|
for(s=t->entries[k]; s; s = s->next)
|
|
{ if ( istreq(s->name, name) )
|
|
return s;
|
|
}
|
|
|
|
s = sgml_calloc(1, sizeof(*s));
|
|
s->name = istrdup(name);
|
|
s->next = t->entries[k];
|
|
t->entries[k] = s;
|
|
|
|
return s;
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* ENTITIES *
|
|
*******************************/
|
|
|
|
static void
|
|
free_entity_list(dtd_entity *e)
|
|
{ dtd_entity *next;
|
|
|
|
for( ; e; e=next)
|
|
{ next = e->next;
|
|
|
|
if ( e->value ) sgml_free(e->value);
|
|
if ( e->extid ) sgml_free(e->extid);
|
|
if ( e->exturl ) sgml_free(e->exturl);
|
|
if ( e->baseurl ) sgml_free(e->baseurl);
|
|
|
|
sgml_free(e);
|
|
}
|
|
}
|
|
|
|
|
|
static dtd_entity *
|
|
find_pentity(dtd *dtd, dtd_symbol *id)
|
|
{ dtd_entity *e;
|
|
|
|
for(e = dtd->pentities; e; e=e->next)
|
|
{ if ( e->name == id )
|
|
return e;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* returned path must be freed when done */
|
|
|
|
static ichar *
|
|
entity_file(dtd *dtd, dtd_entity *e)
|
|
{ switch(e->type)
|
|
{ case ET_SYSTEM:
|
|
case ET_PUBLIC:
|
|
{ const ichar *f;
|
|
|
|
f = find_in_catalogue(e->catalog_location,
|
|
e->name->name,
|
|
e->extid,
|
|
e->exturl,
|
|
dtd->dialect != DL_SGML);
|
|
|
|
if ( f ) /* owned by catalog */
|
|
{ ichar *file;
|
|
|
|
if ( is_absolute_path(f) || !e->baseurl )
|
|
file = istrdup(f);
|
|
else
|
|
file = localpath(e->baseurl, f);
|
|
|
|
return file;
|
|
}
|
|
}
|
|
default:
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
entity_value(dtd_parser *p, dtd_entity *e, int *len)
|
|
{ ichar *file;
|
|
|
|
if ( !e->value && (file=entity_file(p->dtd, e)) )
|
|
{ int normalise = (e->content == EC_SGML || e->content == EC_CDATA);
|
|
size_t l;
|
|
|
|
e->value = load_sgml_file_to_charp(file, normalise, &l);
|
|
e->length = (long)l;
|
|
sgml_free(file);
|
|
}
|
|
|
|
if ( len )
|
|
*len = e->length;
|
|
|
|
return e->value;
|
|
}
|
|
|
|
|
|
static int
|
|
expand_pentities(dtd_parser *p, const ichar *in, int ilen, ichar *out, int len)
|
|
{ dtd *dtd = p->dtd;
|
|
int pero = dtd->charfunc->func[CF_PERO]; /* % */
|
|
int ero = dtd->charfunc->func[CF_ERO]; /* & */
|
|
const ichar *s;
|
|
const ichar *end;
|
|
|
|
if ( ilen == ZERO_TERM_LEN )
|
|
{ end = in + wcslen(in);
|
|
} else
|
|
{ end = &in[ilen];
|
|
}
|
|
|
|
while(in < end)
|
|
{ if ( *in == pero )
|
|
{ dtd_symbol *id;
|
|
|
|
if ( (s = itake_entity_name(dtd, in+1, &id)) )
|
|
{ dtd_entity *e = find_pentity(dtd, id);
|
|
const ichar *eval;
|
|
int l;
|
|
|
|
in = s;
|
|
if ( (s=isee_func(dtd, s, CF_ERC)) ) /* ; is not obligatory? */
|
|
in = s;
|
|
|
|
if ( !e )
|
|
return gripe(ERC_EXISTENCE, L"parameter entity", id->name);
|
|
|
|
if ( !(eval = entity_value(p, e, NULL)) )
|
|
return FALSE;
|
|
|
|
if ( !expand_pentities(p, eval, ZERO_TERM_LEN, out, len) )
|
|
return FALSE;
|
|
l = (int)istrlen(out); /* could be better */
|
|
out += l;
|
|
len -= l;
|
|
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if ( --len <= 0 )
|
|
{ gripe(ERC_REPRESENTATION, L"Declaration too long");
|
|
return FALSE;
|
|
}
|
|
|
|
if ( *in == ero && in[1] == '#' ) /* &# */
|
|
{ int chr;
|
|
|
|
if ( (s=isee_character_entity(dtd, in, &chr)) )
|
|
{ if ( chr == 0 )
|
|
{ gripe(ERC_SYNTAX_ERROR, L"Illegal character entity", in);
|
|
} else
|
|
{ *out++ = chr;
|
|
in = s;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
*out++ = *in++;
|
|
}
|
|
|
|
*out = '\0';
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
char_entity_value(const ichar *decl)
|
|
{ if ( *decl == '#' )
|
|
{ const ichar *s = decl+1;
|
|
ichar *end;
|
|
long v;
|
|
|
|
/* do octal too? */
|
|
if ( s[0] == 'x' || s[0] == 'X' )
|
|
v = wcstoul(s+1, &end, 16);
|
|
else
|
|
v = wcstoul(s, &end, 10);
|
|
|
|
if ( *end == '\0' )
|
|
{ return (int)v;
|
|
} else if ( istreq(s, L"RS") )
|
|
{ return '\n';
|
|
} else if ( istreq(s, L"RE") )
|
|
{ return '\r';
|
|
} else if ( istreq(s, L"TAB") )
|
|
{ return '\t';
|
|
} else if ( istreq(s, L"SPACE") )
|
|
{ return ' ';
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
isee_character_entity(dtd *dtd, const ichar *in, int *chr)
|
|
{ const ichar *s;
|
|
|
|
if ( (s=isee_func(dtd, in, CF_ERO)) && *s == '#' )
|
|
{ ichar e[32];
|
|
ichar *o = e;
|
|
int v;
|
|
|
|
*o++ = *s++;
|
|
while(o < e+sizeof(e)/sizeof(ichar)-1 && HasClass(dtd, *s, CH_NAME))
|
|
*o++ = *s++;
|
|
if ( isee_func(dtd, s, CF_ERC)) /* skip ; */
|
|
s++;
|
|
|
|
*o = '\0';
|
|
if ( (v=char_entity_value(e)) >= 0 )
|
|
{ *chr = v;
|
|
return s;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Expand entities in a string. Used to expand CDATA attribute values.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int
|
|
expand_entities(dtd_parser *p, const ichar *in, int len, ocharbuf *out)
|
|
{ const ichar *s;
|
|
const ichar *end = &in[len];
|
|
dtd *dtd = p->dtd;
|
|
int ero = dtd->charfunc->func[CF_ERO]; /* & */
|
|
|
|
while(in < end)
|
|
{ if ( *in == ero )
|
|
{ const ichar *estart = in; /* for recovery */
|
|
int chr;
|
|
|
|
if ( (s=isee_character_entity(dtd, in, &chr)) )
|
|
{ if ( chr == 0 )
|
|
gripe(ERC_SYNTAX_ERROR, L"Illegal character entity", in);
|
|
|
|
add_ocharbuf(out, chr);
|
|
in = s;
|
|
continue;
|
|
}
|
|
|
|
if ( HasClass(dtd, in[1], CH_NMSTART) )
|
|
{ dtd_symbol *id;
|
|
dtd_entity *e;
|
|
const ichar *eval;
|
|
|
|
if ( !(in = itake_name(dtd, in+1, &id)) )
|
|
{ in = estart;
|
|
goto recover;
|
|
}
|
|
if ( isee_func(dtd, in, CF_ERC) || *in == '\n' )
|
|
in++;
|
|
|
|
if ( !(e = id->entity) && !(e=dtd->default_entity) )
|
|
{ gripe(ERC_EXISTENCE, L"entity", id->name);
|
|
in = estart;
|
|
goto recover;
|
|
}
|
|
|
|
if ( !(eval = entity_value(p, e, NULL)) )
|
|
{ gripe(ERC_NO_VALUE, e->name->name);
|
|
in = estart;
|
|
goto recover;
|
|
}
|
|
|
|
if ( e->content == EC_SGML )
|
|
{ if ( !expand_entities(p, eval, (int)istrlen(eval), out) )
|
|
return FALSE;
|
|
} else
|
|
{ const ichar *s;
|
|
|
|
for(s=eval; *s; s++)
|
|
add_ocharbuf(out, *s);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
if ( dtd->dialect != DL_SGML )
|
|
gripe(ERC_SYNTAX_ERROR, L"Illegal entity", estart);
|
|
}
|
|
|
|
recover:
|
|
|
|
if ( *in == CR && in[1] == LF )
|
|
in++;
|
|
|
|
if ( HasClass(dtd, *in, CH_BLANK) )
|
|
{ add_ocharbuf(out, ' ');
|
|
in++;
|
|
} else
|
|
{ add_ocharbuf(out, *in++);
|
|
}
|
|
}
|
|
|
|
terminate_ocharbuf(out);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
|
|
/*******************************
|
|
* ELEMENTS *
|
|
*******************************/
|
|
|
|
static dtd_element *
|
|
find_element(dtd *dtd, dtd_symbol *id)
|
|
{ dtd_element *e;
|
|
|
|
if ( id->element )
|
|
return id->element; /* must check */
|
|
|
|
e = sgml_calloc(1, sizeof(*e));
|
|
e->space_mode = SP_INHERIT;
|
|
e->undefined = TRUE;
|
|
e->name = id;
|
|
id->element = e;
|
|
|
|
e->next = dtd->elements;
|
|
dtd->elements = e;
|
|
|
|
return e;
|
|
}
|
|
|
|
|
|
static dtd_edef *
|
|
new_element_definition(dtd *dtd)
|
|
{ dtd_edef *def = sgml_calloc(1, sizeof(*def));
|
|
|
|
STAT(edefs_created++);
|
|
|
|
return def;
|
|
}
|
|
|
|
|
|
static dtd_element *
|
|
def_element(dtd *dtd, dtd_symbol *id)
|
|
{ dtd_element *e = find_element(dtd, id);
|
|
|
|
if ( !e->structure )
|
|
{ e->structure = new_element_definition(dtd);
|
|
e->structure->references = 1;
|
|
e->structure->type = C_EMPTY;
|
|
}
|
|
|
|
return e;
|
|
}
|
|
|
|
|
|
static void
|
|
free_name_list(dtd_name_list *nl)
|
|
{ dtd_name_list *next;
|
|
|
|
for( ; nl; nl=next)
|
|
{ next = nl->next;
|
|
|
|
sgml_free(nl);
|
|
}
|
|
}
|
|
|
|
|
|
#define REFS_VIRGIN (-42)
|
|
|
|
static void
|
|
free_attribute(dtd_attr *a)
|
|
{ if ( a->references == REFS_VIRGIN || --a->references == 0 )
|
|
{ switch(a->type)
|
|
{ case AT_NAMEOF:
|
|
case AT_NOTATION:
|
|
free_name_list(a->typeex.nameof);
|
|
default:
|
|
;
|
|
}
|
|
switch(a->def)
|
|
{ case AT_DEFAULT:
|
|
case AT_FIXED:
|
|
{ if ( a->islist )
|
|
sgml_free(a->att_def.list);
|
|
else if ( a->type == AT_CDATA && a->att_def.cdata )
|
|
sgml_free(a->att_def.cdata);
|
|
}
|
|
default:
|
|
;
|
|
}
|
|
|
|
sgml_free(a);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
free_attribute_list(dtd_attr_list *l)
|
|
{ dtd_attr_list *next;
|
|
|
|
for(; l; l=next)
|
|
{ next = l->next;
|
|
|
|
free_attribute(l->attribute);
|
|
sgml_free(l);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
free_element_list(dtd_element_list *l)
|
|
{ dtd_element_list *next;
|
|
|
|
for( ; l; l=next)
|
|
{ next = l->next;
|
|
|
|
sgml_free(l);
|
|
}
|
|
}
|
|
|
|
static void
|
|
free_element_definition(dtd_edef *def)
|
|
{ if ( --def->references == 0 )
|
|
{ STAT(edefs_freed++);
|
|
if ( def->content )
|
|
free_model(def->content);
|
|
free_element_list(def->included);
|
|
free_element_list(def->excluded);
|
|
free_state_engine(def->initial_state);
|
|
|
|
sgml_free(def);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
free_elements(dtd_element *e)
|
|
{ dtd_element *next;
|
|
|
|
for( ; e; e=next)
|
|
{ next = e->next;
|
|
|
|
if ( e->structure )
|
|
free_element_definition(e->structure);
|
|
free_attribute_list(e->attributes);
|
|
|
|
sgml_free(e);
|
|
}
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* ATTRIBUTES *
|
|
*******************************/
|
|
|
|
static dtd_attr *
|
|
find_attribute(dtd_element *e, dtd_symbol *name)
|
|
{ dtd_attr_list *a;
|
|
|
|
for(a=e->attributes; a; a=a->next)
|
|
{ if ( a->attribute->name == name )
|
|
return a->attribute;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* PARSE PRIMITIVES *
|
|
*******************************/
|
|
|
|
static const ichar *
|
|
iskip_layout(dtd *dtd, const ichar *in)
|
|
{ ichar cmt = dtd->charfunc->func[CF_CMT]; /* also skips comment */
|
|
|
|
for( ; *in; in++ )
|
|
{ if ( HasClass(dtd, *in, CH_BLANK) )
|
|
continue;
|
|
|
|
if ( in[0] == cmt && in[1] == cmt )
|
|
{ in += 2;
|
|
|
|
for( ; *in; in++ )
|
|
{ if ( in[0] == cmt && in[1] == cmt )
|
|
break;
|
|
}
|
|
in++;
|
|
continue;
|
|
}
|
|
|
|
return in;
|
|
}
|
|
|
|
return in;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
See whether we are looking at identifier "id". "id" must be lowercase!
|
|
This is only used for reserved words, and parsed case-insentive in both
|
|
XML and SGML modes.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static const ichar *
|
|
isee_identifier(dtd *dtd, const ichar *in, char *id)
|
|
{ in = iskip_layout(dtd, in);
|
|
|
|
/* match */
|
|
while (*id && (wint_t)*id == towlower(*in) )
|
|
id++, in++;
|
|
if ( *id == 0 && !HasClass(dtd, *in, CH_NAME) )
|
|
return iskip_layout(dtd, in);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
itake_name(dtd *dtd, const ichar *in, dtd_symbol **id)
|
|
{ ichar buf[MAXNMLEN];
|
|
ichar *o = buf;
|
|
ichar *e = &buf[MAXNMLEN]-1;
|
|
|
|
in = iskip_layout(dtd, in);
|
|
if ( !HasClass(dtd, *in, CH_NMSTART) )
|
|
return NULL;
|
|
|
|
if ( dtd->case_sensitive )
|
|
{ while( HasClass(dtd, *in, CH_NAME) && o < e )
|
|
*o++ = *in++;
|
|
} else
|
|
{ while( HasClass(dtd, *in, CH_NAME) && o < e )
|
|
*o++ = towlower(*in++);
|
|
}
|
|
|
|
if ( o == e )
|
|
{ gripe(ERC_REPRESENTATION, L"NAME too long");
|
|
return NULL;
|
|
}
|
|
|
|
*o++ = '\0';
|
|
|
|
*id = dtd_add_symbol(dtd, buf);
|
|
|
|
return iskip_layout(dtd, in);
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
itake_entity_name(dtd *dtd, const ichar *in, dtd_symbol **id)
|
|
{ ichar buf[MAXNMLEN];
|
|
ichar *o = buf;
|
|
ichar *e = &buf[MAXNMLEN]-1;
|
|
|
|
in = iskip_layout(dtd, in);
|
|
if ( !HasClass(dtd, *in, CH_NMSTART) )
|
|
return NULL;
|
|
|
|
if ( dtd->ent_case_sensitive )
|
|
{ while( HasClass(dtd, *in, CH_NAME) && o < e )
|
|
*o++ = *in++;
|
|
} else
|
|
{ while( HasClass(dtd, *in, CH_NAME) && o < e )
|
|
*o++ = towlower(*in++);
|
|
}
|
|
if ( o == e )
|
|
{ gripe(ERC_REPRESENTATION, L"Entity NAME too long");
|
|
return NULL;
|
|
}
|
|
|
|
*o++ = '\0';
|
|
|
|
*id = dtd_add_symbol(dtd, buf);
|
|
|
|
return in;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
itake_nmtoken(dtd *dtd, const ichar *in, dtd_symbol **id)
|
|
{ ichar buf[MAXNMLEN];
|
|
ichar *o = buf;
|
|
ichar *e = &buf[MAXNMLEN]-1;
|
|
|
|
in = iskip_layout(dtd, in);
|
|
if ( !HasClass(dtd, *in, CH_NAME) )
|
|
return NULL;
|
|
if ( dtd->case_sensitive )
|
|
{ while( HasClass(dtd, *in, CH_NAME) && o < e )
|
|
*o++ = *in++;
|
|
} else
|
|
{ while( HasClass(dtd, *in, CH_NAME) && o < e )
|
|
*o++ = towlower(*in++);
|
|
}
|
|
if ( o == e )
|
|
{ gripe(ERC_REPRESENTATION, L"NMTOKEN too long");
|
|
return NULL;
|
|
}
|
|
|
|
*o = '\0';
|
|
|
|
*id = dtd_add_symbol(dtd, buf);
|
|
|
|
return iskip_layout(dtd, in);
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
itake_nutoken(dtd *dtd, const ichar *in, dtd_symbol **id)
|
|
{ ichar buf[MAXNMLEN];
|
|
ichar *o = buf;
|
|
ichar *e = &buf[MAXNMLEN]-1;
|
|
|
|
in = iskip_layout(dtd, in);
|
|
if ( !HasClass(dtd, *in, CH_DIGIT) )
|
|
return NULL;
|
|
|
|
if ( dtd->case_sensitive )
|
|
{ while( HasClass(dtd, *in, CH_NAME) && o < e )
|
|
*o++ = *in++;
|
|
} else
|
|
{ while( HasClass(dtd, *in, CH_NAME) && o < e )
|
|
*o++ = towlower(*in++);
|
|
}
|
|
|
|
if ( o == e )
|
|
{ gripe(ERC_REPRESENTATION, L"NUTOKEN too long");
|
|
return NULL;
|
|
}
|
|
|
|
*o = '\0';
|
|
if ( o - buf > 8 )
|
|
gripe(ERC_LIMIT, L"nutoken length");
|
|
|
|
*id = dtd_add_symbol(dtd, buf);
|
|
|
|
return iskip_layout(dtd, in);
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
itake_number(dtd *dtd, const ichar *in, dtd_attr *at)
|
|
{ in = iskip_layout(dtd, in);
|
|
|
|
switch(dtd->number_mode)
|
|
{ case NU_TOKEN:
|
|
{ ichar buf[MAXNMLEN];
|
|
ichar *o = buf;
|
|
|
|
while( HasClass(dtd, *in, CH_DIGIT) )
|
|
*o++ = *in++;
|
|
if ( o == buf )
|
|
return NULL; /* empty */
|
|
*o = '\0';
|
|
at->att_def.name = dtd_add_symbol(dtd, buf);
|
|
|
|
return iskip_layout(dtd, (const ichar *)in);
|
|
}
|
|
case NU_INTEGER:
|
|
{ ichar *end;
|
|
|
|
at->att_def.number = wcstol(in, &end, 10);
|
|
if ( end > in && errno != ERANGE )
|
|
return iskip_layout(dtd, end);
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Get a quoted value. After successful return, *start points to the start
|
|
of the string in the input and *len to the length. The data is *not*
|
|
nul terminated.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static const ichar *
|
|
itake_string(dtd *dtd, const ichar *in, ichar **start, int *len)
|
|
{ in = iskip_layout(dtd, in);
|
|
|
|
if ( isee_func(dtd, in, CF_LIT) ||
|
|
isee_func(dtd, in, CF_LITA) )
|
|
{ ichar q = *in++;
|
|
|
|
*start = (ichar *)in;
|
|
while( *in && *in != q )
|
|
in++;
|
|
if ( *in )
|
|
{ *len = (int)(in - (*start));
|
|
|
|
return iskip_layout(dtd, ++in);
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
itake_dubbed_string(dtd *dtd, const ichar *in, ichar **out)
|
|
{ ichar *start;
|
|
int len;
|
|
const ichar *end;
|
|
|
|
if ( (end=itake_string(dtd, in, &start, &len)) )
|
|
*out = istrndup(start, len);
|
|
|
|
return end;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
itake_url() is used to get the argument of a SYSTEM or 2nd argument of a
|
|
PUBLIC reference. Once upon a time it tried to tag the argument as
|
|
file:<path>, but this job cannot be before lookup in the catalogue. It
|
|
is now the same as itake_dubbed_string(), so we simply call this one.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static const ichar *
|
|
itake_url(dtd *dtd, const ichar *in, ichar **out)
|
|
{ return itake_dubbed_string(dtd, in, out);
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
itake_nmtoken_chars(dtd *dtd, const ichar *in, ichar *out, int len)
|
|
{ in = iskip_layout(dtd, in);
|
|
if ( !HasClass(dtd, *in, CH_NAME) )
|
|
return NULL;
|
|
while( HasClass(dtd, *in, CH_NAME) )
|
|
{ if ( --len <= 0 )
|
|
gripe(ERC_REPRESENTATION, L"Name token too long");
|
|
*out++ = (dtd->case_sensitive ? *in++ : (ichar)towlower(*in++));
|
|
}
|
|
*out++ = '\0';
|
|
|
|
return iskip_layout(dtd, in);
|
|
}
|
|
|
|
|
|
/* There used to be a function
|
|
|
|
itake_nonblank_chars(dtd, in, out, len) -> new end
|
|
|
|
which
|
|
- skipped layout,
|
|
- copied characters from in[] to out[] until layout or \0 was found,
|
|
- added a terminating \0 to out[],
|
|
- skipped any following layout, and
|
|
- returned the new position.
|
|
|
|
That function was only called by get_attribute_value(), which used
|
|
it to parse an unquoted attribute value. According to SGML, that's
|
|
not right: unquoted attribute values must look like NMTOKENs (but
|
|
have a different length bound). In particular, elements like
|
|
<foo a=bar>zoo</foo>
|
|
<foo a=ugh/zip/
|
|
are perfectly legal, so scanning an unquoted attribute value MUST
|
|
stop at a '/' or '>'. According to HTML practice, pretty much any
|
|
old junk will be accepted, and some HTML parsers will allow bare
|
|
slashes in such an attribute.
|
|
|
|
Typical HTML is *so* bad that it doesn't agree with *any* part of
|
|
the HTML specifications (e.g., <FONT> is commonly wrapped around
|
|
block-level elements, which has never been legal). It's not clear
|
|
that there is much point in trying to accomodate bad HTML; if you
|
|
really need to do that, use the free program HTML Tidy (from the
|
|
http://www.w3c.org/ site) to clean up, and parse its output instead.
|
|
|
|
However, in order to break as little as possible, the new (sgml-1.0.14)
|
|
function accepts anything except > / \0 and blanks.
|
|
|
|
JW: I decided to accept / as part of an unquoted in SGML-mode if
|
|
shorttag is disabled as well as in XML mode if it is not the
|
|
end of the begin-element
|
|
*/
|
|
|
|
static ichar const *
|
|
itake_unquoted(dtd *dtd, ichar const *in, ichar *out, int len)
|
|
{ ichar const end2 = dtd->charfunc->func[CF_ETAGO2]; /* / */
|
|
ichar c;
|
|
|
|
/* skip leading layout. Do NOT skip comments! --x-- is a value! */
|
|
while (c = *in, HasClass(dtd, c, CH_BLANK))
|
|
in++;
|
|
|
|
/* copy the attribute to out[] */
|
|
while ( !HasClass(dtd, c, CH_BLANK) &&
|
|
c != '\0' )
|
|
{ if ( c == end2 && (dtd->shorttag ||
|
|
(in[1] == '\0' && dtd->dialect != DL_SGML)) )
|
|
break;
|
|
|
|
if ( --len > 0 )
|
|
*out++ = c;
|
|
else if ( len == 0 )
|
|
gripe(ERC_REPRESENTATION, L"Attribute too long");
|
|
c = *++in;
|
|
}
|
|
*out = '\0';
|
|
|
|
/* skip trailing layout. While it is kind to skip comments here,
|
|
it is technically wrong to do so. Tags may not contain comments.
|
|
*/
|
|
|
|
return iskip_layout(dtd, in);
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* DTD *
|
|
*******************************/
|
|
|
|
dtd *
|
|
new_dtd(const ichar *doctype)
|
|
{ dtd *dtd = sgml_calloc(1, sizeof(*dtd));
|
|
|
|
STAT(dtd_created++);
|
|
dtd->magic = SGML_DTD_MAGIC;
|
|
dtd->implicit = TRUE;
|
|
dtd->dialect = DL_SGML;
|
|
if ( doctype )
|
|
dtd->doctype = istrdup(doctype);
|
|
dtd->symbols = new_symbol_table();
|
|
dtd->charclass = new_charclass();
|
|
dtd->charfunc = new_charfunc();
|
|
dtd->space_mode = SP_SGML;
|
|
dtd->ent_case_sensitive = TRUE; /* case-sensitive entities */
|
|
dtd->shorttag = TRUE; /* allow for <tag/value/ */
|
|
dtd->number_mode = NU_TOKEN;
|
|
|
|
return dtd;
|
|
}
|
|
|
|
|
|
void
|
|
free_dtd(dtd *dtd)
|
|
{ if ( --dtd->references == 0 )
|
|
{ STAT(dtd_freed++);
|
|
|
|
if ( dtd->doctype )
|
|
sgml_free(dtd->doctype);
|
|
|
|
free_entity_list(dtd->entities);
|
|
free_entity_list(dtd->pentities);
|
|
free_notations(dtd->notations);
|
|
free_shortrefs(dtd->shortrefs);
|
|
free_elements(dtd->elements);
|
|
free_symbol_table(dtd->symbols);
|
|
sgml_free(dtd->charfunc);
|
|
sgml_free(dtd->charclass);
|
|
dtd->magic = 0;
|
|
|
|
sgml_free(dtd);
|
|
}
|
|
}
|
|
|
|
|
|
static const wchar_t *xml_entities[] =
|
|
{ L"lt CDATA \"<\"", /* < */
|
|
L"gt CDATA \">\"", /* > */
|
|
L"amp CDATA \"&\"", /* & */
|
|
L"apos CDATA \"'\"", /* ' */
|
|
L"quot CDATA \""\"", /* " */
|
|
NULL
|
|
};
|
|
|
|
|
|
int
|
|
set_dialect_dtd(dtd *dtd, dtd_dialect dialect)
|
|
{ if ( dtd->dialect != dialect )
|
|
{ dtd->dialect = dialect;
|
|
|
|
switch(dialect)
|
|
{ case DL_SGML:
|
|
{ dtd->case_sensitive = FALSE;
|
|
dtd->space_mode = SP_SGML;
|
|
dtd->shorttag = TRUE;
|
|
break;
|
|
}
|
|
case DL_XML:
|
|
case DL_XMLNS:
|
|
{ const ichar **el;
|
|
dtd_parser p;
|
|
|
|
dtd->case_sensitive = TRUE;
|
|
dtd->encoding = SGML_ENC_UTF8;
|
|
dtd->space_mode = SP_PRESERVE;
|
|
dtd->shorttag = FALSE;
|
|
|
|
memset(&p, 0, sizeof(p));
|
|
p.dtd = dtd;
|
|
for(el = xml_entities; *el; el++)
|
|
process_entity_declaration(&p, *el);
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
int
|
|
set_option_dtd(dtd *dtd, dtd_option option, int set)
|
|
{ switch(option)
|
|
{ case OPT_SHORTTAG:
|
|
dtd->shorttag = set;
|
|
break;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
baseurl(dtd_parser *p)
|
|
{ if ( p->location.type == IN_FILE && p->location.name.file )
|
|
{ return p->location.name.file;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
process_entity_value_declaration(dtd_parser *p,
|
|
const ichar *decl, dtd_entity *e)
|
|
{ dtd *dtd = p->dtd;
|
|
const ichar *s;
|
|
|
|
if ( e->type == ET_SYSTEM )
|
|
{ if ( (s=itake_url(dtd, decl, &e->exturl)) )
|
|
{ e->baseurl = istrdup(baseurl(p));
|
|
return s;
|
|
}
|
|
|
|
goto string_expected;
|
|
} else
|
|
{ ichar *start; int len;
|
|
ichar val[MAXSTRINGLEN];
|
|
|
|
if ( !(s = itake_string(dtd, decl, &start, &len)) )
|
|
goto string_expected;
|
|
decl = s;
|
|
|
|
expand_pentities(p, start, len, val, sizeof(val)/sizeof(ichar));
|
|
|
|
switch ( e->type )
|
|
{ case ET_PUBLIC:
|
|
{ e->extid = istrdup(val);
|
|
if ( isee_func(dtd, decl, CF_LIT) ||
|
|
isee_func(dtd, decl, CF_LITA) )
|
|
{ if ( (s=itake_url(dtd, decl, &e->exturl)) )
|
|
{ e->baseurl = istrdup(baseurl(p));
|
|
decl = s;
|
|
}
|
|
}
|
|
return decl;
|
|
}
|
|
case ET_LITERAL:
|
|
{ e->value = istrdup(val);
|
|
e->length = (int)wcslen(e->value);
|
|
return decl;
|
|
}
|
|
default:
|
|
assert(0);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
string_expected:
|
|
gripe(ERC_SYNTAX_ERROR, L"String expected", decl);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
The sgml-standard tells us to accept the first definition of an entity,
|
|
silently suppressing any further attempt to redefine the entity.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int
|
|
process_entity_declaration(dtd_parser *p, const ichar *decl)
|
|
{ dtd *dtd = p->dtd;
|
|
const ichar *s;
|
|
dtd_symbol *id;
|
|
dtd_entity *e;
|
|
int isparam;
|
|
int isdef = FALSE;
|
|
/* parameter entity */
|
|
if ( (s=isee_func(dtd, decl, CF_PERO)) )
|
|
{ isparam = TRUE;
|
|
decl = s;
|
|
} else
|
|
isparam = FALSE;
|
|
|
|
if ( !(s = itake_entity_name(dtd, decl, &id)) )
|
|
{ if ( !(s = isee_identifier(dtd, decl, "#default")) )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl);
|
|
id = dtd_add_symbol(dtd, (ichar*)"#DEFAULT");
|
|
isdef = TRUE;
|
|
}
|
|
|
|
if ( isparam && find_pentity(dtd, id) )
|
|
{ gripe(ERC_REDEFINED, L"parameter entity", id);
|
|
return TRUE; /* already defined parameter entity */
|
|
}
|
|
if ( id->entity )
|
|
{ gripe(ERC_REDEFINED, L"entity", id);
|
|
return TRUE; /* already defined normal entity */
|
|
}
|
|
|
|
decl = iskip_layout(dtd, s);
|
|
e = sgml_calloc(1, sizeof(*e));
|
|
e->name = id;
|
|
e->catalog_location = (isparam ? CAT_PENTITY : CAT_ENTITY);
|
|
|
|
if ( (s = isee_identifier(dtd, decl, "system")) )
|
|
{ e->type = ET_SYSTEM;
|
|
e->content = EC_SGML;
|
|
decl = s;
|
|
} else if ( (s = isee_identifier(dtd, decl, "public")) )
|
|
{ e->type = ET_PUBLIC;
|
|
e->content = EC_SGML;
|
|
decl = s;
|
|
} else
|
|
{ e->type = ET_LITERAL;
|
|
|
|
if ( !isparam )
|
|
{ if ( (s=isee_identifier(dtd, decl, "cdata")) )
|
|
{ decl = s;
|
|
e->content = EC_CDATA;
|
|
} else if ( (s=isee_identifier(dtd, decl, "sdata")) )
|
|
{ decl = s;
|
|
e->content = EC_SDATA;
|
|
} else if ( (s=isee_identifier(dtd, decl, "pi")) )
|
|
{ decl = s;
|
|
e->content = EC_PI;
|
|
} else if ( (s=isee_identifier(dtd, decl, "starttag")) )
|
|
{ decl = s;
|
|
e->content = EC_STARTTAG;
|
|
} else if ( (s=isee_identifier(dtd, decl, "endtag")) )
|
|
{ decl = s;
|
|
e->content = EC_ENDTAG;
|
|
} else
|
|
e->content = EC_SGML;
|
|
}
|
|
}
|
|
|
|
if ( (decl=process_entity_value_declaration(p, decl, e)) )
|
|
{ if ( e->type == ET_LITERAL )
|
|
{ switch(e->content)
|
|
{ case EC_STARTTAG:
|
|
{ ichar *buf = sgml_malloc((e->length + 3)*sizeof(ichar));
|
|
|
|
buf[0] = dtd->charfunc->func[CF_STAGO];
|
|
istrcpy(&buf[1], e->value);
|
|
buf[++e->length] = dtd->charfunc->func[CF_STAGC];
|
|
buf[++e->length] = 0;
|
|
|
|
sgml_free(e->value);
|
|
e->value = buf;
|
|
e->content = EC_SGML;
|
|
|
|
break;
|
|
}
|
|
case EC_ENDTAG:
|
|
{ ichar *buf = sgml_malloc((e->length + 4)*sizeof(ichar));
|
|
|
|
buf[0] = dtd->charfunc->func[CF_ETAGO1];
|
|
buf[1] = dtd->charfunc->func[CF_ETAGO2];
|
|
istrcpy(&buf[2], e->value);
|
|
e->length++;
|
|
buf[++e->length] = dtd->charfunc->func[CF_STAGC];
|
|
buf[++e->length] = 0;
|
|
|
|
sgml_free(e->value);
|
|
e->value = buf;
|
|
e->content = EC_SGML;
|
|
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
} else
|
|
{ if ( *decl )
|
|
{ dtd_symbol *nname;
|
|
|
|
if ( (s=isee_identifier(dtd, decl, "cdata")) )
|
|
{ decl = s;
|
|
e->content = EC_CDATA;
|
|
} else if ( (s=isee_identifier(dtd, decl, "sdata")) )
|
|
{ decl = s;
|
|
e->content = EC_SDATA;
|
|
} else if ( (s=isee_identifier(dtd, decl, "ndata")) )
|
|
{ decl = s;
|
|
e->content = EC_NDATA;
|
|
} else
|
|
return gripe(ERC_SYNTAX_ERROR, L"Bad datatype declaration", decl);
|
|
|
|
if ( (s=itake_name(dtd, decl, &nname)) ) /* what is this? */
|
|
{ decl = s;
|
|
} else
|
|
return gripe(ERC_SYNTAX_ERROR, L"Bad notation declaration", decl);
|
|
}
|
|
}
|
|
|
|
if ( *decl )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl);
|
|
}
|
|
|
|
if ( isparam )
|
|
{ e->next = dtd->pentities;
|
|
dtd->pentities = e;
|
|
} else
|
|
{ e->name->entity = e;
|
|
e->next = dtd->entities;
|
|
dtd->entities = e;
|
|
}
|
|
|
|
if ( isdef )
|
|
dtd->default_entity = e;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* NOTATIONS *
|
|
*******************************/
|
|
|
|
static dtd_notation *
|
|
find_notation(dtd *dtd, dtd_symbol *name)
|
|
{ dtd_notation *n;
|
|
|
|
for(n=dtd->notations; n; n = n->next)
|
|
{ if ( n->name == name )
|
|
return n;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static void
|
|
add_notation(dtd *dtd, dtd_notation *not)
|
|
{ dtd_notation **n = &dtd->notations;
|
|
|
|
for( ; *n; n = &(*n)->next)
|
|
;
|
|
*n = not;
|
|
}
|
|
|
|
static int
|
|
process_notation_declaration(dtd_parser *p, const ichar *decl)
|
|
{ dtd *dtd = p->dtd;
|
|
dtd_symbol *nname;
|
|
const ichar *s;
|
|
ichar *system = NULL, *public = NULL;
|
|
dtd_notation *not;
|
|
|
|
if ( !(s=itake_name(dtd, decl, &nname)) )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Notation name expected", decl);
|
|
decl = s;
|
|
|
|
if ( find_notation(dtd, nname) )
|
|
{ gripe(ERC_REDEFINED, L"notation", nname);
|
|
return TRUE;
|
|
}
|
|
|
|
if ( (s=isee_identifier(dtd, decl, "system")) )
|
|
{ ;
|
|
} else if ( (s=isee_identifier(dtd, decl, "public")) )
|
|
{ decl = s;
|
|
if ( !(s=itake_dubbed_string(dtd, decl, &public)) )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Public identifier expected", decl);
|
|
} else
|
|
return gripe(ERC_SYNTAX_ERROR, L"SYSTEM or PUBLIC expected", decl);
|
|
|
|
decl = s;
|
|
if ( (s=itake_dubbed_string(dtd, decl, &system)) )
|
|
decl = s;
|
|
|
|
if ( *decl )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl);
|
|
|
|
not = sgml_calloc(1, sizeof(*not));
|
|
not->name = nname;
|
|
not->system = system;
|
|
not->public = public;
|
|
not->next = NULL;
|
|
add_notation(dtd, not);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static void
|
|
free_notations(dtd_notation *n)
|
|
{ dtd_notation *next;
|
|
|
|
for( ; n; n=next)
|
|
{ next = n->next;
|
|
|
|
sgml_free(n->system);
|
|
sgml_free(n->public);
|
|
|
|
sgml_free(n);
|
|
}
|
|
}
|
|
|
|
/*******************************
|
|
* SHORTREF *
|
|
*******************************/
|
|
|
|
static void
|
|
free_maps(dtd_map *map)
|
|
{ dtd_map *next;
|
|
|
|
for( ; map; map=next)
|
|
{ next = map->next;
|
|
if ( map->from )
|
|
sgml_free(map->from);
|
|
sgml_free(map);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
free_shortrefs(dtd_shortref *sr)
|
|
{ dtd_shortref *next;
|
|
|
|
for( ; sr; sr=next)
|
|
{ next = sr->next;
|
|
free_maps(sr->map);
|
|
sgml_free(sr);
|
|
}
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
shortref_add_map(dtd *dtd, const ichar *decl, dtd_shortref *sr)
|
|
{ ichar *start; int len;
|
|
ichar from[MAXMAPLEN];
|
|
ichar *f = from;
|
|
dtd_symbol *to;
|
|
const ichar *s;
|
|
const ichar *end;
|
|
dtd_map **p;
|
|
dtd_map *m;
|
|
|
|
if ( !(s=itake_string(dtd, decl, &start, &len)) )
|
|
{ gripe(ERC_SYNTAX_ERROR, L"map-string expected", decl);
|
|
return NULL;
|
|
}
|
|
decl = s;
|
|
if ( !(s=itake_entity_name(dtd, decl, &to)) )
|
|
{ gripe(ERC_SYNTAX_ERROR, L"map-to name expected", decl);
|
|
return NULL;
|
|
}
|
|
end = s;
|
|
|
|
for(decl=start; len > 0;)
|
|
{ if ( *decl == 'B' ) /* blank */
|
|
{ if ( decl[1] == 'B' )
|
|
{ *f++ = CHR_DBLANK;
|
|
decl += 2;
|
|
len -= 2;
|
|
continue;
|
|
}
|
|
*f++ = CHR_BLANK;
|
|
decl++;
|
|
len--;
|
|
} else
|
|
{ *f++ = *decl++; /* any other character */
|
|
len--;
|
|
}
|
|
}
|
|
*f = 0;
|
|
|
|
for(p=&sr->map; *p; p = &(*p)->next)
|
|
;
|
|
|
|
m = sgml_calloc(1, sizeof(*m));
|
|
m->from = istrdup(from);
|
|
m->len = (int)istrlen(from);
|
|
m->to = to;
|
|
|
|
*p = m;
|
|
|
|
return end;
|
|
}
|
|
|
|
|
|
static dtd_shortref *
|
|
def_shortref(dtd_parser *p, dtd_symbol *name)
|
|
{ dtd *dtd = p->dtd;
|
|
dtd_shortref *sr, **pr;
|
|
|
|
for(pr=&dtd->shortrefs; *pr; pr = &(*pr)->next)
|
|
{ dtd_shortref *r = *pr;
|
|
|
|
if ( r->name == name )
|
|
return r;
|
|
}
|
|
|
|
sr = sgml_calloc(1, sizeof(*sr));
|
|
sr->name = name;
|
|
*pr = sr;
|
|
|
|
return sr;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Create an array with TRUE in any character that can be the last of the
|
|
shortref map.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static void
|
|
compile_map(dtd *dtd, dtd_shortref *sr)
|
|
{ dtd_map *map;
|
|
|
|
for(map = sr->map; map; map = map->next)
|
|
{ ichar last = map->from[map->len-1];
|
|
|
|
switch( last )
|
|
{ case CHR_BLANK:
|
|
case CHR_DBLANK:
|
|
{ wint_t i;
|
|
|
|
for( i=0; i< SHORTMAP_SIZE; i++)
|
|
{ if ( HasClass(dtd, i, CH_BLANK) )
|
|
sr->ends[i] = TRUE;
|
|
}
|
|
}
|
|
|
|
default:
|
|
sr->ends[last] = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
process_shortref_declaration(dtd_parser *p, const ichar *decl)
|
|
{ dtd *dtd = p->dtd;
|
|
ichar buf[MAXDECL];
|
|
dtd_shortref *sr;
|
|
dtd_symbol *name;
|
|
const ichar *s;
|
|
|
|
if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
|
|
return FALSE;
|
|
decl = buf;
|
|
|
|
if ( !(s=itake_name(dtd, decl, &name)) )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl);
|
|
decl = s;
|
|
|
|
sr = def_shortref(p, name);
|
|
if ( sr->defined )
|
|
{ gripe(ERC_REDEFINED, L"shortref", name);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
sr->defined = TRUE;
|
|
|
|
while( *(decl = iskip_layout(dtd, decl)) != '\0'
|
|
&& (s=shortref_add_map(dtd, decl, sr)) )
|
|
decl = s;
|
|
compile_map(dtd, sr);
|
|
|
|
if ( *decl )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Map expected", decl);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Find named name. The name NULL stands for the #empty map
|
|
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static dtd_shortref *
|
|
find_map(dtd *dtd, dtd_symbol *name)
|
|
{ dtd_shortref *sr;
|
|
|
|
if ( !name )
|
|
{ static dtd_shortref *empty;
|
|
|
|
if ( !empty )
|
|
{ empty = sgml_calloc(1, sizeof(*empty));
|
|
empty->name = dtd_add_symbol(dtd, (ichar*)"#EMPTY");
|
|
empty->defined = TRUE;
|
|
}
|
|
|
|
return empty;
|
|
}
|
|
|
|
for( sr = dtd->shortrefs; sr; sr = sr->next )
|
|
{ if ( sr->name == name )
|
|
{ if ( !sr->defined )
|
|
break;
|
|
|
|
return sr;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static void
|
|
set_map_element(dtd_element *e, void *closure)
|
|
{ e->map = closure;
|
|
}
|
|
|
|
|
|
static int
|
|
process_usemap_declaration(dtd_parser *p, const ichar *decl)
|
|
{ dtd *dtd = p->dtd;
|
|
ichar buf[MAXDECL];
|
|
dtd_symbol *name;
|
|
const ichar *s;
|
|
dtd_symbol *ename;
|
|
dtd_element *e;
|
|
dtd_shortref *map;
|
|
|
|
if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
|
|
return FALSE;
|
|
decl = buf;
|
|
|
|
if ( !(s=itake_name(dtd, decl, &name)) )
|
|
{ if ( (s=isee_identifier(dtd, decl, "#empty")) )
|
|
name = NULL;
|
|
else
|
|
return gripe(ERC_SYNTAX_ERROR, L"map-name expected", decl);
|
|
}
|
|
|
|
decl = s;
|
|
if ( !(map = find_map(dtd, name)) )
|
|
map = def_shortref(p, name); /* make undefined map */
|
|
|
|
if ( isee_func(dtd, decl, CF_GRPO) ) /* ( */
|
|
{ dtd_model *model;
|
|
|
|
if ( (model = make_model(dtd, decl, &s)) )
|
|
{ for_elements_in_model(model, set_map_element, map);
|
|
free_model(model);
|
|
decl = s;
|
|
} else
|
|
return FALSE;
|
|
} else if ( (s=itake_name(dtd, decl, &ename)) )
|
|
{ e = find_element(dtd, ename);
|
|
e->map = map;
|
|
decl = s;
|
|
} else if ( p->environments )
|
|
{ if ( !map->defined )
|
|
gripe(ERC_EXISTENCE, L"map", name->name);
|
|
|
|
p->environments->map = map;
|
|
p->map = p->environments->map;
|
|
} else
|
|
return gripe(ERC_SYNTAX_ERROR, L"element-name expected", decl);
|
|
|
|
if ( *decl )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Unparsed", decl);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
match_map(dtd *dtd, dtd_map *map, ocharbuf *buf)
|
|
{ wchar_t *data = buf->data.w;
|
|
wchar_t *e = data+buf->size-1;
|
|
ichar *m = map->from+map->len-1;
|
|
|
|
while( m >= map->from )
|
|
{ if ( e < data )
|
|
return 0;
|
|
|
|
if ( *m == *e )
|
|
{ m--;
|
|
e--;
|
|
continue;
|
|
}
|
|
if ( *m == CHR_DBLANK )
|
|
{ if ( e>data && HasClass(dtd, *e, CH_WHITE) )
|
|
e--;
|
|
else
|
|
return FALSE;
|
|
goto wblank;
|
|
}
|
|
if ( *m == CHR_BLANK )
|
|
{ wblank:
|
|
while( e>data && HasClass(dtd, *e, CH_WHITE) )
|
|
e--;
|
|
m--;
|
|
continue;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
return (int)(data+buf->size-1-e);
|
|
}
|
|
|
|
|
|
static int
|
|
match_shortref(dtd_parser *p)
|
|
{ dtd_map *map;
|
|
|
|
for(map = p->map->map; map; map = map->next)
|
|
{ int len;
|
|
|
|
if ( (len=match_map(p->dtd, map, p->cdata)) )
|
|
{ p->cdata->size -= len;
|
|
|
|
if ( p->cdata_must_be_empty )
|
|
{ int blank = TRUE;
|
|
const wchar_t *s;
|
|
int i;
|
|
|
|
for(s = p->cdata->data.w, i=0; i++ < p->cdata->size; s++)
|
|
{ if ( !iswspace(*s) )
|
|
{ blank = FALSE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
p->blank_cdata = blank;
|
|
}
|
|
|
|
WITH_CLASS(p, EV_SHORTREF,
|
|
{ sgml_cplocation(&p->startloc, &p->location);
|
|
p->startloc.charpos -= len;
|
|
p->startloc.linepos -= len;
|
|
if ( p->startloc.linepos < 0 )
|
|
{ p->startloc.line--;
|
|
p->startloc.linepos = 0; /* not correct! */
|
|
}
|
|
DEBUG(printf("%d-%d: Matched map '%s' --> %s, len = %d\n",
|
|
p->startloc.charpos,
|
|
p->location.charpos,
|
|
map->from, map->to->name, len));
|
|
|
|
process_entity(p, map->to->name);
|
|
}) /* TBD: optimise */
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* ELEMENTS *
|
|
*******************************/
|
|
|
|
static void
|
|
add_submodel(dtd_model *m, dtd_model *sub)
|
|
{ dtd_model **d;
|
|
|
|
for( d = &m->content.group; *d; d = &(*d)->next )
|
|
;
|
|
*d = sub;
|
|
}
|
|
|
|
|
|
/* for_elements_in_model()
|
|
Walk along the model, calling f(e, closure) for any element found
|
|
in the model. Used for <!SHORTREF name model>
|
|
*/
|
|
|
|
static void
|
|
for_elements_in_model(dtd_model *m,
|
|
void (*f)(dtd_element *e, void *closure),
|
|
void *closure)
|
|
{ switch(m->type)
|
|
{ case MT_SEQ:
|
|
case MT_AND:
|
|
case MT_OR:
|
|
{ dtd_model *sub = m->content.group;
|
|
|
|
for(; sub; sub = sub->next)
|
|
for_elements_in_model(sub, f, closure);
|
|
break;
|
|
}
|
|
case MT_ELEMENT:
|
|
(*f)(m->content.element, closure);
|
|
break;
|
|
default:
|
|
;
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
free_model(dtd_model *m)
|
|
{ switch(m->type)
|
|
{ case MT_SEQ:
|
|
case MT_AND:
|
|
case MT_OR:
|
|
{ dtd_model *sub = m->content.group;
|
|
dtd_model *next;
|
|
|
|
for(; sub; sub = next)
|
|
{ next = sub->next;
|
|
|
|
free_model(sub);
|
|
}
|
|
}
|
|
default:
|
|
;
|
|
}
|
|
|
|
sgml_free(m);
|
|
}
|
|
|
|
|
|
static dtd_model *
|
|
make_model(dtd *dtd, const ichar *decl, const ichar **end)
|
|
{ const ichar *s;
|
|
dtd_model *m = sgml_calloc(1, sizeof(*m));
|
|
dtd_symbol *id;
|
|
|
|
decl = iskip_layout(dtd, decl);
|
|
|
|
if ( (s=isee_identifier(dtd, decl, "#pcdata")) )
|
|
{ m->type = MT_PCDATA;
|
|
m->cardinality = MC_ONE; /* actually don't care */
|
|
*end = s;
|
|
return m;
|
|
}
|
|
|
|
if ( (s=itake_name(dtd, decl, &id)) )
|
|
{ m->type = MT_ELEMENT;
|
|
m->content.element = find_element(dtd, id);
|
|
decl = s;
|
|
} else
|
|
{ if ( !(s=isee_func(dtd, decl, CF_GRPO)) )
|
|
{ gripe(ERC_SYNTAX_ERROR, L"Name group expected", decl);
|
|
free_model(m);
|
|
return NULL;
|
|
}
|
|
decl = s;
|
|
|
|
for(;;)
|
|
{ dtd_model *sub;
|
|
modeltype mt;
|
|
|
|
if ( !(sub = make_model(dtd, decl, &s)) )
|
|
{ free_model(sub);
|
|
return NULL;
|
|
}
|
|
decl = s;
|
|
add_submodel(m, sub);
|
|
|
|
if ( (s = isee_func(dtd, decl, CF_OR)) )
|
|
{ decl = s;
|
|
mt = MT_OR;
|
|
} else if ( (s = isee_func(dtd, decl, CF_SEQ)) )
|
|
{ decl = s;
|
|
mt = MT_SEQ;
|
|
} else if ( (s = isee_func(dtd, decl, CF_AND)) )
|
|
{ decl = s;
|
|
mt = MT_AND;
|
|
} else if ( (s = isee_func(dtd, decl, CF_GRPC)) )
|
|
{ decl = s;
|
|
break;
|
|
} else
|
|
{ gripe(ERC_SYNTAX_ERROR, L"Connector ('|', ',' or '&') expected", decl);
|
|
free_model(m);
|
|
return NULL;
|
|
}
|
|
decl = iskip_layout(dtd, decl);
|
|
|
|
if ( m->type != mt )
|
|
{ if ( !m->type )
|
|
m->type = mt;
|
|
else
|
|
{ gripe(ERC_SYNTAX_ERROR, L"Different connector types in model", decl);
|
|
free_model(m);
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( (s = isee_func(dtd, decl, CF_OPT)) )
|
|
{ decl = s;
|
|
m->cardinality = MC_OPT;
|
|
} else if ( (s=isee_func(dtd, decl, CF_REP)) )
|
|
{ decl = s;
|
|
m->cardinality = MC_REP;
|
|
} else if ( (s=isee_func(dtd, decl, CF_PLUS)) )
|
|
{ /* ROK: watch out for (x) +(y) */
|
|
if ( isee_func(dtd, iskip_layout(dtd, s), CF_GRPO) == NULL )
|
|
{ decl = s;
|
|
m->cardinality = MC_PLUS;
|
|
}
|
|
} else
|
|
m->cardinality = MC_ONE;
|
|
|
|
if ( m->type == MT_UNDEF ) /* simplify (e+), etc. */
|
|
{ dtd_model *sub = m->content.group;
|
|
modelcard card;
|
|
|
|
assert(!sub->next);
|
|
if ( sub->cardinality == MC_ONE )
|
|
card = m->cardinality;
|
|
else if ( m->cardinality == MC_ONE )
|
|
card = sub->cardinality;
|
|
else
|
|
{ m->type = MT_OR;
|
|
goto out;
|
|
}
|
|
|
|
*m = *sub;
|
|
m->cardinality = card;
|
|
sgml_free(sub);
|
|
}
|
|
|
|
out:
|
|
*end = iskip_layout(dtd, decl);
|
|
return m;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
process_model(dtd *dtd, dtd_edef *e, const ichar *decl)
|
|
{ const ichar *s;
|
|
|
|
decl = iskip_layout(dtd, decl);
|
|
if ( (s = isee_identifier(dtd, decl, "empty")) )
|
|
{ e->type = C_EMPTY;
|
|
return s;
|
|
}
|
|
if ( (s = isee_identifier(dtd, decl, "cdata")) )
|
|
{ e->type = C_CDATA;
|
|
return s;
|
|
}
|
|
if ( (s = isee_identifier(dtd, decl, "rcdata")) )
|
|
{ e->type = C_RCDATA;
|
|
return s;
|
|
}
|
|
if ( (s = isee_identifier(dtd, decl, "any")) )
|
|
{ e->type = C_ANY;
|
|
return s;
|
|
}
|
|
|
|
e->type = C_PCDATA;
|
|
if ( !(e->content = make_model(dtd, decl, &decl)) )
|
|
return FALSE;
|
|
|
|
return decl;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
See a name-group separator. As long as we haven't decided, this can be
|
|
CF_NG. If we have decided they must all be the same.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static const ichar *
|
|
isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep)
|
|
{ const ichar *s;
|
|
|
|
if ( (s=isee_func(dtd, decl, *sep)) )
|
|
return iskip_layout(dtd, s);
|
|
if ( *sep == CF_NG ) /* undecided */
|
|
{ static const charfunc ng[] = { CF_SEQ, CF_OR, CF_AND };
|
|
int n;
|
|
|
|
for(n=0; n<3; n++)
|
|
{ if ( (s=isee_func(dtd, decl, ng[n])) )
|
|
{ *sep = ng[n];
|
|
return iskip_layout(dtd, s);
|
|
}
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
|
|
static const ichar *
|
|
itake_namegroup(dtd *dtd, const ichar *decl,
|
|
dtd_symbol **names, int *n)
|
|
{ const ichar *s;
|
|
int en = 0;
|
|
|
|
if ( (s=isee_func(dtd, decl, CF_GRPO)) )
|
|
{ charfunc ngs = CF_NG;
|
|
|
|
for(;;)
|
|
{ if ( !(decl=itake_name(dtd, s, &names[en++])) )
|
|
{ gripe(ERC_SYNTAX_ERROR, L"Name expected", s);
|
|
return NULL;
|
|
}
|
|
if ( (s=isee_ngsep(dtd, decl, &ngs)) )
|
|
{ decl = iskip_layout(dtd, s);
|
|
continue;
|
|
}
|
|
if ( (s=isee_func(dtd, decl, CF_GRPC)) )
|
|
{ *n = en;
|
|
decl = s;
|
|
return iskip_layout(dtd, decl);
|
|
}
|
|
gripe(ERC_SYNTAX_ERROR, L"Bad name-group", decl);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
typedef struct
|
|
{ dtd_symbol **list;
|
|
int size;
|
|
} namelist;
|
|
|
|
|
|
static void
|
|
add_list_element(dtd_element *e, void *closure)
|
|
{ namelist *nl = closure;
|
|
|
|
nl->list[nl->size++] = e->name;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
itake_el_or_model_element_list(dtd *dtd, const ichar *decl, dtd_symbol **names, int *n)
|
|
{ const ichar *s;
|
|
|
|
if ( isee_func(dtd, decl, CF_GRPO) )
|
|
{ dtd_model *model;
|
|
|
|
if ( (model = make_model(dtd, decl, &s)) )
|
|
{ namelist nl;
|
|
|
|
nl.list = names;
|
|
nl.size = 0;
|
|
for_elements_in_model(model, add_list_element, &nl);
|
|
free_model(model);
|
|
|
|
*n = nl.size;
|
|
return s;
|
|
} else
|
|
return NULL;
|
|
} else
|
|
{ if ( !(s = itake_name(dtd, decl, &names[0])) )
|
|
{ gripe(ERC_SYNTAX_ERROR, L"Name expected", decl);
|
|
return NULL;
|
|
}
|
|
*n = 1;
|
|
return s;
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
add_element_list(dtd_element_list **l, dtd_element *e)
|
|
{ dtd_element_list *n = sgml_calloc(1, sizeof(*n));
|
|
|
|
n->value = e;
|
|
|
|
for( ; *l; l = &(*l)->next )
|
|
;
|
|
*l = n;
|
|
}
|
|
|
|
|
|
static int
|
|
process_element_declaraction(dtd_parser *p, const ichar *decl)
|
|
{ dtd *dtd = p->dtd;
|
|
ichar buf[MAXDECL];
|
|
const ichar *s;
|
|
dtd_symbol *eid[MAXATTELEM];
|
|
dtd_edef *def;
|
|
int en;
|
|
int i;
|
|
|
|
/* expand parameter entities */
|
|
if ( !expand_pentities(p, decl, ZERO_TERM_LEN,
|
|
buf, sizeof(buf)/sizeof(ichar)) )
|
|
return FALSE;
|
|
decl = buf;
|
|
|
|
if ( !(s=itake_el_or_model_element_list(dtd, decl, eid, &en)) )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Name or name-group expected", decl);
|
|
decl = s;
|
|
if ( en == 0 )
|
|
return TRUE; /* 0 elements */
|
|
|
|
STAT(edefs_decl++);
|
|
def = new_element_definition(dtd);
|
|
for(i=0; i<en; i++)
|
|
{ find_element(dtd, eid[i]);
|
|
assert(eid[i]->element->structure == NULL);
|
|
eid[i]->element->structure = def;
|
|
eid[i]->element->undefined = FALSE;
|
|
}
|
|
def->references = en; /* for GC */
|
|
|
|
/* omitted tag declarations (opt) */
|
|
if ( (s = isee_identifier(dtd, decl, "-")) )
|
|
{ def->omit_close = FALSE;
|
|
goto seeclose;
|
|
} else if ( (s = isee_identifier(dtd, decl, "o")) )
|
|
{ def->omit_open = TRUE;
|
|
|
|
seeclose:
|
|
decl = s;
|
|
if ( (s = isee_identifier(dtd, decl, "-")) )
|
|
{ def->omit_close = FALSE;
|
|
} else if ( (s = isee_identifier(dtd, decl, "o")) )
|
|
{ for(i=0; i<en; i++)
|
|
def->omit_close = TRUE;
|
|
} else
|
|
return gripe(ERC_SYNTAX_ERROR, L"Bad omit-tag declaration", decl);
|
|
|
|
decl = s;
|
|
}
|
|
|
|
/* content model */
|
|
if ( !(decl=process_model(dtd, def, decl)) )
|
|
return FALSE;
|
|
|
|
/* in/excluded elements */
|
|
if ( decl[0] == '-' || decl[0] == '+' )
|
|
{ dtd_symbol *ng[MAXNAMEGROUP];
|
|
int ns;
|
|
dtd_element_list **l;
|
|
|
|
if ( decl[0] == '-' )
|
|
l = &def->excluded;
|
|
else
|
|
l = &def->included;
|
|
|
|
decl++;
|
|
if ( (s=itake_namegroup(dtd, decl, ng, &ns)) )
|
|
{ int i;
|
|
|
|
decl = s;
|
|
|
|
for(i=0; i<ns; i++)
|
|
add_element_list(l, find_element(dtd, ng[i]));
|
|
} else
|
|
{ return gripe(ERC_SYNTAX_ERROR, L"Name group expected", decl);
|
|
}
|
|
}
|
|
|
|
if (*decl)
|
|
return gripe(ERC_SYNTAX_ERROR, L"Unexpected end of declaration", decl);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static void
|
|
add_name_list(dtd_name_list **nl, dtd_symbol *s)
|
|
{ dtd_name_list *n = sgml_calloc(1, sizeof(*n));
|
|
|
|
n->value = s;
|
|
|
|
for( ; *nl; nl = &(*nl)->next )
|
|
;
|
|
|
|
*nl = n;
|
|
}
|
|
|
|
|
|
static void
|
|
set_element_properties(dtd_element *e, dtd_attr *a)
|
|
{ if ( istreq(a->name->name, L"xml:space") )
|
|
{ switch(a->def)
|
|
{ case AT_FIXED:
|
|
case AT_DEFAULT:
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
|
|
switch (a->type )
|
|
{ case AT_NAMEOF:
|
|
case AT_NAME:
|
|
case AT_NMTOKEN:
|
|
e->space_mode = istr_to_space_mode(a->att_def.name->name);
|
|
break;
|
|
case AT_CDATA:
|
|
e->space_mode = istr_to_space_mode((ichar *)a->att_def.cdata);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
add_attribute(dtd *dtd, dtd_element *e, dtd_attr *a)
|
|
{ dtd_attr_list **l;
|
|
dtd_attr_list *n;
|
|
|
|
for(l = &e->attributes; *l; l = &(*l)->next)
|
|
{ if ( (*l)->attribute->name == a->name )
|
|
{ gripe(ERC_REDEFINED, L"attribute", a->name);
|
|
a->references++; /* attempt to redefine attribute: */
|
|
free_attribute(a); /* first wins according to standard */
|
|
|
|
return;
|
|
}
|
|
}
|
|
|
|
n = sgml_calloc(1, sizeof(*n));
|
|
|
|
n->attribute = a;
|
|
a->references++;
|
|
*l = n;
|
|
set_element_properties(e, a);
|
|
}
|
|
|
|
|
|
static int
|
|
process_attlist_declaraction(dtd_parser *p, const ichar *decl)
|
|
{ dtd *dtd = p->dtd;
|
|
dtd_symbol *eid[MAXATTELEM];
|
|
int i, en;
|
|
ichar buf[MAXDECL];
|
|
const ichar *s;
|
|
|
|
/* expand parameter entities */
|
|
if ( !expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
|
|
return FALSE;
|
|
decl = iskip_layout(dtd, buf);
|
|
DEBUG(printf("Expanded to %s\n", decl));
|
|
|
|
if ( !(decl=itake_el_or_model_element_list(dtd, decl, eid, &en)) )
|
|
return FALSE;
|
|
|
|
/* fetch attributes */
|
|
while(*decl)
|
|
{ dtd_attr *at = sgml_calloc(1, sizeof(*at));
|
|
at->references = REFS_VIRGIN;
|
|
|
|
/* name of attribute */
|
|
if ( !(s = itake_name(dtd, decl, &at->name)) )
|
|
{ free_attribute(at);
|
|
return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl);
|
|
}
|
|
decl = s;
|
|
|
|
/* (name1|name2|...) type */
|
|
if ( (s=isee_func(dtd, decl, CF_GRPO)) )
|
|
{ charfunc ngs = CF_NG;
|
|
|
|
at->type = AT_NAMEOF;
|
|
decl=s;
|
|
|
|
for(;;)
|
|
{ dtd_symbol *nm;
|
|
|
|
if ( !(s = itake_nmtoken(dtd, decl, &nm)) )
|
|
{ free_attribute(at);
|
|
return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl);
|
|
}
|
|
decl = s;
|
|
add_name_list(&at->typeex.nameof, nm);
|
|
if ( (s=isee_ngsep(dtd, decl, &ngs)) )
|
|
{ decl = s;
|
|
continue;
|
|
}
|
|
if ( (s = isee_func(dtd, decl, CF_GRPC)) )
|
|
{ decl=s;
|
|
decl = iskip_layout(dtd, decl);
|
|
break;
|
|
}
|
|
free_attribute(at);
|
|
return gripe(ERC_SYNTAX_ERROR, L"Illegal name-group", decl);
|
|
}
|
|
} else if ( (s=isee_identifier(dtd, decl, "cdata")) )
|
|
{ decl = s;
|
|
at->type = AT_CDATA;
|
|
} else if ( (s=isee_identifier(dtd, decl, "entity")) )
|
|
{ decl = s;
|
|
at->type = AT_ENTITY;
|
|
} else if ( (s=isee_identifier(dtd, decl, "entities")) )
|
|
{ decl = s;
|
|
at->type = AT_ENTITIES;
|
|
at->islist = TRUE;
|
|
} else if ( (s=isee_identifier(dtd, decl, "id")) )
|
|
{ decl = s;
|
|
at->type = AT_ID;
|
|
} else if ( (s=isee_identifier(dtd, decl, "idref")) )
|
|
{ decl = s;
|
|
at->type = AT_IDREF;
|
|
} else if ( (s=isee_identifier(dtd, decl, "idrefs")) )
|
|
{ decl = s;
|
|
at->type = AT_IDREFS;
|
|
at->islist = TRUE;
|
|
} else if ( (s=isee_identifier(dtd, decl, "name")) )
|
|
{ decl = s;
|
|
at->type = AT_NAME;
|
|
} else if ( (s=isee_identifier(dtd, decl, "names")) )
|
|
{ decl = s;
|
|
at->type = AT_NAMES;
|
|
at->islist = TRUE;
|
|
} else if ( (s=isee_identifier(dtd, decl, "nmtoken")) )
|
|
{ decl = s;
|
|
at->type = AT_NMTOKEN;
|
|
} else if ( (s=isee_identifier(dtd, decl, "nmtokens")) )
|
|
{ decl = s;
|
|
at->type = AT_NMTOKENS;
|
|
at->islist = TRUE;
|
|
} else if ( (s=isee_identifier(dtd, decl, "number")) )
|
|
{ decl = s;
|
|
at->type = AT_NUMBER;
|
|
} else if ( (s=isee_identifier(dtd, decl, "numbers")) )
|
|
{ decl = s;
|
|
at->type = AT_NUMBERS;
|
|
at->islist = TRUE;
|
|
} else if ( (s=isee_identifier(dtd, decl, "nutoken")) )
|
|
{ decl = s;
|
|
at->type = AT_NUTOKEN;
|
|
} else if ( (s=isee_identifier(dtd, decl, "nutokens")) )
|
|
{ decl = s;
|
|
at->type = AT_NUTOKENS;
|
|
at->islist = TRUE;
|
|
} else if ( (s=isee_identifier(dtd, decl, "notation")) )
|
|
{ dtd_symbol *ng[MAXNAMEGROUP];
|
|
int ns;
|
|
|
|
at->type = AT_NOTATION;
|
|
decl=s;
|
|
if ( (s=itake_namegroup(dtd, decl, ng, &ns)) )
|
|
{ decl = s;
|
|
|
|
for(i=0; i<ns; i++)
|
|
add_name_list(&at->typeex.nameof, ng[i]);
|
|
} else
|
|
{ free_attribute(at);
|
|
return gripe(ERC_SYNTAX_ERROR, L"name-group expected", decl);
|
|
}
|
|
} else
|
|
{ free_attribute(at);
|
|
return gripe(ERC_SYNTAX_ERROR, L"Attribute-type expected", decl);
|
|
}
|
|
|
|
/* Attribute Defaults */
|
|
if ( (s=isee_identifier(dtd, decl, "#fixed")) )
|
|
{ decl = s;
|
|
at->def = AT_FIXED;
|
|
} else if ( (s=isee_identifier(dtd, decl, "#required")) )
|
|
{ decl = s;
|
|
at->def = AT_REQUIRED;
|
|
} else if ( (s=isee_identifier(dtd, decl, "#current")) )
|
|
{ decl = s;
|
|
at->def = AT_CURRENT;
|
|
} else if ( (s=isee_identifier(dtd, decl, "#conref")) )
|
|
{ decl = s;
|
|
at->def = AT_CONREF;
|
|
} else if ( (s=isee_identifier(dtd, decl, "#implied")) )
|
|
{ decl = s;
|
|
at->def = AT_IMPLIED;
|
|
} else /* real default */
|
|
at->def = AT_DEFAULT;
|
|
|
|
if ( at->def == AT_DEFAULT || at->def == AT_FIXED )
|
|
{ ichar buf[MAXSTRINGLEN];
|
|
ichar *start; int len;
|
|
const ichar *end;
|
|
|
|
if ( !(end=itake_string(dtd, decl, &start, &len)) )
|
|
{ end=itake_nmtoken_chars(dtd, decl, buf, sizeof(buf)/sizeof(ichar));
|
|
start = buf;
|
|
len = (int)istrlen(buf);
|
|
}
|
|
if ( !end )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Bad attribute default", decl);
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Note: itake_name(), etc. work on nul-terminated strings. The result of
|
|
itake_string() is a pointer in a nul-terminated string and these
|
|
functions will stop scanning at the quote anyway, so we can use the
|
|
length of the parsed data to verify we parsed all of it.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
switch(at->type)
|
|
{ case AT_CDATA:
|
|
{ at->att_def.cdata = istrndup(start, len);
|
|
break;
|
|
}
|
|
case AT_ENTITY:
|
|
case AT_NOTATION:
|
|
case AT_NAME:
|
|
{ if ( !(s=itake_name(dtd, start, &at->att_def.name)) ||
|
|
(s-start) != len )
|
|
return gripe(ERC_DOMAIN, L"name", decl);
|
|
break;
|
|
}
|
|
case AT_NMTOKEN:
|
|
case AT_NAMEOF:
|
|
{ if ( !(s=itake_nmtoken(dtd, start, &at->att_def.name)) ||
|
|
(s-start) != len )
|
|
return gripe(ERC_DOMAIN, L"nmtoken", decl);
|
|
break;
|
|
}
|
|
case AT_NUTOKEN:
|
|
{ if ( !(s=itake_nutoken(dtd, start, &at->att_def.name)) ||
|
|
(s-start) != len )
|
|
return gripe(ERC_DOMAIN, L"nutoken", decl);
|
|
break;
|
|
}
|
|
case AT_NUMBER:
|
|
{ if ( !(s=itake_number(dtd, start, at)) ||
|
|
(s-start) != len )
|
|
return gripe(ERC_DOMAIN, L"number", decl);
|
|
break;
|
|
}
|
|
case AT_NAMES:
|
|
case AT_ENTITIES:
|
|
case AT_IDREFS:
|
|
case AT_NMTOKENS:
|
|
case AT_NUMBERS:
|
|
case AT_NUTOKENS:
|
|
{ at->att_def.list = istrndup(buf, len);
|
|
break;
|
|
}
|
|
default:
|
|
{ free_attribute(at);
|
|
return gripe(ERC_REPRESENTATION, L"No default for type");
|
|
}
|
|
}
|
|
|
|
decl = end;
|
|
}
|
|
|
|
/* add to list */
|
|
at->references = 0;
|
|
for(i=0; i<en; i++)
|
|
{ dtd_element *e = def_element(dtd, eid[i]);
|
|
|
|
add_attribute(dtd, e, at);
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/*******************************
|
|
* GENERIC TAG PROCESSING *
|
|
*******************************/
|
|
|
|
typedef enum
|
|
{ IE_NORMAL,
|
|
IE_INCLUDED, /* is included */
|
|
IE_EXCLUDED /* is excluded */
|
|
} includetype;
|
|
|
|
|
|
static includetype
|
|
in_or_excluded(sgml_environment *env, dtd_element *e)
|
|
{ for(; env; env=env->parent)
|
|
{ if ( env->element->structure )
|
|
{ dtd_edef *def = env->element->structure;
|
|
dtd_element_list *el;
|
|
|
|
for(el=def->excluded; el; el=el->next)
|
|
{ if ( el->value == e )
|
|
return IE_EXCLUDED;
|
|
}
|
|
for(el=def->included; el; el=el->next)
|
|
{ if ( el->value == e )
|
|
return IE_INCLUDED;
|
|
}
|
|
}
|
|
}
|
|
|
|
return IE_NORMAL;
|
|
}
|
|
|
|
|
|
static int
|
|
complete(sgml_environment *env)
|
|
{ if ( env->element->structure &&
|
|
!env->element->undefined &&
|
|
env->element->structure->type != C_ANY )
|
|
{ dtd_edef *def = env->element->structure;
|
|
|
|
if ( !same_state(def->final_state, env->state) )
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static void
|
|
validate_completeness(sgml_environment *env)
|
|
{ if ( !complete(env) )
|
|
{ wchar_t buf[MAXNMLEN+50];
|
|
|
|
swprintf(buf, MAXNMLEN+50, L"Incomplete element: <%s>",
|
|
env->element->name->name);
|
|
|
|
gripe(ERC_VALIDATE, buf); /* TBD: expected */
|
|
}
|
|
}
|
|
|
|
|
|
static sgml_environment *
|
|
push_element(dtd_parser *p, dtd_element *e, int callback)
|
|
{ if ( e != CDATA_ELEMENT )
|
|
{ sgml_environment *env = sgml_calloc(1, sizeof(*env));
|
|
|
|
emit_cdata(p, FALSE);
|
|
|
|
env->element = e;
|
|
env->state = make_state_engine(e);
|
|
env->space_mode = (p->environments ? p->environments->space_mode
|
|
: p->dtd->space_mode);
|
|
env->parent = p->environments;
|
|
p->environments = env;
|
|
|
|
if ( p->dtd->shorttag )
|
|
{ env->saved_waiting_for_net = p->waiting_for_net;
|
|
|
|
if ( p->event_class == EV_SHORTTAG )
|
|
{ p->waiting_for_net = TRUE;
|
|
env->wants_net = TRUE;
|
|
} else
|
|
{ env->wants_net = FALSE;
|
|
if ( e->structure && e->structure->omit_close == FALSE )
|
|
p->waiting_for_net = FALSE;
|
|
}
|
|
}
|
|
|
|
if ( e->map )
|
|
p->map = env->map = e->map;
|
|
else if ( env->parent )
|
|
p->map = env->map = env->parent->map;
|
|
|
|
p->first = TRUE;
|
|
if ( callback && p->on_begin_element )
|
|
{ sgml_attribute atts[MAXATTRIBUTES];
|
|
int natts = 0;
|
|
|
|
if ( !(p->flags & SGML_PARSER_NODEFS) )
|
|
natts = add_default_attributes(p, e, natts, atts);
|
|
|
|
(*p->on_begin_element)(p, e, natts, atts);
|
|
}
|
|
|
|
if ( e->structure )
|
|
{ if ( e->structure->type == C_CDATA ||
|
|
e->structure->type == C_RCDATA )
|
|
{ p->state = (e->structure->type == C_CDATA ? S_CDATA : S_RCDATA);
|
|
p->cdata_state = p->state;
|
|
p->etag = e->name->name;
|
|
p->etaglen = (int)istrlen(p->etag);
|
|
sgml_cplocation(&p->startcdata, &p->location);
|
|
} else
|
|
p->cdata_state = S_PCDATA;
|
|
}
|
|
}
|
|
|
|
return p->environments;
|
|
}
|
|
|
|
|
|
static void
|
|
free_environment(sgml_environment *env)
|
|
{
|
|
#ifdef XMLNS
|
|
if ( env->xmlns )
|
|
xmlns_free(env);
|
|
#endif
|
|
|
|
sgml_free(env);
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Pop the stack, closing all environment uptil `to'. The close was
|
|
initiated by pushing the element `e'.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int
|
|
pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0)
|
|
{ sgml_environment *env, *parent;
|
|
|
|
for(env = p->environments; env != to; env=parent)
|
|
{ dtd_element *e = env->element;
|
|
|
|
validate_completeness(env);
|
|
parent = env->parent;
|
|
|
|
if ( e->structure && !e->structure->omit_close )
|
|
gripe(ERC_OMITTED_CLOSE, e->name->name);
|
|
|
|
if ( e0 != CDATA_ELEMENT )
|
|
emit_cdata(p, TRUE);
|
|
|
|
p->first = FALSE;
|
|
p->environments = env;
|
|
if ( p->dtd->shorttag )
|
|
p->waiting_for_net = env->saved_waiting_for_net;
|
|
|
|
WITH_CLASS(p, EV_OMITTED,
|
|
if ( p->on_end_element )
|
|
(*p->on_end_element)(p, e));
|
|
free_environment(env);
|
|
}
|
|
p->environments = to;
|
|
p->map = to->map;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static void
|
|
allow_for(dtd_element *in, dtd_element *e)
|
|
{ dtd_edef *def = in->structure;
|
|
dtd_model *g;
|
|
|
|
if ( def->type == C_EMPTY )
|
|
{ def->type = C_PCDATA;
|
|
def->content = sgml_calloc(1, sizeof(*def->content));
|
|
def->content->type = MT_OR;
|
|
def->content->cardinality = MC_REP;
|
|
}
|
|
assert(def->content->type == MT_OR);
|
|
|
|
g = def->content->content.group;
|
|
|
|
if ( e == CDATA_ELEMENT )
|
|
{ dtd_model *m;
|
|
|
|
for(; g; g = g->next)
|
|
{ if ( g->type == MT_PCDATA )
|
|
return;
|
|
}
|
|
m = sgml_calloc(1, sizeof(*m));
|
|
m->type = MT_PCDATA;
|
|
m->cardinality = MC_ONE; /* ignored */
|
|
add_submodel(def->content, m);
|
|
} else
|
|
{ dtd_model *m;
|
|
|
|
for(; g; g = g->next)
|
|
{ if ( g->type == MT_ELEMENT && g->content.element == e )
|
|
return;
|
|
}
|
|
m = sgml_calloc(1, sizeof(*m));
|
|
m->type = MT_ELEMENT;
|
|
m->cardinality = MC_ONE; /* ignored */
|
|
m->content.element = e;
|
|
add_submodel(def->content, m);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
open_element(dtd_parser *p, dtd_element *e, int warn)
|
|
{ if ( !p->environments && p->enforce_outer_element )
|
|
{ dtd_element *f = p->enforce_outer_element->element;
|
|
|
|
if ( f && f != e )
|
|
{ if ( !f->structure ||
|
|
!f->structure->omit_open )
|
|
gripe(ERC_OMITTED_OPEN, f->name->name);
|
|
|
|
WITH_CLASS(p, EV_OMITTED,
|
|
{ open_element(p, f, TRUE);
|
|
if ( p->on_begin_element )
|
|
{ sgml_attribute atts[MAXATTRIBUTES];
|
|
int natts = 0;
|
|
|
|
if ( !(p->flags & SGML_PARSER_NODEFS) )
|
|
natts = add_default_attributes(p, f, natts, atts);
|
|
|
|
(*p->on_begin_element)(p, f, natts, atts);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
/* no DTD available yet */
|
|
if ( !p->environments && !p->dtd->doctype && e != CDATA_ELEMENT )
|
|
{ const ichar *file;
|
|
|
|
file = find_in_catalogue(CAT_DOCTYPE, e->name->name, NULL, NULL,
|
|
p->dtd->dialect != DL_SGML);
|
|
if ( file )
|
|
{ dtd_parser *clone = clone_dtd_parser(p);
|
|
|
|
gripe(ERC_NO_DOCTYPE, e->name->name, file);
|
|
|
|
if ( load_dtd_from_file(clone, file) )
|
|
p->dtd->doctype = istrdup(e->name->name);
|
|
else
|
|
gripe(ERC_EXISTENCE, L"file", file);
|
|
|
|
free_dtd_parser(clone);
|
|
}
|
|
}
|
|
|
|
if ( p->environments )
|
|
{ sgml_environment *env = p->environments;
|
|
|
|
if ( env->element->undefined )
|
|
{ allow_for(env->element, e); /* <!ELEMENT x - - (model) +(y)> */
|
|
push_element(p, e, FALSE);
|
|
return TRUE;
|
|
}
|
|
|
|
if ( env->element->structure &&
|
|
env->element->structure->type == C_ANY )
|
|
{ if ( e != CDATA_ELEMENT && e->undefined )
|
|
gripe(ERC_EXISTENCE, L"Element", e->name->name);
|
|
push_element(p, e, FALSE);
|
|
return TRUE;
|
|
}
|
|
|
|
switch(in_or_excluded(env, e))
|
|
{ case IE_INCLUDED:
|
|
push_element(p, e, FALSE);
|
|
return TRUE;
|
|
case IE_EXCLUDED:
|
|
if ( warn )
|
|
gripe(ERC_NOT_ALLOWED, e->name->name);
|
|
/*FALLTHROUGH*/
|
|
case IE_NORMAL:
|
|
for(; env; env=env->parent)
|
|
{ dtd_state *new;
|
|
|
|
if ( (new = make_dtd_transition(env->state, e)) )
|
|
{ env->state = new;
|
|
pop_to(p, env, e);
|
|
push_element(p, e, FALSE);
|
|
return TRUE;
|
|
} else
|
|
{ dtd_element *oe[MAXOMITTED]; /* omitted open */
|
|
int olen;
|
|
int i;
|
|
|
|
if ( (olen=find_omitted_path(env->state, e, oe)) > 0 )
|
|
{ pop_to(p, env, e);
|
|
WITH_CLASS(p, EV_OMITTED,
|
|
for(i=0; i<olen; i++)
|
|
{ env->state = make_dtd_transition(env->state, oe[i]);
|
|
env = push_element(p, oe[i], TRUE);
|
|
})
|
|
env->state = make_dtd_transition(env->state, e);
|
|
push_element(p, e, FALSE);
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
if ( !env->element->structure ||
|
|
!env->element->structure->omit_close )
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( warn )
|
|
{ if ( e == CDATA_ELEMENT )
|
|
gripe(ERC_VALIDATE, L"#PCDATA not allowed here");
|
|
else if ( e->undefined )
|
|
gripe(ERC_EXISTENCE, L"Element", e->name->name);
|
|
else
|
|
gripe(ERC_NOT_ALLOWED, e->name->name);
|
|
}
|
|
}
|
|
|
|
if ( warn )
|
|
{ push_element(p, e, FALSE);
|
|
return TRUE;
|
|
} else
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static int
|
|
close_element(dtd_parser *p, dtd_element *e, int conref)
|
|
{ sgml_environment *env;
|
|
|
|
for(env = p->environments; env; env=env->parent)
|
|
{ if ( env->element == e ) /* element is open */
|
|
{ sgml_environment *parent;
|
|
|
|
for(env = p->environments; ; env=parent)
|
|
{ dtd_element *ce = env->element;
|
|
|
|
if ( !(conref && env == p->environments) )
|
|
validate_completeness(env);
|
|
parent = env->parent;
|
|
|
|
p->first = FALSE;
|
|
if ( p->on_end_element )
|
|
(*p->on_end_element)(p, env->element);
|
|
free_environment(env);
|
|
p->environments = parent;
|
|
|
|
if ( ce == e ) /* closing current element */
|
|
{ p->map = (parent ? parent->map : NULL);
|
|
return TRUE;
|
|
} else /* omited close */
|
|
{ if ( ce->structure && !ce->structure->omit_close )
|
|
gripe(ERC_OMITTED_CLOSE, ce->name->name);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return gripe(ERC_NOT_OPEN, e->name->name);
|
|
}
|
|
|
|
|
|
static int
|
|
close_current_element(dtd_parser *p)
|
|
{ if ( p->environments )
|
|
{ dtd_element *e = p->environments->element;
|
|
|
|
emit_cdata(p, TRUE);
|
|
return close_element(p, e, FALSE);
|
|
}
|
|
|
|
return gripe(ERC_SYNTAX_ERROR, L"No element to close", "");
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
get_attribute_value()
|
|
|
|
Get the value for an attribute. Once I thought this was simple, but
|
|
Richard O'Keefe pointed to the complex handling of white-space in SGML
|
|
attributes. Basically, if the attribute is quoted, we need:
|
|
|
|
* If CDATA, map all blank to space characters, then expand
|
|
entities
|
|
|
|
* If !CDATA expand all entities, canonise white space by
|
|
deleting leading and trailing space and squishing multiple
|
|
space characters to a single (lower for us) case.
|
|
|
|
This almost, but not completely matches the XML definition. This however
|
|
is so complex we will ignore it for now.
|
|
|
|
[Rewritten by Richard O'Keefe with these addional comments]
|
|
Reads a value, the attribute name and value indicator having been
|
|
processed already. It calls itake_string() to read quoted values, and
|
|
itake_unquoted() to read unquoted values.
|
|
|
|
itake_string(dtd, in, buf, size)
|
|
- skips layout INCLUDING comments,
|
|
- returns NULL if the next character is not ' or ",
|
|
- copies characters from in to buf until a matching ' or " is found,
|
|
- adds a terminating \0,
|
|
- skips more layout INCLUDING comments, and
|
|
- returns the new input position.
|
|
It is quite wrong to skip leading comments here. In the tag
|
|
|
|
<foo bar = --ugh-- zoo>
|
|
|
|
the characters "--ugh--" *are the value*. They are not a comment.
|
|
Comments are not in fact allowed inside tags, unfortunately.
|
|
This tag is equivalent to
|
|
|
|
<foo bar="--ugh--" something="zoo">
|
|
|
|
where something is an attribute that has zoo as one of its enumerals.
|
|
|
|
Because itake_string() is called in many other places, this bug has
|
|
not yet been fixed.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static ichar const *
|
|
get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att)
|
|
{ ichar tmp[MAXSTRINGLEN];
|
|
ichar *buf = tmp;
|
|
ichar const *s;
|
|
ichar c;
|
|
dtd *dtd = p->dtd;
|
|
ichar const *end;
|
|
ichar *start; int len;
|
|
|
|
enum
|
|
{ DIG_FIRST = 8, /* any token start with digit? */
|
|
NAM_FIRST = 4, /* any token start with non-digit name char? */
|
|
NAM_LATER = 2, /* any token have non-digit name char later? */
|
|
ANY_OTHER = 1, /* any token have illegal character? */
|
|
YET_EMPTY = 0
|
|
}
|
|
token = YET_EMPTY;
|
|
|
|
att->value.textW = NULL; /* UCS text */
|
|
att->value.number = 0;
|
|
att->flags = 0;
|
|
|
|
end = itake_string(dtd, decl, &start, &len);
|
|
|
|
if ( end != NULL )
|
|
{ ocharbuf out;
|
|
|
|
init_ocharbuf(&out);
|
|
expand_entities(p, start, len, &out);
|
|
|
|
if ( att->definition->type == AT_CDATA )
|
|
{ malloc_ocharbuf(&out);
|
|
|
|
att->value.number = out.size;
|
|
att->value.textW = out.data.w;
|
|
|
|
return end;
|
|
} else
|
|
{ ichar *d;
|
|
|
|
buf = out.data.w;
|
|
|
|
/* canonicalise blanks */
|
|
s = buf;
|
|
while ((c = *s++) != '\0' && HasClass(dtd, c, CH_BLANK))
|
|
;
|
|
d = buf;
|
|
while ( c != '\0' )
|
|
{ token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST
|
|
: HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER;
|
|
if ( d != buf )
|
|
*d++ = ' ';
|
|
if ( dtd->case_sensitive )
|
|
{ *d++ = c;
|
|
while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
|
|
{ token |= HasClass(dtd, c, CH_DIGIT) ? 0
|
|
: HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
|
|
*d++ = c;
|
|
}
|
|
} else
|
|
{ *d++ = towlower(c);
|
|
while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
|
|
{ token |= HasClass(dtd, c, CH_DIGIT) ? 0
|
|
: HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
|
|
*d++ = towlower(c);
|
|
}
|
|
}
|
|
while (c != '\0' && HasClass(dtd, c, CH_BLANK))
|
|
c = *s++;
|
|
}
|
|
*d = '\0';
|
|
}
|
|
} else
|
|
{ end = itake_unquoted(dtd, decl, tmp, sizeof(tmp)/sizeof(ichar));
|
|
if (end == NULL)
|
|
return NULL;
|
|
|
|
s = buf;
|
|
c = *s++;
|
|
if (c != '\0')
|
|
{ token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST
|
|
: HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER;
|
|
while ((c = *s++) != 0)
|
|
{ token |= HasClass(dtd, c, CH_DIGIT) ? 0
|
|
: HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
|
|
}
|
|
}
|
|
if ( token == YET_EMPTY || (token & ANY_OTHER) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"Attribute value requires quotes", buf);
|
|
|
|
if (!dtd->case_sensitive && att->definition->type != AT_CDATA)
|
|
istrlower(buf);
|
|
}
|
|
|
|
switch (att->definition->type)
|
|
{ case AT_NUMBER: /* number */
|
|
if (token != DIG_FIRST)
|
|
{ gripe(ERC_SYNTAX_WARNING, L"NUMBER expected", decl);
|
|
} else if (dtd->number_mode == NU_INTEGER)
|
|
{ (void) istrtol(buf, &att->value.number);
|
|
} else
|
|
{ att->value.textW = istrdup(buf);
|
|
att->value.number = (long)istrlen(buf);
|
|
}
|
|
return end;
|
|
case AT_CDATA: /* CDATA attribute */
|
|
att->value.textW = istrdup(buf);
|
|
att->value.number = (long)istrlen(buf);
|
|
return end;
|
|
case AT_ID: /* identifier */
|
|
case AT_IDREF: /* identifier reference */
|
|
case AT_NAME: /* name token */
|
|
case AT_NOTATION: /* notation-name */
|
|
if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"NAME expected", decl);
|
|
break;
|
|
case AT_NAMEOF: /* one of these names */
|
|
case AT_NMTOKEN: /* name-token */
|
|
if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"NMTOKEN expected", decl);
|
|
if ( att->definition->type == AT_NAMEOF )
|
|
{ dtd_name_list *nl;
|
|
|
|
for(nl=att->definition->typeex.nameof; nl; nl = nl->next)
|
|
{ if ( istreq(nl->value->name, buf) )
|
|
goto passed;
|
|
}
|
|
gripe(ERC_SYNTAX_WARNING, L"unexpected value", decl);
|
|
}
|
|
break;
|
|
case AT_NUTOKEN: /* number token */
|
|
if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"NUTOKEN expected", decl);
|
|
break;
|
|
case AT_ENTITY: /* entity-name */
|
|
if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"entity NAME expected", decl);
|
|
break;
|
|
case AT_NAMES: /* list of names */
|
|
case AT_IDREFS: /* list of identifier references */
|
|
if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"NAMES expected", decl);
|
|
break;
|
|
case AT_ENTITIES: /* entity-name list */
|
|
if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"entity NAMES expected", decl);
|
|
break;
|
|
case AT_NMTOKENS: /* name-token list */
|
|
if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"NMTOKENS expected", decl);
|
|
break;
|
|
case AT_NUMBERS: /* number list */
|
|
if (token != DIG_FIRST)
|
|
gripe(ERC_SYNTAX_WARNING, L"NUMBERS expected", decl);
|
|
break;
|
|
case AT_NUTOKENS:
|
|
if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
|
|
gripe(ERC_SYNTAX_WARNING, L"NUTOKENS expected", decl);
|
|
break;
|
|
default:
|
|
assert(0);
|
|
return NULL;
|
|
}
|
|
|
|
passed:
|
|
att->value.textW = istrdup(buf); /* TBD: more validation */
|
|
att->value.number = (long)istrlen(buf);
|
|
return end;
|
|
}
|
|
|
|
|
|
static const ichar *
|
|
process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl,
|
|
sgml_attribute *atts, int *argc)
|
|
{ int attn = 0;
|
|
dtd *dtd = p->dtd;
|
|
|
|
decl = iskip_layout(dtd, decl);
|
|
while(decl && *decl)
|
|
{ dtd_symbol *nm;
|
|
const ichar *s;
|
|
|
|
if ( (s=itake_nmtoken(dtd, decl, &nm)) )
|
|
{ decl = s;
|
|
|
|
if ( (s=isee_func(dtd, decl, CF_VI)) ) /* name= */
|
|
{ dtd_attr *a;
|
|
|
|
if ( !HasClass(dtd, nm->name[0], CH_NMSTART) )
|
|
gripe(ERC_SYNTAX_WARNING,
|
|
"Illegal start of attribute-name", decl);
|
|
|
|
decl = s;
|
|
if ( !(a=find_attribute(e, nm)) )
|
|
{ a = sgml_calloc(1, sizeof(*a));
|
|
|
|
a->name = nm;
|
|
a->type = AT_CDATA;
|
|
a->def = AT_IMPLIED;
|
|
add_attribute(dtd, e, a);
|
|
|
|
if ( !e->undefined &&
|
|
!(dtd->dialect != DL_SGML &&
|
|
(istreq(L"xmlns", nm->name) ||
|
|
istrprefix(L"xmlns:", nm->name))) )
|
|
gripe(ERC_NO_ATTRIBUTE, e->name->name, nm->name);
|
|
}
|
|
atts[attn].definition = a;
|
|
if ( (decl=get_attribute_value(p, decl, atts+attn)) )
|
|
{ attn++;
|
|
continue;
|
|
}
|
|
} else if ( e->structure )
|
|
{ dtd_attr_list *al; /* value shorthand */
|
|
|
|
for(al=e->attributes; al; al=al->next)
|
|
{ dtd_attr *a = al->attribute;
|
|
|
|
if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
|
|
{ dtd_name_list *nl;
|
|
|
|
for(nl=a->typeex.nameof; nl; nl = nl->next)
|
|
{ if ( nl->value == nm )
|
|
{ if ( dtd->dialect != DL_SGML )
|
|
gripe(ERC_SYNTAX_WARNING,
|
|
"Value short-hand in XML mode", decl);
|
|
atts[attn].flags = 0;
|
|
atts[attn].definition = a;
|
|
atts[attn].value.textW = istrdup(nm->name);
|
|
atts[attn].value.number = (long)istrlen(nm->name);
|
|
attn++;
|
|
goto next;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
gripe(ERC_NO_ATTRIBUTE_VALUE, e->name->name, nm->name);
|
|
decl = s;
|
|
} else
|
|
{ gripe(ERC_SYNTAX_ERROR, L"Bad attribute", decl);
|
|
decl = s;
|
|
}
|
|
} else
|
|
{ *argc = attn;
|
|
return decl;
|
|
}
|
|
|
|
next:
|
|
;
|
|
}
|
|
|
|
*argc = attn;
|
|
return decl;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
sgml_add_default_attributes()
|
|
|
|
This function adds attributes for omitted default and fixed attributes.
|
|
These attributes are added to the end of the attribute list. This
|
|
function returns the new number of attributes. The `atts' array is
|
|
assumed to be MAXATTRIBUTES long, normally passed from
|
|
process_begin_element.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int
|
|
add_default_attributes(dtd_parser *p, dtd_element *e,
|
|
int natts, sgml_attribute *atts)
|
|
{ dtd_attr_list *al;
|
|
|
|
if ( e == CDATA_ELEMENT )
|
|
return natts;
|
|
|
|
for(al=e->attributes; al; al=al->next)
|
|
{ dtd_attr *a = al->attribute;
|
|
|
|
switch(a->def)
|
|
{ case AT_REQUIRED: /* TBD: check if present */
|
|
case AT_CURRENT: /* TBD: register in DTD and reuse */
|
|
case AT_CONREF:
|
|
case AT_IMPLIED:
|
|
goto next;
|
|
case AT_FIXED:
|
|
case AT_DEFAULT:
|
|
{ int i;
|
|
sgml_attribute *ap;
|
|
|
|
for(i=0, ap=atts; i<natts; i++, ap++)
|
|
{ if ( ap->definition == a )
|
|
goto next;
|
|
}
|
|
|
|
ap->definition = a;
|
|
ap->value.textW = NULL;
|
|
ap->value.number = 0;
|
|
ap->flags = SGML_AT_DEFAULT;
|
|
|
|
switch(a->type)
|
|
{ case AT_CDATA:
|
|
ap->value.textW = a->att_def.cdata;
|
|
ap->value.number = (long)istrlen(ap->value.textW);
|
|
break;
|
|
case AT_NUMBER:
|
|
if ( p->dtd->number_mode == NU_TOKEN )
|
|
{ ap->value.textW = (ichar*)a->att_def.name->name;
|
|
ap->value.number = (long)istrlen(ap->value.textW);
|
|
} else
|
|
{ ap->value.number = a->att_def.number;
|
|
}
|
|
break;
|
|
default:
|
|
if ( a->islist )
|
|
{ ap->value.textW = a->att_def.list;
|
|
} else
|
|
{ ap->value.textW = (ichar*)a->att_def.name->name;
|
|
}
|
|
ap->value.number = (long)istrlen(ap->value.textW);
|
|
}
|
|
|
|
natts++;
|
|
}
|
|
}
|
|
next:;
|
|
}
|
|
|
|
return natts;
|
|
}
|
|
|
|
|
|
static void
|
|
free_attribute_values(int argc, sgml_attribute *argv)
|
|
{ int i;
|
|
|
|
for(i=0; i<argc; i++, argv++)
|
|
{ if ( (argv->flags & SGML_AT_DEFAULT) )
|
|
continue; /* shared with the DTD */
|
|
|
|
if ( argv->value.textW )
|
|
sgml_free(argv->value.textW);
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
process_begin_element(dtd_parser *p, const ichar *decl)
|
|
{ dtd *dtd = p->dtd;
|
|
dtd_symbol *id;
|
|
const ichar *s;
|
|
|
|
if ( (s=itake_name(dtd, decl, &id)) )
|
|
{ sgml_attribute atts[MAXATTRIBUTES];
|
|
int natts;
|
|
dtd_element *e = find_element(dtd, id);
|
|
int empty = FALSE;
|
|
int conref = FALSE;
|
|
|
|
if ( !e->structure )
|
|
{ dtd_edef *def;
|
|
e->undefined = TRUE;
|
|
STAT(edefs_implicit++);
|
|
def_element(dtd, id);
|
|
def = e->structure;
|
|
def->type = C_EMPTY;
|
|
}
|
|
|
|
open_element(p, e, TRUE);
|
|
|
|
decl=s;
|
|
if ( (s=process_attributes(p, e, decl, atts, &natts)) )
|
|
decl=s;
|
|
|
|
if ( dtd->dialect != DL_SGML )
|
|
{ if ( (s=isee_func(dtd, decl, CF_ETAGO2)) )
|
|
{ empty = TRUE; /* XML <tag/> */
|
|
decl = s;
|
|
}
|
|
#ifdef XMLNS
|
|
if ( dtd->dialect == DL_XMLNS )
|
|
update_xmlns(p, e, natts, atts);
|
|
#endif
|
|
if ( dtd->dialect != DL_SGML )
|
|
update_space_mode(p, e, natts, atts);
|
|
} else
|
|
{ int i;
|
|
|
|
for(i=0; i<natts; i++)
|
|
{ if ( atts[i].definition->def == AT_CONREF )
|
|
{ empty = TRUE;
|
|
conref = TRUE;
|
|
}
|
|
}
|
|
}
|
|
if ( *decl )
|
|
gripe(ERC_SYNTAX_ERROR, L"Bad attribute list", decl);
|
|
|
|
if ( !(p->flags & SGML_PARSER_NODEFS) )
|
|
natts = add_default_attributes(p, e, natts, atts);
|
|
|
|
if ( empty ||
|
|
(dtd->dialect == DL_SGML &&
|
|
e->structure &&
|
|
e->structure->type == C_EMPTY &&
|
|
!e->undefined) )
|
|
p->empty_element = e;
|
|
else
|
|
p->empty_element = NULL;
|
|
|
|
if ( p->on_begin_element )
|
|
(*p->on_begin_element)(p, e, natts, atts);
|
|
|
|
free_attribute_values(natts, atts);
|
|
|
|
if ( p->empty_element )
|
|
{ p->empty_element = NULL;
|
|
close_element(p, e, conref);
|
|
if ( conref ) /* might be S_CDATA due to declared content */
|
|
p->cdata_state = p->state = S_PCDATA;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
return gripe(ERC_SYNTAX_ERROR, L"Bad open-element tag", decl);
|
|
}
|
|
|
|
|
|
static int
|
|
process_end_element(dtd_parser *p, const ichar *decl)
|
|
{ dtd *dtd = p->dtd;
|
|
dtd_symbol *id;
|
|
const ichar *s;
|
|
|
|
emit_cdata(p, TRUE);
|
|
if ( (s=itake_name(dtd, decl, &id)) && *s == '\0' )
|
|
return close_element(p, find_element(dtd, id), FALSE);
|
|
|
|
if ( p->dtd->shorttag && *decl == '\0' ) /* </>: close current element */
|
|
return close_current_element(p);
|
|
|
|
return gripe(ERC_SYNTAX_ERROR, L"Bad close-element tag", decl);
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
process_net(dtd_parser *p)
|
|
We've seen a / of a shorttag element. Close this one.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int
|
|
process_net(dtd_parser *p)
|
|
{ sgml_environment *env;
|
|
|
|
prepare_cdata(p);
|
|
for(env = p->environments; env; env=env->parent)
|
|
{ if ( env->wants_net )
|
|
{ sgml_environment *parent;
|
|
|
|
pop_to(p, env, NULL); /* close parents */
|
|
validate_completeness(env);
|
|
parent = env->parent;
|
|
|
|
emit_cdata(p, TRUE);
|
|
p->first = FALSE;
|
|
|
|
if ( p->on_end_element )
|
|
{ WITH_CLASS(p, EV_SHORTTAG,
|
|
(*p->on_end_element)(p, env->element));
|
|
}
|
|
|
|
free_environment(env);
|
|
p->environments = parent;
|
|
p->map = (parent ? parent->map : NULL);
|
|
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static int /* <!DOCTYPE ...> */
|
|
process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0)
|
|
{ dtd *dtd = p->dtd;
|
|
dtd_symbol *id;
|
|
const ichar *s;
|
|
dtd_entity *et = NULL;
|
|
|
|
if ( !(s=itake_name(dtd, decl, &id)) )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl);
|
|
decl = s;
|
|
|
|
if ( (s=isee_identifier(dtd, decl, "system")) )
|
|
{ et = sgml_calloc(1, sizeof(*et));
|
|
et->type = ET_SYSTEM;
|
|
decl = s;
|
|
} else if ( (s=isee_identifier(dtd, decl, "public")) )
|
|
{ et = sgml_calloc(1, sizeof(*et));
|
|
et->type = ET_PUBLIC;
|
|
decl = s;
|
|
} else if ( isee_func(dtd, decl, CF_DSO) )
|
|
goto local;
|
|
|
|
if ( et )
|
|
{ et->name = id;
|
|
et->catalog_location = CAT_DOCTYPE;
|
|
if ( !(s=process_entity_value_declaration(p, decl, et)) )
|
|
return FALSE;
|
|
decl = s;
|
|
}
|
|
|
|
if ( !dtd->doctype ) /* i.e. anonymous DTD */
|
|
{ ichar *file;
|
|
dtd_parser *clone;
|
|
|
|
dtd->doctype = istrdup(id->name); /* Fill it */
|
|
if ( et )
|
|
file = entity_file(dtd, et);
|
|
else
|
|
file = istrdup(find_in_catalogue(CAT_DOCTYPE,
|
|
dtd->doctype, NULL, NULL,
|
|
dtd->dialect != DL_SGML));
|
|
|
|
if ( !file )
|
|
{ gripe(ERC_EXISTENCE, L"DTD", dtd->doctype);
|
|
} else
|
|
{ clone = clone_dtd_parser(p);
|
|
if ( !load_dtd_from_file(clone, file) )
|
|
gripe(ERC_EXISTENCE, L"file", file);
|
|
free_dtd_parser(clone);
|
|
sgml_free(file);
|
|
}
|
|
}
|
|
|
|
if ( et )
|
|
free_entity_list(et);
|
|
|
|
local:
|
|
if ( (s=isee_func(dtd, decl, CF_DSO)) ) /* [...] */
|
|
{ int grouplevel = 1;
|
|
data_mode oldmode = p->dmode;
|
|
dtdstate oldstate = p->state;
|
|
locbuf oldloc;
|
|
const ichar *q;
|
|
icharbuf *saved_ibuf = p->buffer;
|
|
|
|
push_location(p, &oldloc);
|
|
/* try to find start-location. */
|
|
/* fails if there is comment before */
|
|
/* the []! */
|
|
sgml_cplocation(&p->location, &p->startloc);
|
|
inc_location(&p->location, '<');
|
|
for(q=decl0; q < s; q++)
|
|
inc_location(&p->location, *q);
|
|
p->dmode = DM_DTD;
|
|
p->state = S_PCDATA;
|
|
p->buffer = new_icharbuf();
|
|
|
|
for( ; *s; s++ )
|
|
{ if ( isee_func(dtd, s, CF_LIT) || /* skip quoted strings */
|
|
isee_func(dtd, s, CF_LITA) )
|
|
{ ichar q = *s;
|
|
|
|
putchar_dtd_parser(p, *s++); /* pass open quote */
|
|
|
|
for( ; *s && *s != q; s++ )
|
|
putchar_dtd_parser(p, *s);
|
|
|
|
if ( *s == q ) /* pass closing quote */
|
|
putchar_dtd_parser(p, *s);
|
|
continue;
|
|
}
|
|
|
|
if ( isee_func(dtd, s, CF_DSO) )
|
|
grouplevel++;
|
|
else if ( isee_func(dtd, s, CF_DSC) && --grouplevel == 0 )
|
|
break;
|
|
putchar_dtd_parser(p, *s);
|
|
}
|
|
p->dtd->implicit = FALSE;
|
|
|
|
p->state = oldstate;
|
|
p->dmode = oldmode;
|
|
free_icharbuf(p->buffer);
|
|
p->buffer = saved_ibuf;
|
|
pop_location(p, &oldloc);
|
|
}
|
|
|
|
p->enforce_outer_element = id; /* make this the outer element */
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static void
|
|
init_decoding(dtd_parser *p)
|
|
{
|
|
#ifdef UTF8
|
|
int decode;
|
|
dtd *dtd = p->dtd;
|
|
|
|
if ( dtd->encoding == SGML_ENC_UTF8 &&
|
|
p->encoded == TRUE )
|
|
decode = TRUE;
|
|
else
|
|
decode = FALSE;
|
|
|
|
if ( p->utf8_decode != decode )
|
|
{ DEBUG(fprintf(stderr, "%s UTF-8 decoding on %p\n",
|
|
decode ? "Enable" : "Disable",
|
|
p));
|
|
|
|
p->utf8_decode = decode;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
xml_set_encoding() is the public interface to set the encoding for the
|
|
parser.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int /* strcasecmp() with C locale */
|
|
posix_strcasecmp(const char *s1, const char *s2)
|
|
{ for(; *s1 && *s2; s1++, s2++)
|
|
{ int c1 = *s1&0xff;
|
|
int c2 = *s2&0xff;
|
|
|
|
if ( c1 >= 'A' && c1 <= 'Z' ) c1 += 'a'-'A';
|
|
if ( c2 >= 'A' && c2 <= 'Z' ) c2 += 'a'-'A';
|
|
|
|
if ( c1 != c2 )
|
|
return c1-c2;
|
|
}
|
|
|
|
return *s1 - *s2;
|
|
}
|
|
|
|
|
|
int
|
|
xml_set_encoding(dtd_parser *p, const char *enc)
|
|
{ dtd *dtd = p->dtd;
|
|
|
|
if ( posix_strcasecmp(enc, "iso-8859-1") == 0 )
|
|
{ dtd->encoding = SGML_ENC_ISO_LATIN1;
|
|
} else if ( posix_strcasecmp(enc, "us-ascii") == 0 )
|
|
{ dtd->encoding = SGML_ENC_ISO_LATIN1; /* doesn't make a difference */
|
|
} else if ( posix_strcasecmp(enc, "utf-8") == 0 )
|
|
{ dtd->encoding = SGML_ENC_UTF8;
|
|
} else
|
|
return FALSE;
|
|
|
|
init_decoding(p);
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
set_encoding() sets the encoding from the encoding="..." field of the
|
|
XML header.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static void
|
|
set_encoding(dtd_parser *p, const ichar *enc)
|
|
{ char buf[32];
|
|
char *e = buf+sizeof(buf)-1;
|
|
char *o;
|
|
const ichar *i;
|
|
|
|
for(i=enc, o=buf; *i; )
|
|
{ if ( *i < 128 && o < e )
|
|
{ *o++ = (char)*i++;
|
|
} else
|
|
{ goto error;
|
|
}
|
|
}
|
|
*o = '\0';
|
|
|
|
if ( !xml_set_encoding(p, buf) )
|
|
{ error:
|
|
gripe(ERC_EXISTENCE, L"character encoding", enc);
|
|
}
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Process <? ... ?>
|
|
|
|
Should deal with character encoding for XML documents.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int
|
|
process_pi(dtd_parser *p, const ichar *decl)
|
|
{ const ichar *s;
|
|
dtd *dtd = p->dtd;
|
|
|
|
if ( (s=isee_identifier(dtd, decl, "xml")) ) /* <?xml version="1.0"?> */
|
|
{ decl = s;
|
|
|
|
switch(dtd->dialect)
|
|
{ case DL_SGML:
|
|
set_dialect_dtd(dtd, DL_XML);
|
|
break;
|
|
case DL_XML:
|
|
case DL_XMLNS:
|
|
break;
|
|
}
|
|
|
|
while(*decl)
|
|
{ dtd_symbol *nm;
|
|
|
|
if ( (s=itake_name(dtd, decl, &nm)) &&
|
|
(s=isee_func(dtd, s, CF_VI)) ) /* = */
|
|
{ ichar *start;
|
|
int len;
|
|
ichar buf[MAXSTRINGLEN];
|
|
const ichar *end;
|
|
|
|
if ( !(end=itake_string(dtd, s, &start, &len)) )
|
|
{ end=itake_nmtoken_chars(dtd, s, buf, sizeof(buf)/sizeof(ichar));
|
|
start = buf;
|
|
len = (int)istrlen(buf);
|
|
}
|
|
|
|
if ( end )
|
|
{ decl = end;
|
|
|
|
if ( istrcaseeq(nm->name, L"encoding") )
|
|
{ ichar tmp[32];
|
|
|
|
if ( len < (int)(sizeof(tmp)/sizeof(ichar)-1) )
|
|
{ istrncpy(tmp, start, len);
|
|
tmp[len] = 0;
|
|
|
|
set_encoding(p, tmp);
|
|
} else
|
|
{ gripe(ERC_SYNTAX_ERROR, L"Unterminated encoding?", decl);
|
|
}
|
|
}
|
|
|
|
/* fprintf(stderr, "XML %s = %s\n", nm->name, buf); */
|
|
|
|
continue;
|
|
}
|
|
}
|
|
|
|
gripe(ERC_SYNTAX_ERROR, L"Illegal XML parameter", decl);
|
|
break;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
if ( p->on_pi )
|
|
(*p->on_pi)(p, decl);
|
|
|
|
return FALSE; /* Warn? */
|
|
}
|
|
|
|
|
|
static int
|
|
process_sgml_declaration(dtd_parser *p, const ichar *decl)
|
|
{ return gripe(ERC_SYNTAX_WARNING, L"Ignored <!SGML ...> declaration", NULL);
|
|
}
|
|
|
|
|
|
static int
|
|
process_declaration(dtd_parser *p, const ichar *decl)
|
|
{ const ichar *s;
|
|
dtd *dtd = p->dtd;
|
|
|
|
if ( p->dmode != DM_DTD )
|
|
{ if ( (s=isee_func(dtd, decl, CF_ETAGO2)) ) /* </ ... > */
|
|
{ return process_end_element(p, s);
|
|
} else if ( HasClass(dtd, *decl, CH_NAME) ) /* <letter */
|
|
{ return process_begin_element(p, decl);
|
|
}
|
|
}
|
|
|
|
if ( (s=isee_func(dtd, decl, CF_MDO2)) ) /* <! ... >*/
|
|
{ decl = s;
|
|
|
|
if ( p->on_decl )
|
|
(*p->on_decl)(p, decl);
|
|
|
|
if ( (s = isee_identifier(dtd, decl, "entity")) )
|
|
process_entity_declaration(p, s);
|
|
else if ( (s = isee_identifier(dtd, decl, "element")) )
|
|
process_element_declaraction(p, s);
|
|
else if ( (s = isee_identifier(dtd, decl, "attlist")) )
|
|
process_attlist_declaraction(p, s);
|
|
else if ( (s = isee_identifier(dtd, decl, "notation")) )
|
|
process_notation_declaration(p, s);
|
|
else if ( (s = isee_identifier(dtd, decl, "shortref")) )
|
|
process_shortref_declaration(p, s);
|
|
else if ( (s = isee_identifier(dtd, decl, "usemap")) )
|
|
process_usemap_declaration(p, s);
|
|
else if ( (s = isee_identifier(dtd, decl, "sgml")) )
|
|
process_sgml_declaration(p, s);
|
|
else if ( (s = isee_identifier(dtd, decl, "doctype")) )
|
|
{ if ( p->dmode != DM_DTD )
|
|
process_doctype(p, s, decl-1);
|
|
} else
|
|
{ s = iskip_layout(dtd, decl);
|
|
|
|
if ( *s )
|
|
gripe(ERC_SYNTAX_ERROR, L"Invalid declaration", s);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
return gripe(ERC_SYNTAX_ERROR, L"Invalid declaration", decl);
|
|
}
|
|
|
|
/*******************************
|
|
* STREAM BINDING *
|
|
*******************************/
|
|
|
|
static dtd_parser *current_parser; /* For gripes */
|
|
|
|
void
|
|
set_file_dtd_parser(dtd_parser *p, input_type type, const ichar *name)
|
|
{ p->location.type = type;
|
|
p->location.name.file = name;
|
|
p->location.line = 1;
|
|
p->location.linepos = 0;
|
|
p->location.charpos = 0;
|
|
}
|
|
|
|
|
|
static void
|
|
set_src_dtd_parser(dtd_parser *p, input_type type, const ichar *name)
|
|
{ p->location.type = type;
|
|
p->location.name.entity = name;
|
|
p->location.line = 1;
|
|
p->location.linepos = 0;
|
|
p->location.charpos = 0;
|
|
}
|
|
|
|
|
|
void
|
|
set_mode_dtd_parser(dtd_parser *p, data_mode m)
|
|
{ p->dmode = m; /* DM_DTD or DM_DATA */
|
|
p->state = S_PCDATA;
|
|
p->blank_cdata = TRUE;
|
|
}
|
|
|
|
|
|
dtd_parser *
|
|
new_dtd_parser(dtd *dtd)
|
|
{ dtd_parser *p = sgml_calloc(1, sizeof(*p));
|
|
|
|
if ( !dtd )
|
|
dtd = new_dtd(NULL);
|
|
dtd->references++;
|
|
|
|
p->magic = SGML_PARSER_MAGIC;
|
|
p->dtd = dtd;
|
|
p->state = S_PCDATA;
|
|
p->mark_state = MS_INCLUDE;
|
|
p->dmode = DM_DTD;
|
|
p->encoded = TRUE; /* encoded octet stream */
|
|
p->buffer = new_icharbuf();
|
|
p->cdata = new_ocharbuf();
|
|
p->event_class = EV_EXPLICIT;
|
|
set_src_dtd_parser(p, IN_NONE, NULL);
|
|
|
|
return p;
|
|
}
|
|
|
|
|
|
static dtd_parser *
|
|
clone_dtd_parser(dtd_parser *p)
|
|
{ dtd_parser *clone = sgml_calloc(1, sizeof(*p));
|
|
|
|
*clone = *p;
|
|
clone->dtd->references++;
|
|
clone->environments = NULL;
|
|
clone->marked = NULL;
|
|
clone->etag = NULL;
|
|
clone->grouplevel = 0;
|
|
clone->state = S_PCDATA;
|
|
clone->mark_state = MS_INCLUDE;
|
|
clone->dmode = DM_DTD;
|
|
clone->buffer = new_icharbuf();
|
|
clone->cdata = new_ocharbuf();
|
|
|
|
return clone;
|
|
}
|
|
|
|
|
|
void
|
|
free_dtd_parser(dtd_parser *p)
|
|
{ free_icharbuf(p->buffer);
|
|
free_ocharbuf(p->cdata);
|
|
|
|
free_dtd(p->dtd);
|
|
|
|
sgml_free(p);
|
|
}
|
|
|
|
|
|
static int
|
|
process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s)
|
|
{ locbuf old;
|
|
|
|
push_location(p, &old);
|
|
set_src_dtd_parser(p, in, name);
|
|
empty_icharbuf(p->buffer); /* dubious */
|
|
for(; *s; s++)
|
|
putchar_dtd_parser(p, *s);
|
|
pop_location(p, &old);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
process_include(dtd_parser *p, const ichar *entity_name)
|
|
{ dtd_symbol *id;
|
|
dtd_entity *pe;
|
|
dtd *dtd = p->dtd;
|
|
|
|
if ( (id=dtd_find_entity_symbol(dtd, entity_name)) &&
|
|
(pe=find_pentity(p->dtd, id)) )
|
|
{ ichar *file;
|
|
|
|
if ( (file = entity_file(dtd, pe)) )
|
|
{ int rc = sgml_process_file(p, file, SGML_SUB_DOCUMENT);
|
|
sgml_free(file);
|
|
|
|
return rc;
|
|
} else
|
|
{ const ichar *text = entity_value(p, pe, NULL);
|
|
|
|
if ( !text )
|
|
return gripe(ERC_NO_VALUE, pe->name->name);
|
|
|
|
return process_chars(p, IN_ENTITY, entity_name, text);
|
|
}
|
|
}
|
|
|
|
return gripe(ERC_EXISTENCE, L"parameter entity", entity_name);
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Process <![ KEYWORD [
|
|
|
|
Switches ->mark_state according to KEYWORD. Processes the rest in normal
|
|
S_PCDATA style, which pops the mark-stack on seeing ]]>
|
|
|
|
For the purpose of <!DOCTYPE spec [additions]> we switch to S_GROUP if
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static void
|
|
process_marked_section(dtd_parser *p)
|
|
{ ichar buf[MAXDECL];
|
|
dtd *dtd = p->dtd;
|
|
const ichar *decl = p->buffer->data;
|
|
const ichar *s;
|
|
|
|
if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */
|
|
(decl=isee_func(dtd, decl, CF_DSO)) && /* [ */
|
|
expand_pentities(p, decl, ZERO_TERM_LEN, buf, sizeof(buf)/sizeof(ichar)) )
|
|
{ dtd_symbol *kwd;
|
|
|
|
decl = buf;
|
|
if ( (s=itake_name(dtd, decl, &kwd)) &&
|
|
isee_func(dtd, s, CF_DSO) ) /* [ */
|
|
{ dtd_marked *m = sgml_calloc(1, sizeof(*m));
|
|
|
|
m->keyword = kwd; /* push on the stack */
|
|
m->parent = p->marked;
|
|
p->marked = m;
|
|
|
|
if ( istrcaseeq(kwd->name, L"IGNORE") )
|
|
m->type = MS_IGNORE;
|
|
else if ( istrcaseeq(kwd->name, L"INCLUDE") )
|
|
m->type = MS_INCLUDE;
|
|
else if ( istrcaseeq(kwd->name, L"TEMP") )
|
|
m->type = MS_INCLUDE;
|
|
else if ( istrcaseeq(kwd->name, L"CDATA") )
|
|
m->type = MS_CDATA;
|
|
else if ( istrcaseeq(kwd->name, L"RCDATA") )
|
|
m->type = MS_RCDATA;
|
|
else
|
|
m->type = MS_INCLUDE; /* default */
|
|
|
|
empty_icharbuf(p->buffer);
|
|
if ( m->type == MS_CDATA )
|
|
p->state = S_MSCDATA;
|
|
else
|
|
p->state = S_PCDATA;
|
|
if ( p->mark_state != MS_IGNORE )
|
|
p->mark_state = m->type;
|
|
}
|
|
} else
|
|
{ decl = p->buffer->data;
|
|
|
|
if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */
|
|
!isee_func(dtd, decl, CF_DSO) ) /* [ */
|
|
{ p->state = S_GROUP;
|
|
p->grouplevel = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
pop_marked_section(dtd_parser *p)
|
|
{ dtd_marked *m = p->marked;
|
|
|
|
if ( m )
|
|
{ p->marked = m->parent;
|
|
sgml_free(m);
|
|
p->mark_state = (p->marked ? p->marked->type : MS_INCLUDE);
|
|
}
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Update the space-mode for the current element. The space mode defines
|
|
how spaces are handled in the CDATA output.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static dtd_space_mode
|
|
istr_to_space_mode(const ichar *val)
|
|
{ if ( istreq(val, L"default") )
|
|
return SP_DEFAULT;
|
|
if ( istreq(val, L"preserve") )
|
|
return SP_PRESERVE;
|
|
if ( istreq(val, L"sgml") )
|
|
return SP_SGML;
|
|
if ( istreq(val, L"remove") )
|
|
return SP_REMOVE;
|
|
|
|
return SP_INHERIT; /* interpret as error */
|
|
}
|
|
|
|
|
|
static void
|
|
update_space_mode(dtd_parser *p, dtd_element *e,
|
|
int natts, sgml_attribute *atts)
|
|
{ for( ; natts-- > 0; atts++ )
|
|
{ const ichar *name = atts->definition->name->name;
|
|
|
|
if ( istreq(name, L"xml:space") &&
|
|
atts->definition->type == AT_CDATA &&
|
|
atts->value.textW )
|
|
{ dtd_space_mode m = istr_to_space_mode(atts->value.textW);
|
|
|
|
if ( m != SP_INHERIT )
|
|
p->environments->space_mode = m;
|
|
else
|
|
gripe(ERC_EXISTENCE, L"xml:space-mode", atts->value.textW);
|
|
|
|
return;
|
|
}
|
|
}
|
|
|
|
if ( e->space_mode != SP_INHERIT )
|
|
p->environments->space_mode = e->space_mode;
|
|
}
|
|
|
|
|
|
static void
|
|
empty_cdata(dtd_parser *p)
|
|
{ if ( p->dmode == DM_DATA )
|
|
{ empty_ocharbuf(p->cdata);
|
|
p->blank_cdata = TRUE;
|
|
p->cdata_must_be_empty = FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
cb_cdata(dtd_parser *p, ocharbuf *buf, int offset, int size)
|
|
{ if ( p->on_data )
|
|
(*p->on_data)(p, EC_CDATA, size, buf->data.w+offset);
|
|
}
|
|
|
|
|
|
static int
|
|
emit_cdata(dtd_parser *p, int last)
|
|
{ dtd *dtd = p->dtd;
|
|
locbuf locsafe;
|
|
ocharbuf *cdata = p->cdata;
|
|
int offset = 0;
|
|
int size = cdata->size;
|
|
|
|
if ( size == 0 )
|
|
return TRUE; /* empty or done */
|
|
|
|
push_location(p, &locsafe);
|
|
sgml_cplocation(&p->location, &p->startloc); /* start of markup */
|
|
sgml_cplocation(&p->startloc, &p->startcdata); /* real start of CDATA */
|
|
|
|
if ( p->environments )
|
|
{ switch(p->environments->space_mode)
|
|
{ case SP_SGML:
|
|
case SP_DEFAULT:
|
|
if ( p->first )
|
|
{ wint_t c = fetch_ocharbuf(cdata, offset);
|
|
|
|
if ( HasClass(dtd, c, CH_RE) )
|
|
{ inc_location(&p->startloc, c);
|
|
offset++;
|
|
size--;
|
|
c = fetch_ocharbuf(cdata, offset);
|
|
}
|
|
|
|
if ( HasClass(dtd, c, CH_RS) )
|
|
{ inc_location(&p->startloc, c);
|
|
offset++;
|
|
size--;
|
|
}
|
|
}
|
|
if ( last && size > 0 )
|
|
{ wint_t c = fetch_ocharbuf(cdata, offset+size-1);
|
|
|
|
if ( HasClass(dtd, c, CH_RS) )
|
|
{ dec_location(&p->location, c);
|
|
size--;
|
|
poke_ocharbuf(cdata, offset+size, '\0');
|
|
if ( size > 0 )
|
|
c = fetch_ocharbuf(cdata, offset+size-1);
|
|
else
|
|
c = 0; /* HasClass(CH_RE) must fail */
|
|
}
|
|
if ( HasClass(dtd, c, CH_RE) )
|
|
{ dec_location(&p->location, c);
|
|
size--;
|
|
poke_ocharbuf(cdata, offset+size, '\0');
|
|
}
|
|
}
|
|
if ( p->environments->space_mode == SP_DEFAULT )
|
|
{ int o = 0;
|
|
int i;
|
|
|
|
for(i=0; i<size; i++)
|
|
{ wint_t c = fetch_ocharbuf(cdata, offset+i);
|
|
|
|
if ( HasClass(dtd, c, CH_BLANK) )
|
|
{ for(i++; i<size; i++)
|
|
{ wint_t c = fetch_ocharbuf(cdata, offset+i);
|
|
|
|
if ( !HasClass(dtd, c, CH_BLANK) )
|
|
break;
|
|
}
|
|
i--;
|
|
poke_ocharbuf(cdata, o++, ' ');
|
|
continue;
|
|
}
|
|
poke_ocharbuf(cdata, o++, c);
|
|
}
|
|
poke_ocharbuf(cdata, o, '\0');
|
|
offset = 0; /* wrote new output from offset=0 */
|
|
size = o;
|
|
}
|
|
break;
|
|
case SP_REMOVE:
|
|
{ int o = 0;
|
|
int i;
|
|
int end = 0;
|
|
|
|
for(i=0; i<size; i++)
|
|
{ wint_t c = fetch_ocharbuf(cdata, offset+i);
|
|
|
|
if ( HasClass(dtd, c, CH_BLANK) )
|
|
inc_location(&p->startloc, c);
|
|
else
|
|
break;
|
|
}
|
|
|
|
if ( i<size )
|
|
{ for(; i<size; i++)
|
|
{ wint_t c = fetch_ocharbuf(cdata, offset+i);
|
|
|
|
if ( HasClass(dtd, c, CH_BLANK) )
|
|
{ i++;
|
|
|
|
while(i<size && HasClass(dtd,
|
|
(wint_t)fetch_ocharbuf(cdata, offset+i),
|
|
CH_BLANK))
|
|
i++;
|
|
i--;
|
|
poke_ocharbuf(cdata, o++, ' ');
|
|
continue;
|
|
}
|
|
poke_ocharbuf(cdata, o++, c);
|
|
end = o;
|
|
}
|
|
}
|
|
/* TBD: adjust end */
|
|
poke_ocharbuf(cdata, end, '\0');
|
|
size = end;
|
|
break;
|
|
}
|
|
case SP_PRESERVE:
|
|
break;
|
|
case SP_INHERIT:
|
|
assert(0);
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
if ( size == 0 )
|
|
{ pop_location(p, &locsafe);
|
|
empty_cdata(p);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
assert(size > 0);
|
|
|
|
if ( !p->blank_cdata )
|
|
{ if ( p->cdata_must_be_empty )
|
|
{ gripe(ERC_NOT_ALLOWED_PCDATA, p->cdata); /* TBD: now passes buffer! */
|
|
}
|
|
cb_cdata(p, cdata, offset, size);
|
|
} else if ( p->environments )
|
|
{ sgml_environment *env = p->environments;
|
|
dtd_state *new;
|
|
|
|
/* If an element is not in the DTD we must */
|
|
/* assume mixed content and emit spaces */
|
|
|
|
if ( (new=make_dtd_transition(env->state, CDATA_ELEMENT)) )
|
|
{ env->state = new;
|
|
cb_cdata(p, cdata, offset, size);
|
|
} else if ( env->element->undefined &&
|
|
p->environments->space_mode == SP_PRESERVE )
|
|
{ cb_cdata(p, cdata, offset, size);
|
|
}
|
|
}
|
|
|
|
pop_location(p, &locsafe);
|
|
|
|
empty_cdata(p);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
prepare_cdata(dtd_parser *p)
|
|
{ if ( p->cdata->size == 0 )
|
|
return TRUE;
|
|
|
|
terminate_ocharbuf(p->cdata);
|
|
|
|
if ( p->mark_state == MS_INCLUDE )
|
|
{ dtd *dtd = p->dtd;
|
|
|
|
if ( p->environments ) /* needed for <img> <img> */
|
|
{ dtd_element *e = p->environments->element;
|
|
|
|
if ( e->structure && e->structure->type == C_EMPTY && !e->undefined )
|
|
close_element(p, e, FALSE);
|
|
}
|
|
|
|
if ( p->blank_cdata == TRUE )
|
|
{ int blank = TRUE;
|
|
int i;
|
|
|
|
for(i=0; i<p->cdata->size; i++)
|
|
{ wint_t c = fetch_ocharbuf(p->cdata, i);
|
|
|
|
if ( !HasClass(dtd, c, CH_BLANK) )
|
|
{ blank = FALSE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
p->blank_cdata = blank;
|
|
if ( !blank )
|
|
{ if ( p->dmode == DM_DTD )
|
|
gripe(ERC_SYNTAX_ERROR, L"CDATA in DTD", p->cdata->data);
|
|
else
|
|
open_element(p, CDATA_ELEMENT, TRUE);
|
|
}
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
process_cdata(dtd_parser *p, int last)
|
|
{ int rc;
|
|
|
|
WITH_PARSER(p, (prepare_cdata(p), rc=emit_cdata(p, last)));
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
static int
|
|
process_entity(dtd_parser *p, const ichar *name)
|
|
{ if ( name[0] == '#' ) /* #charcode: character entity */
|
|
{ int v = char_entity_value(name);
|
|
|
|
if ( v <= 0 )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Bad character entity", name);
|
|
|
|
add_ocharbuf(p->cdata, v);
|
|
} else
|
|
{ dtd_symbol *id;
|
|
dtd_entity *e;
|
|
dtd *dtd = p->dtd;
|
|
int len;
|
|
const ichar *text;
|
|
const ichar *s;
|
|
int chr;
|
|
ichar *file;
|
|
|
|
if ( !(id=dtd_find_entity_symbol(dtd, name)) ||
|
|
!(e=id->entity) )
|
|
{ if ( dtd->default_entity )
|
|
e = dtd->default_entity;
|
|
else
|
|
return gripe(ERC_EXISTENCE, L"entity", name);
|
|
}
|
|
|
|
if ( !e->value &&
|
|
e->content == EC_SGML &&
|
|
(file=entity_file(p->dtd, e)) )
|
|
{ int rc;
|
|
|
|
empty_icharbuf(p->buffer); /* dubious */
|
|
rc = sgml_process_file(p, file, SGML_SUB_DOCUMENT);
|
|
sgml_free(file);
|
|
return rc;
|
|
}
|
|
|
|
if ( !(text = entity_value(p, e, &len)) )
|
|
return gripe(ERC_NO_VALUE, e->name->name);
|
|
|
|
switch ( e->content )
|
|
{ case EC_SGML:
|
|
case EC_CDATA:
|
|
if ( (s=isee_character_entity(dtd, text, &chr)) && *s == '\0' )
|
|
{ if ( chr == 0 )
|
|
return gripe(ERC_SYNTAX_ERROR, L"Illegal character entity", text);
|
|
|
|
if ( p->blank_cdata == TRUE &&
|
|
!HasClass(dtd, (wint_t)chr, CH_BLANK) )
|
|
{ p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
|
|
p->blank_cdata = FALSE;
|
|
}
|
|
|
|
add_ocharbuf(p->cdata, chr);
|
|
return TRUE;
|
|
}
|
|
if ( e->content == EC_SGML )
|
|
{ locbuf oldloc;
|
|
int decode = p->utf8_decode;
|
|
|
|
push_location(p, &oldloc);
|
|
p->utf8_decode = FALSE;
|
|
set_src_dtd_parser(p, IN_ENTITY, e->name->name);
|
|
empty_icharbuf(p->buffer); /* dubious */
|
|
for(s=text; *s; s++)
|
|
putchar_dtd_parser(p, *s);
|
|
p->utf8_decode = decode;
|
|
pop_location(p, &oldloc);
|
|
} else if ( *text )
|
|
{ const ichar *o;
|
|
|
|
if ( p->blank_cdata == TRUE )
|
|
{ p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
|
|
p->blank_cdata = FALSE;
|
|
}
|
|
|
|
for(o=text; *o; o++)
|
|
add_ocharbuf(p->cdata, *o);
|
|
}
|
|
break;
|
|
case EC_SDATA:
|
|
case EC_NDATA:
|
|
process_cdata(p, FALSE);
|
|
if ( p->on_data )
|
|
(*p->on_data)(p, e->content, len, text);
|
|
break;
|
|
case EC_PI:
|
|
process_cdata(p, FALSE);
|
|
if ( p->on_pi )
|
|
(*p->on_pi)(p, text);
|
|
case EC_STARTTAG:
|
|
#if 0
|
|
prepare_cdata(p);
|
|
process_begin_element(p, text);
|
|
#endif
|
|
break;
|
|
case EC_ENDTAG:
|
|
#if 0
|
|
prepare_cdata(p);
|
|
process_end_element(p, text);
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Deal with end of input. We should give a proper error message depending
|
|
on the state and the start-location of the error.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int
|
|
end_document_dtd_parser_(dtd_parser *p)
|
|
{ int rval;
|
|
|
|
switch(p->state)
|
|
{ case S_RCDATA:
|
|
case S_CDATA:
|
|
case S_PCDATA:
|
|
rval = TRUE;
|
|
break;
|
|
case S_CMT:
|
|
case S_CMT1:
|
|
case S_CMTE0:
|
|
case S_CMTE1:
|
|
case S_DECLCMT0:
|
|
case S_DECLCMT:
|
|
case S_DECLCMTE0:
|
|
rval = gripe(ERC_SYNTAX_ERROR,
|
|
L"Unexpected end-of-file in comment", L"");
|
|
break;
|
|
case S_ECDATA1:
|
|
case S_ECDATA2:
|
|
case S_EMSC1:
|
|
case S_EMSC2:
|
|
case S_DECL0:
|
|
case S_DECL:
|
|
case S_MDECL0:
|
|
case S_STRING:
|
|
case S_CMTO:
|
|
case S_GROUP:
|
|
case S_PENT:
|
|
case S_ENT:
|
|
case S_ENT0:
|
|
rval = gripe(ERC_SYNTAX_ERROR,
|
|
L"Unexpected end-of-file", L"");
|
|
break;
|
|
#ifdef UTF8
|
|
case S_UTF8:
|
|
rval = gripe(ERC_SYNTAX_ERROR,
|
|
L"Unexpected end-of-file in UTF-8 sequence", L"");
|
|
break;
|
|
#endif
|
|
case S_MSCDATA:
|
|
case S_EMSCDATA1:
|
|
case S_EMSCDATA2:
|
|
rval = gripe(ERC_SYNTAX_ERROR,
|
|
L"Unexpected end-of-file in CDATA marked section", L"");
|
|
break;
|
|
case S_PI:
|
|
case S_PI2:
|
|
rval = gripe(ERC_SYNTAX_ERROR,
|
|
L"Unexpected end-of-file in processing instruction", L"");
|
|
break;
|
|
default:
|
|
rval = gripe(ERC_SYNTAX_ERROR,
|
|
L"Unexpected end-of-file in ???");
|
|
break;
|
|
}
|
|
|
|
if ( p->dmode == DM_DATA )
|
|
{ sgml_environment *env;
|
|
|
|
if ( p->cdata->size > 0 &&
|
|
fetch_ocharbuf(p->cdata, p->cdata->size-1) == CR )
|
|
del_ocharbuf(p->cdata);
|
|
|
|
process_cdata(p, TRUE);
|
|
|
|
if ( (env=p->environments) )
|
|
{ dtd_element *e;
|
|
|
|
while(env->parent)
|
|
env = env->parent;
|
|
|
|
pop_to(p, env, CDATA_ELEMENT);
|
|
e = env->element;
|
|
if ( e->structure && !e->structure->omit_close )
|
|
gripe(ERC_OMITTED_CLOSE, e->name->name);
|
|
close_element(p, e, FALSE);
|
|
}
|
|
}
|
|
|
|
return rval;
|
|
}
|
|
|
|
|
|
int
|
|
end_document_dtd_parser(dtd_parser *p)
|
|
{ int rval;
|
|
|
|
WITH_PARSER(p, rval = end_document_dtd_parser_(p));
|
|
|
|
return rval;
|
|
}
|
|
|
|
|
|
int
|
|
begin_document_dtd_parser(dtd_parser *p)
|
|
{ init_decoding(p);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
void
|
|
reset_document_dtd_parser(dtd_parser *p)
|
|
{ if ( p->environments )
|
|
{ sgml_environment *env, *parent;
|
|
|
|
for(env = p->environments; env; env=parent)
|
|
{ parent = env->parent;
|
|
|
|
free_environment(env);
|
|
}
|
|
|
|
p->environments = NULL;
|
|
}
|
|
|
|
while(p->marked)
|
|
pop_marked_section(p);
|
|
|
|
empty_icharbuf(p->buffer);
|
|
empty_ocharbuf(p->cdata);
|
|
|
|
p->mark_state = MS_INCLUDE;
|
|
p->state = S_PCDATA;
|
|
p->grouplevel = 0;
|
|
p->blank_cdata = TRUE;
|
|
p->event_class = EV_EXPLICIT;
|
|
p->dmode = DM_DATA;
|
|
|
|
begin_document_dtd_parser(p);
|
|
}
|
|
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Set the UTF-8 state
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
#ifdef UTF8
|
|
static void
|
|
process_utf8(dtd_parser *p, int chr)
|
|
{ int bytes;
|
|
int mask;
|
|
|
|
for( bytes=1, mask=0x20; chr&mask; bytes++, mask >>= 1 )
|
|
;
|
|
mask--; /* 0x20 --> 0x1f */
|
|
|
|
p->utf8_saved_state = p->state; /* state to return to */
|
|
p->state = S_UTF8;
|
|
p->utf8_char = chr & mask;
|
|
p->utf8_left = bytes;
|
|
}
|
|
#endif
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
add_cdata() adds a character to the output data. It also maps \r\n onto
|
|
a single \n for Windows newline conventions.
|
|
|
|
There is a problem here in shortref handling. We open the CDATA_ELEMENT
|
|
as soon as we find a character as this may open other elements through
|
|
omitted tags and thus install a new shortref map.
|
|
|
|
If, at a later stage, all CDATA read sofar turns out to be a shortref we
|
|
have incorrectly opened the CDATA_ELEMENT. As `undoing' the
|
|
open_element() is not an option (it may already have caused `events' on
|
|
omitted tags) we are in trouble.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static void
|
|
add_cdata(dtd_parser *p, int chr)
|
|
{ if ( p->mark_state == MS_INCLUDE )
|
|
{ ocharbuf *buf = p->cdata;
|
|
|
|
if ( p->blank_cdata == TRUE &&
|
|
!HasClass(p->dtd, (wint_t)chr, CH_BLANK) )
|
|
{ p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
|
|
p->blank_cdata = FALSE;
|
|
}
|
|
|
|
if ( chr == '\n' ) /* insert missing CR */
|
|
{ int sz;
|
|
|
|
if ( (sz=buf->size) == 0 ||
|
|
fetch_ocharbuf(buf, sz-1) != CR )
|
|
add_cdata(p, CR);
|
|
}
|
|
|
|
add_ocharbuf(buf, chr);
|
|
|
|
if ( p->map &&
|
|
chr <= 0xff && p->map->ends[chr] &&
|
|
match_shortref(p) )
|
|
return;
|
|
|
|
if ( chr == '\n' ) /* dubious. Whould we do that */
|
|
{ int sz; /* here or in space-handling? */
|
|
|
|
if ( (sz=buf->size) > 1 &&
|
|
fetch_ocharbuf(buf, sz-1) == LF &&
|
|
fetch_ocharbuf(buf, sz-2) == CR )
|
|
{ poke_ocharbuf(buf, sz-2, LF);
|
|
buf->size--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
add_verbatim_cdata(dtd_parser *p, int chr)
|
|
{ if ( p->mark_state != MS_IGNORE )
|
|
{ ocharbuf *buf = p->cdata;
|
|
|
|
if ( p->blank_cdata == TRUE &&
|
|
!HasClass(p->dtd, (wint_t)chr, CH_BLANK) )
|
|
{ p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
|
|
p->blank_cdata = FALSE;
|
|
}
|
|
|
|
if ( chr == '\n' && buf->size > 0 &&
|
|
fetch_ocharbuf(buf, buf->size-1) == '\r' )
|
|
buf->size--;
|
|
|
|
add_ocharbuf(buf, chr);
|
|
}
|
|
}
|
|
|
|
|
|
/* We discovered illegal markup and now process it as normal CDATA
|
|
*/
|
|
|
|
static void
|
|
recover_parser(dtd_parser *p)
|
|
{ const ichar *s;
|
|
|
|
terminate_icharbuf(p->buffer);
|
|
add_cdata(p, p->saved);
|
|
for(s=p->buffer->data; *s; s++)
|
|
add_cdata(p, *s);
|
|
p->state = S_PCDATA;
|
|
}
|
|
|
|
|
|
static inline void
|
|
setlocation(dtd_srcloc *d, dtd_srcloc *loc, int line, int lpos)
|
|
{ d->line = line;
|
|
d->linepos = lpos;
|
|
d->charpos = loc->charpos - 1;
|
|
d->type = loc->type;
|
|
d->name = loc->name;
|
|
}
|
|
|
|
|
|
void
|
|
putchar_dtd_parser(dtd_parser *p, int chr)
|
|
{ dtd *dtd = p->dtd;
|
|
const ichar *f = dtd->charfunc->func;
|
|
int line = p->location.line;
|
|
int lpos = p->location.linepos;
|
|
|
|
p->location.charpos++; /* TBD: actually `bytepos' */
|
|
|
|
#ifdef UTF8
|
|
if ( p->state == S_UTF8 )
|
|
{ if ( (chr & 0xc0) != 0x80 ) /* TBD: recover */
|
|
gripe(ERC_SYNTAX_ERROR, L"Bad UTF-8 sequence", L"");
|
|
p->utf8_char <<= 6;
|
|
p->utf8_char |= (chr & ~0xc0);
|
|
if ( --p->utf8_left == 0 )
|
|
{ chr = p->utf8_char;
|
|
p->state = p->utf8_saved_state;
|
|
} else
|
|
{ return;
|
|
}
|
|
} else if ( ISUTF8_MB(chr) && p->utf8_decode )
|
|
{ process_utf8(p, chr);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
if ( f[CF_RS] == chr )
|
|
{ p->location.line++;
|
|
p->location.linepos = 0;
|
|
} else
|
|
{ if ( f[CF_RE] == chr )
|
|
p->location.linepos = 0;
|
|
else
|
|
p->location.linepos++;
|
|
}
|
|
|
|
reprocess:
|
|
switch(p->state)
|
|
{ case S_PCDATA:
|
|
{ if ( f[CF_MDO1] == chr ) /* < */
|
|
{ setlocation(&p->startloc, &p->location, line, lpos);
|
|
p->state = S_DECL0;
|
|
empty_icharbuf(p->buffer);
|
|
return;
|
|
}
|
|
if ( p->dmode == DM_DTD )
|
|
{ if ( f[CF_PERO] == chr ) /* % */
|
|
{ setlocation(&p->startloc, &p->location, line, lpos);
|
|
p->state = S_PENT;
|
|
return;
|
|
}
|
|
} else
|
|
{ if ( f[CF_ERO] == chr ) /* & */
|
|
{ setlocation(&p->startloc, &p->location, line, lpos);
|
|
p->state = S_ENT0;
|
|
return;
|
|
}
|
|
}
|
|
|
|
if ( p->marked && f[CF_DSC] == chr ) /* ] in marked section */
|
|
{ empty_icharbuf(p->buffer);
|
|
p->state = S_EMSC1;
|
|
p->saved = chr; /* for recovery */
|
|
return;
|
|
}
|
|
|
|
if ( p->waiting_for_net && f[CF_ETAGO2] == chr ) /* shorttag */
|
|
{ setlocation(&p->startloc, &p->location, line, lpos);
|
|
WITH_PARSER(p,
|
|
process_net(p));
|
|
return;
|
|
}
|
|
|
|
/* Real character data */
|
|
if ( p->cdata->size == 0 )
|
|
setlocation(&p->startcdata, &p->location, line, lpos);
|
|
|
|
add_cdata(p, chr);
|
|
return;
|
|
}
|
|
case S_ECDATA2: /* Seen </ in CDATA/RCDATA */
|
|
{ if ( f[CF_MDC] == chr &&
|
|
p->etaglen == p->buffer->size &&
|
|
istrncaseeq(p->buffer->data, p->etag, p->etaglen) )
|
|
{ p->cdata->size -= p->etaglen+2; /* 2 for </ */
|
|
terminate_ocharbuf(p->cdata);
|
|
terminate_icharbuf(p->buffer);
|
|
if ( p->mark_state == MS_INCLUDE )
|
|
{ WITH_PARSER(p,
|
|
process_cdata(p, TRUE);
|
|
process_end_element(p, p->buffer->data));
|
|
empty_cdata(p);
|
|
}
|
|
empty_icharbuf(p->buffer);
|
|
p->cdata_state = p->state = S_PCDATA;
|
|
} else
|
|
{ add_verbatim_cdata(p, chr);
|
|
if ( p->etaglen < p->buffer->size ||
|
|
!HasClass(dtd, (wint_t)chr, CH_NAME))
|
|
{ empty_icharbuf(p->buffer); /* mismatch */
|
|
p->state = p->cdata_state;
|
|
} else
|
|
add_icharbuf(p->buffer, chr);
|
|
}
|
|
return;
|
|
}
|
|
case S_ECDATA1: /* seen < in CDATA */
|
|
{ add_verbatim_cdata(p, chr);
|
|
if ( f[CF_ETAGO2] == chr ) /* / */
|
|
{ empty_icharbuf(p->buffer);
|
|
p->state = S_ECDATA2;
|
|
} else if ( f[CF_ETAGO1] != chr ) /* <: do not change state */
|
|
p->state = p->cdata_state;
|
|
return;
|
|
}
|
|
case S_RCDATA:
|
|
{ if ( f[CF_ERO] == chr ) /* & */
|
|
{ setlocation(&p->startloc, &p->location, line, lpos);
|
|
p->state = S_ENT0;
|
|
return;
|
|
}
|
|
/*FALLTHROUGH*/
|
|
}
|
|
case S_CDATA:
|
|
{ add_verbatim_cdata(p, chr);
|
|
|
|
if ( f[CF_MDO1] == chr ) /* < */
|
|
{ setlocation(&p->startloc, &p->location, line, lpos);
|
|
p->state = S_ECDATA1;
|
|
}
|
|
|
|
/* / in CDATA shorttag element */
|
|
if ( p->waiting_for_net && f[CF_ETAGO2] == chr )
|
|
{ setlocation(&p->startloc, &p->location, line, lpos);
|
|
p->cdata->size--;
|
|
terminate_ocharbuf(p->cdata);
|
|
terminate_icharbuf(p->buffer);
|
|
if ( p->mark_state == MS_INCLUDE )
|
|
{ WITH_PARSER(p,
|
|
process_cdata(p, TRUE);
|
|
process_net(p));
|
|
empty_cdata(p);
|
|
}
|
|
empty_icharbuf(p->buffer);
|
|
p->cdata_state = p->state = S_PCDATA;
|
|
}
|
|
|
|
return;
|
|
}
|
|
case S_MSCDATA:
|
|
{ add_verbatim_cdata(p, chr);
|
|
if ( f[CF_DSC] == chr ) /* ] */
|
|
p->state = S_EMSCDATA1;
|
|
return;
|
|
}
|
|
case S_EMSCDATA1:
|
|
{ add_verbatim_cdata(p, chr);
|
|
if ( f[CF_DSC] == chr ) /* ]] */
|
|
p->state = S_EMSCDATA2;
|
|
else
|
|
p->state = S_MSCDATA;
|
|
return;
|
|
}
|
|
case S_EMSCDATA2:
|
|
{ add_verbatim_cdata(p, chr);
|
|
if ( f[CF_MDC] == chr ) /* ]]> */
|
|
{ p->cdata->size -= 3; /* Delete chars for ]] */
|
|
pop_marked_section(p);
|
|
p->state = S_PCDATA;
|
|
} else if ( f[CF_DSC] != chr ) /* if ]]], stay in this state */
|
|
p->state = S_MSCDATA;
|
|
return;
|
|
}
|
|
case S_EMSC1:
|
|
{ if ( f[CF_DSC] == chr ) /* ]] in marked section */
|
|
{ p->state = S_EMSC2;
|
|
return;
|
|
} else
|
|
{ add_icharbuf(p->buffer, chr);
|
|
recover_parser(p);
|
|
return;
|
|
}
|
|
}
|
|
case S_EMSC2:
|
|
{ if ( f[CF_MDC] == chr ) /* ]]> in marked section */
|
|
{ pop_marked_section(p);
|
|
p->state = S_PCDATA;
|
|
return;
|
|
} else
|
|
{ add_icharbuf(p->buffer, chr);
|
|
recover_parser(p);
|
|
return;
|
|
}
|
|
}
|
|
case S_PENT: /* %parameter entity; */
|
|
{ if ( f[CF_ERC] == chr )
|
|
{ p->state = S_PCDATA;
|
|
terminate_icharbuf(p->buffer);
|
|
if ( p->mark_state == MS_INCLUDE )
|
|
{ WITH_PARSER(p, process_include(p, p->buffer->data));
|
|
}
|
|
empty_icharbuf(p->buffer);
|
|
return;
|
|
}
|
|
if ( HasClass(dtd, (wint_t)chr, CH_NAME) )
|
|
{ add_icharbuf(p->buffer, chr);
|
|
return;
|
|
}
|
|
|
|
terminate_icharbuf(p->buffer);
|
|
gripe(ERC_SYNTAX_ERROR, L"Illegal parameter entity", p->buffer->data);
|
|
break;
|
|
}
|
|
case S_ENT0: /* Seen & */
|
|
{ if ( chr == '#' || HasClass(dtd, (wint_t)chr, CH_NAME) )
|
|
{ empty_icharbuf(p->buffer);
|
|
add_icharbuf(p->buffer, chr);
|
|
p->state = S_ENT;
|
|
} else
|
|
{ if ( dtd->dialect != DL_SGML )
|
|
{ wchar_t buf[3];
|
|
buf[0] = '&';
|
|
buf[1] = chr;
|
|
buf[2] = '\0';
|
|
gripe(ERC_SYNTAX_ERROR, L"Illegal entity", buf);
|
|
}
|
|
|
|
add_cdata(p, f[CF_ERO]);
|
|
p->state = p->cdata_state;
|
|
goto reprocess;
|
|
}
|
|
|
|
return;
|
|
}
|
|
case S_ENT: /* &entity; */
|
|
{ if ( HasClass(dtd, (wint_t)chr, CH_NAME) )
|
|
{ add_icharbuf(p->buffer, chr);
|
|
return;
|
|
}
|
|
|
|
terminate_icharbuf(p->buffer);
|
|
p->state = p->cdata_state;
|
|
if ( p->mark_state == MS_INCLUDE )
|
|
{ WITH_PARSER(p, process_entity(p, p->buffer->data));
|
|
}
|
|
empty_icharbuf(p->buffer);
|
|
|
|
if ( chr == CR )
|
|
p->state = S_ENTCR;
|
|
else if ( f[CF_ERC] != chr && chr != '\n' )
|
|
goto reprocess;
|
|
|
|
break;
|
|
}
|
|
case S_ENTCR: /* seen &entCR, eat the LF */
|
|
{ p->state = p->cdata_state;
|
|
if ( chr != LF )
|
|
goto reprocess;
|
|
|
|
break;
|
|
}
|
|
case S_DECL0: /* Seen < */
|
|
{ if ( f[CF_ETAGO2] == chr ) /* </ */
|
|
{ add_icharbuf(p->buffer, chr);
|
|
p->state = S_DECL;
|
|
} else if ( HasClass(dtd, (wint_t)chr, CH_NAME) ) /* <letter */
|
|
{ add_icharbuf(p->buffer, chr);
|
|
p->state = S_DECL;
|
|
} else if ( f[CF_MDO2] == chr ) /* <! */
|
|
{ p->state = S_MDECL0;
|
|
} else if ( f[CF_PRO2] == chr ) /* <? */
|
|
{ p->state = S_PI;
|
|
} else /* recover */
|
|
{ add_cdata(p, f[CF_MDO1]);
|
|
add_cdata(p, chr);
|
|
p->state = S_PCDATA;
|
|
}
|
|
|
|
return;
|
|
}
|
|
case S_MDECL0: /* Seen <! */
|
|
{ if ( f[CF_CMT] == chr ) /* <!- */
|
|
{ p->state = S_CMTO;
|
|
return;
|
|
}
|
|
add_icharbuf(p->buffer, f[CF_MDO2]);
|
|
add_icharbuf(p->buffer, chr);
|
|
p->state = S_DECL;
|
|
return;
|
|
}
|
|
case S_DECL: /* <...> */
|
|
{ if ( f[CF_MDC] == chr ) /* > */
|
|
{ prepare_cdata(p);
|
|
p->state = S_PCDATA;
|
|
terminate_icharbuf(p->buffer);
|
|
if ( p->mark_state == MS_INCLUDE )
|
|
{ WITH_PARSER(p, process_declaration(p, p->buffer->data));
|
|
}
|
|
empty_icharbuf(p->buffer);
|
|
return;
|
|
}
|
|
if ( dtd->shorttag && f[CF_ETAGO2] == chr && p->buffer->size > 0 )
|
|
{ prepare_cdata(p);
|
|
p->state = S_PCDATA;
|
|
terminate_icharbuf(p->buffer);
|
|
if ( p->mark_state == MS_INCLUDE )
|
|
{ WITH_CLASS(p, EV_SHORTTAG,
|
|
WITH_PARSER(p, process_declaration(p, p->buffer->data)));
|
|
}
|
|
empty_icharbuf(p->buffer);
|
|
p->waiting_for_net = TRUE;
|
|
return;
|
|
}
|
|
|
|
add_icharbuf(p->buffer, chr);
|
|
|
|
if ( f[CF_LIT] == chr ) /* " */
|
|
{ p->state = S_STRING;
|
|
p->saved = chr;
|
|
p->lit_saved_state = S_DECL;
|
|
} else if ( f[CF_LITA] == chr ) /* ' */
|
|
{ p->state = S_STRING;
|
|
p->saved = chr;
|
|
p->lit_saved_state = S_DECL;
|
|
return;
|
|
} else if ( f[CF_CMT] == chr && /* - */
|
|
p->buffer->data[0] == f[CF_MDO2] ) /* Started <! */
|
|
{ p->state = S_DECLCMT0;
|
|
} else if ( f[CF_DSO] == chr ) /* [: marked section */
|
|
{ terminate_icharbuf(p->buffer);
|
|
|
|
process_marked_section(p);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case S_DECLCMT0: /* <...- */
|
|
{ if ( f[CF_CMT] == chr )
|
|
{ p->buffer->size--;
|
|
p->state = S_DECLCMT;
|
|
} else
|
|
{ add_icharbuf(p->buffer, chr);
|
|
p->state = S_DECL;
|
|
}
|
|
break;
|
|
}
|
|
case S_DECLCMT: /* <...--.. */
|
|
{ if ( f[CF_CMT] == chr )
|
|
p->state = S_DECLCMTE0;
|
|
break;
|
|
}
|
|
case S_DECLCMTE0: /* <...--..- */
|
|
{ if ( f[CF_CMT] == chr )
|
|
p->state = S_DECL;
|
|
else
|
|
p->state = S_DECLCMT;
|
|
break;
|
|
}
|
|
case S_PI:
|
|
{ add_icharbuf(p->buffer, chr);
|
|
if ( f[CF_PRO2] == chr ) /* <? ... ? */
|
|
p->state = S_PI2;
|
|
if ( f[CF_PRC] == chr ) /* no ? is ok too (XML/SGML) */
|
|
goto pi;
|
|
return;
|
|
}
|
|
case S_PI2:
|
|
{ if ( f[CF_PRC] == chr )
|
|
{ pi:
|
|
process_cdata(p, FALSE);
|
|
p->state = S_PCDATA;
|
|
p->buffer->size--;
|
|
terminate_icharbuf(p->buffer);
|
|
if ( p->mark_state == MS_INCLUDE )
|
|
{ WITH_PARSER(p, process_pi(p, p->buffer->data));
|
|
}
|
|
empty_icharbuf(p->buffer);
|
|
return;
|
|
}
|
|
add_icharbuf(p->buffer, chr);
|
|
p->state = S_PI;
|
|
return;
|
|
}
|
|
case S_STRING:
|
|
{ add_icharbuf(p->buffer, chr);
|
|
if ( chr == p->saved )
|
|
p->state = p->lit_saved_state;
|
|
break;
|
|
}
|
|
case S_CMTO: /* Seen <!- */
|
|
{ if ( f[CF_CMT] == chr ) /* - */
|
|
{ p->state = S_CMT1;
|
|
return;
|
|
} else
|
|
{ add_cdata(p, f[CF_MDO1]);
|
|
add_cdata(p, f[CF_MDO2]);
|
|
add_cdata(p, f[CF_CMT]);
|
|
add_cdata(p, chr);
|
|
p->state = S_PCDATA;
|
|
return;
|
|
}
|
|
}
|
|
case S_CMT1: /* <!-- */
|
|
{ if ( f[CF_CMT] == chr ) /* <!--- */
|
|
{ if ( dtd->dialect != DL_SGML )
|
|
gripe(ERC_SYNTAX_ERROR, L"Illegal comment", L"<!---");
|
|
}
|
|
p->state = S_CMT;
|
|
break;
|
|
}
|
|
case S_CMT:
|
|
{ if ( f[CF_CMT] == chr )
|
|
p->state = S_CMTE0; /* <!--...- */
|
|
break;
|
|
}
|
|
case S_CMTE0: /* <!--... -- */
|
|
{ if ( f[CF_CMT] == chr )
|
|
p->state = S_CMTE1;
|
|
else
|
|
p->state = S_CMT;
|
|
break;
|
|
}
|
|
case S_CMTE1: /* <!--...-- seen */
|
|
{ if ( f[CF_MDC] == chr ) /* > */
|
|
{ if ( p->on_decl )
|
|
(*p->on_decl)(p, (ichar*)"");
|
|
p->state = S_PCDATA;
|
|
} else
|
|
{ if ( dtd->dialect != DL_SGML )
|
|
gripe(ERC_SYNTAX_ERROR, L"Illegal comment", L"");
|
|
if ( f[CF_CMT] != chr )
|
|
p->state = S_CMT;
|
|
}
|
|
break;
|
|
}
|
|
case S_GROUP: /* [...] in declaration */
|
|
{ add_icharbuf(p->buffer, chr);
|
|
if ( f[CF_DSO] == chr )
|
|
{ p->grouplevel++;
|
|
} else if ( f[CF_DSC] == chr )
|
|
{ if ( --p->grouplevel == 0 )
|
|
p->state = S_DECL;
|
|
} else if ( f[CF_LIT] == chr ) /* " */
|
|
{ p->state = S_STRING;
|
|
p->saved = chr;
|
|
p->lit_saved_state = S_GROUP;
|
|
} else if ( f[CF_LITA] == chr ) /* ' */
|
|
{ p->state = S_STRING;
|
|
p->saved = chr;
|
|
p->lit_saved_state = S_GROUP;
|
|
return;
|
|
}
|
|
break;
|
|
}
|
|
#ifdef UTF8
|
|
case S_UTF8:
|
|
assert(0);
|
|
break;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* TOPLEVEL *
|
|
*******************************/
|
|
|
|
int
|
|
load_dtd_from_file(dtd_parser *p, const ichar *file)
|
|
{ FILE *fd;
|
|
int rval;
|
|
data_mode oldmode = p->dmode;
|
|
dtdstate oldstate = p->state;
|
|
locbuf oldloc;
|
|
|
|
push_location(p, &oldloc);
|
|
p->dmode = DM_DTD;
|
|
p->state = S_PCDATA;
|
|
empty_icharbuf(p->buffer); /* dubious */
|
|
set_file_dtd_parser(p, IN_FILE, file);
|
|
|
|
if ( (fd = wfopen(file, "rb")) )
|
|
{ int chr;
|
|
|
|
while( (chr = getc(fd)) != EOF )
|
|
putchar_dtd_parser(p, chr);
|
|
|
|
fclose(fd);
|
|
|
|
p->dtd->implicit = FALSE;
|
|
rval = TRUE;
|
|
} else
|
|
rval = FALSE;
|
|
|
|
pop_location(p, &oldloc);
|
|
p->dmode = oldmode;
|
|
p->state = oldstate;
|
|
|
|
return rval;
|
|
}
|
|
|
|
|
|
dtd *
|
|
file_to_dtd(const ichar *file, const ichar *doctype, dtd_dialect dialect)
|
|
{ dtd_parser *p = new_dtd_parser(new_dtd(doctype));
|
|
|
|
set_dialect_dtd(p->dtd, dialect);
|
|
|
|
if ( load_dtd_from_file(p, file) )
|
|
{ dtd *dtd = p->dtd;
|
|
|
|
dtd->references++; /* avoid deletion */
|
|
free_dtd_parser(p);
|
|
return dtd;
|
|
} else
|
|
{ free_dtd_parser(p);
|
|
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
SGML sees a file as
|
|
|
|
[<LF>]Line 1<CR>
|
|
<LF> Line 2<CR>
|
|
|
|
I.e. the newline appearing just before the end-of-file should be
|
|
ignored. In addition, Unix-style files are mapped to CR-LF. Thanks to
|
|
Richard O'Keefe.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
int
|
|
sgml_process_stream(dtd_parser *p, FILE *fd, unsigned flags)
|
|
{ int p0, p1;
|
|
|
|
if ( (p0 = getc(fd)) == EOF )
|
|
return TRUE;
|
|
if ( (p1 = getc(fd)) == EOF )
|
|
{ putchar_dtd_parser(p, p0);
|
|
return end_document_dtd_parser(p);
|
|
}
|
|
|
|
for(;;)
|
|
{ int p2 = getc(fd);
|
|
|
|
if ( p2 == EOF )
|
|
{ putchar_dtd_parser(p, p0);
|
|
if ( p1 != LF )
|
|
putchar_dtd_parser(p, p1);
|
|
else if ( p0 != CR )
|
|
putchar_dtd_parser(p, CR);
|
|
|
|
if ( flags & SGML_SUB_DOCUMENT )
|
|
return TRUE;
|
|
else
|
|
return end_document_dtd_parser(p);
|
|
}
|
|
|
|
putchar_dtd_parser(p, p0);
|
|
p0 = p1;
|
|
p1 = p2;
|
|
}
|
|
}
|
|
|
|
|
|
int
|
|
sgml_process_file(dtd_parser *p, const ichar *file, unsigned flags)
|
|
{ FILE *fd;
|
|
int rval;
|
|
locbuf oldloc;
|
|
|
|
push_location(p, &oldloc);
|
|
set_file_dtd_parser(p, IN_FILE, file);
|
|
if ( !(flags & SGML_SUB_DOCUMENT) )
|
|
set_mode_dtd_parser(p, DM_DATA);
|
|
|
|
if ( (fd = wfopen(file, "rb")) )
|
|
{ rval = sgml_process_stream(p, fd, flags);
|
|
fclose(fd);
|
|
} else
|
|
rval = FALSE;
|
|
|
|
pop_location(p, &oldloc);
|
|
|
|
return rval;
|
|
}
|
|
|
|
|
|
|
|
/*******************************
|
|
* ERRORS *
|
|
*******************************/
|
|
|
|
static wchar_t *
|
|
format_location(wchar_t *s, size_t len, dtd_srcloc *l)
|
|
{ int first = TRUE;
|
|
|
|
if ( !l || l->type == IN_NONE )
|
|
return s;
|
|
|
|
for( ; l && l->type != IN_NONE;
|
|
l = l->parent, first = FALSE )
|
|
{ if ( !first )
|
|
{ swprintf(s, len, L" (from ");
|
|
s += wcslen(s);
|
|
}
|
|
|
|
switch(l->type)
|
|
{ case IN_NONE:
|
|
assert(0);
|
|
case IN_FILE:
|
|
swprintf(s, len, L"%ls:%d:%d", l->name.file, l->line, l->linepos);
|
|
break;
|
|
case IN_ENTITY:
|
|
swprintf(s, len, L"&%ls;%d:%d", l->name.entity, l->line, l->linepos);
|
|
break;
|
|
}
|
|
|
|
s += wcslen(s);
|
|
if ( !first )
|
|
{ *s++ = L')';
|
|
}
|
|
}
|
|
|
|
*s++ = L':';
|
|
*s++ = L' ';
|
|
|
|
return s;
|
|
}
|
|
|
|
|
|
static void
|
|
format_message(dtd_error *e)
|
|
{ wchar_t buf[1024];
|
|
wchar_t *s;
|
|
int prefix_len;
|
|
int left;
|
|
|
|
switch(e->severity)
|
|
{ case ERS_ERROR:
|
|
wcscpy(buf, L"Error: ");
|
|
break;
|
|
case ERS_WARNING:
|
|
wcscpy(buf, L"Warning: ");
|
|
break;
|
|
default:
|
|
buf[0] = '\0';
|
|
}
|
|
s = buf+wcslen(buf);
|
|
|
|
s = format_location(s, 1024-(s-buf), e->location);
|
|
prefix_len = (int)(s-buf);
|
|
left = 1024-prefix_len;
|
|
|
|
switch(e->id)
|
|
{ case ERC_REPRESENTATION:
|
|
swprintf(s, left, L"Cannot represent due to %ls", e->argv[0]);
|
|
break;
|
|
case ERC_RESOURCE:
|
|
swprintf(s, left, L"Insufficient %ls resources", e->argv[0]);
|
|
break;
|
|
case ERC_LIMIT:
|
|
swprintf(s, left, L"%ls limit exceeded", e->argv[0]);
|
|
break;
|
|
case ERC_VALIDATE:
|
|
swprintf(s, left, L"%ls", e->argv[0]);
|
|
break;
|
|
case ERC_SYNTAX_ERROR:
|
|
swprintf(s, left, L"Syntax error: %ls", e->argv[0]);
|
|
break;
|
|
case ERC_EXISTENCE:
|
|
swprintf(s, left, L"%ls \"%ls\" does not exist", e->argv[0], e->argv[1]);
|
|
break;
|
|
case ERC_REDEFINED:
|
|
swprintf(s, left, L"Redefined %ls \"%ls\"", e->argv[0], e->argv[1]);
|
|
break;
|
|
default:
|
|
;
|
|
}
|
|
|
|
e->message = str2ring(buf);
|
|
e->plain_message = e->message + prefix_len;
|
|
}
|
|
|
|
|
|
int
|
|
gripe(dtd_error_id e, ...)
|
|
{ va_list args;
|
|
wchar_t buf[1024];
|
|
dtd_error error;
|
|
int dtdmode = FALSE;
|
|
void *freeme = NULL;
|
|
|
|
va_start(args, e);
|
|
|
|
memset(&error, 0, sizeof(error));
|
|
error.minor = e; /* detailed error code */
|
|
|
|
if ( current_parser )
|
|
{ error.location = ¤t_parser->location;
|
|
if ( current_parser->dmode == DM_DTD )
|
|
dtdmode = TRUE;
|
|
} else
|
|
{ error.location = NULL;
|
|
}
|
|
|
|
switch(e)
|
|
{ case ERC_REPRESENTATION:
|
|
case ERC_RESOURCE:
|
|
error.severity = ERS_ERROR;
|
|
error.argv[0] = va_arg(args, wchar_t *);
|
|
break;
|
|
case ERC_LIMIT:
|
|
error.severity = ERS_WARNING;
|
|
error.argv[0] = va_arg(args, wchar_t *);
|
|
break;
|
|
case ERC_SYNTAX_ERROR:
|
|
case ERC_SYNTAX_WARNING:
|
|
{ wchar_t *m = va_arg(args, wchar_t *);
|
|
const wchar_t *s = va_arg(args, const wchar_t *);
|
|
|
|
if ( s && *s )
|
|
{ swprintf(buf, 1024, L"%ls, found \"%ls\"", m, str_summary(s, 25));
|
|
error.argv[0] = buf;
|
|
} else
|
|
error.argv[0] = m;
|
|
|
|
error.severity = (e == ERC_SYNTAX_WARNING ? ERS_WARNING : ERS_ERROR);
|
|
e = ERC_SYNTAX_ERROR;
|
|
break;
|
|
}
|
|
case ERC_DOMAIN:
|
|
{ const wchar_t *expected = va_arg(args, const wchar_t *);
|
|
const wchar_t *found = str_summary(va_arg(args, const wchar_t *), 25);
|
|
|
|
swprintf(buf, 1024, L"Expected type %ls, found \"%ls\"", expected, found);
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_ERROR;
|
|
e = (dtdmode ? ERC_SYNTAX_ERROR : ERC_VALIDATE);
|
|
break;
|
|
}
|
|
case ERC_REDEFINED:
|
|
{ dtd_symbol *name;
|
|
error.argv[0] = va_arg(args, wchar_t *); /* type */
|
|
name = va_arg(args, dtd_symbol *); /* name */
|
|
error.argv[1] = (ichar*)name->name;
|
|
error.severity = ERS_STYLE;
|
|
break;
|
|
}
|
|
case ERC_EXISTENCE:
|
|
{ error.argv[0] = va_arg(args, wchar_t *); /* type */
|
|
error.argv[1] = va_arg(args, wchar_t *); /* name */
|
|
error.severity = ERS_ERROR;
|
|
break;
|
|
}
|
|
case ERC_VALIDATE:
|
|
{ error.argv[0] = va_arg(args, wchar_t *); /* message */
|
|
error.severity = ERS_WARNING;
|
|
break;
|
|
}
|
|
case ERC_OMITTED_CLOSE:
|
|
{ const wchar_t *element = va_arg(args, const wchar_t *);
|
|
|
|
swprintf(buf, 1024, L"Inserted omitted end-tag for \"%ls\"", element);
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_WARNING;
|
|
e = ERC_VALIDATE;
|
|
break;
|
|
}
|
|
case ERC_OMITTED_OPEN:
|
|
{ const wchar_t *element = va_arg(args, const wchar_t *);
|
|
|
|
swprintf(buf, 1024, L"Inserted omitted start-tag for \"%ls\"", element);
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_WARNING;
|
|
e = ERC_VALIDATE;
|
|
break;
|
|
}
|
|
case ERC_NOT_OPEN:
|
|
{ const wchar_t *element = va_arg(args, const wchar_t *);
|
|
|
|
swprintf(buf, 1024, L"Ignored end-tag for \"%ls\" which is not open",
|
|
element);
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_WARNING;
|
|
e = ERC_VALIDATE;
|
|
break;
|
|
}
|
|
case ERC_NOT_ALLOWED:
|
|
{ const wchar_t *element = va_arg(args, const wchar_t *);
|
|
|
|
swprintf(buf, 1024, L"Element \"%ls\" not allowed here", element);
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_WARNING;
|
|
e = ERC_VALIDATE;
|
|
break;
|
|
}
|
|
case ERC_NOT_ALLOWED_PCDATA:
|
|
{ const ocharbuf *cdata = va_arg(args, const ocharbuf *);
|
|
|
|
swprintf(buf, 1024, L"#PCDATA (\"%ls\") not allowed here",
|
|
str_summary(cdata->data.w, 25));
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_WARNING;
|
|
e = ERC_VALIDATE;
|
|
break;
|
|
}
|
|
case ERC_NO_ATTRIBUTE:
|
|
{ const wchar_t *elem = va_arg(args, wchar_t *); /* element */
|
|
const wchar_t *attr = va_arg(args, wchar_t *); /* attribute */
|
|
|
|
swprintf(buf, 1024, L"Element \"%ls\" has no attribute \"%ls\"",
|
|
elem, attr);
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_WARNING;
|
|
|
|
e = ERC_VALIDATE;
|
|
break;
|
|
}
|
|
case ERC_NO_ATTRIBUTE_VALUE:
|
|
{ const wchar_t *elem = va_arg(args, wchar_t *); /* element */
|
|
const wchar_t *value = va_arg(args, wchar_t *); /* attribute value */
|
|
|
|
swprintf(buf, 1024, L"Element \"%ls\" has no attribute with value \"%ls\"",
|
|
elem, value);
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_WARNING;
|
|
|
|
e = ERC_VALIDATE;
|
|
break;
|
|
}
|
|
case ERC_NO_VALUE:
|
|
{ error.argv[0] = L"entity value";
|
|
error.argv[1] = va_arg(args, wchar_t *); /* entity */
|
|
|
|
error.severity = ERS_ERROR;
|
|
e = ERC_EXISTENCE;
|
|
break;
|
|
}
|
|
case ERC_NO_DOCTYPE:
|
|
{ const wchar_t *doctype = va_arg(args, wchar_t *); /* element */
|
|
const wchar_t *file = va_arg(args, wchar_t *); /* DTD file */
|
|
|
|
swprintf(buf, 1024, L"No <!DOCTYPE ...>, assuming \"%ls\" from DTD file \"%s\"",
|
|
doctype, file);
|
|
error.argv[0] = buf;
|
|
error.severity = ERS_WARNING;
|
|
|
|
e = ERC_VALIDATE;
|
|
break;
|
|
}
|
|
case ERC_NO_CATALOGUE:
|
|
{ char *file = va_arg(args, char *); /* catalogue file */
|
|
|
|
error.argv[0] = L"catalogue file";
|
|
freeme = error.argv[1] = utf8towcs(file);
|
|
error.severity = ERS_WARNING;
|
|
e = ERC_EXISTENCE;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
error.id = e;
|
|
format_message(&error);
|
|
|
|
if ( current_parser && current_parser->on_error )
|
|
(*current_parser->on_error)(current_parser, &error);
|
|
else
|
|
fwprintf(stderr, L"SGML: %ls\n", error.message);
|
|
|
|
if ( freeme )
|
|
sgml_free(freeme);
|
|
|
|
va_end(args);
|
|
|
|
return FALSE;
|
|
}
|