diff --git a/packages/sgml/catalog.c b/packages/sgml/catalog.c index 80e9dcebb..929bd7174 100644 --- a/packages/sgml/catalog.c +++ b/packages/sgml/catalog.c @@ -155,7 +155,7 @@ localpath(const ichar *ref, const ichar *name) } -static int +int register_catalog_file_unlocked(const ichar *file, catalog_location where) { catalog_file **f = &catalog; catalog_file *cf; @@ -205,7 +205,7 @@ wgetenv(const char *name) static void -init_catalog(void) +init_catalog() { static int done = FALSE; LOCK(); @@ -241,7 +241,7 @@ init_catalog(void) int register_catalog_file(const ichar *file, catalog_location where) { int rc; - + init_catalog(); LOCK(); @@ -310,7 +310,7 @@ cs_streql(ichar const *a, ichar const *b) static int scan_overflow(size_t buflen) -{ gripe(ERC_REPRESENTATION, L"token length"); +{ gripe(NULL, ERC_REPRESENTATION, L"token length"); return EOF; } @@ -439,7 +439,7 @@ load_one_catalogue(catalog_file * file) int override = 0; if ( !src ) - { gripe(ERC_NO_CATALOGUE, file->file); + { gripe(NULL, ERC_NO_CATALOGUE, file->file); return; } @@ -514,7 +514,7 @@ load_one_catalogue(catalog_file * file) To look up a parameter entity: f = find_in_catalogue(CAT_PENTITY, name, pubid, sysid, ci); - The name may begin with a % but need not; if it doesn't + The name may begin with a % but need not; if it doesn't a % will be prefixed for the search. If it cannot otherwise be found ${name}.pen will be returned. @@ -635,7 +635,7 @@ find_in_catalogue(int kind, return 0; if ( istrlen(name)+4+1 > penlen ) - { gripe(ERC_REPRESENTATION, L"entity name"); + { gripe(NULL, ERC_REPRESENTATION, L"entity name"); return NULL; } diff --git a/packages/sgml/charmap.c b/packages/sgml/charmap.c index af6392e59..eaab5db44 100644 --- a/packages/sgml/charmap.c +++ b/packages/sgml/charmap.c @@ -44,7 +44,7 @@ new_charclass() char_range(map, 'a', 'z', CH_LCLETTER); char_range(map, 'A', 'Z', CH_LCLETTER); char_range(map, '0', '9', CH_DIGIT); - + ca['.'] |= CH_CNM; ca['-'] |= CH_CNM; ca[183] |= CH_CNM; /* XML */ diff --git a/packages/sgml/dtd.h b/packages/sgml/dtd.h index 3495543cf..46ddca482 100644 --- a/packages/sgml/dtd.h +++ b/packages/sgml/dtd.h @@ -466,7 +466,7 @@ dtd * new_dtd(const ichar *doctype); int set_dialect_dtd(dtd *dtd, dtd_dialect dialect); int set_option_dtd(dtd *dtd, dtd_option option, int set); -void putchar_dtd_parser(dtd_parser *p, int chr); +int putchar_dtd_parser(dtd_parser *p, int chr); int begin_document_dtd_parser(dtd_parser *p); int end_document_dtd_parser(dtd_parser *p); void reset_document_dtd_parser(dtd_parser *p); diff --git a/packages/sgml/dtd2pl.c b/packages/sgml/dtd2pl.c index 8750c3edf..f435f95c1 100644 --- a/packages/sgml/dtd2pl.c +++ b/packages/sgml/dtd2pl.c @@ -27,6 +27,7 @@ #include #include #include "dtd.h" +#include "util.h" #include "prolog.h" #define streq(s,q) strcmp((s), (q)) == 0 @@ -42,10 +43,12 @@ int main(int argc, char **argv) { dtd_dialect dialect = DL_SGML; + init_ring(); + program = argv[0]; argv++; argc--; - + while(argc > 0 && argv[0][0] == '-') { if ( streq(argv[0], "-xml") ) { dialect = DL_XML; @@ -63,7 +66,7 @@ main(int argc, char **argv) if ( argc == 1 ) { int wl = mbstowcs(NULL, argv[0], 0); - + if ( wl > 0 ) { wchar_t *ws = malloc((wl+1)*sizeof(wchar_t)); dtd *dtd; diff --git a/packages/sgml/error.c b/packages/sgml/error.c index 799c999cb..3f61e29cf 100644 --- a/packages/sgml/error.c +++ b/packages/sgml/error.c @@ -3,9 +3,9 @@ Part of SWI-Prolog Author: Jan Wielemaker - E-mail: jan@swi.psy.uva.nl + E-mail: J.Wielemaker@cs.vu.nl WWW: http://www.swi-prolog.org - Copyright (C): 1985-2002, University of Amsterdam + Copyright (C): 1985-2009, University of Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -32,48 +32,52 @@ int sgml2pl_error(plerrorid id, ...) -{ term_t except = PL_new_term_ref(); - term_t formal = PL_new_term_ref(); - term_t swi = PL_new_term_ref(); +{ int rc; + term_t except, formal, swi; va_list args; char msgbuf[1024]; char *msg = NULL; + if ( !(except = PL_new_term_ref()) || + !(formal = PL_new_term_ref()) || + !(swi = PL_new_term_ref()) ) + return FALSE; + va_start(args, id); switch(id) { case ERR_ERRNO: { int err = va_arg(args, int); - + msg = strerror(err); switch(err) { case ENOMEM: - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "resource_error", 1, - PL_CHARS, "no_memory"); + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "resource_error", 1, + PL_CHARS, "no_memory"); break; case EACCES: { const char *file = va_arg(args, const char *); const char *action = va_arg(args, const char *); - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "permission_error", 3, - PL_CHARS, action, - PL_CHARS, "file", - PL_CHARS, file); + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "permission_error", 3, + PL_CHARS, action, + PL_CHARS, "file", + PL_CHARS, file); break; } case ENOENT: { const char *file = va_arg(args, const char *); - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "existence_error", 2, - PL_CHARS, "file", - PL_CHARS, file); + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "existence_error", 2, + PL_CHARS, "file", + PL_CHARS, file); break; } default: - PL_unify_atom_chars(formal, "system_error"); + rc = PL_unify_atom_chars(formal, "system_error"); break; } break; @@ -84,12 +88,12 @@ sgml2pl_error(plerrorid id, ...) if ( PL_is_variable(actual) && strcmp(expected, "variable") != 0 ) - PL_unify_atom_chars(formal, "instantiation_error"); + rc = PL_unify_atom_chars(formal, "instantiation_error"); else - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "type_error", 2, - PL_CHARS, expected, - PL_TERM, actual); + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "type_error", 2, + PL_CHARS, expected, + PL_TERM, actual); break; } case ERR_DOMAIN: @@ -97,31 +101,31 @@ sgml2pl_error(plerrorid id, ...) term_t actual = va_arg(args, term_t); if ( PL_is_variable(actual) ) - PL_unify_atom_chars(formal, "instantiation_error"); + rc = PL_unify_atom_chars(formal, "instantiation_error"); else - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "domain_error", 2, - PL_CHARS, expected, - PL_TERM, actual); + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "domain_error", 2, + PL_CHARS, expected, + PL_TERM, actual); break; } case ERR_EXISTENCE: { const char *type = va_arg(args, const char *); term_t obj = va_arg(args, term_t); - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "existence_error", 2, - PL_CHARS, type, - PL_TERM, obj); + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "existence_error", 2, + PL_CHARS, type, + PL_TERM, obj); break; } case ERR_FAIL: { term_t goal = va_arg(args, term_t); - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "goal_failed", 1, - PL_TERM, goal); + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "goal_failed", 1, + PL_TERM, goal); break; } @@ -129,10 +133,10 @@ sgml2pl_error(plerrorid id, ...) { const char *limit = va_arg(args, const char *); long maxval = va_arg(args, long); - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "limit_exceeded", 2, - PL_CHARS, limit, - PL_LONG, maxval); + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "limit_exceeded", 2, + PL_CHARS, limit, + PL_LONG, maxval); break; } @@ -142,10 +146,10 @@ sgml2pl_error(plerrorid id, ...) vsprintf(msgbuf, fmt, args); msg = msgbuf; - - PL_unify_term(formal, - PL_FUNCTOR_CHARS, "miscellaneous", 1, - PL_CHARS, id); + + rc = PL_unify_term(formal, + PL_FUNCTOR_CHARS, "miscellaneous", 1, + PL_CHARS, id); break; } default: @@ -153,26 +157,29 @@ sgml2pl_error(plerrorid id, ...) } va_end(args); - if ( msg ) + if ( rc && msg ) { term_t predterm = PL_new_term_ref(); term_t msgterm = PL_new_term_ref(); - if ( msg ) - { PL_put_atom_chars(msgterm, msg); - } - - PL_unify_term(swi, - PL_FUNCTOR_CHARS, "context", 2, - PL_TERM, predterm, - PL_TERM, msgterm); + if ( !(predterm = PL_new_term_ref()) || + !(msgterm = PL_new_term_ref()) || + !PL_put_atom_chars(msgterm, msg) || + !PL_unify_term(swi, + PL_FUNCTOR_CHARS, "context", 2, + PL_TERM, predterm, + PL_TERM, msgterm) ) + rc = FALSE; } - PL_unify_term(except, - PL_FUNCTOR_CHARS, "error", 2, - PL_TERM, formal, - PL_TERM, swi); + if ( rc ) + rc = PL_unify_term(except, + PL_FUNCTOR_CHARS, "error", 2, + PL_TERM, formal, + PL_TERM, swi); + if ( rc ) + return PL_raise_exception(except); - return PL_raise_exception(except); + return FALSE; } diff --git a/packages/sgml/error.h b/packages/sgml/error.h index d691f6ee0..e3c2d23f9 100644 --- a/packages/sgml/error.h +++ b/packages/sgml/error.h @@ -44,4 +44,3 @@ typedef enum int sgml2pl_error(plerrorid, ...); #endif /*H_ERROR_INCLUDED*/ - diff --git a/packages/sgml/model.c b/packages/sgml/model.c index 7c3e6fb9f..3fec59ad5 100644 --- a/packages/sgml/model.c +++ b/packages/sgml/model.c @@ -107,7 +107,7 @@ visit(dtd_state *state, visited *visited) { if ( visited->states[i] == state ) return FALSE; } - + if ( visited->size >= MAX_VISITED ) { fprintf(stderr, "Reached MAX_VISITED!\n"); return FALSE; @@ -262,7 +262,7 @@ do_find_omitted_path(dtd_state *state, dtd_element *e, } -int +int find_omitted_path(dtd_state *state, dtd_element *e, dtd_element **path) { int pl = 0; visited visited; @@ -314,13 +314,13 @@ static transition * state_transitions(dtd_state *state) { if ( !state->transitions && state->expander ) { expander *ex = state->expander; - + switch(ex->type) { case EX_AND: { dtd_model_list *left = ex->kind.and.set; if ( !left ) /* empty AND (should not happen) */ - { link(state, ex->target, NULL); + { link(state, ex->target, NULL); } else if ( !left->next ) /* only one left */ { translate_model(left->model, state, ex->target); } else @@ -378,7 +378,7 @@ translate_one(dtd_model *m, dtd_state *from, dtd_state *to) ex->target = to; ex->type = EX_AND; - + for( sub = m->content.group; sub; sub = sub->next ) add_model_list(&ex->kind.and.set, sub); @@ -436,7 +436,7 @@ make_state_engine(dtd_element *e) { if ( def->content ) { def->initial_state = new_dtd_state(); def->final_state = new_dtd_state(); - + translate_model(def->content, def->initial_state, def->final_state); } else if ( def->type == C_CDATA || def->type == C_RCDATA ) { def->initial_state = new_dtd_state(); @@ -450,7 +450,7 @@ make_state_engine(dtd_element *e) return def->initial_state; } - + return NULL; } @@ -492,7 +492,7 @@ free_expander(expander *e, visited *visited) static void do_free_state_engine(dtd_state *state, visited *visited) { transition *t, *next; - + for(t=state->transitions; t; t=next) { next = t->next; diff --git a/packages/sgml/parser.c b/packages/sgml/parser.c index faf49a4bb..816d5058b 100644 --- a/packages/sgml/parser.c +++ b/packages/sgml/parser.c @@ -65,10 +65,11 @@ typedef struct locbuf * PROTOYPES * *******************************/ -static const ichar * itake_name(dtd *dtd, const ichar *in, dtd_symbol **id); -static const ichar * itake_entity_name(dtd *dtd, const ichar *in, +static const ichar * itake_name(dtd_parser *p, + const ichar *in, dtd_symbol **id); +static const ichar * itake_entity_name(dtd_parser *p, const ichar *in, dtd_symbol **id); -static const ichar * itake_namegroup(dtd *dtd, const ichar *decl, +static const ichar * itake_namegroup(dtd_parser *p, const ichar *decl, dtd_symbol **names, int *n); static const ichar * iskip_layout(dtd *dtd, const ichar *in); static dtd_parser * clone_dtd_parser(dtd_parser *p); @@ -83,13 +84,13 @@ static int emit_cdata(dtd_parser *p, int last); static dtd_space_mode istr_to_space_mode(const ichar *val); static void update_space_mode(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts); -static dtd_model * make_model(dtd *dtd, const ichar *decl, +static dtd_model * make_model(dtd_parser *p, const ichar *decl, const ichar **end); static void for_elements_in_model(dtd_model *m, void (*f)(dtd_element *e, void *closure), void *closure); -void putchar_dtd_parser(dtd_parser *p, int chr); +int putchar_dtd_parser(dtd_parser *p, int chr); void free_dtd_parser(dtd_parser *p); static const ichar * isee_character_entity(dtd *dtd, const ichar *in, int *chr); @@ -110,13 +111,6 @@ static int prepare_cdata(dtd_parser *p); p->event_class = _oc; \ } -#define WITH_PARSER(p, g) \ - { dtd_parser *_old = p; \ - current_parser = p; \ - g; \ - current_parser = _old; \ - } - /******************************* * STATISTICS * *******************************/ @@ -259,7 +253,7 @@ isee_func(dtd *dtd, const ichar *in, charfunc func) *******************************/ static dtd_symbol_table * -new_symbol_table(void) +new_symbol_table() { dtd_symbol_table *t = sgml_calloc(1, sizeof(*t)); t->size = SYMBOLHASHSIZE; t->entries = sgml_calloc(t->size, sizeof(dtd_symbol*)); @@ -464,7 +458,7 @@ expand_pentities(dtd_parser *p, const ichar *in, int ilen, ichar *out, int len) { if ( *in == pero ) { dtd_symbol *id; - if ( (s = itake_entity_name(dtd, in+1, &id)) ) + if ( (s = itake_entity_name(p, in+1, &id)) ) { dtd_entity *e = find_pentity(dtd, id); const ichar *eval; int l; @@ -474,7 +468,7 @@ expand_pentities(dtd_parser *p, const ichar *in, int ilen, ichar *out, int len) in = s; if ( !e ) - return gripe(ERC_EXISTENCE, L"parameter entity", id->name); + return gripe(p, ERC_EXISTENCE, L"parameter entity", id->name); if ( !(eval = entity_value(p, e, NULL)) ) return FALSE; @@ -490,7 +484,7 @@ expand_pentities(dtd_parser *p, const ichar *in, int ilen, ichar *out, int len) } if ( --len <= 0 ) - { gripe(ERC_REPRESENTATION, L"Declaration too long"); + { gripe(p, ERC_REPRESENTATION, L"Declaration too long"); return FALSE; } @@ -499,7 +493,7 @@ expand_pentities(dtd_parser *p, const ichar *in, int ilen, ichar *out, int len) if ( (s=isee_character_entity(dtd, in, &chr)) ) { if ( chr == 0 ) - { gripe(ERC_SYNTAX_ERROR, L"Illegal character entity", in); + { gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", in); } else { *out++ = chr; in = s; @@ -591,7 +585,7 @@ expand_entities(dtd_parser *p, const ichar *in, int len, ocharbuf *out) if ( (s=isee_character_entity(dtd, in, &chr)) ) { if ( chr == 0 ) - gripe(ERC_SYNTAX_ERROR, L"Illegal character entity", in); + gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", in); add_ocharbuf(out, chr); in = s; @@ -602,22 +596,22 @@ expand_entities(dtd_parser *p, const ichar *in, int len, ocharbuf *out) { dtd_symbol *id; dtd_entity *e; const ichar *eval; - - if ( !(in = itake_name(dtd, in+1, &id)) ) + + if ( !(in = itake_name(p, in+1, &id)) ) { in = estart; goto recover; } if ( isee_func(dtd, in, CF_ERC) || *in == '\n' ) in++; - + if ( !(e = id->entity) && !(e=dtd->default_entity) ) - { gripe(ERC_EXISTENCE, L"entity", id->name); + { gripe(p, ERC_EXISTENCE, L"entity", id->name); in = estart; goto recover; } - + if ( !(eval = entity_value(p, e, NULL)) ) - { gripe(ERC_NO_VALUE, e->name->name); + { gripe(p, ERC_NO_VALUE, e->name->name); in = estart; goto recover; } @@ -636,7 +630,7 @@ expand_entities(dtd_parser *p, const ichar *in, int len, ocharbuf *out) } if ( dtd->dialect != DL_SGML ) - gripe(ERC_SYNTAX_ERROR, L"Illegal entity", estart); + gripe(p, ERC_SYNTAX_ERROR, L"Illegal entity", estart); } recover: @@ -675,7 +669,7 @@ find_element(dtd *dtd, dtd_symbol *id) e->undefined = TRUE; e->name = id; id->element = e; - + e->next = dtd->elements; dtd->elements = e; @@ -686,7 +680,7 @@ find_element(dtd *dtd, dtd_symbol *id) static dtd_edef * new_element_definition(dtd *dtd) { dtd_edef *def = sgml_calloc(1, sizeof(*def)); - + STAT(edefs_created++); return def; @@ -757,7 +751,7 @@ free_attribute_list(dtd_attr_list *l) free_attribute(l->attribute); sgml_free(l); - } + } } @@ -871,10 +865,11 @@ isee_identifier(dtd *dtd, const ichar *in, char *id) static const ichar * -itake_name(dtd *dtd, const ichar *in, dtd_symbol **id) +itake_name(dtd_parser *p, const ichar *in, dtd_symbol **id) { ichar buf[MAXNMLEN]; ichar *o = buf; ichar *e = &buf[MAXNMLEN]-1; + dtd *dtd = p->dtd; in = iskip_layout(dtd, in); if ( !HasClass(dtd, *in, CH_NMSTART) ) @@ -889,7 +884,7 @@ itake_name(dtd *dtd, const ichar *in, dtd_symbol **id) } if ( o == e ) - { gripe(ERC_REPRESENTATION, L"NAME too long"); + { gripe(p, ERC_REPRESENTATION, L"NAME too long"); return NULL; } @@ -902,10 +897,11 @@ itake_name(dtd *dtd, const ichar *in, dtd_symbol **id) static const ichar * -itake_entity_name(dtd *dtd, const ichar *in, dtd_symbol **id) +itake_entity_name(dtd_parser *p, const ichar *in, dtd_symbol **id) { ichar buf[MAXNMLEN]; ichar *o = buf; ichar *e = &buf[MAXNMLEN]-1; + dtd *dtd = p->dtd; in = iskip_layout(dtd, in); if ( !HasClass(dtd, *in, CH_NMSTART) ) @@ -919,7 +915,7 @@ itake_entity_name(dtd *dtd, const ichar *in, dtd_symbol **id) *o++ = towlower(*in++); } if ( o == e ) - { gripe(ERC_REPRESENTATION, L"Entity NAME too long"); + { gripe(p, ERC_REPRESENTATION, L"Entity NAME too long"); return NULL; } @@ -932,10 +928,11 @@ itake_entity_name(dtd *dtd, const ichar *in, dtd_symbol **id) static const ichar * -itake_nmtoken(dtd *dtd, const ichar *in, dtd_symbol **id) +itake_nmtoken(dtd_parser *p, const ichar *in, dtd_symbol **id) { ichar buf[MAXNMLEN]; ichar *o = buf; ichar *e = &buf[MAXNMLEN]-1; + dtd *dtd = p->dtd; in = iskip_layout(dtd, in); if ( !HasClass(dtd, *in, CH_NAME) ) @@ -948,7 +945,7 @@ itake_nmtoken(dtd *dtd, const ichar *in, dtd_symbol **id) *o++ = towlower(*in++); } if ( o == e ) - { gripe(ERC_REPRESENTATION, L"NMTOKEN too long"); + { gripe(p, ERC_REPRESENTATION, L"NMTOKEN too long"); return NULL; } @@ -961,10 +958,11 @@ itake_nmtoken(dtd *dtd, const ichar *in, dtd_symbol **id) static const ichar * -itake_nutoken(dtd *dtd, const ichar *in, dtd_symbol **id) +itake_nutoken(dtd_parser *p, const ichar *in, dtd_symbol **id) { ichar buf[MAXNMLEN]; ichar *o = buf; ichar *e = &buf[MAXNMLEN]-1; + dtd *dtd = p->dtd; in = iskip_layout(dtd, in); if ( !HasClass(dtd, *in, CH_DIGIT) ) @@ -979,13 +977,13 @@ itake_nutoken(dtd *dtd, const ichar *in, dtd_symbol **id) } if ( o == e ) - { gripe(ERC_REPRESENTATION, L"NUTOKEN too long"); + { gripe(p, ERC_REPRESENTATION, L"NUTOKEN too long"); return NULL; } *o = '\0'; if ( o - buf > 8 ) - gripe(ERC_LIMIT, L"nutoken length"); + gripe(p, ERC_LIMIT, L"nutoken length"); *id = dtd_add_symbol(dtd, buf); @@ -994,8 +992,10 @@ itake_nutoken(dtd *dtd, const ichar *in, dtd_symbol **id) static const ichar * -itake_number(dtd *dtd, const ichar *in, dtd_attr *at) -{ in = iskip_layout(dtd, in); +itake_number(dtd_parser *p, const ichar *in, dtd_attr *at) +{ dtd *dtd = p->dtd; + + in = iskip_layout(dtd, in); switch(dtd->number_mode) { case NU_TOKEN: @@ -1079,13 +1079,15 @@ itake_url(dtd *dtd, const ichar *in, ichar **out) static const ichar * -itake_nmtoken_chars(dtd *dtd, const ichar *in, ichar *out, int len) -{ in = iskip_layout(dtd, in); +itake_nmtoken_chars(dtd_parser *p, const ichar *in, ichar *out, int len) +{ dtd *dtd = p->dtd; + + in = iskip_layout(dtd, in); if ( !HasClass(dtd, *in, CH_NAME) ) return NULL; while( HasClass(dtd, *in, CH_NAME) ) { if ( --len <= 0 ) - gripe(ERC_REPRESENTATION, L"Name token too long"); + gripe(p, ERC_REPRESENTATION, L"Name token too long"); *out++ = (dtd->case_sensitive ? *in++ : (ichar)towlower(*in++)); } *out++ = '\0'; @@ -1132,8 +1134,9 @@ JW: I decided to accept / as part of an unquoted in SGML-mode if */ static ichar const * -itake_unquoted(dtd *dtd, ichar const *in, ichar *out, int len) -{ ichar const end2 = dtd->charfunc->func[CF_ETAGO2]; /* / */ +itake_unquoted(dtd_parser *p, ichar const *in, ichar *out, int len) +{ dtd *dtd = p->dtd; + ichar const end2 = dtd->charfunc->func[CF_ETAGO2]; /* / */ ichar c; /* skip leading layout. Do NOT skip comments! --x-- is a value! */ @@ -1150,7 +1153,7 @@ itake_unquoted(dtd *dtd, ichar const *in, ichar *out, int len) if ( --len > 0 ) *out++ = c; else if ( len == 0 ) - gripe(ERC_REPRESENTATION, L"Attribute too long"); + gripe(p, ERC_REPRESENTATION, L"Attribute too long"); c = *++in; } *out = '\0'; @@ -1196,7 +1199,7 @@ free_dtd(dtd *dtd) if ( dtd->doctype ) sgml_free(dtd->doctype); - + free_entity_list(dtd->entities); free_entity_list(dtd->pentities); free_notations(dtd->notations); @@ -1206,7 +1209,7 @@ free_dtd(dtd *dtd) sgml_free(dtd->charfunc); sgml_free(dtd->charclass); dtd->magic = 0; - + sgml_free(dtd); } } @@ -1238,17 +1241,17 @@ set_dialect_dtd(dtd *dtd, dtd_dialect dialect) case DL_XMLNS: { const ichar **el; dtd_parser p; - + dtd->case_sensitive = TRUE; dtd->encoding = SGML_ENC_UTF8; dtd->space_mode = SP_PRESERVE; dtd->shorttag = FALSE; - + memset(&p, 0, sizeof(p)); p.dtd = dtd; for(el = xml_entities; *el; el++) process_entity_declaration(&p, *el); - + break; } } @@ -1327,7 +1330,7 @@ process_entity_value_declaration(dtd_parser *p, } string_expected: - gripe(ERC_SYNTAX_ERROR, L"String expected", decl); + gripe(p, ERC_SYNTAX_ERROR, L"String expected", decl); return NULL; } @@ -1352,19 +1355,19 @@ process_entity_declaration(dtd_parser *p, const ichar *decl) } else isparam = FALSE; - if ( !(s = itake_entity_name(dtd, decl, &id)) ) + if ( !(s = itake_entity_name(p, decl, &id)) ) { if ( !(s = isee_identifier(dtd, decl, "#default")) ) - return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); id = dtd_add_symbol(dtd, (ichar*)"#DEFAULT"); isdef = TRUE; } if ( isparam && find_pentity(dtd, id) ) - { gripe(ERC_REDEFINED, L"parameter entity", id); + { gripe(p, ERC_REDEFINED, L"parameter entity", id); return TRUE; /* already defined parameter entity */ } if ( id->entity ) - { gripe(ERC_REDEFINED, L"entity", id); + { gripe(p, ERC_REDEFINED, L"entity", id); return TRUE; /* already defined normal entity */ } @@ -1455,17 +1458,17 @@ process_entity_declaration(dtd_parser *p, const ichar *decl) { decl = s; e->content = EC_NDATA; } else - return gripe(ERC_SYNTAX_ERROR, L"Bad datatype declaration", decl); - - if ( (s=itake_name(dtd, decl, &nname)) ) /* what is this? */ + return gripe(p, ERC_SYNTAX_ERROR, L"Bad datatype declaration", decl); + + if ( (s=itake_name(p, decl, &nname)) ) /* what is this? */ { decl = s; } else - return gripe(ERC_SYNTAX_ERROR, L"Bad notation declaration", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Bad notation declaration", decl); } } if ( *decl ) - return gripe(ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl); } if ( isparam ) @@ -1476,7 +1479,7 @@ process_entity_declaration(dtd_parser *p, const ichar *decl) e->next = dtd->entities; dtd->entities = e; } - + if ( isdef ) dtd->default_entity = e; @@ -1518,12 +1521,12 @@ process_notation_declaration(dtd_parser *p, const ichar *decl) ichar *system = NULL, *public = NULL; dtd_notation *not; - if ( !(s=itake_name(dtd, decl, &nname)) ) - return gripe(ERC_SYNTAX_ERROR, L"Notation name expected", decl); + if ( !(s=itake_name(p, decl, &nname)) ) + return gripe(p, ERC_SYNTAX_ERROR, L"Notation name expected", decl); decl = s; if ( find_notation(dtd, nname) ) - { gripe(ERC_REDEFINED, L"notation", nname); + { gripe(p, ERC_REDEFINED, L"notation", nname); return TRUE; } @@ -1532,16 +1535,16 @@ process_notation_declaration(dtd_parser *p, const ichar *decl) } else if ( (s=isee_identifier(dtd, decl, "public")) ) { decl = s; if ( !(s=itake_dubbed_string(dtd, decl, &public)) ) - return gripe(ERC_SYNTAX_ERROR, L"Public identifier expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Public identifier expected", decl); } else - return gripe(ERC_SYNTAX_ERROR, L"SYSTEM or PUBLIC expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"SYSTEM or PUBLIC expected", decl); decl = s; if ( (s=itake_dubbed_string(dtd, decl, &system)) ) decl = s; if ( *decl ) - return gripe(ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end of declaraction", decl); not = sgml_calloc(1, sizeof(*not)); not->name = nname; @@ -1598,23 +1601,24 @@ free_shortrefs(dtd_shortref *sr) static const ichar * -shortref_add_map(dtd *dtd, const ichar *decl, dtd_shortref *sr) +shortref_add_map(dtd_parser *p, const ichar *decl, dtd_shortref *sr) { ichar *start; int len; ichar from[MAXMAPLEN]; ichar *f = from; dtd_symbol *to; const ichar *s; const ichar *end; - dtd_map **p; + dtd *dtd = p->dtd; + dtd_map **prev; dtd_map *m; if ( !(s=itake_string(dtd, decl, &start, &len)) ) - { gripe(ERC_SYNTAX_ERROR, L"map-string expected", decl); + { gripe(p, ERC_SYNTAX_ERROR, L"map-string expected", decl); return NULL; } decl = s; - if ( !(s=itake_entity_name(dtd, decl, &to)) ) - { gripe(ERC_SYNTAX_ERROR, L"map-to name expected", decl); + if ( !(s=itake_entity_name(p, decl, &to)) ) + { gripe(p, ERC_SYNTAX_ERROR, L"map-to name expected", decl); return NULL; } end = s; @@ -1637,15 +1641,15 @@ shortref_add_map(dtd *dtd, const ichar *decl, dtd_shortref *sr) } *f = 0; - for(p=&sr->map; *p; p = &(*p)->next) + for(prev=&sr->map; *prev; prev = &(*prev)->next) ; - + m = sgml_calloc(1, sizeof(*m)); m->from = istrdup(from); m->len = (int)istrlen(from); m->to = to; - *p = m; + *prev = m; return end; } @@ -1662,7 +1666,7 @@ def_shortref(dtd_parser *p, dtd_symbol *name) if ( r->name == name ) return r; } - + sr = sgml_calloc(1, sizeof(*sr)); sr->name = name; *pr = sr; @@ -1713,13 +1717,13 @@ process_shortref_declaration(dtd_parser *p, const ichar *decl) return FALSE; decl = buf; - if ( !(s=itake_name(dtd, decl, &name)) ) - return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl); + if ( !(s=itake_name(p, decl, &name)) ) + return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); decl = s; sr = def_shortref(p, name); if ( sr->defined ) - { gripe(ERC_REDEFINED, L"shortref", name); + { gripe(p, ERC_REDEFINED, L"shortref", name); return TRUE; } @@ -1727,13 +1731,13 @@ process_shortref_declaration(dtd_parser *p, const ichar *decl) sr->defined = TRUE; while( *(decl = iskip_layout(dtd, decl)) != '\0' - && (s=shortref_add_map(dtd, decl, sr)) ) + && (s=shortref_add_map(p, decl, sr)) ) decl = s; compile_map(dtd, sr); if ( *decl ) - return gripe(ERC_SYNTAX_ERROR, L"Map expected", decl); - + return gripe(p, ERC_SYNTAX_ERROR, L"Map expected", decl); + return TRUE; } @@ -1767,7 +1771,7 @@ find_map(dtd *dtd, dtd_symbol *name) return sr; } } - + return NULL; } @@ -1792,11 +1796,11 @@ process_usemap_declaration(dtd_parser *p, const ichar *decl) return FALSE; decl = buf; - if ( !(s=itake_name(dtd, decl, &name)) ) + if ( !(s=itake_name(p, decl, &name)) ) { if ( (s=isee_identifier(dtd, decl, "#empty")) ) name = NULL; else - return gripe(ERC_SYNTAX_ERROR, L"map-name expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"map-name expected", decl); } decl = s; @@ -1806,27 +1810,27 @@ process_usemap_declaration(dtd_parser *p, const ichar *decl) if ( isee_func(dtd, decl, CF_GRPO) ) /* ( */ { dtd_model *model; - if ( (model = make_model(dtd, decl, &s)) ) + if ( (model = make_model(p, decl, &s)) ) { for_elements_in_model(model, set_map_element, map); free_model(model); decl = s; } else return FALSE; - } else if ( (s=itake_name(dtd, decl, &ename)) ) + } else if ( (s=itake_name(p, decl, &ename)) ) { e = find_element(dtd, ename); e->map = map; decl = s; } else if ( p->environments ) { if ( !map->defined ) - gripe(ERC_EXISTENCE, L"map", name->name); + gripe(p, ERC_EXISTENCE, L"map", name->name); p->environments->map = map; p->map = p->environments->map; } else - return gripe(ERC_SYNTAX_ERROR, L"element-name expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"element-name expected", decl); if ( *decl ) - return gripe(ERC_SYNTAX_ERROR, L"Unparsed", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Unparsed", decl); return TRUE; } @@ -1982,10 +1986,11 @@ free_model(dtd_model *m) static dtd_model * -make_model(dtd *dtd, const ichar *decl, const ichar **end) +make_model(dtd_parser *p, const ichar *decl, const ichar **end) { const ichar *s; dtd_model *m = sgml_calloc(1, sizeof(*m)); dtd_symbol *id; + dtd *dtd = p->dtd; decl = iskip_layout(dtd, decl); @@ -1996,13 +2001,13 @@ make_model(dtd *dtd, const ichar *decl, const ichar **end) return m; } - if ( (s=itake_name(dtd, decl, &id)) ) + if ( (s=itake_name(p, decl, &id)) ) { m->type = MT_ELEMENT; m->content.element = find_element(dtd, id); decl = s; } else { if ( !(s=isee_func(dtd, decl, CF_GRPO)) ) - { gripe(ERC_SYNTAX_ERROR, L"Name group expected", decl); + { gripe(p, ERC_SYNTAX_ERROR, L"Name group expected", decl); free_model(m); return NULL; } @@ -2012,13 +2017,13 @@ make_model(dtd *dtd, const ichar *decl, const ichar **end) { dtd_model *sub; modeltype mt; - if ( !(sub = make_model(dtd, decl, &s)) ) + if ( !(sub = make_model(p, decl, &s)) ) { free_model(sub); return NULL; } decl = s; add_submodel(m, sub); - + if ( (s = isee_func(dtd, decl, CF_OR)) ) { decl = s; mt = MT_OR; @@ -2032,7 +2037,7 @@ make_model(dtd *dtd, const ichar *decl, const ichar **end) { decl = s; break; } else - { gripe(ERC_SYNTAX_ERROR, L"Connector ('|', ',' or '&') expected", decl); + { gripe(p, ERC_SYNTAX_ERROR, L"Connector ('|', ',' or '&') expected", decl); free_model(m); return NULL; } @@ -2042,7 +2047,7 @@ make_model(dtd *dtd, const ichar *decl, const ichar **end) { if ( !m->type ) m->type = mt; else - { gripe(ERC_SYNTAX_ERROR, L"Different connector types in model", decl); + { gripe(p, ERC_SYNTAX_ERROR, L"Different connector types in model", decl); free_model(m); return NULL; } @@ -2064,7 +2069,7 @@ make_model(dtd *dtd, const ichar *decl, const ichar **end) } } else m->cardinality = MC_ONE; - + if ( m->type == MT_UNDEF ) /* simplify (e+), etc. */ { dtd_model *sub = m->content.group; modelcard card; @@ -2078,7 +2083,7 @@ make_model(dtd *dtd, const ichar *decl, const ichar **end) { m->type = MT_OR; goto out; } - + *m = *sub; m->cardinality = card; sgml_free(sub); @@ -2091,8 +2096,9 @@ out: static const ichar * -process_model(dtd *dtd, dtd_edef *e, const ichar *decl) +process_model(dtd_parser *p, dtd_edef *e, const ichar *decl) { const ichar *s; + dtd *dtd = p->dtd; decl = iskip_layout(dtd, decl); if ( (s = isee_identifier(dtd, decl, "empty")) ) @@ -2111,9 +2117,9 @@ process_model(dtd *dtd, dtd_edef *e, const ichar *decl) { e->type = C_ANY; return s; } - + e->type = C_PCDATA; - if ( !(e->content = make_model(dtd, decl, &decl)) ) + if ( !(e->content = make_model(p, decl, &decl)) ) return FALSE; return decl; @@ -2149,17 +2155,18 @@ isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep) static const ichar * -itake_namegroup(dtd *dtd, const ichar *decl, +itake_namegroup(dtd_parser *p, const ichar *decl, dtd_symbol **names, int *n) { const ichar *s; int en = 0; + dtd *dtd = p->dtd; if ( (s=isee_func(dtd, decl, CF_GRPO)) ) { charfunc ngs = CF_NG; for(;;) - { if ( !(decl=itake_name(dtd, s, &names[en++])) ) - { gripe(ERC_SYNTAX_ERROR, L"Name expected", s); + { if ( !(decl=itake_name(p, s, &names[en++])) ) + { gripe(p, ERC_SYNTAX_ERROR, L"Name expected", s); return NULL; } if ( (s=isee_ngsep(dtd, decl, &ngs)) ) @@ -2171,7 +2178,7 @@ itake_namegroup(dtd *dtd, const ichar *decl, decl = s; return iskip_layout(dtd, decl); } - gripe(ERC_SYNTAX_ERROR, L"Bad name-group", decl); + gripe(p, ERC_SYNTAX_ERROR, L"Bad name-group", decl); return NULL; } } @@ -2195,15 +2202,17 @@ add_list_element(dtd_element *e, void *closure) static const ichar * -itake_el_or_model_element_list(dtd *dtd, const ichar *decl, dtd_symbol **names, int *n) +itake_el_or_model_element_list(dtd_parser *p, + const ichar *decl, dtd_symbol **names, int *n) { const ichar *s; + dtd *dtd = p->dtd; if ( isee_func(dtd, decl, CF_GRPO) ) { dtd_model *model; - if ( (model = make_model(dtd, decl, &s)) ) + if ( (model = make_model(p, decl, &s)) ) { namelist nl; - + nl.list = names; nl.size = 0; for_elements_in_model(model, add_list_element, &nl); @@ -2214,8 +2223,8 @@ itake_el_or_model_element_list(dtd *dtd, const ichar *decl, dtd_symbol **names, } else return NULL; } else - { if ( !(s = itake_name(dtd, decl, &names[0])) ) - { gripe(ERC_SYNTAX_ERROR, L"Name expected", decl); + { if ( !(s = itake_name(p, decl, &names[0])) ) + { gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); return NULL; } *n = 1; @@ -2252,8 +2261,8 @@ process_element_declaraction(dtd_parser *p, const ichar *decl) return FALSE; decl = buf; - if ( !(s=itake_el_or_model_element_list(dtd, decl, eid, &en)) ) - return gripe(ERC_SYNTAX_ERROR, L"Name or name-group expected", decl); + if ( !(s=itake_el_or_model_element_list(p, decl, eid, &en)) ) + return gripe(p, ERC_SYNTAX_ERROR, L"Name or name-group expected", decl); decl = s; if ( en == 0 ) return TRUE; /* 0 elements */ @@ -2283,13 +2292,13 @@ process_element_declaraction(dtd_parser *p, const ichar *decl) { for(i=0; iomit_close = TRUE; } else - return gripe(ERC_SYNTAX_ERROR, L"Bad omit-tag declaration", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Bad omit-tag declaration", decl); decl = s; } - + /* content model */ - if ( !(decl=process_model(dtd, def, decl)) ) + if ( !(decl=process_model(p, def, decl)) ) return FALSE; /* in/excluded elements */ @@ -2297,14 +2306,14 @@ process_element_declaraction(dtd_parser *p, const ichar *decl) { dtd_symbol *ng[MAXNAMEGROUP]; int ns; dtd_element_list **l; - + if ( decl[0] == '-' ) l = &def->excluded; else l = &def->included; decl++; - if ( (s=itake_namegroup(dtd, decl, ng, &ns)) ) + if ( (s=itake_namegroup(p, decl, ng, &ns)) ) { int i; decl = s; @@ -2312,12 +2321,12 @@ process_element_declaraction(dtd_parser *p, const ichar *decl) for(i=0; ivalue = s; for( ; *nl; nl = &(*nl)->next ) @@ -2364,13 +2373,13 @@ set_element_properties(dtd_element *e, dtd_attr *a) static void -add_attribute(dtd *dtd, dtd_element *e, dtd_attr *a) +add_attribute(dtd_parser *p, dtd_element *e, dtd_attr *a) { dtd_attr_list **l; dtd_attr_list *n; for(l = &e->attributes; *l; l = &(*l)->next) { if ( (*l)->attribute->name == a->name ) - { gripe(ERC_REDEFINED, L"attribute", a->name); + { gripe(p, ERC_REDEFINED, L"attribute", a->name); a->references++; /* attempt to redefine attribute: */ free_attribute(a); /* first wins according to standard */ @@ -2401,7 +2410,7 @@ process_attlist_declaraction(dtd_parser *p, const ichar *decl) decl = iskip_layout(dtd, buf); DEBUG(printf("Expanded to %s\n", decl)); - if ( !(decl=itake_el_or_model_element_list(dtd, decl, eid, &en)) ) + if ( !(decl=itake_el_or_model_element_list(p, decl, eid, &en)) ) return FALSE; /* fetch attributes */ @@ -2410,9 +2419,9 @@ process_attlist_declaraction(dtd_parser *p, const ichar *decl) at->references = REFS_VIRGIN; /* name of attribute */ - if ( !(s = itake_name(dtd, decl, &at->name)) ) + if ( !(s = itake_name(p, decl, &at->name)) ) { free_attribute(at); - return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); } decl = s; @@ -2426,9 +2435,9 @@ process_attlist_declaraction(dtd_parser *p, const ichar *decl) for(;;) { dtd_symbol *nm; - if ( !(s = itake_nmtoken(dtd, decl, &nm)) ) + if ( !(s = itake_nmtoken(p, decl, &nm)) ) { free_attribute(at); - return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); } decl = s; add_name_list(&at->typeex.nameof, nm); @@ -2442,7 +2451,7 @@ process_attlist_declaraction(dtd_parser *p, const ichar *decl) break; } free_attribute(at); - return gripe(ERC_SYNTAX_ERROR, L"Illegal name-group", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Illegal name-group", decl); } } else if ( (s=isee_identifier(dtd, decl, "cdata")) ) { decl = s; @@ -2498,18 +2507,18 @@ process_attlist_declaraction(dtd_parser *p, const ichar *decl) at->type = AT_NOTATION; decl=s; - if ( (s=itake_namegroup(dtd, decl, ng, &ns)) ) + if ( (s=itake_namegroup(p, decl, ng, &ns)) ) { decl = s; for(i=0; itypeex.nameof, ng[i]); } else { free_attribute(at); - return gripe(ERC_SYNTAX_ERROR, L"name-group expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"name-group expected", decl); } } else { free_attribute(at); - return gripe(ERC_SYNTAX_ERROR, L"Attribute-type expected", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Attribute-type expected", decl); } /* Attribute Defaults */ @@ -2535,14 +2544,14 @@ process_attlist_declaraction(dtd_parser *p, const ichar *decl) { ichar buf[MAXSTRINGLEN]; ichar *start; int len; const ichar *end; - + if ( !(end=itake_string(dtd, decl, &start, &len)) ) - { end=itake_nmtoken_chars(dtd, decl, buf, sizeof(buf)/sizeof(ichar)); + { end=itake_nmtoken_chars(p, decl, buf, sizeof(buf)/sizeof(ichar)); start = buf; len = (int)istrlen(buf); } if ( !end ) - return gripe(ERC_SYNTAX_ERROR, L"Bad attribute default", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute default", decl); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Note: itake_name(), etc. work on nul-terminated strings. The result of @@ -2559,28 +2568,28 @@ length of the parsed data to verify we parsed all of it. case AT_ENTITY: case AT_NOTATION: case AT_NAME: - { if ( !(s=itake_name(dtd, start, &at->att_def.name)) || + { if ( !(s=itake_name(p, start, &at->att_def.name)) || (s-start) != len ) - return gripe(ERC_DOMAIN, L"name", decl); + return gripe(p, ERC_DOMAIN, L"name", decl); break; } case AT_NMTOKEN: case AT_NAMEOF: - { if ( !(s=itake_nmtoken(dtd, start, &at->att_def.name)) || + { if ( !(s=itake_nmtoken(p, start, &at->att_def.name)) || (s-start) != len ) - return gripe(ERC_DOMAIN, L"nmtoken", decl); + return gripe(p, ERC_DOMAIN, L"nmtoken", decl); break; } case AT_NUTOKEN: - { if ( !(s=itake_nutoken(dtd, start, &at->att_def.name)) || + { if ( !(s=itake_nutoken(p, start, &at->att_def.name)) || (s-start) != len ) - return gripe(ERC_DOMAIN, L"nutoken", decl); + return gripe(p, ERC_DOMAIN, L"nutoken", decl); break; } case AT_NUMBER: - { if ( !(s=itake_number(dtd, start, at)) || + { if ( !(s=itake_number(p, start, at)) || (s-start) != len ) - return gripe(ERC_DOMAIN, L"number", decl); + return gripe(p, ERC_DOMAIN, L"number", decl); break; } case AT_NAMES: @@ -2594,7 +2603,7 @@ length of the parsed data to verify we parsed all of it. } default: { free_attribute(at); - return gripe(ERC_REPRESENTATION, L"No default for type"); + return gripe(p, ERC_REPRESENTATION, L"No default for type"); } } @@ -2606,7 +2615,7 @@ length of the parsed data to verify we parsed all of it. for(i=0; i", env->element->name->name); - gripe(ERC_VALIDATE, buf); /* TBD: expected */ + gripe(p, ERC_VALIDATE, buf); /* TBD: expected */ } } @@ -2739,7 +2748,7 @@ free_environment(sgml_environment *env) { #ifdef XMLNS if ( env->xmlns ) - xmlns_free(env); + xmlns_free(env->xmlns); #endif sgml_free(env); @@ -2754,15 +2763,15 @@ initiated by pushing the element `e'. static int pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0) { sgml_environment *env, *parent; - + for(env = p->environments; env != to; env=parent) { dtd_element *e = env->element; - validate_completeness(env); + validate_completeness(p, env); parent = env->parent; - + if ( e->structure && !e->structure->omit_close ) - gripe(ERC_OMITTED_CLOSE, e->name->name); + gripe(p, ERC_OMITTED_CLOSE, e->name->name); if ( e0 != CDATA_ELEMENT ) emit_cdata(p, TRUE); @@ -2779,7 +2788,7 @@ pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0) } p->environments = to; p->map = to->map; - + return TRUE; } @@ -2835,14 +2844,14 @@ open_element(dtd_parser *p, dtd_element *e, int warn) if ( f && f != e ) { if ( !f->structure || !f->structure->omit_open ) - gripe(ERC_OMITTED_OPEN, f->name->name); + gripe(p, ERC_OMITTED_OPEN, f->name->name); WITH_CLASS(p, EV_OMITTED, { open_element(p, f, TRUE); if ( p->on_begin_element ) { sgml_attribute atts[MAXATTRIBUTES]; int natts = 0; - + if ( !(p->flags & SGML_PARSER_NODEFS) ) natts = add_default_attributes(p, f, natts, atts); @@ -2861,12 +2870,12 @@ open_element(dtd_parser *p, dtd_element *e, int warn) if ( file ) { dtd_parser *clone = clone_dtd_parser(p); - gripe(ERC_NO_DOCTYPE, e->name->name, file); + gripe(p, ERC_NO_DOCTYPE, e->name->name, file); if ( load_dtd_from_file(clone, file) ) p->dtd->doctype = istrdup(e->name->name); else - gripe(ERC_EXISTENCE, L"file", file); + gripe(p, ERC_EXISTENCE, L"file", file); free_dtd_parser(clone); } @@ -2884,7 +2893,7 @@ open_element(dtd_parser *p, dtd_element *e, int warn) if ( env->element->structure && env->element->structure->type == C_ANY ) { if ( e != CDATA_ELEMENT && e->undefined ) - gripe(ERC_EXISTENCE, L"Element", e->name->name); + gripe(p, ERC_EXISTENCE, L"Element", e->name->name); push_element(p, e, FALSE); return TRUE; } @@ -2895,12 +2904,12 @@ open_element(dtd_parser *p, dtd_element *e, int warn) return TRUE; case IE_EXCLUDED: if ( warn ) - gripe(ERC_NOT_ALLOWED, e->name->name); + gripe(p, ERC_NOT_ALLOWED, e->name->name); /*FALLTHROUGH*/ case IE_NORMAL: for(; env; env=env->parent) { dtd_state *new; - + if ( (new = make_dtd_transition(env->state, e)) ) { env->state = new; pop_to(p, env, e); @@ -2910,7 +2919,7 @@ open_element(dtd_parser *p, dtd_element *e, int warn) { dtd_element *oe[MAXOMITTED]; /* omitted open */ int olen; int i; - + if ( (olen=find_omitted_path(env->state, e, oe)) > 0 ) { pop_to(p, env, e); WITH_CLASS(p, EV_OMITTED, @@ -2932,11 +2941,11 @@ open_element(dtd_parser *p, dtd_element *e, int warn) if ( warn ) { if ( e == CDATA_ELEMENT ) - gripe(ERC_VALIDATE, L"#PCDATA not allowed here"); + gripe(p, ERC_VALIDATE, L"#PCDATA not allowed here"); else if ( e->undefined ) - gripe(ERC_EXISTENCE, L"Element", e->name->name); + gripe(p, ERC_EXISTENCE, L"Element", e->name->name); else - gripe(ERC_NOT_ALLOWED, e->name->name); + gripe(p, ERC_NOT_ALLOWED, e->name->name); } } @@ -2960,9 +2969,9 @@ close_element(dtd_parser *p, dtd_element *e, int conref) { dtd_element *ce = env->element; if ( !(conref && env == p->environments) ) - validate_completeness(env); + validate_completeness(p, env); parent = env->parent; - + p->first = FALSE; if ( p->on_end_element ) (*p->on_end_element)(p, env->element); @@ -2974,13 +2983,13 @@ close_element(dtd_parser *p, dtd_element *e, int conref) return TRUE; } else /* omited close */ { if ( ce->structure && !ce->structure->omit_close ) - gripe(ERC_OMITTED_CLOSE, ce->name->name); + gripe(p, ERC_OMITTED_CLOSE, ce->name->name); } } } } - return gripe(ERC_NOT_OPEN, e->name->name); + return gripe(p, ERC_NOT_OPEN, e->name->name); } @@ -2988,12 +2997,12 @@ static int close_current_element(dtd_parser *p) { if ( p->environments ) { dtd_element *e = p->environments->element; - + emit_cdata(p, TRUE); return close_element(p, e, FALSE); } - return gripe(ERC_SYNTAX_ERROR, L"No element to close", ""); + return gripe(p, ERC_SYNTAX_ERROR, L"No element to close", ""); } @@ -3116,7 +3125,7 @@ get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att) *d = '\0'; } } else - { end = itake_unquoted(dtd, decl, tmp, sizeof(tmp)/sizeof(ichar)); + { end = itake_unquoted(p, decl, tmp, sizeof(tmp)/sizeof(ichar)); if (end == NULL) return NULL; @@ -3131,7 +3140,7 @@ get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att) } } if ( token == YET_EMPTY || (token & ANY_OTHER) != 0) - gripe(ERC_SYNTAX_WARNING, L"Attribute value requires quotes", buf); + gripe(p, ERC_SYNTAX_WARNING, L"Attribute value requires quotes", buf); if (!dtd->case_sensitive && att->definition->type != AT_CDATA) istrlower(buf); @@ -3140,7 +3149,7 @@ get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att) switch (att->definition->type) { case AT_NUMBER: /* number */ if (token != DIG_FIRST) - { gripe(ERC_SYNTAX_WARNING, L"NUMBER expected", decl); + { gripe(p, ERC_SYNTAX_WARNING, L"NUMBER expected", decl); } else if (dtd->number_mode == NU_INTEGER) { (void) istrtol(buf, &att->value.number); } else @@ -3157,12 +3166,12 @@ get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att) case AT_NAME: /* name token */ case AT_NOTATION: /* notation-name */ if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0) - gripe(ERC_SYNTAX_WARNING, L"NAME expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"NAME expected", decl); break; case AT_NAMEOF: /* one of these names */ case AT_NMTOKEN: /* name-token */ if (token == YET_EMPTY || (token & ANY_OTHER) != 0) - gripe(ERC_SYNTAX_WARNING, L"NMTOKEN expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"NMTOKEN expected", decl); if ( att->definition->type == AT_NAMEOF ) { dtd_name_list *nl; @@ -3170,37 +3179,37 @@ get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att) { if ( istreq(nl->value->name, buf) ) goto passed; } - gripe(ERC_SYNTAX_WARNING, L"unexpected value", decl); + gripe(p, ERC_SYNTAX_WARNING, L"unexpected value", decl); } break; case AT_NUTOKEN: /* number token */ if ((token & (NAM_FIRST | ANY_OTHER)) != 0) - gripe(ERC_SYNTAX_WARNING, L"NUTOKEN expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"NUTOKEN expected", decl); break; case AT_ENTITY: /* entity-name */ if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0) - gripe(ERC_SYNTAX_WARNING, L"entity NAME expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"entity NAME expected", decl); break; case AT_NAMES: /* list of names */ case AT_IDREFS: /* list of identifier references */ if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0) - gripe(ERC_SYNTAX_WARNING, L"NAMES expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"NAMES expected", decl); break; case AT_ENTITIES: /* entity-name list */ if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0) - gripe(ERC_SYNTAX_WARNING, L"entity NAMES expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"entity NAMES expected", decl); break; case AT_NMTOKENS: /* name-token list */ if (token == YET_EMPTY || (token & ANY_OTHER) != 0) - gripe(ERC_SYNTAX_WARNING, L"NMTOKENS expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"NMTOKENS expected", decl); break; case AT_NUMBERS: /* number list */ if (token != DIG_FIRST) - gripe(ERC_SYNTAX_WARNING, L"NUMBERS expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"NUMBERS expected", decl); break; case AT_NUTOKENS: if ((token & (NAM_FIRST | ANY_OTHER)) != 0) - gripe(ERC_SYNTAX_WARNING, L"NUTOKENS expected", decl); + gripe(p, ERC_SYNTAX_WARNING, L"NUTOKENS expected", decl); break; default: assert(0); @@ -3209,7 +3218,7 @@ get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att) passed: att->value.textW = istrdup(buf); /* TBD: more validation */ - att->value.number = (long)istrlen(buf); + att->value.number = (long)istrlen(buf); return end; } @@ -3225,14 +3234,14 @@ process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl, { dtd_symbol *nm; const ichar *s; - if ( (s=itake_nmtoken(dtd, decl, &nm)) ) + if ( (s=itake_nmtoken(p, decl, &nm)) ) { decl = s; if ( (s=isee_func(dtd, decl, CF_VI)) ) /* name= */ { dtd_attr *a; if ( !HasClass(dtd, nm->name[0], CH_NMSTART) ) - gripe(ERC_SYNTAX_WARNING, + gripe(p, ERC_SYNTAX_WARNING, "Illegal start of attribute-name", decl); decl = s; @@ -3242,13 +3251,13 @@ process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl, a->name = nm; a->type = AT_CDATA; a->def = AT_IMPLIED; - add_attribute(dtd, e, a); + add_attribute(p, e, a); if ( !e->undefined && !(dtd->dialect != DL_SGML && (istreq(L"xmlns", nm->name) || istrprefix(L"xmlns:", nm->name))) ) - gripe(ERC_NO_ATTRIBUTE, e->name->name, nm->name); + gripe(p, ERC_NO_ATTRIBUTE, e->name->name, nm->name); } atts[attn].definition = a; if ( (decl=get_attribute_value(p, decl, atts+attn)) ) @@ -3267,7 +3276,7 @@ process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl, for(nl=a->typeex.nameof; nl; nl = nl->next) { if ( nl->value == nm ) { if ( dtd->dialect != DL_SGML ) - gripe(ERC_SYNTAX_WARNING, + gripe(p, ERC_SYNTAX_WARNING, "Value short-hand in XML mode", decl); atts[attn].flags = 0; atts[attn].definition = a; @@ -3279,17 +3288,17 @@ process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl, } } } - gripe(ERC_NO_ATTRIBUTE_VALUE, e->name->name, nm->name); + gripe(p, ERC_NO_ATTRIBUTE_VALUE, e->name->name, nm->name); decl = s; } else - { gripe(ERC_SYNTAX_ERROR, L"Bad attribute", decl); + { gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute", decl); decl = s; } } else { *argc = attn; return decl; } - + next: ; } @@ -3393,12 +3402,13 @@ process_begin_element(dtd_parser *p, const ichar *decl) dtd_symbol *id; const ichar *s; - if ( (s=itake_name(dtd, decl, &id)) ) + if ( (s=itake_name(p, decl, &id)) ) { sgml_attribute atts[MAXATTRIBUTES]; int natts; dtd_element *e = find_element(dtd, id); int empty = FALSE; int conref = FALSE; + int rc = TRUE; if ( !e->structure ) { dtd_edef *def; @@ -3437,7 +3447,7 @@ process_begin_element(dtd_parser *p, const ichar *decl) } } if ( *decl ) - gripe(ERC_SYNTAX_ERROR, L"Bad attribute list", decl); + gripe(p, ERC_SYNTAX_ERROR, L"Bad attribute list", decl); if ( !(p->flags & SGML_PARSER_NODEFS) ) natts = add_default_attributes(p, e, natts, atts); @@ -3452,7 +3462,7 @@ process_begin_element(dtd_parser *p, const ichar *decl) p->empty_element = NULL; if ( p->on_begin_element ) - (*p->on_begin_element)(p, e, natts, atts); + rc = (*p->on_begin_element)(p, e, natts, atts); free_attribute_values(natts, atts); @@ -3463,10 +3473,10 @@ process_begin_element(dtd_parser *p, const ichar *decl) p->cdata_state = p->state = S_PCDATA; } - return TRUE; + return rc; } - return gripe(ERC_SYNTAX_ERROR, L"Bad open-element tag", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Bad open-element tag", decl); } @@ -3475,15 +3485,15 @@ process_end_element(dtd_parser *p, const ichar *decl) { dtd *dtd = p->dtd; dtd_symbol *id; const ichar *s; - + emit_cdata(p, TRUE); - if ( (s=itake_name(dtd, decl, &id)) && *s == '\0' ) + if ( (s=itake_name(p, decl, &id)) && *s == '\0' ) return close_element(p, find_element(dtd, id), FALSE); if ( p->dtd->shorttag && *decl == '\0' ) /* : close current element */ return close_current_element(p); - return gripe(ERC_SYNTAX_ERROR, L"Bad close-element tag", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Bad close-element tag", decl); } @@ -3502,7 +3512,7 @@ process_net(dtd_parser *p) { sgml_environment *parent; pop_to(p, env, NULL); /* close parents */ - validate_completeness(env); + validate_completeness(p, env); parent = env->parent; emit_cdata(p, TRUE); @@ -3532,8 +3542,8 @@ process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0) const ichar *s; dtd_entity *et = NULL; - if ( !(s=itake_name(dtd, decl, &id)) ) - return gripe(ERC_SYNTAX_ERROR, L"Name expected", decl); + if ( !(s=itake_name(p, decl, &id)) ) + return gripe(p, ERC_SYNTAX_ERROR, L"Name expected", decl); decl = s; if ( (s=isee_identifier(dtd, decl, "system")) ) @@ -3568,11 +3578,11 @@ process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0) dtd->dialect != DL_SGML)); if ( !file ) - { gripe(ERC_EXISTENCE, L"DTD", dtd->doctype); + { gripe(p, ERC_EXISTENCE, L"DTD", dtd->doctype); } else { clone = clone_dtd_parser(p); if ( !load_dtd_from_file(clone, file) ) - gripe(ERC_EXISTENCE, L"file", file); + gripe(p, ERC_EXISTENCE, L"file", file); free_dtd_parser(clone); sgml_free(file); } @@ -3719,13 +3729,13 @@ set_encoding(dtd_parser *p, const ichar *enc) { *o++ = (char)*i++; } else { goto error; - } + } } *o = '\0'; if ( !xml_set_encoding(p, buf) ) { error: - gripe(ERC_EXISTENCE, L"character encoding", enc); + gripe(p, ERC_EXISTENCE, L"character encoding", enc); } } @@ -3756,7 +3766,7 @@ process_pi(dtd_parser *p, const ichar *decl) while(*decl) { dtd_symbol *nm; - if ( (s=itake_name(dtd, decl, &nm)) && + if ( (s=itake_name(p, decl, &nm)) && (s=isee_func(dtd, s, CF_VI)) ) /* = */ { ichar *start; int len; @@ -3764,7 +3774,7 @@ process_pi(dtd_parser *p, const ichar *decl) const ichar *end; if ( !(end=itake_string(dtd, s, &start, &len)) ) - { end=itake_nmtoken_chars(dtd, s, buf, sizeof(buf)/sizeof(ichar)); + { end=itake_nmtoken_chars(p, s, buf, sizeof(buf)/sizeof(ichar)); start = buf; len = (int)istrlen(buf); } @@ -3781,7 +3791,7 @@ process_pi(dtd_parser *p, const ichar *decl) set_encoding(p, tmp); } else - { gripe(ERC_SYNTAX_ERROR, L"Unterminated encoding?", decl); + { gripe(p, ERC_SYNTAX_ERROR, L"Unterminated encoding?", decl); } } @@ -3791,7 +3801,7 @@ process_pi(dtd_parser *p, const ichar *decl) } } - gripe(ERC_SYNTAX_ERROR, L"Illegal XML parameter", decl); + gripe(p, ERC_SYNTAX_ERROR, L"Illegal XML parameter", decl); break; } @@ -3807,7 +3817,7 @@ process_pi(dtd_parser *p, const ichar *decl) static int process_sgml_declaration(dtd_parser *p, const ichar *decl) -{ return gripe(ERC_SYNTAX_WARNING, L"Ignored declaration", NULL); +{ return gripe(p, ERC_SYNTAX_WARNING, L"Ignored declaration", NULL); } @@ -3849,23 +3859,21 @@ process_declaration(dtd_parser *p, const ichar *decl) process_doctype(p, s, decl-1); } else { s = iskip_layout(dtd, decl); - + if ( *s ) - gripe(ERC_SYNTAX_ERROR, L"Invalid declaration", s); + gripe(p, ERC_SYNTAX_ERROR, L"Invalid declaration", s); } return TRUE; } - return gripe(ERC_SYNTAX_ERROR, L"Invalid declaration", decl); + return gripe(p, ERC_SYNTAX_ERROR, L"Invalid declaration", decl); } /******************************* * STREAM BINDING * *******************************/ -static dtd_parser *current_parser; /* For gripes */ - void set_file_dtd_parser(dtd_parser *p, input_type type, const ichar *name) { p->location.type = type; @@ -3897,7 +3905,7 @@ set_mode_dtd_parser(dtd_parser *p, data_mode m) dtd_parser * new_dtd_parser(dtd *dtd) { dtd_parser *p = sgml_calloc(1, sizeof(*p)); - + if ( !dtd ) dtd = new_dtd(NULL); dtd->references++; @@ -3920,7 +3928,7 @@ new_dtd_parser(dtd *dtd) static dtd_parser * clone_dtd_parser(dtd_parser *p) { dtd_parser *clone = sgml_calloc(1, sizeof(*p)); - + *clone = *p; clone->dtd->references++; clone->environments = NULL; @@ -3941,7 +3949,9 @@ void free_dtd_parser(dtd_parser *p) { free_icharbuf(p->buffer); free_ocharbuf(p->cdata); - +#ifdef XMLNS + xmlns_free(p->xmlns); +#endif free_dtd(p->dtd); sgml_free(p); @@ -3951,7 +3961,7 @@ free_dtd_parser(dtd_parser *p) static int process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s) { locbuf old; - + push_location(p, &old); set_src_dtd_parser(p, in, name); empty_icharbuf(p->buffer); /* dubious */ @@ -3982,13 +3992,13 @@ process_include(dtd_parser *p, const ichar *entity_name) { const ichar *text = entity_value(p, pe, NULL); if ( !text ) - return gripe(ERC_NO_VALUE, pe->name->name); + return gripe(p, ERC_NO_VALUE, pe->name->name); return process_chars(p, IN_ENTITY, entity_name, text); } } - - return gripe(ERC_EXISTENCE, L"parameter entity", entity_name); + + return gripe(p, ERC_EXISTENCE, L"parameter entity", entity_name); } @@ -4014,7 +4024,7 @@ process_marked_section(dtd_parser *p) { dtd_symbol *kwd; decl = buf; - if ( (s=itake_name(dtd, decl, &kwd)) && + if ( (s=itake_name(p, decl, &kwd)) && isee_func(dtd, s, CF_DSO) ) /* [ */ { dtd_marked *m = sgml_calloc(1, sizeof(*m)); @@ -4064,7 +4074,7 @@ pop_marked_section(dtd_parser *p) sgml_free(m); p->mark_state = (p->marked ? p->marked->type : MS_INCLUDE); } -} +} /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -4101,7 +4111,7 @@ update_space_mode(dtd_parser *p, dtd_element *e, if ( m != SP_INHERIT ) p->environments->space_mode = m; else - gripe(ERC_EXISTENCE, L"xml:space-mode", atts->value.textW); + gripe(p, ERC_EXISTENCE, L"xml:space-mode", atts->value.textW); return; } @@ -4136,7 +4146,7 @@ emit_cdata(dtd_parser *p, int last) ocharbuf *cdata = p->cdata; int offset = 0; int size = cdata->size; - + if ( size == 0 ) return TRUE; /* empty or done */ @@ -4157,7 +4167,7 @@ emit_cdata(dtd_parser *p, int last) size--; c = fetch_ocharbuf(cdata, offset); } - + if ( HasClass(dtd, c, CH_RS) ) { inc_location(&p->startloc, c); offset++; @@ -4185,7 +4195,7 @@ emit_cdata(dtd_parser *p, int last) if ( p->environments->space_mode == SP_DEFAULT ) { int o = 0; int i; - + for(i=0; iblank_cdata ) { if ( p->cdata_must_be_empty ) - { gripe(ERC_NOT_ALLOWED_PCDATA, p->cdata); /* TBD: now passes buffer! */ + { gripe(p, ERC_NOT_ALLOWED_PCDATA, p->cdata); /* TBD: now passes buffer! */ } cb_cdata(p, cdata, offset, size); } else if ( p->environments ) { sgml_environment *env = p->environments; dtd_state *new; - + /* If an element is not in the DTD we must */ /* assume mixed content and emit spaces */ @@ -4282,7 +4292,7 @@ emit_cdata(dtd_parser *p, int last) { cb_cdata(p, cdata, offset, size); } } - + pop_location(p, &locsafe); empty_cdata(p); @@ -4324,7 +4334,7 @@ prepare_cdata(dtd_parser *p) p->blank_cdata = blank; if ( !blank ) { if ( p->dmode == DM_DTD ) - gripe(ERC_SYNTAX_ERROR, L"CDATA in DTD", p->cdata->data); + gripe(p, ERC_SYNTAX_ERROR, L"CDATA in DTD", p->cdata->data); else open_element(p, CDATA_ELEMENT, TRUE); } @@ -4337,11 +4347,9 @@ prepare_cdata(dtd_parser *p) static int process_cdata(dtd_parser *p, int last) -{ int rc; +{ prepare_cdata(p); - WITH_PARSER(p, (prepare_cdata(p), rc=emit_cdata(p, last))); - - return rc; + return emit_cdata(p, last); } @@ -4351,7 +4359,7 @@ process_entity(dtd_parser *p, const ichar *name) { int v = char_entity_value(name); if ( v <= 0 ) - return gripe(ERC_SYNTAX_ERROR, L"Bad character entity", name); + return gripe(p, ERC_SYNTAX_ERROR, L"Bad character entity", name); add_ocharbuf(p->cdata, v); } else @@ -4369,7 +4377,7 @@ process_entity(dtd_parser *p, const ichar *name) { if ( dtd->default_entity ) e = dtd->default_entity; else - return gripe(ERC_EXISTENCE, L"entity", name); + return gripe(p, ERC_EXISTENCE, L"entity", name); } if ( !e->value && @@ -4384,21 +4392,21 @@ process_entity(dtd_parser *p, const ichar *name) } if ( !(text = entity_value(p, e, &len)) ) - return gripe(ERC_NO_VALUE, e->name->name); + return gripe(p, ERC_NO_VALUE, e->name->name); switch ( e->content ) { case EC_SGML: case EC_CDATA: if ( (s=isee_character_entity(dtd, text, &chr)) && *s == '\0' ) { if ( chr == 0 ) - return gripe(ERC_SYNTAX_ERROR, L"Illegal character entity", text); + return gripe(p, ERC_SYNTAX_ERROR, L"Illegal character entity", text); if ( p->blank_cdata == TRUE && !HasClass(dtd, (wint_t)chr, CH_BLANK) ) { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE); p->blank_cdata = FALSE; } - + add_ocharbuf(p->cdata, chr); return TRUE; } @@ -4462,8 +4470,8 @@ Deal with end of input. We should give a proper error message depending on the state and the start-location of the error. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ -static int -end_document_dtd_parser_(dtd_parser *p) +int +end_document_dtd_parser(dtd_parser *p) { int rval; switch(p->state) @@ -4479,7 +4487,7 @@ end_document_dtd_parser_(dtd_parser *p) case S_DECLCMT0: case S_DECLCMT: case S_DECLCMTE0: - rval = gripe(ERC_SYNTAX_ERROR, + rval = gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end-of-file in comment", L""); break; case S_ECDATA1: @@ -4495,28 +4503,28 @@ end_document_dtd_parser_(dtd_parser *p) case S_PENT: case S_ENT: case S_ENT0: - rval = gripe(ERC_SYNTAX_ERROR, + rval = gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end-of-file", L""); break; #ifdef UTF8 case S_UTF8: - rval = gripe(ERC_SYNTAX_ERROR, + rval = gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end-of-file in UTF-8 sequence", L""); break; #endif case S_MSCDATA: case S_EMSCDATA1: case S_EMSCDATA2: - rval = gripe(ERC_SYNTAX_ERROR, + rval = gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end-of-file in CDATA marked section", L""); break; case S_PI: case S_PI2: - rval = gripe(ERC_SYNTAX_ERROR, + rval = gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end-of-file in processing instruction", L""); break; default: - rval = gripe(ERC_SYNTAX_ERROR, + rval = gripe(p, ERC_SYNTAX_ERROR, L"Unexpected end-of-file in ???"); break; } @@ -4539,7 +4547,7 @@ end_document_dtd_parser_(dtd_parser *p) pop_to(p, env, CDATA_ELEMENT); e = env->element; if ( e->structure && !e->structure->omit_close ) - gripe(ERC_OMITTED_CLOSE, e->name->name); + gripe(p, ERC_OMITTED_CLOSE, e->name->name); close_element(p, e, FALSE); } } @@ -4548,16 +4556,6 @@ end_document_dtd_parser_(dtd_parser *p) } -int -end_document_dtd_parser(dtd_parser *p) -{ int rval; - - WITH_PARSER(p, rval = end_document_dtd_parser_(p)); - - return rval; -} - - int begin_document_dtd_parser(dtd_parser *p) { init_decoding(p); @@ -4654,7 +4652,7 @@ add_cdata(dtd_parser *p, int chr) } add_ocharbuf(buf, chr); - + if ( p->map && chr <= 0xff && p->map->ends[chr] && match_shortref(p) ) @@ -4688,7 +4686,7 @@ add_verbatim_cdata(dtd_parser *p, int chr) if ( chr == '\n' && buf->size > 0 && fetch_ocharbuf(buf, buf->size-1) == '\r' ) buf->size--; - + add_ocharbuf(buf, chr); } } @@ -4719,30 +4717,30 @@ setlocation(dtd_srcloc *d, dtd_srcloc *loc, int line, int lpos) } -void +int putchar_dtd_parser(dtd_parser *p, int chr) { dtd *dtd = p->dtd; const ichar *f = dtd->charfunc->func; int line = p->location.line; int lpos = p->location.linepos; - + p->location.charpos++; /* TBD: actually `bytepos' */ #ifdef UTF8 if ( p->state == S_UTF8 ) { if ( (chr & 0xc0) != 0x80 ) /* TBD: recover */ - gripe(ERC_SYNTAX_ERROR, L"Bad UTF-8 sequence", L""); + gripe(p, ERC_SYNTAX_ERROR, L"Bad UTF-8 sequence", L""); p->utf8_char <<= 6; p->utf8_char |= (chr & ~0xc0); if ( --p->utf8_left == 0 ) { chr = p->utf8_char; p->state = p->utf8_saved_state; } else - { return; + { return TRUE; } } else if ( ISUTF8_MB(chr) && p->utf8_decode ) { process_utf8(p, chr); - return; + return TRUE; } #endif @@ -4763,34 +4761,33 @@ reprocess: { setlocation(&p->startloc, &p->location, line, lpos); p->state = S_DECL0; empty_icharbuf(p->buffer); - return; + return TRUE; } if ( p->dmode == DM_DTD ) { if ( f[CF_PERO] == chr ) /* % */ { setlocation(&p->startloc, &p->location, line, lpos); p->state = S_PENT; - return; + return TRUE; } } else { if ( f[CF_ERO] == chr ) /* & */ { setlocation(&p->startloc, &p->location, line, lpos); p->state = S_ENT0; - return; + return TRUE; } } - + if ( p->marked && f[CF_DSC] == chr ) /* ] in marked section */ { empty_icharbuf(p->buffer); p->state = S_EMSC1; p->saved = chr; /* for recovery */ - return; + return TRUE; } if ( p->waiting_for_net && f[CF_ETAGO2] == chr ) /* shorttag */ { setlocation(&p->startloc, &p->location, line, lpos); - WITH_PARSER(p, - process_net(p)); - return; + process_net(p); + return TRUE; } /* Real character data */ @@ -4798,7 +4795,7 @@ reprocess: setlocation(&p->startcdata, &p->location, line, lpos); add_cdata(p, chr); - return; + return TRUE; } case S_ECDATA2: /* Seen cdata); terminate_icharbuf(p->buffer); if ( p->mark_state == MS_INCLUDE ) - { WITH_PARSER(p, - process_cdata(p, TRUE); - process_end_element(p, p->buffer->data)); + { process_cdata(p, TRUE); + process_end_element(p, p->buffer->data); empty_cdata(p); } empty_icharbuf(p->buffer); @@ -4824,7 +4820,7 @@ reprocess: } else add_icharbuf(p->buffer, chr); } - return; + return TRUE; } case S_ECDATA1: /* seen < in CDATA */ { add_verbatim_cdata(p, chr); @@ -4833,13 +4829,13 @@ reprocess: p->state = S_ECDATA2; } else if ( f[CF_ETAGO1] != chr ) /* <: do not change state */ p->state = p->cdata_state; - return; + return TRUE; } case S_RCDATA: { if ( f[CF_ERO] == chr ) /* & */ { setlocation(&p->startloc, &p->location, line, lpos); p->state = S_ENT0; - return; + return TRUE; } /*FALLTHROUGH*/ } @@ -4858,22 +4854,21 @@ reprocess: terminate_ocharbuf(p->cdata); terminate_icharbuf(p->buffer); if ( p->mark_state == MS_INCLUDE ) - { WITH_PARSER(p, - process_cdata(p, TRUE); - process_net(p)); + { process_cdata(p, TRUE); + process_net(p); empty_cdata(p); } empty_icharbuf(p->buffer); p->cdata_state = p->state = S_PCDATA; } - return; + return TRUE; } case S_MSCDATA: { add_verbatim_cdata(p, chr); if ( f[CF_DSC] == chr ) /* ] */ p->state = S_EMSCDATA1; - return; + return TRUE; } case S_EMSCDATA1: { add_verbatim_cdata(p, chr); @@ -4881,7 +4876,7 @@ reprocess: p->state = S_EMSCDATA2; else p->state = S_MSCDATA; - return; + return TRUE; } case S_EMSCDATA2: { add_verbatim_cdata(p, chr); @@ -4891,27 +4886,27 @@ reprocess: p->state = S_PCDATA; } else if ( f[CF_DSC] != chr ) /* if ]]], stay in this state */ p->state = S_MSCDATA; - return; + return TRUE; } case S_EMSC1: { if ( f[CF_DSC] == chr ) /* ]] in marked section */ { p->state = S_EMSC2; - return; + return TRUE; } else { add_icharbuf(p->buffer, chr); recover_parser(p); - return; + return TRUE; } } case S_EMSC2: { if ( f[CF_MDC] == chr ) /* ]]> in marked section */ { pop_marked_section(p); p->state = S_PCDATA; - return; + return TRUE; } else { add_icharbuf(p->buffer, chr); recover_parser(p); - return; + return TRUE; } } case S_PENT: /* %parameter entity; */ @@ -4919,19 +4914,19 @@ reprocess: { p->state = S_PCDATA; terminate_icharbuf(p->buffer); if ( p->mark_state == MS_INCLUDE ) - { WITH_PARSER(p, process_include(p, p->buffer->data)); + { process_include(p, p->buffer->data); } empty_icharbuf(p->buffer); - return; + return TRUE; } if ( HasClass(dtd, (wint_t)chr, CH_NAME) ) { add_icharbuf(p->buffer, chr); - return; + return TRUE; } terminate_icharbuf(p->buffer); - gripe(ERC_SYNTAX_ERROR, L"Illegal parameter entity", p->buffer->data); - break; + return gripe(p, ERC_SYNTAX_ERROR, + L"Illegal parameter entity", p->buffer->data); } case S_ENT0: /* Seen & */ { if ( chr == '#' || HasClass(dtd, (wint_t)chr, CH_NAME) ) @@ -4944,7 +4939,7 @@ reprocess: buf[0] = '&'; buf[1] = chr; buf[2] = '\0'; - gripe(ERC_SYNTAX_ERROR, L"Illegal entity", buf); + gripe(p, ERC_SYNTAX_ERROR, L"Illegal entity", buf); } add_cdata(p, f[CF_ERO]); @@ -4952,34 +4947,34 @@ reprocess: goto reprocess; } - return; - } + return TRUE; + } case S_ENT: /* &entity; */ { if ( HasClass(dtd, (wint_t)chr, CH_NAME) ) { add_icharbuf(p->buffer, chr); - return; + return TRUE; } terminate_icharbuf(p->buffer); p->state = p->cdata_state; if ( p->mark_state == MS_INCLUDE ) - { WITH_PARSER(p, process_entity(p, p->buffer->data)); + { process_entity(p, p->buffer->data); } empty_icharbuf(p->buffer); - + if ( chr == CR ) p->state = S_ENTCR; else if ( f[CF_ERC] != chr && chr != '\n' ) goto reprocess; - break; + return TRUE; } case S_ENTCR: /* seen &entCR, eat the LF */ { p->state = p->cdata_state; if ( chr != LF ) goto reprocess; - break; + return TRUE; } case S_DECL0: /* Seen < */ { if ( f[CF_ETAGO2] == chr ) /* state = S_PCDATA; } - return; + return TRUE; } case S_MDECL0: /* Seen state = S_CMTO; - return; + return TRUE; } add_icharbuf(p->buffer, f[CF_MDO2]); add_icharbuf(p->buffer, chr); p->state = S_DECL; - return; + return TRUE; } case S_DECL: /* <...> */ { if ( f[CF_MDC] == chr ) /* > */ @@ -5016,10 +5011,10 @@ reprocess: p->state = S_PCDATA; terminate_icharbuf(p->buffer); if ( p->mark_state == MS_INCLUDE ) - { WITH_PARSER(p, process_declaration(p, p->buffer->data)); + { process_declaration(p, p->buffer->data); } empty_icharbuf(p->buffer); - return; + return TRUE; } if ( dtd->shorttag && f[CF_ETAGO2] == chr && p->buffer->size > 0 ) { prepare_cdata(p); @@ -5027,11 +5022,11 @@ reprocess: terminate_icharbuf(p->buffer); if ( p->mark_state == MS_INCLUDE ) { WITH_CLASS(p, EV_SHORTTAG, - WITH_PARSER(p, process_declaration(p, p->buffer->data))); + process_declaration(p, p->buffer->data)); } empty_icharbuf(p->buffer); p->waiting_for_net = TRUE; - return; + return TRUE; } add_icharbuf(p->buffer, chr); @@ -5044,7 +5039,7 @@ reprocess: { p->state = S_STRING; p->saved = chr; p->lit_saved_state = S_DECL; - return; + return TRUE; } else if ( f[CF_CMT] == chr && /* - */ p->buffer->data[0] == f[CF_MDO2] ) /* Started state = S_DECLCMT0; @@ -5054,7 +5049,7 @@ reprocess: process_marked_section(p); } - break; + return TRUE; } case S_DECLCMT0: /* <...- */ { if ( f[CF_CMT] == chr ) @@ -5064,19 +5059,19 @@ reprocess: { add_icharbuf(p->buffer, chr); p->state = S_DECL; } - break; + return TRUE; } case S_DECLCMT: /* <...--.. */ { if ( f[CF_CMT] == chr ) p->state = S_DECLCMTE0; - break; + return TRUE; } case S_DECLCMTE0: /* <...--..- */ { if ( f[CF_CMT] == chr ) p->state = S_DECL; else p->state = S_DECLCMT; - break; + return TRUE; } case S_PI: { add_icharbuf(p->buffer, chr); @@ -5084,7 +5079,7 @@ reprocess: p->state = S_PI2; if ( f[CF_PRC] == chr ) /* no ? is ok too (XML/SGML) */ goto pi; - return; + return TRUE; } case S_PI2: { if ( f[CF_PRC] == chr ) @@ -5094,53 +5089,53 @@ reprocess: p->buffer->size--; terminate_icharbuf(p->buffer); if ( p->mark_state == MS_INCLUDE ) - { WITH_PARSER(p, process_pi(p, p->buffer->data)); + { process_pi(p, p->buffer->data); } empty_icharbuf(p->buffer); - return; + return TRUE; } add_icharbuf(p->buffer, chr); p->state = S_PI; - return; + return TRUE; } case S_STRING: { add_icharbuf(p->buffer, chr); if ( chr == p->saved ) p->state = p->lit_saved_state; - break; + return TRUE; } case S_CMTO: /* Seen state = S_CMT1; - return; + return TRUE; } else { add_cdata(p, f[CF_MDO1]); add_cdata(p, f[CF_MDO2]); add_cdata(p, f[CF_CMT]); add_cdata(p, chr); p->state = S_PCDATA; - return; + return TRUE; } } case S_CMT1: /* \\ */ wputc(x, f); @@ -352,7 +352,7 @@ mb2wc(const char *s) return ws; } - + perror("mbstowcs"); exit(1); } diff --git a/packages/sgml/sgml.doc b/packages/sgml/sgml.doc index 999cbec6b..bc12f8b5b 100644 --- a/packages/sgml/sgml.doc +++ b/packages/sgml/sgml.doc @@ -26,7 +26,7 @@ Markup languages are an increasingly important method for data-representation and exchange. This article documents the package \pllib{sgml}, a foreign library for SWI-Prolog to parse SGML and XML documents, returning information on both the document and the -document's DTD. The parser is designed to be small, fast and flexible. +document's DTD. The parser is designed to be small, fast and flexible. \end{abstract} \pagebreak @@ -56,17 +56,17 @@ The parser described in this document is small (less than 100 kBytes executable on a Pentium), fast (between 2 and 5 times faster than SP), provides access to the DTD, and provides flexible input handling. -The document output is equal to the output produced by \jargon{xml2pl}, +The document output is equal to the output produced by \jargon{xml2pl}, an SP interface to SWI-Prolog written by Anjo Anjewierden. \section{Bluffer's Guide} -This package allows you to parse SGML, XML and HTML data into a Prolog -data structure. The high-level interface defined in \pllib{sgml} +This package allows you to parse SGML, XML and HTML data into a Prolog +data structure. The high-level interface defined in \pllib{sgml} provides access at the file-level, while the low-level interface defined -in the foreign module works with Prolog streams. Please use the source -of \file{sgml.pl} as a starting point for dealing with data from +in the foreign module works with Prolog streams. Please use the source +of \file{sgml.pl} as a starting point for dealing with data from other sources than files, such as SWI-Prolog resources, network-sockets, character strings, \emph{etc.} The first example below loads an HTML file. @@ -123,9 +123,9 @@ This is called `omitted-tag' handling. ]. \end{code} -The document is represented as a list, each element being an atom to +The document is represented as a list, each element being an atom to represent \const{CDATA} or a term \term{element}{Name, Attributes, Content}. -Entities (e.g. \verb$<$) are expanded and included in the +Entities (e.g. \verb$<$) are expanded and included in the atom representing the element content or attribute value.% \footnote{Up to SWI-Prolog 5.4.x, Prolog could not represent \jargon{wide} characters and entities that did not fit in @@ -141,23 +141,24 @@ self-contained files in SGML, HTML, or XML into a structured term. They are based on load_structure/3. \begin{description} - \predicate{load_sgml_file}{2}{+File, -ListOfContent} -Same as \term{load_structure}{File, ListOfContent, [dialect(sgml)]}. + \predicate{load_sgml_file}{2}{+Source, -ListOfContent} +Same as \term{load_structure}{Source, ListOfContent, [dialect(sgml)]}. - \predicate{load_xml_file}{2}{+File, -ListOfContent} -Same as \term{load_structure(File, ListOfContent, [dialect(xml)]}. + \predicate{load_xml_file}{2}{+Source, -ListOfContent} +Same as \term{load_structure(Source, ListOfContent, [dialect(xml)]}. - \predicate{load_html_file}{2}{+File, -Content} -Load \arg{File} and parse as HTML. Implemented as below. Note that -load_html_file/2 re-uses a cached DTD object as defined by dtd/2. As DTD -objects may be corrupted while loading errornous documents sharing is -undesirable if the documents are not known to be correct. See dtd/2 for -details. + \predicate{load_html_file}{2}{+Source, -Content} +Load \arg{Source} and parse as HTML. \arg{Source} is either the +name of a file or term \term{stream}{Handle}. Implemented as +below. Note that load_html_file/2 re-uses a cached DTD object as defined +by dtd/2. As DTD objects may be corrupted while loading errornous +documents sharing is undesirable if the documents are not known to be +correct. See dtd/2 for details. \begin{code} -load_html_file(File, Term) :- +load_html_file(Source, Term) :- dtd(html, DTD), - load_structure(File, Term, + load_structure(Source, Term, [ dtd(DTD), dialect(sgml), shorttag(false) @@ -171,8 +172,8 @@ load_html_file(File, Term) :- \subsection{Loading Structured Documents} SGML or XML files are loaded through the common predicate -load_structure/3. This is a predicate with many options. For -simplicity a number of commonly used shorthands are provided: +load_structure/3. This is a predicate with many options. For +simplicity a number of commonly used shorthands are provided: load_sgml_file/2, load_xml_file/2, and load_html_file/2. @@ -184,18 +185,18 @@ Parse \arg{Source} and return the resulting structure in options controlling the conversion process. A proper XML document contains only a single toplevel element whose name -matches the document type. Nevertheless, a list is returned for +matches the document type. Nevertheless, a list is returned for consistency with the representation of element content. The $), $ instruction is handled internally. +\verb$$ instruction is handled internally. \end{description} -The \arg{Options} list controls the conversion process. Currently +The \arg{Options} list controls the conversion process. Currently defined options are: \begin{description} \termitem{dtd}{?DTD} Reference to a DTD object. If specified, the \verb$$ -declaration is ignored and the document is parsed and validated against +declaration is ignored and the document is parsed and validated against the provided DTD. If provided as a variable, the created DTD is returned. See \secref{implicitdtd}. \termitem{dialect}{+Dialect} -Specify the parsing dialect. Supported are \const{sgml} (default), \const{xml} +Specify the parsing dialect. Supported are \const{sgml} (default), \const{xml} and \const{xmlns}. See \secref{xml} for details on the differences. \termitem{shorttag}{+Bool} @@ -272,14 +273,14 @@ Defines (overwrites) an entity definition. At the moment, only entity options are allowed. \termitem{file}{+Name} -Sets the name of the file on which errors are reported. Sets the +Sets the name of the file on which errors are reported. Sets the linenumber to 1. \termitem{line}{+Line} Sets the starting line-number for reporting errors. \termitem{max_errors}{+Max} -Sets the maximum number of errors. If this number is reached, an +Sets the maximum number of errors. If this number is reached, an exception of the format below is raised. The default is 50. Using \term{max_errors}{-1} makes the parser continue, no matter how many errors it encounters. @@ -303,26 +304,26 @@ modes are: \termitem{space}{sgml} In SGML, newlines at the start and end of an element are removed.In addition, newlines at the end of lines containing only markup should be -deleted. This is not yet implemented. This is the default mode for -the SGML dialect. +deleted. This is not yet implemented. This is the default mode for +the SGML dialect. \termitem{space}{preserve} White space is passed literally to the application. This mode leaves all white space handling to the application. This is the default mode for -the XML dialect. +the XML dialect. \termitem{space}{default} -In addition to \const{sgml} space-mode, all consequtive white-space is -reduced to a single space-character. This mode canonises all white -space. +In addition to \const{sgml} space-mode, all consequtive white-space is +reduced to a single space-character. This mode canonises all white +space. \termitem{space}{remove} -In addition to \const{default}, all leading and trailing white-space is -removed from \const{CDATA} objects. If, as a result, the \const{CDATA} -becomes empty, nothing is passed to the application. This mode is -especially handy for processing `data-oriented' documents, such as RDF. -It is not suitable for normal text documents. Consider the HTML -fragment below. When processed in this mode, the spaces between the +In addition to \const{default}, all leading and trailing white-space is +removed from \const{CDATA} objects. If, as a result, the \const{CDATA} +becomes empty, nothing is passed to the application. This mode is +especially handy for processing `data-oriented' documents, such as RDF. +It is not suitable for normal text documents. Consider the HTML +fragment below. When processed in this mode, the spaces between the three modified words are lost. This mode is not part of any standard; XML 1.0 allows only \const{default} and \const{preserve}. @@ -333,9 +334,9 @@ Consider adjacent bold
    and
italic words. \subsection{XML documents} \label{sec:xml} -The parser can operate in two modes: \const{sgml} mode and \const{xml} mode, as -defined by the \term{dialect}{Dialect} option. Regardless of this -option, if the first line of the document reads as below, the parser is +The parser can operate in two modes: \const{sgml} mode and \const{xml} mode, as +defined by the \term{dialect}{Dialect} option. Regardless of this +option, if the first line of the document reads as below, the parser is switched automatically into XML mode. \begin{code} @@ -346,21 +347,21 @@ Currently switching to XML mode implies: \begin{itemlist} \item [XML empty elements] -The construct \verb$$ is recognised as -an empty element. +The construct \verb$$ is recognised as +an empty element. \item [Predefined entities] The following entitities are predefined: \const{lt} (\verb$<$), \const{gt} -(\verb$>$), \const{amp} (\verb$&$), \const{apos} (\verb$'$) -and \const{quot} (\verb$"$). +(\verb$>$), \const{amp} (\verb$&$), \const{apos} (\verb$'$) +and \const{quot} (\verb$"$). \item [Case sensitivity] -In XML mode, names are treated case-sensitive, except for the DTD -reserved names (i.e. \exam{ELEMENT}, \emph{etc.}). +In XML mode, names are treated case-sensitive, except for the DTD +reserved names (i.e. \exam{ELEMENT}, \emph{etc.}). \item [Character classes] In XML mode, underscores (\verb$_$) and colon (\verb$:$) are -allowed in names. +allowed in names. \item [White-space handling] White space mode is set to \const{preserve}. In addition to setting @@ -378,28 +379,28 @@ preserves space, regardless of the default processing mode. \subsubsection{XML Namespaces} \label{sec:xmlns} -Using the \jargon{dialect} \const{xmlns}, the parser will interpret XML -namespaces. In this case, the names of elements are returned as a term +Using the \jargon{dialect} \const{xmlns}, the parser will interpret XML +namespaces. In this case, the names of elements are returned as a term of the format \begin{quote} -\arg{URL}\const{:}\arg{LocalName} +\arg{URL}\const{:}\arg{LocalName} \end{quote} -If an identifier has no namespace and there is no default namespace it -is returned as a simple atom. If an identifier has a namespace but this -namespace is undeclared, the namespace name rather than the related URL +If an identifier has no namespace and there is no default namespace it +is returned as a simple atom. If an identifier has a namespace but this +namespace is undeclared, the namespace name rather than the related URL is returned. Attributes declaring namespaces ({\tt xmlns:=}) are reported as if \const{xmlns} were not a defined resource. -In many cases, getting attribute-names as \arg{url}:\arg{name} -is not desirable. Such terms are hard to unify and sometimes multiple -URLs may be mapped to the same identifier. This may happen due to poor -version management, poor standardisation or because the the application -doesn't care too much about versions. This package defines two -call-backs that can be set using set_sgml_parser/2 to deal +In many cases, getting attribute-names as \arg{url}:\arg{name} +is not desirable. Such terms are hard to unify and sometimes multiple +URLs may be mapped to the same identifier. This may happen due to poor +version management, poor standardisation or because the the application +doesn't care too much about versions. This package defines two +call-backs that can be set using set_sgml_parser/2 to deal with this problem. The call-back \const{xmlns} is called as XML namespaces are noticed. @@ -428,6 +429,41 @@ load_rdf_xml(File, Term) :- ]). \end{code} +The library provides iri_xml_namespace/3 to break down an IRI into +its namespace and localname: + +\begin{description} + \predicate[det]{iri_xml_namespace}{3}{+IRI, -Namespace, -Localname} +Split an IRI (Unicode URI) into its \arg{Namespace} (an IRI) and +\arg{Localname} (a Unicode XML name, see xml_name/2). The +\arg{Localname} is defined as the longest last part of the IRI that +satisfies the syntax of an XML name. With IRI schemas that are designed +to work with XML namespaces, this will typically break the IRI on the +last \chr{\#} or \chr{/}. Note however that this can produce unexpected +results. E.g., in the example below, one might expect the namespace to +be \url{http://example.com/images\#}, but an XML name cannot start with +a digit. + +\begin{code} +?- iri_xml_namespace('http://example.com/images#12345', NS, L). +NS = 'http://example.com/images#12345', +L = ''. +\end{code} + +As we see from the example above, the \arg{Localname} can be the empty +atom. Similarly, \arg{Namespace} can be the empty atom if \arg{IRI} is +an XML name. Applications will often have to check for either or both +these conditions. We decided against failing in these conditions because +the application typically wants to know which of the two conditions +(empty namespace or empty localname) holds. This predicate is often used +for generating RDF/XML from an RDF graph. + + \predicate[det]{iri_xml_namespace}{2}{+IRI, -Namespace} +Same as iri_xml_namespace/3, but avoids creating an atom for the +\arg{Localname}. +\end{description} + + \subsection{DTD-Handling} The DTD (\textbf{D}ocument \textbf{T}ype \textbf{D}efinition) is a @@ -438,7 +474,7 @@ predicates for handling the DTD. \begin{description} \predicate{new_dtd}{2}{+DocType, -DTD} -Creates an empty DTD for the named \arg{DocType}. The returned +Creates an empty DTD for the named \arg{DocType}. The returned DTD-reference is an opaque term that can be used in the other predicates of this package. @@ -468,7 +504,7 @@ Define the DTD dialect. Default is \const{sgml}. Using \const{xml} or \predicate{dtd}{2}{+DocType, -DTD} Find the DTD representing the indicated \jargon{doctype}. This predicate -uses a cache of DTD objects. If a doctype has no associated dtd, it +uses a cache of DTD objects. If a doctype has no associated dtd, it searches for a file using the file search path \exam{dtd} using the call: \begin{code} @@ -488,15 +524,15 @@ parse multiple documents should be restricted to situations where the documents processed are known to be error-free. \predicate{dtd_property}{2}{+DTD, ?Property} -This predicate is used to examine the content of a DTD. Property is one +This predicate is used to examine the content of a DTD. Property is one of: \begin{description} \termitem{doctype}{DocType} -An atom representing the document-type defined by this DTD. +An atom representing the document-type defined by this DTD. \termitem{elements}{ListOfElements} -A list of atoms representing the names of the elements in this DTD. +A list of atoms representing the names of the elements in this DTD. \termitem{element}{Name, Omit, Content} The DTD contains an element with the given name. \arg{Omit} is a term of @@ -508,7 +544,7 @@ form: \begin{description} \termitem{empty}{} -The element has no content. +The element has no content. \termitem{cdata}{} The element contains non-parsed character data. All data up to the @@ -524,30 +560,30 @@ any order. \termitem{\#pcdata}{} The element contains parsed character data . - \termitem{\arg{element}} An element with this name. + \termitem{\arg{element}} An element with this name. \termitem{*}{SubModel} -0 or more appearances. +0 or more appearances. \termitem{?}{SubModel} -0 or one appearance. +0 or one appearance. \termitem{+}{SubModel} -1 or more appearances. +1 or more appearances. \termitem{,}{SubModel1, SubModel2} -\arg{SubModel1} followed by \arg{SubModel2}. +\arg{SubModel1} followed by \arg{SubModel2}. \termitem{\&}{SubModel1, SubModel2} -\arg{SubModel1} and \arg{SubModel2} in any order. +\arg{SubModel1} and \arg{SubModel2} in any order. \termitem{\chr{|}}{SubModel1, SubModel2} -\arg{SubModel1} or \arg{SubModel2}. +\arg{SubModel1} or \arg{SubModel2}. \end{description} \termitem{attributes}{Element, ListOfAttributes} -\arg{ListOfAttributes} is a list of atoms representing the attributes -of the element \arg{Element}. +\arg{ListOfAttributes} is a list of atoms representing the attributes +of the element \arg{Element}. \termitem{attribute}{Element, Attribute, Type, Default} Query an element. \arg{Type} is one of \const{cdata}, \const{entity}, @@ -555,34 +591,34 @@ Query an element. \arg{Type} is one of \const{cdata}, \const{entity}, \const{notation}, \const{number} or \const{nutoken}. For DTD types that allow for a list, the notation \term{list}{Type} is used. Finally, the DTD construct \verb$(a|b|...)$ is mapped to the term -\term{nameof}{ListOfValues}. +\term{nameof}{ListOfValues}. \arg{Default} describes the sgml default. It is one \const{required}, \const{current}, \const{conref} or \const{implied}. If a real default is -present, it is one of \term{default}{Value} or \term{fixed}{Value}. +present, it is one of \term{default}{Value} or \term{fixed}{Value}. \termitem{entities}{ListOfEntities} -\arg{ListOfEntities} is a list of atoms representing the names of the -defined entities. +\arg{ListOfEntities} is a list of atoms representing the names of the +defined entities. \termitem{entity}{Name, Value} -\arg{Name} is the name of an entity with given value. Value is one of +\arg{Name} is the name of an entity with given value. Value is one of \begin{description} \termitem{\arg{Atom}}{} -If the value is atomic, it represents the literal value of the entity. +If the value is atomic, it represents the literal value of the entity. \termitem{system}{Url} -\arg{Url} is the URL of the system external entity. +\arg{Url} is the URL of the system external entity. \termitem{public}{Id, Url} -For external public entities, \arg{Id} is the identifier. If an URL is -provided this is returned in \arg{Url}. Otherwise this argument is -unbound. +For external public entities, \arg{Id} is the identifier. If an URL is +provided this is returned in \arg{Url}. Otherwise this argument is +unbound. \end{description} \termitem{notations}{ListOfNotations} -Returns a list holding the names of all \const{NOTATION} declarations. +Returns a list holding the names of all \const{NOTATION} declarations. \termitem{notation}{Name, Decl} Unify \arg{Decl} with a list if \term{system}{+File} and/or @@ -592,11 +628,11 @@ Unify \arg{Decl} with a list if \term{system}{+File} and/or \subsubsection{The DOCTYPE declaration} -As this parser allows for processing partial documents and process the +As this parser allows for processing partial documents and process the DTD separately, the DOCTYPE declaration plays a special role. -If a document has no DOCTYPE declaraction, the parser returns a list -holding all elements and CDATA found. If the document has a DOCTYPE +If a document has no DOCTYPE declaraction, the parser returns a list +holding all elements and CDATA found. If the document has a DOCTYPE declaraction, the parser will open the element defined in the DOCTYPE as soon as the first real data is encountered. @@ -632,53 +668,63 @@ elements_in_xml_document(File, Elements) :- \begin{description} \predicate{new_sgml_parser}{2}{-Parser, +Options} -Creates a new parser. A parser can be used one or multiple times for -parsing documents or parts thereof. It may be bound to a DTD or the DTD -may be left implicit, in which case it is created from the document +Creates a new parser. A parser can be used one or multiple times for +parsing documents or parts thereof. It may be bound to a DTD or the DTD +may be left implicit, in which case it is created from the document prologue or parsing is performed without a DTD. Options: \begin{description} \termitem{dtd}{?DTD} -If specified with an initialised DTD, this DTD is used for parsing the -document, regardless of the document prologue. If specified using as a -variable, a reference to the created DTD is returned. This DTD may be -created from the document prologue or build implicitely from the -document's content. +If specified with an initialised DTD, this DTD is used for parsing the +document, regardless of the document prologue. If specified using as a +variable, a reference to the created DTD is returned. This DTD may be +created from the document prologue or build implicitely from the +document's content. \end{description} \predicate{free_sgml_parser}{1}{+Parser} -Destroy all resources related to the parser. This does not destroy the +Destroy all resources related to the parser. This does not destroy the DTD if the parser was created using the \term{dtd}{DTD} option. \predicate{set_sgml_parser}{2}{+Parser, +Option} -Sets attributes to the parser. Currently defined attributes: +Sets attributes to the parser. Currently defined attributes: \begin{description} \termitem{file}{File} -Sets the file for reporting errors and warnings. Sets the line to 1. +Sets the file for reporting errors and warnings. Sets the line to 1. \termitem{line}{Line} -Sets the current line. Useful if the stream is not at the start of the -(file) object for generating proper line-numbers. +Sets the current line. Useful if the stream is not at the start of the +(file) object for generating proper line-numbers. \termitem{charpos}{Offset} Sets the current character location. See also the \term{file}{File} option. \termitem{dialect}{Dialect} -Set the markup dialect. Known dialects: +Set the markup dialect. Known dialects: \begin{description} \termitem{sgml}{} -The default dialect is to process as SGML. This implies markup is -case-insensitive and standard SGML abbreviation is allowed (abreviated -attributes and omitted tags). +The default dialect is to process as SGML. This implies markup is +case-insensitive and standard SGML abbreviation is allowed (abreviated +attributes and omitted tags). \termitem{xml}{} This dialect is selected automatically if the processing instruction -\verb$$ is encountered. See \secref{xml} for details. +\verb$$ is encountered. See \secref{xml} for details. \termitem{xmlns}{} Process file as XML file with namespace support. See \secref{xmlns} for details. See also the \verb$qualify_attributes$ option below. \end{description} + \termitem{xmlns}{+URI} +Set the default namespace of the outer environment. This option is +provided to process partial XML content with proper namespace +resolution. + + \termitem{xmlns}{+NS, +URI} +Specify a namespace for the outer environment. This option is +provided to process partial XML content with proper namespace +resolution. + \termitem{qualify_attributes}{Boolean} How to handle unqualified attribute (i.e. without an explicit namespace) in XML namespace (\const{xmlns}) mode. Default and standard compliant is @@ -715,20 +761,20 @@ sgml_parse/2. \end{description} \predicate{get_sgml_parser}{2}{+Parser, -Option} -Retrieve infomation on the current status of the parser. Notably useful -if the parser is used in the call-back mode. Currently defined options: +Retrieve infomation on the current status of the parser. Notably useful +if the parser is used in the call-back mode. Currently defined options: \begin{description} \termitem{file}{-File} -Current file-name. Note that this may be different from the provided -file if an external entity is being loaded. +Current file-name. Note that this may be different from the provided +file if an external entity is being loaded. \termitem{line}{-Line} -Line-offset from where the parser started its processing in the file-object. +Line-offset from where the parser started its processing in the file-object. \termitem{charpos}{-CharPos} -Offset from where the parser started its processing in the file-object. -See \secref{indexaccess}. +Offset from where the parser started its processing in the file-object. +See \secref{indexaccess}. \termitem{charpos}{-Start, -End} Character offsets of the start and end of the source processed causing the @@ -736,8 +782,8 @@ current call-back. Used in \program{PceEmacs} to for colouring text in SGML and XML modes. \termitem{source}{-Stream} -Prolog stream being processed. May be used in the \const{on_begin}, \emph{etc.} -callbacks from sgml_parse/2. +Prolog stream being processed. May be used in the \const{on_begin}, \emph{etc.} +callbacks from sgml_parse/2. \termitem{dialect}{-Dialect} Return the current dialect used by the parser (\const{sgml}, \const{xml} or \const{xmlns}). @@ -822,8 +868,8 @@ Input is a stream. A full description of the option-list is below. \begin{description} \termitem{document}{+Term} -A variable that will be unified with a list describing the content of -the document (see load_structure/2). +A variable that will be unified with a list describing the content of +the document (see load_structure/2). \termitem{source}{+Stream} An input stream that is read. This option print_message/2 with severity \const{informational}. \end{description} + + \termitem{xml_no_ns}{+Mode} +Error handling if an XML namespace is not defined. Default generates +an error. If \const{quiet}, the error is suppressed. Can be used +together with \term{call}{urlns, Closure} to provide external expansion +of namespaces. See also \secref{xmlns}. + \termitem{call}{+Event, :PredicateName} -Issue call-backs on the specified events. \arg{PredicateName} is the -name of the predicate to call on this event, possibly prefixed with a +Issue call-backs on the specified events. \arg{PredicateName} is the +name of the predicate to call on this event, possibly prefixed with a module identifier. If the handler throws an exception, parsing is stopped and sgml_parse/2 re-throws the exception. The defined events are: \begin{description} \termitem{begin}{} -An open-tag has been parsed. The named handler is called with three -arguments: \term{\arg{Handler}}{+Tag, +Attributes, +Parser}. +An open-tag has been parsed. The named handler is called with three +arguments: \term{\arg{Handler}}{+Tag, +Attributes, +Parser}. \termitem{end}{} -A close-tag has been parsed. The named handler is called with two -arguments: \term{\arg{Handler}}{+Tag, +Parser}. +A close-tag has been parsed. The named handler is called with two +arguments: \term{\arg{Handler}}{+Tag, +Parser}. \termitem{cdata}{} CDATA has been parsed. The named handler is called with two arguments: \term{Handler}{+CDATA, +Parser}, where CDATA is an atom -representing the data. +representing the data. \termitem{pi}{} -A processing instruction has been parsed. The named handler is called +A processing instruction has been parsed. The named handler is called with two arguments: \term{\arg{Handler}}{+Text, +Parser}, where -\arg{Text} is the text of the processing instruction. +\arg{Text} is the text of the processing instruction. \termitem{decl}{} A declaration (\verb$$) has been read. The named handler is @@ -918,33 +971,33 @@ If this option is present, errors and warnings are not reported using print_message/3 \termitem{xmlns}{} -When parsing an in \const{xmlns} mode, a new namespace declaraction is -pushed on the environment. The named handler is called with three +When parsing an in \const{xmlns} mode, a new namespace declaraction is +pushed on the environment. The named handler is called with three arguments: \term{\arg{Handler}}{+NameSpace, +URL, +Parser}. -See \secref{xmlns} for details. +See \secref{xmlns} for details. \termitem{urlns}{} -When parsing an in \const{xmlns} mode, this predicate can be used to map a -url into either a canonical URL for this namespace or another internal -identifier. See \secref{xmlns} for details. +When parsing an in \const{xmlns} mode, this predicate can be used to map a +url into either a canonical URL for this namespace or another internal +identifier. See \secref{xmlns} for details. \end{description} \end{description} \end{description} \subsubsection{Partial Parsing} -In some cases, part of a document needs to be parsed. One option is to -use load_structure/2 or one of its variations and extract -the desired elements from the returned structure. This is a clean -solution, especially on small and medium-sized documents. It however is -unsuitable for parsing really big documents. Such documents can only be +In some cases, part of a document needs to be parsed. One option is to +use load_structure/2 or one of its variations and extract +the desired elements from the returned structure. This is a clean +solution, especially on small and medium-sized documents. It however is +unsuitable for parsing really big documents. Such documents can only be handled with the call-back output interface realised by the \term{call}{Event, Action} option of sgml_parse/2. Event-driven processing is not very natural in Prolog. The SGML2PL library allows for a mixed approach. Consider the case where we want to process all descriptions from RDF elements in a document. The -code below calls process_rdf_description(Element) on each element +code below calls process_rdf_description(Element) on each element that is directly inside an RDF element. \begin{code} @@ -994,26 +1047,28 @@ set_sgml_parser/2 or, for XML, based on the \const{encoding} attribute of the XML header. The parser reads from SWI-Prolog streams, which also provide encoding handling. Therefore, there are two modes for parsing. If the SWI-Prolog stream has encoding \const{octet} (which -is the default for binary streams), the decoder of the SGML parser will +is the default for binary streams), the decoder of the SGML parser will be used and positions reported by the parser are octet offsets in the stream. In other cases, the Prolog stream decoder is used and offsets are character code counts. +\input{xpath.tex} + \section{Processing Indexed Files} \label{sec:indexaccess} -In some cases applications wish to process small portions of large -SGML, XML or RDF files. For example, the \emph{OpenDirectory} project -by Netscape has produced a 90MB RDF file representing the main index. -The parser described here can process this document as a unit, but -loading takes 85 seconds on a Pentium-II 450 and the resulting term -requires about 70MB global stack. One option is to process the entire -document and output it as a Prolog fact-base of RDF triplets, but in -many cases this is undesirable. Another example is a large SGML file -containing online documentation. The application normally wishes to -provide only small portions at a time to the user. Loading the entire +In some cases applications wish to process small portions of large +SGML, XML or RDF files. For example, the \emph{OpenDirectory} project +by Netscape has produced a 90MB RDF file representing the main index. +The parser described here can process this document as a unit, but +loading takes 85 seconds on a Pentium-II 450 and the resulting term +requires about 70MB global stack. One option is to process the entire +document and output it as a Prolog fact-base of RDF triplets, but in +many cases this is undesirable. Another example is a large SGML file +containing online documentation. The application normally wishes to +provide only small portions at a time to the user. Loading the entire document into memory is then undesirable. -Using the \term{parse}{element} option, we open a file, seek +Using the \term{parse}{element} option, we open a file, seek (using seek/4) to the position of the element and read the desired element. @@ -1059,12 +1114,12 @@ rdf_element(Id, Term) :- \section{External entities} -While processing an SGML document the document may refer to external -data. This occurs in three places: external parameter entities, normal -external entities and the \const{DOCTYPE} declaration. The current version -of this tool deals rather primitively with external data. External -entities can only be loaded from a file and the mapping between the -entity names and the file is done using a \jargon{catalog} file in a +While processing an SGML document the document may refer to external +data. This occurs in three places: external parameter entities, normal +external entities and the \const{DOCTYPE} declaration. The current version +of this tool deals rather primitively with external data. External +entities can only be loaded from a file and the mapping between the +entity names and the file is done using a \jargon{catalog} file in a format compatible with that used by James Clark's SP Parser, based on the SGML Open (now OASIS) specification. @@ -1075,23 +1130,23 @@ sgml_register_catalog_file/2 or the environment variable \begin{description} \predicate{sgml_register_catalog_file}{2}{+File, +Location} Register the indicated \arg{File} as a catalog file. \arg{Location} is -either \const{start} or \const{end} and defines whether the catalog is +either \const{start} or \const{end} and defines whether the catalog is considered first or last. This predicate has no effect if \arg{File} is already part of the catalog. -If no files are registered using this predicate, the first query on the +If no files are registered using this predicate, the first query on the catalog examines \env{SGML_CATALOG_FILES} and fills the catalog with -all files in this path. +all files in this path. \end{description} Two types of lines are used by this package. \begin{quote} \const{DOCTYPE} \arg{doctype} \arg{file} \\ -\const{PUBLIC} \exam{"}\arg{Id}\exam{"} \arg{file} +\const{PUBLIC} \exam{"}\arg{Id}\exam{"} \arg{file} \end{quote} -The specified \arg{file} path is taken relative to the location of the +The specified \arg{file} path is taken relative to the location of the catolog file. For the \const{DOCTYPE} declaraction, \pllib{sgml} first makes an attempt to resolve the \const{SYSTEM} or \const{PUBLIC} identifier. If this fails it tries to resolve the \arg{doctype} using @@ -1102,10 +1157,12 @@ where system identifiers must be Universal Resource Indicators, not local file names. Simple uses of relative URIs will work correctly under UNIX and Windows. -In the future we will design a call-back mechanism for locating and -processing external entities, so Prolog-based file-location and Prolog +In the future we will design a call-back mechanism for locating and +processing external entities, so Prolog-based file-location and Prolog resources can be used to store external entities. +\input{pwp.tex} + \section{Writing markup} \subsection{Writing documents} @@ -1149,14 +1206,14 @@ elements are written using increasing indentation. This introduces (depending on the mode and defined whitespace handling) CDATA sequences with only layout between elements when read back in. If \const{false}, no layout characters are added. As this mode does not need to analyse the -document it is faster and guarantees correct output when read back. -Unfortunately the output is hardly human readable and causes problems +document it is faster and guarantees correct output when read back. +Unfortunately the output is hardly human readable and causes problems with many editors. \termitem{indent}{Integer} Set the initial element indentation. It more than zero, the indent is written before the document. \termitem{nsmap}{Map} -Set the initial namespace map. \arg{Map} is a list of +Set the initial namespace map. \arg{Map} is a list of \arg{Name} = \arg{URI}. This option, together with \const{header} and \const{ident} is added to use xml_write/3 to generate XML that is embedded in a larger XML document. @@ -1197,7 +1254,7 @@ values are \const{ascii}, \const{iso_latin_1}, \const{utf8} and \const{unicode}. Versions with two arguments are provided for backward compatibility, making the safe \const{ascii} encoding assumption. -\begin{description} +\begin{description} \predicate{xml_quote_attribute}{3}{+In, -Quoted, +Encoding} Map the characters that may not appear in XML attributes to entities. Currently these are \verb$<>&"$.% @@ -1222,8 +1279,8 @@ Assumes \const{ascii} encoding. Succeed if \arg{In} is an atom or string that satisfies the rules for a valid XML element or attribute name. As with the other predicates in this group, if \arg{Encoding} cannot represent one of the characters, this -function fails. It uses a hard-coded table for ASCII-range characters and -iswalpha()/iswalnum() for the first and remaining characters of the name. +function fails. Character classification is based on +\url{http://www.w3.org/TR/2006/REC-xml-20060816}. \predicate{xml_name}{1}{+In} Backward compatibility version for xml_name/2. Assumes \const{ascii} @@ -1238,8 +1295,8 @@ Known missing SGML features include \begin{itemlist} \item [NOTATION on entities] -Though notation is parsed, notation attributes on external entity -declarations are not handed to the user. +Though notation is parsed, notation attributes on external entity +declarations are not handed to the user. \item [NOTATION attributes] SGML notations may have attributes, declared using \verb$$. Those data attributes @@ -1261,8 +1318,8 @@ Empty start tags (\verb$<>$), unclosed start tags (\verb$) and unclosed end tags ( #include #include @@ -86,7 +87,7 @@ typedef struct _parser_data int max_warnings; /* warning limit */ errormode error_mode; /* how to handle errors */ int positions; /* report file-positions */ - int exception; /* pending exception from callback */ + term_t exception; /* pending exception from callback */ predicate_t on_begin; /* begin element */ predicate_t on_end; /* end element */ @@ -150,6 +151,7 @@ static functor_t FUNCTOR_sdata1; static functor_t FUNCTOR_ndata1; static functor_t FUNCTOR_number1; static functor_t FUNCTOR_syntax_errors1; +static functor_t FUNCTOR_xml_no_ns1; static functor_t FUNCTOR_minus2; static functor_t FUNCTOR_positions1; static functor_t FUNCTOR_event_class1; @@ -160,6 +162,8 @@ static functor_t FUNCTOR_defaults1; static functor_t FUNCTOR_shorttag1; static functor_t FUNCTOR_qualify_attributes1; static functor_t FUNCTOR_encoding1; +static functor_t FUNCTOR_xmlns1; +static functor_t FUNCTOR_xmlns2; static atom_t ATOM_true; static atom_t ATOM_false; @@ -173,7 +177,7 @@ static atom_t ATOM_position; #define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a) static void -initConstants(void) +initConstants() { FUNCTOR_sgml_parser1 = mkfunctor("sgml_parser", 1); FUNCTOR_equal2 = mkfunctor("=", 2); @@ -211,6 +215,7 @@ initConstants(void) FUNCTOR_ndata1 = mkfunctor("ndata", 1); FUNCTOR_number1 = mkfunctor("number", 1); FUNCTOR_syntax_errors1 = mkfunctor("syntax_errors", 1); + FUNCTOR_xml_no_ns1 = mkfunctor("xml_no_ns", 1); FUNCTOR_minus2 = mkfunctor("-", 2); FUNCTOR_positions1 = mkfunctor("positions", 1); FUNCTOR_event_class1 = mkfunctor("event_class", 1); @@ -221,6 +226,8 @@ initConstants(void) FUNCTOR_shorttag1 = mkfunctor("shorttag", 1); FUNCTOR_qualify_attributes1 = mkfunctor("qualify_attributes", 1); FUNCTOR_encoding1 = mkfunctor("encoding", 1); + FUNCTOR_xmlns1 = mkfunctor("xmlns", 1); + FUNCTOR_xmlns2 = mkfunctor("xmlns", 2); ATOM_true = PL_new_atom("true"); ATOM_false = PL_new_atom("false"); @@ -249,7 +256,7 @@ get_parser(term_t parser, dtd_parser **p) { term_t a = PL_new_term_ref(); void *ptr; - PL_get_arg(1, parser, a); + _PL_get_arg(1, parser, a); if ( PL_get_pointer(a, &ptr) ) { dtd_parser *tmp = ptr; @@ -285,7 +292,7 @@ get_dtd(term_t t, dtd **dtdp) { term_t a = PL_new_term_ref(); void *ptr; - PL_get_arg(1, t, a); + _PL_get_arg(1, t, a); if ( PL_get_pointer(a, &ptr) ) { dtd *tmp = ptr; @@ -317,7 +324,7 @@ pl_new_sgml_parser(term_t ref, term_t options) while ( PL_get_list(tail, head, tail) ) { if ( PL_is_functor(head, FUNCTOR_dtd1) ) - { PL_get_arg(1, head, tmp); + { _PL_get_arg(1, head, tmp); if ( PL_is_variable(tmp) ) /* dtd(X) */ { dtd = new_dtd(NULL); /* no known doctype */ @@ -382,10 +389,10 @@ pl_free_dtd(term_t t) * DATA EXCHANGE * *******************************/ -static void +static int put_atom_wchars(term_t t, wchar_t const *s) { PL_put_variable(t); - PL_unify_wchars(t, PL_ATOM, ENDSNUL, s); + return PL_unify_wchars(t, PL_ATOM, ENDSNUL, s); } @@ -405,7 +412,7 @@ pl_set_sgml_parser(term_t parser, term_t option) wchar_t *file; dtd_symbol *fs; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_wchars(a, NULL, &file, CVT_ATOM|CVT_EXCEPTION) ) return FALSE; fs = dtd_add_symbol(p->dtd, file); /* symbol will be freed */ @@ -413,20 +420,20 @@ pl_set_sgml_parser(term_t parser, term_t option) } else if ( PL_is_functor(option, FUNCTOR_line1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_integer(a, &p->location.line) ) return sgml2pl_error(ERR_TYPE, "integer", a); } else if ( PL_is_functor(option, FUNCTOR_charpos1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_long(a, &p->location.charpos) ) return sgml2pl_error(ERR_TYPE, "integer", a); } else if ( PL_is_functor(option, FUNCTOR_dialect1) ) { term_t a = PL_new_term_ref(); char *s; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_atom_chars(a, &s) ) return sgml2pl_error(ERR_TYPE, "atom", a); @@ -442,7 +449,7 @@ pl_set_sgml_parser(term_t parser, term_t option) { term_t a = PL_new_term_ref(); char *s; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_atom_chars(a, &s) ) return sgml2pl_error(ERR_TYPE, "atom", a); @@ -461,7 +468,7 @@ pl_set_sgml_parser(term_t parser, term_t option) { term_t a = PL_new_term_ref(); int val; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_bool(a, &val) ) return sgml2pl_error(ERR_TYPE, "boolean", a); @@ -473,7 +480,7 @@ pl_set_sgml_parser(term_t parser, term_t option) { term_t a = PL_new_term_ref(); int val; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_bool(a, &val) ) return sgml2pl_error(ERR_TYPE, "boolean", a); @@ -485,7 +492,7 @@ pl_set_sgml_parser(term_t parser, term_t option) { term_t a = PL_new_term_ref(); int val; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_bool(a, &val) ) return sgml2pl_error(ERR_TYPE, "boolean", a); @@ -494,7 +501,7 @@ pl_set_sgml_parser(term_t parser, term_t option) { term_t a = PL_new_term_ref(); char *s; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_atom_chars(a, &s) ) return sgml2pl_error(ERR_TYPE, "atom", a); @@ -508,7 +515,7 @@ pl_set_sgml_parser(term_t parser, term_t option) { term_t a = PL_new_term_ref(); char *val; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_atom_chars(a, &val) ) return sgml2pl_error(ERR_TYPE, "atom", a); if ( !xml_set_encoding(p, val) ) @@ -517,15 +524,37 @@ pl_set_sgml_parser(term_t parser, term_t option) { term_t a = PL_new_term_ref(); ichar *s; - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( PL_is_variable(a) ) { p->enforce_outer_element = NULL; } else { if ( !PL_get_wchars(a, NULL, &s, CVT_ATOM) ) return sgml2pl_error(ERR_TYPE, "atom_or_variable", a); - + p->enforce_outer_element = dtd_add_symbol(p->dtd, s); } + } else if ( PL_is_functor(option, FUNCTOR_xmlns1) ) + { term_t a = PL_new_term_ref(); + ichar ns[1] = {0}; + ichar *uri; + + _PL_get_arg(1, option, a); + if ( !PL_get_wchars(a, NULL, &uri, CVT_ATOM|CVT_EXCEPTION) ) + return FALSE; + + xmlns_push(p, ns, uri); + } else if ( PL_is_functor(option, FUNCTOR_xmlns2) ) + { term_t a = PL_new_term_ref(); + ichar *ns, *uri; + + _PL_get_arg(1, option, a); + if ( !PL_get_wchars(a, NULL, &ns, CVT_ATOM|CVT_EXCEPTION) ) + return FALSE; + _PL_get_arg(2, option, a); + if ( !PL_get_wchars(a, NULL, &uri, CVT_ATOM|CVT_EXCEPTION) ) + return FALSE; + + xmlns_push(p, ns, uri); } else return sgml2pl_error(ERR_DOMAIN, "sgml_parser_option", option); @@ -552,12 +581,12 @@ pl_get_sgml_parser(term_t parser, term_t option) if ( PL_is_functor(option, FUNCTOR_charpos1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); return PL_unify_integer(a, file_location(p, &p->startloc)->charpos); } else if ( PL_is_functor(option, FUNCTOR_line1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); return PL_unify_integer(a, file_location(p, &p->startloc)->line); } else if ( PL_is_functor(option, FUNCTOR_charpos2) ) { term_t a = PL_new_term_ref(); @@ -575,7 +604,7 @@ pl_get_sgml_parser(term_t parser, term_t option) if ( l->type == IN_FILE && l->name.file ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); return PL_unify_wchars(a, PL_ATOM, ENDSNUL, l->name.file); } } else if ( PL_is_functor(option, FUNCTOR_source1) ) @@ -584,13 +613,13 @@ pl_get_sgml_parser(term_t parser, term_t option) if ( pd && pd->magic == PD_MAGIC && pd->source ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); return PL_unify_stream(a, pd->source); } } else if ( PL_is_functor(option, FUNCTOR_dialect1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); switch(p->dtd->dialect) { case DL_SGML: return PL_unify_atom_chars(a, "sgml"); @@ -602,7 +631,7 @@ pl_get_sgml_parser(term_t parser, term_t option) } else if ( PL_is_functor(option, FUNCTOR_event_class1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); switch(p->event_class) { case EV_EXPLICIT: return PL_unify_atom_chars(a, "explicit"); @@ -616,45 +645,51 @@ pl_get_sgml_parser(term_t parser, term_t option) } else if ( PL_is_functor(option, FUNCTOR_dtd1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); return unify_dtd(a, p->dtd); } else if ( PL_is_functor(option, FUNCTOR_doctype1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( p->enforce_outer_element ) return PL_unify_wchars(a, PL_ATOM, ENDSNUL, p->enforce_outer_element->name); else return TRUE; /* leave variable */ } else if ( PL_is_functor(option, FUNCTOR_allowed1) ) - { term_t tail = PL_new_term_ref(); - term_t head = PL_new_term_ref(); - term_t tmp = PL_new_term_ref(); + { term_t tail, head, tmp; sgml_environment *env = p->environments; - - PL_get_arg(1, option, tail); + + if ( !(tail = PL_new_term_ref()) || + !(head = PL_new_term_ref()) || + !(tmp = PL_new_term_ref()) ) + return FALSE; + + _PL_get_arg(1, option, tail); if ( env ) { for( ; env; env = env->parent) { dtd_element *buf[256]; /* MAX_VISITED! */ int n = sizeof(buf)/sizeof(dtd_element *); /* not yet used! */ int i; - + state_allows_for(env->state, buf, &n); - + for(i=0; iname->name); - - if ( !PL_unify_list(tail, head, tail) || + rc = put_atom_wchars(tmp, buf[i]->name->name); + + if ( !rc || + !PL_unify_list(tail, head, tail) || !PL_unify(head, tmp) ) return FALSE; } - + if ( !env->element->structure || !env->element->structure->omit_close ) break; @@ -673,8 +708,8 @@ pl_get_sgml_parser(term_t parser, term_t option) term_t head = PL_new_term_ref(); term_t tmp = PL_new_term_ref(); sgml_environment *env = p->environments; - - PL_get_arg(1, option, tail); + + _PL_get_arg(1, option, tail); for( ; env; env = env->parent) { put_atom_wchars(tmp, env->element->name->name); @@ -709,7 +744,7 @@ call_prolog(parser_data *pd, predicate_t pred, term_t av) static void -end_frame(fid_t fid, int ex) +end_frame(fid_t fid, term_t ex) { if ( ex ) PL_close_foreign_frame(fid); else @@ -740,7 +775,7 @@ typedef struct static url_cache cache[URL_CACHE]; static void -reset_url_cache(void) +reset_url_cache() { int i; url_cache *c = cache; @@ -753,24 +788,21 @@ reset_url_cache(void) } -static void +WUNUSED static int put_url(dtd_parser *p, term_t t, const ichar *url) { parser_data *pd = p->closure; + fid_t fid; int i; if ( !pd->on_urlns ) - { put_atom_wchars(t, url); - return; - } + return put_atom_wchars(t, url); for(i=0; ion_urlns, av) && + rc = (put_atom_wchars(av+0, url) && + unify_parser(av+2, p)); + + if ( rc && + PL_call_predicate(NULL, PL_Q_NORMAL, pd->on_urlns, av) && PL_get_atom(av+1, &a) ) { PL_register_atom(a); cache[0].canonical = a; PL_put_atom(t, a); - } else - { put_atom_wchars(t, url); + } else if ( rc ) + { rc = put_atom_wchars(t, url); } PL_discard_foreign_frame(fid); + + return rc; } + + return FALSE; } -static void +WUNUSED static int put_attribute_name(dtd_parser *p, term_t t, dtd_symbol *nm) { const ichar *url, *local; @@ -809,19 +848,20 @@ put_attribute_name(dtd_parser *p, term_t t, dtd_symbol *nm) { xmlns_resolve_attribute(p, nm, &local, &url); if ( url ) - { term_t av = PL_new_term_refs(2); - - put_url(p, av+0, url); - put_atom_wchars(av+1, local); - PL_cons_functor_v(t, FUNCTOR_ns2, av); + { term_t av; + + return ( (av=PL_new_term_refs(2)) && + put_url(p, av+0, url) && + put_atom_wchars(av+1, local) && + PL_cons_functor_v(t, FUNCTOR_ns2, av) ); } else - put_atom_wchars(t, local); + return put_atom_wchars(t, local); } else - put_atom_wchars(t, nm->name); + return put_atom_wchars(t, nm->name); } -static void +WUNUSED static int put_element_name(dtd_parser *p, term_t t, dtd_element *e) { const ichar *url, *local; @@ -830,15 +870,16 @@ put_element_name(dtd_parser *p, term_t t, dtd_element *e) xmlns_resolve_element(p, &local, &url); if ( url ) - { term_t av = PL_new_term_refs(2); - - put_url(p, av+0, url); - put_atom_wchars(av+1, local); - PL_cons_functor_v(t, FUNCTOR_ns2, av); + { term_t av; + + return ( (av=PL_new_term_refs(2)) && + put_url(p, av+0, url) && + put_atom_wchars(av+1, local) && + PL_cons_functor_v(t, FUNCTOR_ns2, av) ); } else - put_atom_wchars(t, local); + return put_atom_wchars(t, local); } else - put_atom_wchars(t, e->name->name); + return put_atom_wchars(t, e->name->name); } @@ -880,44 +921,47 @@ static int put_att_text(term_t t, sgml_attribute *a) { if ( a->value.textW ) { PL_put_variable(t); - PL_unify_wchars(t, PL_ATOM, a->value.number, a->value.textW); - return TRUE; + return PL_unify_wchars(t, PL_ATOM, a->value.number, a->value.textW); } else return FALSE; } -static void +static int put_attribute_value(dtd_parser *p, term_t t, sgml_attribute *a) { switch(a->definition->type) { case AT_CDATA: - put_att_text(t, a); - break; + return put_att_text(t, a); case AT_NUMBER: { if ( !put_att_text(t, a) ) - PL_put_integer(t, a->value.number); - break; + return PL_put_integer(t, a->value.number); + return TRUE; } default: /* multi-valued attribute */ { if ( a->definition->islist && a->value.textW ) - { term_t tail, head = PL_new_term_ref(); + { term_t tail, head; const ichar *val = a->value.textW; const ichar *e; PL_put_variable(t); - tail = PL_copy_term_ref(t); - + if ( !(head = PL_new_term_ref()) || + !(tail = PL_copy_term_ref(t)) ) + return FALSE; + for(e=istrblank(val); e; val = e+1, e=istrblank(val)) { if ( e == val ) continue; /* skip spaces */ - PL_unify_list(tail, head, tail); - unify_listval(p, head, a->definition->type, e-val, val); + if ( !PL_unify_list(tail, head, tail) || + !unify_listval(p, head, a->definition->type, e-val, val) ) + return FALSE; } - PL_unify_list(tail, head, tail); - unify_listval(p, head, a->definition->type, istrlen(val), val); - PL_unify_nil(tail); + + return ( PL_unify_list(tail, head, tail) && + unify_listval(p, head, a->definition->type, + istrlen(val), val) && + PL_unify_nil(tail) ); } else - put_att_text(t, a); + return put_att_text(t, a); } } } @@ -926,7 +970,7 @@ put_attribute_value(dtd_parser *p, term_t t, sgml_attribute *a) /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Produce a tag-location in the format - start_location=file:char-char + start_location=file:char-char - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ static int @@ -935,13 +979,12 @@ put_tag_position(dtd_parser *p, term_t pos) if ( l->type == IN_FILE && l->name.file ) { PL_put_variable(pos); - PL_unify_term(pos, - PL_FUNCTOR, FUNCTOR_ns2, - PL_NWCHARS, wcslen(l->name.file), l->name.file, - PL_FUNCTOR, FUNCTOR_minus2, - PL_LONG, l->charpos, - PL_LONG, p->location.charpos); - return TRUE; + return PL_unify_term(pos, + PL_FUNCTOR, FUNCTOR_ns2, + PL_NWCHARS, wcslen(l->name.file), l->name.file, + PL_FUNCTOR, FUNCTOR_minus2, + PL_LONG, l->charpos, + PL_LONG, p->location.charpos); } return FALSE; @@ -959,22 +1002,22 @@ unify_attribute_list(dtd_parser *p, term_t alist, parser_data *pd = p->closure; for(i=0; iname); - put_attribute_value(p, a+1, &argv[i]); - PL_cons_functor_v(a, FUNCTOR_equal2, a); - if ( !PL_unify_list(tail, h, tail) || + { if ( !put_attribute_name(p, a+0, argv[i].definition->name) || + !put_attribute_value(p, a+1, &argv[i]) || + !PL_cons_functor_v(a, FUNCTOR_equal2, a) || + !PL_unify_list(tail, h, tail) || !PL_unify(h, a) ) return FALSE; } if ( pd->positions && put_tag_position(p, a+1) ) { PL_put_atom(a, ATOM_position); - - PL_cons_functor_v(a, FUNCTOR_equal2, a); - if ( !PL_unify_list(tail, h, tail) || + + if ( !PL_cons_functor_v(a, FUNCTOR_equal2, a) || + !PL_unify_list(tail, h, tail) || !PL_unify(h, a) ) return FALSE; - } + } if ( PL_unify_nil(tail) ) { PL_reset_term_refs(tail); @@ -1000,12 +1043,18 @@ on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv) term_t et = PL_new_term_ref(); /* element structure */ term_t h = PL_new_term_ref(); - put_element_name(p, h, e); - unify_attribute_list(p, alist, argc, argv); - PL_unify_term(et, PL_FUNCTOR, FUNCTOR_element3, - PL_TERM, h, - PL_TERM, alist, - PL_TERM, content); + if ( !h || + !put_element_name(p, h, e) || + !unify_attribute_list(p, alist, argc, argv) || + !PL_unify_term(et, + PL_FUNCTOR, FUNCTOR_element3, + PL_TERM, h, + PL_TERM, alist, + PL_TERM, content) ) + { pd->exception = PL_exception(0); + return FALSE; + } + if ( PL_unify_list(pd->tail, h, pd->tail) && PL_unify(h, et) ) { env *env = sgml_calloc(1, sizeof(*env)); @@ -1016,21 +1065,34 @@ on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv) pd->tail = content; PL_reset_term_refs(alist); + + return TRUE; } - return TRUE; + pd->exception = PL_exception(0); + return FALSE; } if ( pd->on_begin ) - { fid_t fid = PL_open_foreign_frame(); - term_t av = PL_new_term_refs(3); + { fid_t fid; - put_element_name(p, av+0, e); - unify_attribute_list(p, av+1, argc, argv); - unify_parser(av+2, p); + if ( (fid = PL_open_foreign_frame()) ) + { int rc; + term_t av = PL_new_term_refs(3); - call_prolog(pd, pd->on_begin, av); - end_frame(fid, pd->exception); + rc = ( put_element_name(p, av+0, e) && + unify_attribute_list(p, av+1, argc, argv) && + unify_parser(av+2, p) && + call_prolog(pd, pd->on_begin, av) + ); + + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; + } + + pd->exception = PL_exception(0); + return FALSE; } return TRUE; @@ -1045,18 +1107,29 @@ on_end(dtd_parser *p, dtd_element *e) return TRUE; if ( pd->on_end ) - { fid_t fid = PL_open_foreign_frame(); - term_t av = PL_new_term_refs(2); + { fid_t fid; - put_element_name(p, av+0, e); - unify_parser(av+1, p); + if ( (fid = PL_open_foreign_frame()) ) + { int rc; + term_t av = PL_new_term_refs(2); - call_prolog(pd, pd->on_end, av); - end_frame(fid, pd->exception); + rc = ( put_element_name(p, av+0, e) && + unify_parser(av+1, p) && + call_prolog(pd, pd->on_end, av) + ); + + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; + } + + pd->exception = PL_exception(0); + return FALSE; } if ( pd->tail && !pd->stopped ) - { PL_unify_nil(pd->tail); + { if ( !PL_unify_nil(pd->tail) ) + return FALSE; PL_reset_term_refs(pd->tail); /* ? */ if ( pd->stack ) @@ -1086,36 +1159,55 @@ on_entity(dtd_parser *p, dtd_entity *e, int chr) return TRUE; if ( pd->on_entity ) - { fid_t fid = PL_open_foreign_frame(); - term_t av = PL_new_term_refs(2); + { fid_t fid; - if ( e ) - put_atom_wchars(av+0, e->name->name); - else - PL_put_integer(av+0, chr); + if ( (fid=PL_open_foreign_frame()) ) + { int rc; + term_t av = PL_new_term_refs(2); - unify_parser(av+1, p); + if ( e ) + rc = put_atom_wchars(av+0, e->name->name); + else + rc = PL_put_integer(av+0, chr); - call_prolog(pd, pd->on_end, av); - end_frame(fid, pd->exception); + if ( rc ) + rc = ( unify_parser(av+1, p) && + call_prolog(pd, pd->on_end, av) + ); + + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; + } + + pd->exception = PL_exception(0); + return FALSE; } if ( pd->tail ) - { term_t h = PL_new_term_ref(); + { int rc; + term_t h = PL_new_term_ref(); - if ( !PL_unify_list(pd->tail, h, pd->tail) ) + if ( !h || + !PL_unify_list(pd->tail, h, pd->tail) ) + { pd->exception = PL_exception(0); return FALSE; + } if ( e ) - PL_unify_term(h, - PL_FUNCTOR, FUNCTOR_entity1, - PL_CHARS, e->name->name); + rc = PL_unify_term(h, + PL_FUNCTOR, FUNCTOR_entity1, + PL_CHARS, e->name->name); else - PL_unify_term(h, - PL_FUNCTOR, FUNCTOR_entity1, - PL_INT, chr); - + rc = PL_unify_term(h, + PL_FUNCTOR, FUNCTOR_entity1, + PL_INT, chr); + PL_reset_term_refs(h); + if ( !rc ) + pd->exception = PL_exception(0); + + return rc; } return TRUE; @@ -1127,15 +1219,23 @@ on_data(dtd_parser *p, data_type type, int len, const wchar_t *data) { parser_data *pd = p->closure; if ( pd->on_cdata ) - { fid_t fid = PL_open_foreign_frame(); - term_t av = PL_new_term_refs(2); + { fid_t fid; - PL_unify_wchars(av+0, PL_ATOM, len, data); + if ( (fid=PL_open_foreign_frame()) ) + { int rc; + term_t av = PL_new_term_refs(2); - unify_parser(av+1, p); + rc = ( PL_unify_wchars(av+0, PL_ATOM, len, data) && + unify_parser(av+1, p) && + call_prolog(pd, pd->on_cdata, av) ); - call_prolog(pd, pd->on_cdata, av); - end_frame(fid, pd->exception); + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; + } + + pd->exception = PL_exception(0); + return FALSE; } if ( pd->tail && !pd->stopped ) @@ -1167,13 +1267,15 @@ on_data(dtd_parser *p, data_type type, int len, const wchar_t *data) rval = FALSE; assert(0); } - + if ( rval ) rval = PL_unify_wchars(a, PL_ATOM, len, data); if ( rval ) { PL_reset_term_refs(h); return TRUE; + } else + { pd->exception = PL_exception(0); } } } @@ -1238,46 +1340,69 @@ on_error(dtd_parser *p, dtd_error *error) } if ( pd->on_error ) /* msg, parser */ - { fid_t fid = PL_open_foreign_frame(); - term_t av = PL_new_term_refs(3); - - PL_put_atom_chars(av+0, severity); - PL_unify_wchars(av+1, PL_ATOM, - wcslen(error->plain_message), error->plain_message); - unify_parser(av+2, p); + { fid_t fid; - call_prolog(pd, pd->on_error, av); - end_frame(fid, pd->exception); + if ( (fid=PL_open_foreign_frame()) ) + { int rc; + term_t av = PL_new_term_refs(3); + + rc = ( PL_put_atom_chars(av+0, severity) && + PL_unify_wchars(av+1, PL_ATOM, + wcslen(error->plain_message), + error->plain_message) && + unify_parser(av+2, p) && + call_prolog(pd, pd->on_error, av) + ); + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; + } + pd->exception = PL_exception(0); + return FALSE; } else if ( pd->error_mode != EM_QUIET ) - { fid_t fid = PL_open_foreign_frame(); - predicate_t pred = PL_predicate("print_message", 2, "user"); - term_t av = PL_new_term_refs(2); - term_t src = PL_new_term_ref(); - term_t parser = PL_new_term_ref(); - dtd_srcloc *l = file_location(p, &p->startloc); + { fid_t fid; - unify_parser(parser, p); - PL_put_atom_chars(av+0, severity); + if ( (fid=PL_open_foreign_frame()) ) + { int rc; + predicate_t pred = PL_predicate("print_message", 2, "user"); + term_t av = PL_new_term_refs(2); + term_t src = PL_new_term_ref(); + term_t parser = PL_new_term_ref(); + dtd_srcloc *l = file_location(p, &p->startloc); - if ( l->name.file ) - { if ( l->type == IN_FILE ) - put_atom_wchars(src, l->name.file); - else - put_atom_wchars(src, l->name.entity); - } else - { PL_put_nil(src); + rc = ( unify_parser(parser, p) && + PL_put_atom_chars(av+0, severity) ); + + if ( rc ) + { if ( l->name.file ) + { if ( l->type == IN_FILE ) + rc = put_atom_wchars(src, l->name.file); + else + rc = put_atom_wchars(src, l->name.entity); + } else + { PL_put_nil(src); + } + } + + if ( rc ) + rc = PL_unify_term(av+1, + PL_FUNCTOR_CHARS, "sgml", 4, + PL_TERM, parser, + PL_TERM, src, + PL_INT, l->line, + PL_NWCHARS, wcslen(error->plain_message), + error->plain_message); + + if ( rc ) + rc = PL_call_predicate(NULL, PL_Q_NODEBUG, pred, av); + + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; } - PL_unify_term(av+1, - PL_FUNCTOR_CHARS, "sgml", 4, - PL_TERM, parser, - PL_TERM, src, - PL_INT, l->line, - PL_NWCHARS, wcslen(error->plain_message), error->plain_message); - - PL_call_predicate(NULL, PL_Q_NODEBUG, pred, av); - - PL_discard_foreign_frame(fid); + pd->exception = PL_exception(0); + return FALSE; } return TRUE; @@ -1292,18 +1417,34 @@ on_xmlns(dtd_parser *p, dtd_symbol *ns, dtd_symbol *url) return TRUE; if ( pd->on_xmlns ) - { fid_t fid = PL_open_foreign_frame(); - term_t av = PL_new_term_refs(3); + { fid_t fid; + term_t av; - if ( ns ) - put_atom_wchars(av+0, ns->name); - else - PL_put_nil(av+0); - put_atom_wchars(av+1, url->name); - unify_parser(av+2, p); + if ( (fid = PL_open_foreign_frame()) && + (av = PL_new_term_refs(3)) ) + { int rc; - call_prolog(pd, pd->on_xmlns, av); - end_frame(fid, pd->exception); + if ( ns ) + { rc = put_atom_wchars(av+0, ns->name); + } else + { PL_put_nil(av+0); + rc = TRUE; + } + + if ( rc ) + { rc = ( put_atom_wchars(av+1, url->name) && + unify_parser(av+2, p) && + call_prolog(pd, pd->on_xmlns, av) + ); + } + end_frame(fid, pd->exception); + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; + } + + pd->exception = PL_exception(0); + return FALSE; } return TRUE; @@ -1318,26 +1459,42 @@ on_pi(dtd_parser *p, const ichar *pi) return TRUE; if ( pd->on_pi ) - { fid_t fid = PL_open_foreign_frame(); - term_t av = PL_new_term_refs(2); + { fid_t fid; - put_atom_wchars(av+0, pi); - unify_parser(av+1, p); + if ( (fid=PL_open_foreign_frame()) ) + { int rc; + term_t av = PL_new_term_refs(2); - call_prolog(pd, pd->on_pi, av); - end_frame(fid, pd->exception); + rc = ( put_atom_wchars(av+0, pi) && + unify_parser(av+1, p) && + call_prolog(pd, pd->on_pi, av) + ); + + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; + } + + pd->exception = PL_exception(0); + return FALSE; } if ( pd->tail ) - { term_t h = PL_new_term_ref(); + { term_t h; - if ( !PL_unify_list(pd->tail, h, pd->tail) ) + if ( !(h = PL_new_term_ref()) || + !PL_unify_list(pd->tail, h, pd->tail) ) + { pd->exception = PL_exception(0); return FALSE; + } + + if ( !PL_unify_term(h, + PL_FUNCTOR, FUNCTOR_pi1, + PL_NWCHARS, wcslen(pi), pi) ) + { pd->exception = PL_exception(0); + return FALSE; + } - PL_unify_term(h, - PL_FUNCTOR, FUNCTOR_pi1, - PL_NWCHARS, wcslen(pi), pi); - PL_reset_term_refs(h); } @@ -1353,14 +1510,25 @@ on_decl(dtd_parser *p, const ichar *decl) return TRUE; if ( pd->on_decl ) - { fid_t fid = PL_open_foreign_frame(); - term_t av = PL_new_term_refs(2); + { fid_t fid; + term_t av; - put_atom_wchars(av+0, decl); - unify_parser(av+1, p); + if ( (fid = PL_open_foreign_frame()) && + (av = PL_new_term_refs(2)) ) + { int rc; - call_prolog(pd, pd->on_decl, av); - end_frame(fid, pd->exception); + rc = ( put_atom_wchars(av+0, decl) && + unify_parser(av+1, p) && + call_prolog(pd, pd->on_decl, av) + ); + end_frame(fid, pd->exception); + PL_discard_foreign_frame(fid); + if ( rc ) + return TRUE; + } + + pd->exception = PL_exception(0); + return FALSE; } if ( pd->stopat == SA_DECL ) @@ -1380,14 +1548,17 @@ write_parser(void *h, char *buf, int len) { errno = EINVAL; return -1; } - + if ( (pd->errors > pd->max_errors && pd->max_errors >= 0) || pd->stopped ) { errno = EIO; return -1; } for(; sparser, *s); + { putchar_dtd_parser(pd->parser, *s); + if ( pd->exception ) + break; + } return len; } @@ -1404,7 +1575,9 @@ close_parser(void *h) } if ( pd->tail ) - PL_unify_nil(pd->tail); + { if ( !PL_unify_nil(pd->tail) ) + return -1; /* resource error */ + } if ( p->dmode == DM_DTD ) p->dtd->implicit = FALSE; /* assume we loaded a DTD */ @@ -1441,7 +1614,7 @@ new_parser_data(dtd_parser *p) pd->error_mode = EM_PRINT; pd->exception = FALSE; p->closure = pd; - + return pd; } @@ -1466,11 +1639,11 @@ pl_open_dtd(term_t ref, term_t options, term_t stream) { if ( PL_is_functor(option, FUNCTOR_dialect1) ) { term_t a = PL_new_term_ref(); char *s; - - PL_get_arg(1, option, a); + + _PL_get_arg(1, option, a); if ( !PL_get_atom_chars(a, &s) ) return sgml2pl_error(ERR_TYPE, "atom", a); - + if ( streq(s, "xml") ) set_dialect_dtd(dtd, DL_XML); else if ( streq(s, "xmlns") ) @@ -1503,14 +1676,14 @@ set_callback_predicates(parser_data *pd, term_t option) int arity; module_t m = NULL; - PL_get_arg(2, option, a); + _PL_get_arg(2, option, a); PL_strip_module(a, &m, a); if ( !PL_get_atom(a, &pname) ) return sgml2pl_error(ERR_TYPE, "atom", a); - PL_get_arg(1, option, a); + _PL_get_arg(1, option, a); if ( !PL_get_atom_chars(a, &fname) ) return sgml2pl_error(ERR_TYPE, "atom", a); - + if ( streq(fname, "begin") ) { pp = &pd->on_begin; /* tag, attributes, parser */ arity = 3; @@ -1570,7 +1743,7 @@ pl_sgml_parse(term_t parser, term_t options) if ( oldpd->magic != PD_MAGIC || oldpd->parser != p ) return sgml2pl_error(ERR_MISC, "sgml", "Parser associated with illegal data"); - + pd = sgml_calloc(1, sizeof(*pd)); *pd = *oldpd; p->closure = pd; @@ -1590,37 +1763,50 @@ pl_sgml_parse(term_t parser, term_t options) p->on_error = on_error; p->on_xmlns = on_xmlns; p->on_decl = on_decl; - + pd = new_parser_data(p); } while ( PL_get_list(tail, head, tail) ) - { if ( PL_is_functor(head, FUNCTOR_document1) ) + { if ( PL_is_functor(head, FUNCTOR_document1) ) { pd->list = PL_new_term_ref(); - PL_get_arg(1, head, pd->list); + _PL_get_arg(1, head, pd->list); pd->tail = PL_copy_term_ref(pd->list); pd->stack = NULL; } else if ( PL_is_functor(head, FUNCTOR_source1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, head, a); + _PL_get_arg(1, head, a); if ( !PL_get_stream_handle(a, &in) ) return FALSE; } else if ( PL_is_functor(head, FUNCTOR_content_length1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, head, a); + _PL_get_arg(1, head, a); if ( !PL_get_int64(a, &content_length) ) return sgml2pl_error(ERR_TYPE, "integer", a); has_content_length = TRUE; } else if ( PL_is_functor(head, FUNCTOR_call2) ) { if ( !set_callback_predicates(pd, head) ) return FALSE; + } else if ( PL_is_functor(head, FUNCTOR_xml_no_ns1) ) + { term_t a = PL_new_term_ref(); + char *s; + + _PL_get_arg(1, head, a); + if ( !PL_get_atom_chars(a, &s) ) + return sgml2pl_error(ERR_TYPE, "atom", a); + if ( streq(s, "error") ) + p->xml_no_ns = NONS_ERROR; + else if ( streq(s, "quiet") ) + p->xml_no_ns = NONS_QUIET; + else + return sgml2pl_error(ERR_DOMAIN, "xml_no_ns", a); } else if ( PL_is_functor(head, FUNCTOR_parse1) ) { term_t a = PL_new_term_ref(); char *s; - PL_get_arg(1, head, a); + _PL_get_arg(1, head, a); if ( !PL_get_atom_chars(a, &s) ) return sgml2pl_error(ERR_TYPE, "atom", a); if ( streq(s, "element") ) @@ -1638,14 +1824,14 @@ pl_sgml_parse(term_t parser, term_t options) } else if ( PL_is_functor(head, FUNCTOR_max_errors1) ) { term_t a = PL_new_term_ref(); - PL_get_arg(1, head, a); + _PL_get_arg(1, head, a); if ( !PL_get_integer(a, &pd->max_errors) ) return sgml2pl_error(ERR_TYPE, "integer", a); } else if ( PL_is_functor(head, FUNCTOR_syntax_errors1) ) { term_t a = PL_new_term_ref(); char *s; - PL_get_arg(1, head, a); + _PL_get_arg(1, head, a); if ( !PL_get_atom_chars(a, &s) ) return sgml2pl_error(ERR_TYPE, "atom", a); @@ -1661,7 +1847,7 @@ pl_sgml_parse(term_t parser, term_t options) { term_t a = PL_new_term_ref(); char *s; - PL_get_arg(1, head, a); + _PL_get_arg(1, head, a); if ( !PL_get_atom_chars(a, &s) ) return sgml2pl_error(ERR_TYPE, "atom", a); @@ -1767,7 +1953,9 @@ pl_sgml_parse(term_t parser, term_t options) out: reset_url_cache(); if ( pd->tail ) - PL_unify_nil(pd->tail); + { if ( !PL_unify_nil(pd->tail) ) + return FALSE; + } if ( recursive ) { p->closure = oldpd; @@ -1791,7 +1979,7 @@ pl_sgml_parse(term_t parser, term_t options) * DTD PROPERTIES * *******************************/ -static void put_model(term_t t, dtd_model *m); +static int put_model(term_t t, dtd_model *m) WUNUSED; /* doctype(DocType) */ @@ -1805,31 +1993,37 @@ dtd_prop_doctype(dtd *dtd, term_t prop) /* elements(ListOfElements) */ -static void +WUNUSED static int make_model_list(term_t t, dtd_model *m, functor_t f) { if ( !m->next ) - { put_model(t, m); + { return put_model(t, m); } else - { term_t av = PL_new_term_refs(2); + { term_t av; - put_model(av+0, m); - make_model_list(av+1, m->next, f); - PL_cons_functor_v(t, f, av); - PL_reset_term_refs(av); + if ( (av=PL_new_term_refs(2)) && + put_model(av+0, m) && + make_model_list(av+1, m->next, f) && + PL_cons_functor_v(t, f, av) ) + { PL_reset_term_refs(av); + return TRUE; + } + + return FALSE; } } -static void +WUNUSED static int put_model(term_t t, dtd_model *m) -{ functor_t f; +{ int rc = TRUE; + functor_t f; switch(m->type) { case MT_PCDATA: - PL_put_atom(t, ATOM_pcdata); + rc = PL_put_atom(t, ATOM_pcdata); goto card; case MT_ELEMENT: - put_atom_wchars(t, m->content.element->name->name); + rc = put_atom_wchars(t, m->content.element->name->name); goto card; case MT_AND: f = FUNCTOR_and2; @@ -1847,46 +2041,51 @@ put_model(term_t t, dtd_model *m) break; } - if ( !m->content.group ) - PL_put_atom(t, ATOM_empty); - else - make_model_list(t, m->content.group, f); - + if ( rc ) + { if ( !m->content.group ) + rc = PL_put_atom(t, ATOM_empty); + else + rc = make_model_list(t, m->content.group, f); + } + card: + if ( !rc ) + return FALSE; + switch(m->cardinality) { case MC_ONE: break; case MC_OPT: - PL_cons_functor_v(t, FUNCTOR_opt1, t); + rc = PL_cons_functor_v(t, FUNCTOR_opt1, t); break; case MC_REP: - PL_cons_functor_v(t, FUNCTOR_rep1, t); + rc = PL_cons_functor_v(t, FUNCTOR_rep1, t); break; case MC_PLUS: - PL_cons_functor_v(t, FUNCTOR_plus1, t); + rc = PL_cons_functor_v(t, FUNCTOR_plus1, t); break; } + + return rc; } -static void +WUNUSED static int put_content(term_t t, dtd_edef *def) { switch(def->type) { case C_EMPTY: - PL_put_atom(t, ATOM_empty); - return; + return PL_put_atom(t, ATOM_empty); case C_CDATA: - PL_put_atom(t, ATOM_cdata); - return; + return PL_put_atom(t, ATOM_cdata); case C_RCDATA: - PL_put_atom(t, ATOM_rcdata); - return; + return PL_put_atom(t, ATOM_rcdata); case C_ANY: - PL_put_atom(t, ATOM_any); - return; + return PL_put_atom(t, ATOM_any); default: if ( def->content ) - put_model(t, def->content); + return put_model(t, def->content); + + return TRUE; } } @@ -1897,7 +2096,7 @@ dtd_prop_elements(dtd *dtd, term_t prop) term_t head = PL_new_term_ref(); term_t et = PL_new_term_ref(); dtd_element *e; - + for( e=dtd->elements; e; e=e->next ) { put_atom_wchars(et, e->name->name); if ( !PL_unify_list(tail, head, tail) || @@ -1937,14 +2136,14 @@ dtd_prop_element(dtd *dtd, term_t name, term_t omit, term_t content) if ( !get_element(dtd, name, &e) || !(def=e->structure) ) return FALSE; - + if ( !PL_unify_term(omit, PL_FUNCTOR, FUNCTOR_omit2, PL_ATOM, def->omit_open ? ATOM_true : ATOM_false, PL_ATOM, def->omit_close ? ATOM_true : ATOM_false) ) return FALSE; - put_content(model, def); - return PL_unify(content, model); + return ( put_content(model, def) && + PL_unify(content, model) ); } @@ -1958,7 +2157,7 @@ dtd_prop_attributes(dtd *dtd, term_t ename, term_t atts) if ( !get_element(dtd, ename, &e) ) return FALSE; - + for(al=e->attributes; al; al=al->next) { put_atom_wchars(elem, al->attribute->name->name); @@ -1978,7 +2177,7 @@ typedef struct _plattrdef atom_t atom; /* name as atom */ } plattrdef; -static plattrdef plattrs[] = +static plattrdef plattrs[] = { { AT_CDATA, "cdata", FALSE }, { AT_ENTITY, "entity", FALSE }, @@ -2020,21 +2219,22 @@ unify_attribute_type(term_t type, dtd_attr *a) if ( a->type == AT_NAMEOF || a->type == AT_NOTATION ) { dtd_name_list *nl; - term_t tail = PL_new_term_ref(); - term_t head = PL_new_term_ref(); - term_t elem = PL_new_term_ref(); + term_t tail, head, elem; - if ( !PL_unify_functor(type, + if ( !(tail = PL_new_term_ref()) || + !(head = PL_new_term_ref()) || + !(elem = PL_new_term_ref()) || + !PL_unify_functor(type, a->type == AT_NAMEOF ? FUNCTOR_nameof1 : FUNCTOR_notation1) ) return FALSE; - PL_get_arg(1, type, tail); + + _PL_get_arg(1, type, tail); for(nl = a->typeex.nameof; nl; nl = nl->next) - { put_atom_wchars(elem, nl->value->name); - - if ( !PL_unify_list(tail, head, tail) || + { if ( !put_atom_wchars(elem, nl->value->name) || + !PL_unify_list(tail, head, tail) || !PL_unify(head, elem) ) return FALSE; } @@ -2067,9 +2267,12 @@ unify_attribute_default(term_t defval, dtd_attr *a) v = PL_unify_functor(defval, FUNCTOR_fixed1); common: if ( v ) - { term_t tmp = PL_new_term_ref(); + { term_t tmp; - PL_get_arg(1, defval, tmp); + if ( !(tmp=PL_new_term_ref()) ) + return FALSE; + + _PL_get_arg(1, defval, tmp); switch( a->type ) { case AT_CDATA: return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.cdata); @@ -2116,8 +2319,8 @@ dtd_prop_attribute(dtd *dtd, term_t ename, term_t aname, return FALSE; } - } - + } + return FALSE; } @@ -2128,7 +2331,7 @@ dtd_prop_entities(dtd *dtd, term_t list) term_t head = PL_new_term_ref(); term_t et = PL_new_term_ref(); dtd_entity *e; - + for( e=dtd->entities; e; e=e->next ) { put_atom_wchars(et, e->name->name); if ( !PL_unify_list(tail, head, tail) || @@ -2206,7 +2409,7 @@ dtd_prop_notations(dtd *dtd, term_t list) { if ( PL_unify_list(tail, head, tail) && PL_unify_wchars(head, PL_ATOM, wcslen(n->name->name), n->name->name) ) continue; - + return FALSE; } @@ -2278,7 +2481,7 @@ static prop dtd_props[] = static void -initprops(void) +initprops() { static int done = FALSE; if ( !done ) @@ -2319,7 +2522,7 @@ pl_dtd_property(term_t ref, term_t property) int i; for(i=0; iarity; i++) - PL_get_arg(i+1, property, a+i); + _PL_get_arg(i+1, property, a+i); switch(p->arity) { case 1: @@ -2361,7 +2564,7 @@ pl_sgml_register_catalog_file(term_t file, term_t where) loc = CTL_END; else return sgml2pl_error(ERR_DOMAIN, "location", where); - + return register_catalog_file(fn, loc); } @@ -2376,9 +2579,11 @@ extern void sgml_statistics(void); #endif install_t -install(void) +install() { initConstants(); + init_ring(); + PL_register_foreign("new_dtd", 2, pl_new_dtd, 0); PL_register_foreign("free_dtd", 1, pl_free_dtd, 0); PL_register_foreign("new_sgml_parser", 2, pl_new_sgml_parser, 0); @@ -2390,8 +2595,8 @@ install(void) PL_FA_TRANSPARENT); PL_register_foreign("_sgml_register_catalog_file", 2, pl_sgml_register_catalog_file, 0); - PL_register_foreign("$dtd_property", 2, pl_dtd_property, - 0); + + PL_register_foreign("$dtd_property", 2, pl_dtd_property, 0); install_xml_quote(); #ifdef O_STATISTICS diff --git a/packages/sgml/utf8.c b/packages/sgml/utf8.c index a5189c3bd..68e90143e 100644 --- a/packages/sgml/utf8.c +++ b/packages/sgml/utf8.c @@ -61,7 +61,7 @@ sgml__utf8_get_char(const char *in, int *chr) } *chr = *in; - + return (char *)in+1; } diff --git a/packages/sgml/util.c b/packages/sgml/util.c index 001c2349c..66253a8ec 100644 --- a/packages/sgml/util.c +++ b/packages/sgml/util.c @@ -26,7 +26,6 @@ #define UTIL_H_IMPLEMENTATION #include "util.h" -#include #include #include #include @@ -50,7 +49,7 @@ size_t istrlen(const ichar *s) { size_t len =0; - + while(*s++) len++; @@ -67,7 +66,7 @@ istrdup(const ichar *s) while(*s) *d++ = *s++; *d = 0; - + return dup; } else { return NULL; @@ -140,10 +139,10 @@ int istreq(const ichar *s1, const ichar *s2) { while(*s1 && *s1 == *s2) s1++, s2++; - + if ( *s1 == 0 && *s2 == 0 ) return TRUE; - + return FALSE; } @@ -152,10 +151,10 @@ int istrncaseeq(const ichar *s1, const ichar *s2, int len) { while(--len >= 0 && towlower(*s1) == towlower(*s2)) s1++, s2++; - + if ( len < 0 ) return TRUE; - + return FALSE; } @@ -164,10 +163,10 @@ int istrprefix(const ichar *pref, const ichar *s) { while(*pref && *pref == *s) pref++, s++; - + if ( *pref == 0 ) return TRUE; - + return FALSE; } @@ -212,7 +211,7 @@ istrhash(const ichar *t, int tsize) while(*t) { unsigned int c = *t++; - + c -= 'a'; value ^= c << (shift & 0xf); shift ^= c; @@ -231,7 +230,7 @@ istrcasehash(const ichar *t, int tsize) while(*t) { unsigned int c = towlower(*t++); /* case insensitive */ - + c -= 'a'; value ^= c << (shift & 0xf); shift ^= c; @@ -301,7 +300,7 @@ __add_icharbuf(icharbuf *buf, int chr) else buf->data = sgml_malloc(buf->allocated*sizeof(ichar)); } - + buf->data[buf->size++] = chr; } @@ -349,7 +348,7 @@ init_ocharbuf(ocharbuf *buf) ocharbuf * new_ocharbuf() { ocharbuf *buf = sgml_malloc(sizeof(*buf)); - + return init_ocharbuf(buf); } @@ -436,24 +435,76 @@ empty_ocharbuf(ocharbuf *buf) *******************************/ #define RINGSIZE 16 -static void *ring[RINGSIZE]; -static int ringp; + +typedef struct ring +{ void *ring[RINGSIZE]; + int ringp; +} ring; + +#ifdef _REENTRANT +#include +static pthread_key_t ring_key; + +static void +free_ring(void *ptr) +{ ring *r = ptr; + int i; + void **bp; + + for(i=0, bp=r->ring; iring[r->ringp] ) + sgml_free(r->ring[r->ringp]); + r->ring[r->ringp++] = copy; + if ( r->ringp == RINGSIZE ) + r->ringp = 0; return copy; } @@ -461,13 +512,19 @@ str2ring(const wchar_t *in) void * ringallo(size_t size) -{ char *result = sgml_malloc(size); - - if ( ring[ringp] ) - sgml_free(ring[ringp]); - ring[ringp++] = result; - if ( ringp == RINGSIZE ) - ringp = 0; +{ ring *r; + char *result; + + if ( !(r=my_ring()) || !(result = sgml_malloc(size)) ) + { sgml_nomem(); + return NULL; + } + + if ( r->ring[r->ringp] ) + sgml_free(r->ring[r->ringp]); + r->ring[r->ringp++] = result; + if ( r->ringp == RINGSIZE ) + r->ringp = 0; return result; } @@ -529,7 +586,7 @@ wcstoutf8(const wchar_t *in) { size++; } } - + rc = sgml_malloc(size+1); for(o=rc, s=in; *s; s++) { o = utf8_put_char(o, *s); @@ -605,7 +662,7 @@ load_sgml_file_to_charp(const ichar *file, int normalise_rsre, size_t *length) if ( r ) { char *s = r; - + while(len>0) { int n; @@ -652,7 +709,7 @@ load_sgml_file_to_charp(const ichar *file, int normalise_rsre, size_t *length) if ( last_is_lf ) r2[--len] = '\0'; /* delete last LF */ - + if ( length ) *length = len; sgml_free(r); diff --git a/packages/sgml/util.h b/packages/sgml/util.h index db01138cb..8ae7663e9 100644 --- a/packages/sgml/util.h +++ b/packages/sgml/util.h @@ -34,16 +34,16 @@ #include #endif -typedef struct +typedef struct { int allocated; int size; ichar *data; } icharbuf; -typedef struct +typedef struct { int allocated; int size; - union + union { wchar_t *w; /* UCS */ } data; wchar_t localbuf[256]; /* Initial local store */ @@ -98,6 +98,7 @@ void empty_ocharbuf(ocharbuf *buf); { buf->data.w[at] = chr; \ } +void init_ring(void); const wchar_t * str_summary(const wchar_t *s, int len); wchar_t * str2ring(const wchar_t *in); void * ringallo(size_t); @@ -107,8 +108,6 @@ ichar * load_sgml_file_to_charp(const ichar *file, int normalise_rsre, size_t *len); FILE * wfopen(const wchar_t *name, const char *mode); -void wputs(ichar *s); - #if defined(USE_STRING_FUNCTIONS) && !defined(UTIL_H_IMPLEMENTATION) #define istrlen(s1) wcslen((s1)) diff --git a/packages/sgml/xml_unicode.c b/packages/sgml/xml_unicode.c index f6ecb19a3..1df69405c 100644 --- a/packages/sgml/xml_unicode.c +++ b/packages/sgml/xml_unicode.c @@ -29,8 +29,6 @@ the GNU General Public License. */ -#include "xml_unicode.h" - /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - XML character classification. diff --git a/packages/sgml/xmlns.c b/packages/sgml/xmlns.c index 30b31b7a2..444babd93 100644 --- a/packages/sgml/xmlns.c +++ b/packages/sgml/xmlns.c @@ -29,35 +29,36 @@ #ifdef XMLNS -static xmlns * +xmlns * xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url) { sgml_environment *env = p->environments; dtd_symbol *n = (*ns ? dtd_add_symbol(p->dtd, ns) : (dtd_symbol *)NULL); dtd_symbol *u = dtd_add_symbol(p->dtd, url); /* TBD: ochar/ichar */ + xmlns *x = sgml_malloc(sizeof(*x)); - if ( p->on_xmlns ) - (*p->on_xmlns)(p, n, u); + x->name = n; + x->url = u; if ( env ) - { xmlns *x = sgml_malloc(sizeof(*n)); + { if ( p->on_xmlns ) + (*p->on_xmlns)(p, n, u); - x->name = n; - x->url = u; x->next = env->xmlns; env->xmlns = x; - - return x; + } else + { x->next = p->xmlns; + p->xmlns = x; } - return NULL; + return x; } void -xmlns_free(sgml_environment *env) -{ xmlns *n, *next; +xmlns_free(xmlns *n) +{ xmlns *next; - for(n = env->xmlns; n; n = next) + for(; n; n = next) { next = n->next; sgml_free(n); @@ -66,16 +67,22 @@ xmlns_free(sgml_environment *env) xmlns * -xmlns_find(sgml_environment *env, dtd_symbol *ns) -{ for(; env; env = env->parent) - { xmlns *n; +xmlns_find(dtd_parser *p, dtd_symbol *ns) +{ sgml_environment *env = p->environments; + xmlns *n; - for(n=env->xmlns; n; n = n->next) + for(; env; env = env->parent) + { for(n=env->xmlns; n; n = n->next) { if ( n->name == ns ) return n; } } + for (n=p->xmlns; n; n = n->next) + { if ( n->name == ns ) + return n; + } + return NULL; } @@ -97,7 +104,7 @@ void update_xmlns(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts) { dtd_attr_list *al; int nschr = p->dtd->charfunc->func[CF_NS]; /* : */ - + for(al=e->attributes; al; al=al->next) { dtd_attr *a = al->attribute; const ichar *name = a->name->name; @@ -123,7 +130,7 @@ update_xmlns(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts) xmlns_resolve() Convert a symbol as returned by the XML level-1.0 parser to its namespace tuple {url}localname. This function is not used internally, but provided - for use from the call-back functions of the parser. + for use from the call-back functions of the parser. It exploits the stack of namespace-environments managed by the parser itself (see update_xmlns()) @@ -150,7 +157,7 @@ xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id, if ( istrprefix(L"xml", buf) ) /* XML reserved namespaces */ { *url = n->name; return TRUE; - } else if ( (ns = xmlns_find(p->environments, n)) ) + } else if ( (ns = xmlns_find(p, n)) ) { if ( ns->url->name[0] ) *url = ns->url->name; else @@ -158,7 +165,9 @@ xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id, return TRUE; } else { *url = n->name; /* undefined namespace */ - gripe(ERC_EXISTENCE, L"namespace", n->name); + if ( p->xml_no_ns == NONS_QUIET ) + return TRUE; + gripe(p, ERC_EXISTENCE, L"namespace", n->name); return FALSE; } } @@ -195,16 +204,16 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url) ichar *o = buf; const ichar *s; xmlns *ns; - + for(s=id->name; *s; s++) { if ( *s == nschr ) /* explicit namespace */ { dtd_symbol *n; - + *o = '\0'; *local = s+1; n = dtd_add_symbol(dtd, buf); - if ( (ns = xmlns_find(p->environments, n)) ) + if ( (ns = xmlns_find(p, n)) ) { if ( ns->url->name[0] ) *url = ns->url->name; else @@ -213,17 +222,19 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url) return TRUE; } else { *url = n->name; /* undefined namespace */ - gripe(ERC_EXISTENCE, "namespace", n->name); e->thisns = xmlns_push(p, n->name, n->name); /* define implicitly */ + if ( p->xml_no_ns == NONS_QUIET ) + return TRUE; + gripe(p, ERC_EXISTENCE, L"namespace", n->name); return FALSE; } } *o++ = *s; } - + *local = id->name; - - if ( (ns = xmlns_find(p->environments, NULL)) ) + + if ( (ns = xmlns_find(p, NULL)) ) { if ( ns->url->name[0] ) *url = ns->url->name; else diff --git a/packages/sgml/xmlns.h b/packages/sgml/xmlns.h index 51a1579ef..b222d4088 100644 --- a/packages/sgml/xmlns.h +++ b/packages/sgml/xmlns.h @@ -31,8 +31,9 @@ typedef struct _xmlns struct _xmlns *next; /* next name */ } xmlns; -void xmlns_free(sgml_environment *env); -xmlns* xmlns_find(sgml_environment *env, dtd_symbol *ns); +void xmlns_free(xmlns *list); +xmlns* xmlns_find(dtd_parser *p, dtd_symbol *ns); +xmlns * xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url); void update_xmlns(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts); int xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,