update sgml package.
This commit is contained in:
parent
0fabe2b9c6
commit
261b5163c7
@ -155,7 +155,7 @@ localpath(const ichar *ref, const ichar *name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int
|
int
|
||||||
register_catalog_file_unlocked(const ichar *file, catalog_location where)
|
register_catalog_file_unlocked(const ichar *file, catalog_location where)
|
||||||
{ catalog_file **f = &catalog;
|
{ catalog_file **f = &catalog;
|
||||||
catalog_file *cf;
|
catalog_file *cf;
|
||||||
@ -205,7 +205,7 @@ wgetenv(const char *name)
|
|||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
init_catalog(void)
|
init_catalog()
|
||||||
{ static int done = FALSE;
|
{ static int done = FALSE;
|
||||||
|
|
||||||
LOCK();
|
LOCK();
|
||||||
@ -241,7 +241,7 @@ init_catalog(void)
|
|||||||
int
|
int
|
||||||
register_catalog_file(const ichar *file, catalog_location where)
|
register_catalog_file(const ichar *file, catalog_location where)
|
||||||
{ int rc;
|
{ int rc;
|
||||||
|
|
||||||
init_catalog();
|
init_catalog();
|
||||||
|
|
||||||
LOCK();
|
LOCK();
|
||||||
@ -310,7 +310,7 @@ cs_streql(ichar const *a, ichar const *b)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
scan_overflow(size_t buflen)
|
scan_overflow(size_t buflen)
|
||||||
{ gripe(ERC_REPRESENTATION, L"token length");
|
{ gripe(NULL, ERC_REPRESENTATION, L"token length");
|
||||||
|
|
||||||
return EOF;
|
return EOF;
|
||||||
}
|
}
|
||||||
@ -439,7 +439,7 @@ load_one_catalogue(catalog_file * file)
|
|||||||
int override = 0;
|
int override = 0;
|
||||||
|
|
||||||
if ( !src )
|
if ( !src )
|
||||||
{ gripe(ERC_NO_CATALOGUE, file->file);
|
{ gripe(NULL, ERC_NO_CATALOGUE, file->file);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -514,7 +514,7 @@ load_one_catalogue(catalog_file * file)
|
|||||||
|
|
||||||
To look up a parameter entity:
|
To look up a parameter entity:
|
||||||
f = find_in_catalogue(CAT_PENTITY, name, pubid, sysid, ci);
|
f = find_in_catalogue(CAT_PENTITY, name, pubid, sysid, ci);
|
||||||
The name may begin with a % but need not; if it doesn't
|
The name may begin with a % but need not; if it doesn't
|
||||||
a % will be prefixed for the search.
|
a % will be prefixed for the search.
|
||||||
If it cannot otherwise be found ${name}.pen will be returned.
|
If it cannot otherwise be found ${name}.pen will be returned.
|
||||||
|
|
||||||
@ -635,7 +635,7 @@ find_in_catalogue(int kind,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if ( istrlen(name)+4+1 > penlen )
|
if ( istrlen(name)+4+1 > penlen )
|
||||||
{ gripe(ERC_REPRESENTATION, L"entity name");
|
{ gripe(NULL, ERC_REPRESENTATION, L"entity name");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ new_charclass()
|
|||||||
char_range(map, 'a', 'z', CH_LCLETTER);
|
char_range(map, 'a', 'z', CH_LCLETTER);
|
||||||
char_range(map, 'A', 'Z', CH_LCLETTER);
|
char_range(map, 'A', 'Z', CH_LCLETTER);
|
||||||
char_range(map, '0', '9', CH_DIGIT);
|
char_range(map, '0', '9', CH_DIGIT);
|
||||||
|
|
||||||
ca['.'] |= CH_CNM;
|
ca['.'] |= CH_CNM;
|
||||||
ca['-'] |= CH_CNM;
|
ca['-'] |= CH_CNM;
|
||||||
ca[183] |= CH_CNM; /* XML */
|
ca[183] |= CH_CNM; /* XML */
|
||||||
|
@ -466,7 +466,7 @@ dtd * new_dtd(const ichar *doctype);
|
|||||||
int set_dialect_dtd(dtd *dtd, dtd_dialect dialect);
|
int set_dialect_dtd(dtd *dtd, dtd_dialect dialect);
|
||||||
int set_option_dtd(dtd *dtd, dtd_option option, int set);
|
int set_option_dtd(dtd *dtd, dtd_option option, int set);
|
||||||
|
|
||||||
void putchar_dtd_parser(dtd_parser *p, int chr);
|
int putchar_dtd_parser(dtd_parser *p, int chr);
|
||||||
int begin_document_dtd_parser(dtd_parser *p);
|
int begin_document_dtd_parser(dtd_parser *p);
|
||||||
int end_document_dtd_parser(dtd_parser *p);
|
int end_document_dtd_parser(dtd_parser *p);
|
||||||
void reset_document_dtd_parser(dtd_parser *p);
|
void reset_document_dtd_parser(dtd_parser *p);
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
#include "dtd.h"
|
#include "dtd.h"
|
||||||
|
#include "util.h"
|
||||||
#include "prolog.h"
|
#include "prolog.h"
|
||||||
|
|
||||||
#define streq(s,q) strcmp((s), (q)) == 0
|
#define streq(s,q) strcmp((s), (q)) == 0
|
||||||
@ -42,10 +43,12 @@ int
|
|||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{ dtd_dialect dialect = DL_SGML;
|
{ dtd_dialect dialect = DL_SGML;
|
||||||
|
|
||||||
|
init_ring();
|
||||||
|
|
||||||
program = argv[0];
|
program = argv[0];
|
||||||
argv++;
|
argv++;
|
||||||
argc--;
|
argc--;
|
||||||
|
|
||||||
while(argc > 0 && argv[0][0] == '-')
|
while(argc > 0 && argv[0][0] == '-')
|
||||||
{ if ( streq(argv[0], "-xml") )
|
{ if ( streq(argv[0], "-xml") )
|
||||||
{ dialect = DL_XML;
|
{ dialect = DL_XML;
|
||||||
@ -63,7 +66,7 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
if ( argc == 1 )
|
if ( argc == 1 )
|
||||||
{ int wl = mbstowcs(NULL, argv[0], 0);
|
{ int wl = mbstowcs(NULL, argv[0], 0);
|
||||||
|
|
||||||
if ( wl > 0 )
|
if ( wl > 0 )
|
||||||
{ wchar_t *ws = malloc((wl+1)*sizeof(wchar_t));
|
{ wchar_t *ws = malloc((wl+1)*sizeof(wchar_t));
|
||||||
dtd *dtd;
|
dtd *dtd;
|
||||||
|
@ -3,9 +3,9 @@
|
|||||||
Part of SWI-Prolog
|
Part of SWI-Prolog
|
||||||
|
|
||||||
Author: Jan Wielemaker
|
Author: Jan Wielemaker
|
||||||
E-mail: jan@swi.psy.uva.nl
|
E-mail: J.Wielemaker@cs.vu.nl
|
||||||
WWW: http://www.swi-prolog.org
|
WWW: http://www.swi-prolog.org
|
||||||
Copyright (C): 1985-2002, University of Amsterdam
|
Copyright (C): 1985-2009, University of Amsterdam
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU Lesser General Public
|
modify it under the terms of the GNU Lesser General Public
|
||||||
@ -32,48 +32,52 @@
|
|||||||
|
|
||||||
int
|
int
|
||||||
sgml2pl_error(plerrorid id, ...)
|
sgml2pl_error(plerrorid id, ...)
|
||||||
{ term_t except = PL_new_term_ref();
|
{ int rc;
|
||||||
term_t formal = PL_new_term_ref();
|
term_t except, formal, swi;
|
||||||
term_t swi = PL_new_term_ref();
|
|
||||||
va_list args;
|
va_list args;
|
||||||
char msgbuf[1024];
|
char msgbuf[1024];
|
||||||
char *msg = NULL;
|
char *msg = NULL;
|
||||||
|
|
||||||
|
if ( !(except = PL_new_term_ref()) ||
|
||||||
|
!(formal = PL_new_term_ref()) ||
|
||||||
|
!(swi = PL_new_term_ref()) )
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
va_start(args, id);
|
va_start(args, id);
|
||||||
switch(id)
|
switch(id)
|
||||||
{ case ERR_ERRNO:
|
{ case ERR_ERRNO:
|
||||||
{ int err = va_arg(args, int);
|
{ int err = va_arg(args, int);
|
||||||
|
|
||||||
msg = strerror(err);
|
msg = strerror(err);
|
||||||
|
|
||||||
switch(err)
|
switch(err)
|
||||||
{ case ENOMEM:
|
{ case ENOMEM:
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "resource_error", 1,
|
PL_FUNCTOR_CHARS, "resource_error", 1,
|
||||||
PL_CHARS, "no_memory");
|
PL_CHARS, "no_memory");
|
||||||
break;
|
break;
|
||||||
case EACCES:
|
case EACCES:
|
||||||
{ const char *file = va_arg(args, const char *);
|
{ const char *file = va_arg(args, const char *);
|
||||||
const char *action = va_arg(args, const char *);
|
const char *action = va_arg(args, const char *);
|
||||||
|
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "permission_error", 3,
|
PL_FUNCTOR_CHARS, "permission_error", 3,
|
||||||
PL_CHARS, action,
|
PL_CHARS, action,
|
||||||
PL_CHARS, "file",
|
PL_CHARS, "file",
|
||||||
PL_CHARS, file);
|
PL_CHARS, file);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ENOENT:
|
case ENOENT:
|
||||||
{ const char *file = va_arg(args, const char *);
|
{ const char *file = va_arg(args, const char *);
|
||||||
|
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "existence_error", 2,
|
PL_FUNCTOR_CHARS, "existence_error", 2,
|
||||||
PL_CHARS, "file",
|
PL_CHARS, "file",
|
||||||
PL_CHARS, file);
|
PL_CHARS, file);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
PL_unify_atom_chars(formal, "system_error");
|
rc = PL_unify_atom_chars(formal, "system_error");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -84,12 +88,12 @@ sgml2pl_error(plerrorid id, ...)
|
|||||||
|
|
||||||
if ( PL_is_variable(actual) &&
|
if ( PL_is_variable(actual) &&
|
||||||
strcmp(expected, "variable") != 0 )
|
strcmp(expected, "variable") != 0 )
|
||||||
PL_unify_atom_chars(formal, "instantiation_error");
|
rc = PL_unify_atom_chars(formal, "instantiation_error");
|
||||||
else
|
else
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "type_error", 2,
|
PL_FUNCTOR_CHARS, "type_error", 2,
|
||||||
PL_CHARS, expected,
|
PL_CHARS, expected,
|
||||||
PL_TERM, actual);
|
PL_TERM, actual);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ERR_DOMAIN:
|
case ERR_DOMAIN:
|
||||||
@ -97,31 +101,31 @@ sgml2pl_error(plerrorid id, ...)
|
|||||||
term_t actual = va_arg(args, term_t);
|
term_t actual = va_arg(args, term_t);
|
||||||
|
|
||||||
if ( PL_is_variable(actual) )
|
if ( PL_is_variable(actual) )
|
||||||
PL_unify_atom_chars(formal, "instantiation_error");
|
rc = PL_unify_atom_chars(formal, "instantiation_error");
|
||||||
else
|
else
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "domain_error", 2,
|
PL_FUNCTOR_CHARS, "domain_error", 2,
|
||||||
PL_CHARS, expected,
|
PL_CHARS, expected,
|
||||||
PL_TERM, actual);
|
PL_TERM, actual);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ERR_EXISTENCE:
|
case ERR_EXISTENCE:
|
||||||
{ const char *type = va_arg(args, const char *);
|
{ const char *type = va_arg(args, const char *);
|
||||||
term_t obj = va_arg(args, term_t);
|
term_t obj = va_arg(args, term_t);
|
||||||
|
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "existence_error", 2,
|
PL_FUNCTOR_CHARS, "existence_error", 2,
|
||||||
PL_CHARS, type,
|
PL_CHARS, type,
|
||||||
PL_TERM, obj);
|
PL_TERM, obj);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ERR_FAIL:
|
case ERR_FAIL:
|
||||||
{ term_t goal = va_arg(args, term_t);
|
{ term_t goal = va_arg(args, term_t);
|
||||||
|
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "goal_failed", 1,
|
PL_FUNCTOR_CHARS, "goal_failed", 1,
|
||||||
PL_TERM, goal);
|
PL_TERM, goal);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -129,10 +133,10 @@ sgml2pl_error(plerrorid id, ...)
|
|||||||
{ const char *limit = va_arg(args, const char *);
|
{ const char *limit = va_arg(args, const char *);
|
||||||
long maxval = va_arg(args, long);
|
long maxval = va_arg(args, long);
|
||||||
|
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "limit_exceeded", 2,
|
PL_FUNCTOR_CHARS, "limit_exceeded", 2,
|
||||||
PL_CHARS, limit,
|
PL_CHARS, limit,
|
||||||
PL_LONG, maxval);
|
PL_LONG, maxval);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -142,10 +146,10 @@ sgml2pl_error(plerrorid id, ...)
|
|||||||
|
|
||||||
vsprintf(msgbuf, fmt, args);
|
vsprintf(msgbuf, fmt, args);
|
||||||
msg = msgbuf;
|
msg = msgbuf;
|
||||||
|
|
||||||
PL_unify_term(formal,
|
rc = PL_unify_term(formal,
|
||||||
PL_FUNCTOR_CHARS, "miscellaneous", 1,
|
PL_FUNCTOR_CHARS, "miscellaneous", 1,
|
||||||
PL_CHARS, id);
|
PL_CHARS, id);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -153,26 +157,29 @@ sgml2pl_error(plerrorid id, ...)
|
|||||||
}
|
}
|
||||||
va_end(args);
|
va_end(args);
|
||||||
|
|
||||||
if ( msg )
|
if ( rc && msg )
|
||||||
{ term_t predterm = PL_new_term_ref();
|
{ term_t predterm = PL_new_term_ref();
|
||||||
term_t msgterm = PL_new_term_ref();
|
term_t msgterm = PL_new_term_ref();
|
||||||
|
|
||||||
if ( msg )
|
if ( !(predterm = PL_new_term_ref()) ||
|
||||||
{ PL_put_atom_chars(msgterm, msg);
|
!(msgterm = PL_new_term_ref()) ||
|
||||||
}
|
!PL_put_atom_chars(msgterm, msg) ||
|
||||||
|
!PL_unify_term(swi,
|
||||||
PL_unify_term(swi,
|
PL_FUNCTOR_CHARS, "context", 2,
|
||||||
PL_FUNCTOR_CHARS, "context", 2,
|
PL_TERM, predterm,
|
||||||
PL_TERM, predterm,
|
PL_TERM, msgterm) )
|
||||||
PL_TERM, msgterm);
|
rc = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
PL_unify_term(except,
|
if ( rc )
|
||||||
PL_FUNCTOR_CHARS, "error", 2,
|
rc = PL_unify_term(except,
|
||||||
PL_TERM, formal,
|
PL_FUNCTOR_CHARS, "error", 2,
|
||||||
PL_TERM, swi);
|
PL_TERM, formal,
|
||||||
|
PL_TERM, swi);
|
||||||
|
|
||||||
|
if ( rc )
|
||||||
|
return PL_raise_exception(except);
|
||||||
|
|
||||||
return PL_raise_exception(except);
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,4 +44,3 @@ typedef enum
|
|||||||
int sgml2pl_error(plerrorid, ...);
|
int sgml2pl_error(plerrorid, ...);
|
||||||
|
|
||||||
#endif /*H_ERROR_INCLUDED*/
|
#endif /*H_ERROR_INCLUDED*/
|
||||||
|
|
||||||
|
@ -107,7 +107,7 @@ visit(dtd_state *state, visited *visited)
|
|||||||
{ if ( visited->states[i] == state )
|
{ if ( visited->states[i] == state )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( visited->size >= MAX_VISITED )
|
if ( visited->size >= MAX_VISITED )
|
||||||
{ fprintf(stderr, "Reached MAX_VISITED!\n");
|
{ fprintf(stderr, "Reached MAX_VISITED!\n");
|
||||||
return FALSE;
|
return FALSE;
|
||||||
@ -262,7 +262,7 @@ do_find_omitted_path(dtd_state *state, dtd_element *e,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
find_omitted_path(dtd_state *state, dtd_element *e, dtd_element **path)
|
find_omitted_path(dtd_state *state, dtd_element *e, dtd_element **path)
|
||||||
{ int pl = 0;
|
{ int pl = 0;
|
||||||
visited visited;
|
visited visited;
|
||||||
@ -314,13 +314,13 @@ static transition *
|
|||||||
state_transitions(dtd_state *state)
|
state_transitions(dtd_state *state)
|
||||||
{ if ( !state->transitions && state->expander )
|
{ if ( !state->transitions && state->expander )
|
||||||
{ expander *ex = state->expander;
|
{ expander *ex = state->expander;
|
||||||
|
|
||||||
switch(ex->type)
|
switch(ex->type)
|
||||||
{ case EX_AND:
|
{ case EX_AND:
|
||||||
{ dtd_model_list *left = ex->kind.and.set;
|
{ dtd_model_list *left = ex->kind.and.set;
|
||||||
|
|
||||||
if ( !left ) /* empty AND (should not happen) */
|
if ( !left ) /* empty AND (should not happen) */
|
||||||
{ link(state, ex->target, NULL);
|
{ link(state, ex->target, NULL);
|
||||||
} else if ( !left->next ) /* only one left */
|
} else if ( !left->next ) /* only one left */
|
||||||
{ translate_model(left->model, state, ex->target);
|
{ translate_model(left->model, state, ex->target);
|
||||||
} else
|
} else
|
||||||
@ -378,7 +378,7 @@ translate_one(dtd_model *m, dtd_state *from, dtd_state *to)
|
|||||||
|
|
||||||
ex->target = to;
|
ex->target = to;
|
||||||
ex->type = EX_AND;
|
ex->type = EX_AND;
|
||||||
|
|
||||||
for( sub = m->content.group; sub; sub = sub->next )
|
for( sub = m->content.group; sub; sub = sub->next )
|
||||||
add_model_list(&ex->kind.and.set, sub);
|
add_model_list(&ex->kind.and.set, sub);
|
||||||
|
|
||||||
@ -436,7 +436,7 @@ make_state_engine(dtd_element *e)
|
|||||||
{ if ( def->content )
|
{ if ( def->content )
|
||||||
{ def->initial_state = new_dtd_state();
|
{ def->initial_state = new_dtd_state();
|
||||||
def->final_state = new_dtd_state();
|
def->final_state = new_dtd_state();
|
||||||
|
|
||||||
translate_model(def->content, def->initial_state, def->final_state);
|
translate_model(def->content, def->initial_state, def->final_state);
|
||||||
} else if ( def->type == C_CDATA || def->type == C_RCDATA )
|
} else if ( def->type == C_CDATA || def->type == C_RCDATA )
|
||||||
{ def->initial_state = new_dtd_state();
|
{ def->initial_state = new_dtd_state();
|
||||||
@ -450,7 +450,7 @@ make_state_engine(dtd_element *e)
|
|||||||
|
|
||||||
return def->initial_state;
|
return def->initial_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -492,7 +492,7 @@ free_expander(expander *e, visited *visited)
|
|||||||
static void
|
static void
|
||||||
do_free_state_engine(dtd_state *state, visited *visited)
|
do_free_state_engine(dtd_state *state, visited *visited)
|
||||||
{ transition *t, *next;
|
{ transition *t, *next;
|
||||||
|
|
||||||
for(t=state->transitions; t; t=next)
|
for(t=state->transitions; t; t=next)
|
||||||
{ next = t->next;
|
{ next = t->next;
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -145,6 +145,12 @@ typedef enum
|
|||||||
DM_DATA /* Environment has only elements */
|
DM_DATA /* Environment has only elements */
|
||||||
} data_mode;
|
} data_mode;
|
||||||
|
|
||||||
|
#ifdef XMLNS
|
||||||
|
typedef enum
|
||||||
|
{ NONS_ERROR = 0,
|
||||||
|
NONS_QUIET
|
||||||
|
} xmlnons;
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct _sgml_environment
|
typedef struct _sgml_environment
|
||||||
{ dtd_element *element; /* element that opened the env */
|
{ dtd_element *element; /* element that opened the env */
|
||||||
@ -201,6 +207,10 @@ typedef struct _dtd_parser
|
|||||||
dtd_srcloc startcdata; /* Start of last cdata */
|
dtd_srcloc startcdata; /* Start of last cdata */
|
||||||
dtd_symbol *enforce_outer_element; /* Outer element to look for */
|
dtd_symbol *enforce_outer_element; /* Outer element to look for */
|
||||||
sgml_event_class event_class; /* EV_* */
|
sgml_event_class event_class; /* EV_* */
|
||||||
|
xmlnons xml_no_ns; /* What if namespace does not exist? */
|
||||||
|
#ifdef XMLNS
|
||||||
|
struct _xmlns *xmlns; /* Outer xmlns declaration */
|
||||||
|
#endif
|
||||||
|
|
||||||
void *closure; /* client handle */
|
void *closure; /* client handle */
|
||||||
sgml_begin_element_f on_begin_element; /* start an element */
|
sgml_begin_element_f on_begin_element; /* start an element */
|
||||||
@ -221,7 +231,7 @@ typedef struct _dtd_parser
|
|||||||
#include "xmlns.h"
|
#include "xmlns.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern int gripe(dtd_error_id e, ...);
|
extern int gripe(dtd_parser *p, dtd_error_id e, ...);
|
||||||
|
|
||||||
#define SGML_SUB_DOCUMENT 0x1
|
#define SGML_SUB_DOCUMENT 0x1
|
||||||
|
|
||||||
|
@ -342,7 +342,7 @@ prolog_print_attribute(dtd_element *e, dtd_attr *at)
|
|||||||
printf("list(nutoken)");
|
printf("list(nutoken)");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf(", "); /* print default */
|
printf(", "); /* print default */
|
||||||
switch(at->def)
|
switch(at->def)
|
||||||
{ case AT_REQUIRED:
|
{ case AT_REQUIRED:
|
||||||
@ -427,7 +427,7 @@ prolog_print_element(dtd_element *e, unsigned int flags)
|
|||||||
|
|
||||||
if ( def->excluded )
|
if ( def->excluded )
|
||||||
{ dtd_element_list *el;
|
{ dtd_element_list *el;
|
||||||
|
|
||||||
for(el = def->excluded; el; el=el->next)
|
for(el = def->excluded; el; el=el->next)
|
||||||
wprintf(L"exclude(%ls, %ls).\n",
|
wprintf(L"exclude(%ls, %ls).\n",
|
||||||
atom(e->name->name),
|
atom(e->name->name),
|
||||||
@ -435,7 +435,7 @@ prolog_print_element(dtd_element *e, unsigned int flags)
|
|||||||
}
|
}
|
||||||
if ( def->included )
|
if ( def->included )
|
||||||
{ dtd_element_list *el;
|
{ dtd_element_list *el;
|
||||||
|
|
||||||
for(el = def->included; el; el=el->next)
|
for(el = def->included; el; el=el->next)
|
||||||
wprintf(L"include(%ls, %ls).\n",
|
wprintf(L"include(%ls, %ls).\n",
|
||||||
atom(e->name->name),
|
atom(e->name->name),
|
||||||
|
@ -26,14 +26,18 @@
|
|||||||
#include <SWI-Prolog.h>
|
#include <SWI-Prolog.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#ifdef HAVE_MALLOC_H
|
#ifdef HAVE_MALLOC_H
|
||||||
#include <malloc.h>
|
#include HAVE_MALLOC_H
|
||||||
#endif
|
#endif
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <wctype.h>
|
#include <wctype.h>
|
||||||
|
#include "xml_unicode.h"
|
||||||
#include "dtd.h"
|
#include "dtd.h"
|
||||||
|
#ifdef __WINDOWS__
|
||||||
|
#define inline __inline
|
||||||
|
#endif
|
||||||
|
|
||||||
static atom_t ATOM_iso_latin_1;
|
static atom_t ATOM_iso_latin_1;
|
||||||
static atom_t ATOM_utf8;
|
static atom_t ATOM_utf8;
|
||||||
@ -86,7 +90,7 @@ room_buf(charbuf *b, size_t room)
|
|||||||
b->end = b->bufp + used;
|
b->end = b->bufp + used;
|
||||||
}
|
}
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -178,11 +182,11 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
|
|||||||
if ( inA )
|
if ( inA )
|
||||||
{ for(s = (unsigned char*)inA ; len-- > 0; s++ )
|
{ for(s = (unsigned char*)inA ; len-- > 0; s++ )
|
||||||
{ int c = *s;
|
{ int c = *s;
|
||||||
|
|
||||||
if ( map[c] )
|
if ( map[c] )
|
||||||
{ if ( !add_str_buf(&buffer, map[c]) )
|
{ if ( !add_str_buf(&buffer, map[c]) )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
changes++;
|
changes++;
|
||||||
} else if ( c > maxchr )
|
} else if ( c > maxchr )
|
||||||
{ char buf[10];
|
{ char buf[10];
|
||||||
@ -190,7 +194,7 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
|
|||||||
sprintf(buf, "&#%d;", c);
|
sprintf(buf, "&#%d;", c);
|
||||||
if ( !add_str_buf(&buffer, buf) )
|
if ( !add_str_buf(&buffer, buf) )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
changes++;
|
changes++;
|
||||||
} else
|
} else
|
||||||
{ add_char_buf(&buffer, c);
|
{ add_char_buf(&buffer, c);
|
||||||
@ -204,11 +208,11 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
|
|||||||
} else
|
} else
|
||||||
{ for( ; len-- > 0; inW++ )
|
{ for( ; len-- > 0; inW++ )
|
||||||
{ int c = *inW;
|
{ int c = *inW;
|
||||||
|
|
||||||
if ( c <= 0xff && map[c] )
|
if ( c <= 0xff && map[c] )
|
||||||
{ if ( !add_str_bufW(&buffer, map[c]) )
|
{ if ( !add_str_bufW(&buffer, map[c]) )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
changes++;
|
changes++;
|
||||||
} else if ( c > maxchr )
|
} else if ( c > maxchr )
|
||||||
{ char buf[10];
|
{ char buf[10];
|
||||||
@ -216,13 +220,13 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
|
|||||||
sprintf(buf, "&#%d;", c);
|
sprintf(buf, "&#%d;", c);
|
||||||
if ( !add_str_bufW(&buffer, buf) )
|
if ( !add_str_bufW(&buffer, buf) )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
changes++;
|
changes++;
|
||||||
}else
|
}else
|
||||||
{ add_char_bufW(&buffer, c);
|
{ add_char_bufW(&buffer, c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( changes > 0 )
|
if ( changes > 0 )
|
||||||
rc = PL_unify_wchars(quoted, PL_ATOM,
|
rc = PL_unify_wchars(quoted, PL_ATOM,
|
||||||
used_buf(&buffer)/sizeof(wchar_t),
|
used_buf(&buffer)/sizeof(wchar_t),
|
||||||
@ -230,7 +234,7 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
|
|||||||
else
|
else
|
||||||
rc = PL_unify(in, quoted);
|
rc = PL_unify(in, quoted);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_buf(&buffer);
|
free_buf(&buffer);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
@ -321,12 +325,39 @@ xml_quote_cdata(term_t in, term_t out, term_t encoding)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
is_xml_nmstart(dtd_charclass *map, int c)
|
||||||
|
{ if ( c <= 0xff )
|
||||||
|
{ return (map->class[c] & CH_NMSTART);
|
||||||
|
} else
|
||||||
|
{ return ( xml_basechar(c) ||
|
||||||
|
xml_ideographic(c)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
is_xml_chname(dtd_charclass *map, int c)
|
||||||
|
{ if ( c <= 0xff )
|
||||||
|
{ return (map->class[c] & CH_NAME);
|
||||||
|
} else
|
||||||
|
{ return ( xml_basechar(c) ||
|
||||||
|
xml_digit(c) ||
|
||||||
|
xml_ideographic(c) ||
|
||||||
|
xml_combining_char(c) ||
|
||||||
|
xml_extender(c)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static dtd_charclass *map;
|
||||||
|
|
||||||
static foreign_t
|
static foreign_t
|
||||||
xml_name(term_t in, term_t encoding)
|
xml_name(term_t in, term_t encoding)
|
||||||
{ char *ins;
|
{ char *ins;
|
||||||
wchar_t *inW;
|
wchar_t *inW;
|
||||||
size_t len;
|
size_t len;
|
||||||
static dtd_charclass *map;
|
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
int maxchr;
|
int maxchr;
|
||||||
|
|
||||||
@ -345,7 +376,7 @@ xml_name(term_t in, term_t encoding)
|
|||||||
c = ins[0] & 0xff;
|
c = ins[0] & 0xff;
|
||||||
if ( c > maxchr )
|
if ( c > maxchr )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
if ( !(map->class[c] & CH_NMSTART) )
|
if ( !(map->class[c] & CH_NMSTART) )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
for(i=1; i<len; i++)
|
for(i=1; i<len; i++)
|
||||||
@ -360,22 +391,16 @@ xml_name(term_t in, term_t encoding)
|
|||||||
if ( PL_get_wchars(in, &len, &inW, CVT_ATOMIC) )
|
if ( PL_get_wchars(in, &len, &inW, CVT_ATOMIC) )
|
||||||
{ if ( len == 0 )
|
{ if ( len == 0 )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
if ( inW[0] > maxchr )
|
if ( inW[0] > maxchr ||
|
||||||
|
!is_xml_nmstart(map, inW[0]) )
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
if ( inW[0] <= 0xff &&
|
|
||||||
!(map->class[inW[0]] & CH_NMSTART) )
|
|
||||||
return FALSE;
|
|
||||||
if ( inW[0] > 0xff && !iswalpha(inW[0]) )
|
|
||||||
return FALSE;
|
|
||||||
|
|
||||||
for(i=1; i<len; i++)
|
for(i=1; i<len; i++)
|
||||||
{ int c = inW[i];
|
{ int c = inW[i];
|
||||||
|
|
||||||
if ( c <= 0xff && !(map->class[c] & CH_NAME) )
|
if ( c > maxchr ||
|
||||||
return FALSE;
|
!is_xml_chname(map, c) )
|
||||||
if ( c > 0xff && !iswalnum((wint_t)c) )
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -386,6 +411,57 @@ xml_name(term_t in, term_t encoding)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static foreign_t
|
||||||
|
iri_xml_namespace(term_t iri, term_t namespace, term_t localname)
|
||||||
|
{ char *s;
|
||||||
|
pl_wchar_t *w;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
if ( !map )
|
||||||
|
map = new_charclass();
|
||||||
|
|
||||||
|
if ( PL_get_nchars(iri, &len, &s, CVT_ATOM|CVT_STRING) )
|
||||||
|
{ const char *e = &s[len];
|
||||||
|
const char *p = e;
|
||||||
|
|
||||||
|
while(p>s && (map->class[p[-1]&0xff] & CH_NAME))
|
||||||
|
p--;
|
||||||
|
while(p<e && !(map->class[p[0]&0xff] & CH_NMSTART))
|
||||||
|
p++;
|
||||||
|
|
||||||
|
if ( !PL_unify_atom_nchars(namespace, p-s, s) )
|
||||||
|
return FALSE;
|
||||||
|
if ( localname &&
|
||||||
|
!PL_unify_atom_nchars(localname, e-p, p) )
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
} else if ( PL_get_wchars(iri, &len, &w, CVT_ATOM|CVT_STRING|CVT_EXCEPTION) )
|
||||||
|
{ const pl_wchar_t *e = &w[len];
|
||||||
|
const pl_wchar_t *p = e;
|
||||||
|
|
||||||
|
while(p>w && is_xml_chname(map, p[-1]) )
|
||||||
|
p--;
|
||||||
|
while(p<e && !is_xml_nmstart(map, p[0]) )
|
||||||
|
p++;
|
||||||
|
|
||||||
|
if ( !PL_unify_wchars(namespace, PL_ATOM, p-w, w) )
|
||||||
|
return FALSE;
|
||||||
|
if ( localname &&
|
||||||
|
!PL_unify_wchars(localname, PL_ATOM, e-p, p) )
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static foreign_t
|
||||||
|
iri_xml_namespace2(term_t iri, term_t namespace)
|
||||||
|
{ return iri_xml_namespace(iri, namespace, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
install_t
|
install_t
|
||||||
@ -398,4 +474,6 @@ install_xml_quote()
|
|||||||
PL_register_foreign("xml_quote_attribute", 3, xml_quote_attribute, 0);
|
PL_register_foreign("xml_quote_attribute", 3, xml_quote_attribute, 0);
|
||||||
PL_register_foreign("xml_quote_cdata", 3, xml_quote_cdata, 0);
|
PL_register_foreign("xml_quote_cdata", 3, xml_quote_cdata, 0);
|
||||||
PL_register_foreign("xml_name", 2, xml_name, 0);
|
PL_register_foreign("xml_name", 2, xml_name, 0);
|
||||||
|
PL_register_foreign("iri_xml_namespace", 3, iri_xml_namespace, 0);
|
||||||
|
PL_register_foreign("iri_xml_namespace", 2, iri_xml_namespace2, 0);
|
||||||
}
|
}
|
||||||
|
@ -95,10 +95,10 @@ print_word(dtd_parser * p, char c, /* preceding character */
|
|||||||
static void
|
static void
|
||||||
wprint_escaped(FILE *f, const wchar_t *s, int len)
|
wprint_escaped(FILE *f, const wchar_t *s, int len)
|
||||||
{ const wchar_t *e = &s[len];
|
{ const wchar_t *e = &s[len];
|
||||||
|
|
||||||
while ( s < e )
|
while ( s < e )
|
||||||
{ wint_t x = *s++;
|
{ wint_t x = *s++;
|
||||||
|
|
||||||
if (x >= ' ')
|
if (x >= ' ')
|
||||||
{ if (x == '\\') /* \ --> \\ */
|
{ if (x == '\\') /* \ --> \\ */
|
||||||
wputc(x, f);
|
wputc(x, f);
|
||||||
@ -352,7 +352,7 @@ mb2wc(const char *s)
|
|||||||
|
|
||||||
return ws;
|
return ws;
|
||||||
}
|
}
|
||||||
|
|
||||||
perror("mbstowcs");
|
perror("mbstowcs");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ Markup languages are an increasingly important method for
|
|||||||
data-representation and exchange. This article documents the package
|
data-representation and exchange. This article documents the package
|
||||||
\pllib{sgml}, a foreign library for SWI-Prolog to parse SGML
|
\pllib{sgml}, a foreign library for SWI-Prolog to parse SGML
|
||||||
and XML documents, returning information on both the document and the
|
and XML documents, returning information on both the document and the
|
||||||
document's DTD. The parser is designed to be small, fast and flexible.
|
document's DTD. The parser is designed to be small, fast and flexible.
|
||||||
\end{abstract}
|
\end{abstract}
|
||||||
|
|
||||||
\pagebreak
|
\pagebreak
|
||||||
@ -56,17 +56,17 @@ The parser described in this document is small (less than 100 kBytes
|
|||||||
executable on a Pentium), fast (between 2 and 5 times faster than SP),
|
executable on a Pentium), fast (between 2 and 5 times faster than SP),
|
||||||
provides access to the DTD, and provides flexible input handling.
|
provides access to the DTD, and provides flexible input handling.
|
||||||
|
|
||||||
The document output is equal to the output produced by \jargon{xml2pl},
|
The document output is equal to the output produced by \jargon{xml2pl},
|
||||||
an SP interface to SWI-Prolog written by Anjo Anjewierden.
|
an SP interface to SWI-Prolog written by Anjo Anjewierden.
|
||||||
|
|
||||||
|
|
||||||
\section{Bluffer's Guide}
|
\section{Bluffer's Guide}
|
||||||
|
|
||||||
This package allows you to parse SGML, XML and HTML data into a Prolog
|
This package allows you to parse SGML, XML and HTML data into a Prolog
|
||||||
data structure. The high-level interface defined in \pllib{sgml}
|
data structure. The high-level interface defined in \pllib{sgml}
|
||||||
provides access at the file-level, while the low-level interface defined
|
provides access at the file-level, while the low-level interface defined
|
||||||
in the foreign module works with Prolog streams. Please use the source
|
in the foreign module works with Prolog streams. Please use the source
|
||||||
of \file{sgml.pl} as a starting point for dealing with data from
|
of \file{sgml.pl} as a starting point for dealing with data from
|
||||||
other sources than files, such as SWI-Prolog resources, network-sockets,
|
other sources than files, such as SWI-Prolog resources, network-sockets,
|
||||||
character strings, \emph{etc.} The first example below loads an HTML file.
|
character strings, \emph{etc.} The first example below loads an HTML file.
|
||||||
|
|
||||||
@ -123,9 +123,9 @@ This is called `omitted-tag' handling.
|
|||||||
].
|
].
|
||||||
\end{code}
|
\end{code}
|
||||||
|
|
||||||
The document is represented as a list, each element being an atom to
|
The document is represented as a list, each element being an atom to
|
||||||
represent \const{CDATA} or a term \term{element}{Name, Attributes, Content}.
|
represent \const{CDATA} or a term \term{element}{Name, Attributes, Content}.
|
||||||
Entities (e.g. \verb$<$) are expanded and included in the
|
Entities (e.g. \verb$<$) are expanded and included in the
|
||||||
atom representing the element content or attribute value.%
|
atom representing the element content or attribute value.%
|
||||||
\footnote{Up to SWI-Prolog 5.4.x, Prolog could not represent
|
\footnote{Up to SWI-Prolog 5.4.x, Prolog could not represent
|
||||||
\jargon{wide} characters and entities that did not fit in
|
\jargon{wide} characters and entities that did not fit in
|
||||||
@ -141,23 +141,24 @@ self-contained files in SGML, HTML, or XML into a structured term. They
|
|||||||
are based on load_structure/3.
|
are based on load_structure/3.
|
||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\predicate{load_sgml_file}{2}{+File, -ListOfContent}
|
\predicate{load_sgml_file}{2}{+Source, -ListOfContent}
|
||||||
Same as \term{load_structure}{File, ListOfContent, [dialect(sgml)]}.
|
Same as \term{load_structure}{Source, ListOfContent, [dialect(sgml)]}.
|
||||||
|
|
||||||
\predicate{load_xml_file}{2}{+File, -ListOfContent}
|
\predicate{load_xml_file}{2}{+Source, -ListOfContent}
|
||||||
Same as \term{load_structure(File, ListOfContent, [dialect(xml)]}.
|
Same as \term{load_structure(Source, ListOfContent, [dialect(xml)]}.
|
||||||
|
|
||||||
\predicate{load_html_file}{2}{+File, -Content}
|
\predicate{load_html_file}{2}{+Source, -Content}
|
||||||
Load \arg{File} and parse as HTML. Implemented as below. Note that
|
Load \arg{Source} and parse as HTML. \arg{Source} is either the
|
||||||
load_html_file/2 re-uses a cached DTD object as defined by dtd/2. As DTD
|
name of a file or term \term{stream}{Handle}. Implemented as
|
||||||
objects may be corrupted while loading errornous documents sharing is
|
below. Note that load_html_file/2 re-uses a cached DTD object as defined
|
||||||
undesirable if the documents are not known to be correct. See dtd/2 for
|
by dtd/2. As DTD objects may be corrupted while loading errornous
|
||||||
details.
|
documents sharing is undesirable if the documents are not known to be
|
||||||
|
correct. See dtd/2 for details.
|
||||||
|
|
||||||
\begin{code}
|
\begin{code}
|
||||||
load_html_file(File, Term) :-
|
load_html_file(Source, Term) :-
|
||||||
dtd(html, DTD),
|
dtd(html, DTD),
|
||||||
load_structure(File, Term,
|
load_structure(Source, Term,
|
||||||
[ dtd(DTD),
|
[ dtd(DTD),
|
||||||
dialect(sgml),
|
dialect(sgml),
|
||||||
shorttag(false)
|
shorttag(false)
|
||||||
@ -171,8 +172,8 @@ load_html_file(File, Term) :-
|
|||||||
\subsection{Loading Structured Documents}
|
\subsection{Loading Structured Documents}
|
||||||
|
|
||||||
SGML or XML files are loaded through the common predicate
|
SGML or XML files are loaded through the common predicate
|
||||||
load_structure/3. This is a predicate with many options. For
|
load_structure/3. This is a predicate with many options. For
|
||||||
simplicity a number of commonly used shorthands are provided:
|
simplicity a number of commonly used shorthands are provided:
|
||||||
load_sgml_file/2, load_xml_file/2, and
|
load_sgml_file/2, load_xml_file/2, and
|
||||||
load_html_file/2.
|
load_html_file/2.
|
||||||
|
|
||||||
@ -184,18 +185,18 @@ Parse \arg{Source} and return the resulting structure in
|
|||||||
options controlling the conversion process.
|
options controlling the conversion process.
|
||||||
|
|
||||||
A proper XML document contains only a single toplevel element whose name
|
A proper XML document contains only a single toplevel element whose name
|
||||||
matches the document type. Nevertheless, a list is returned for
|
matches the document type. Nevertheless, a list is returned for
|
||||||
consistency with the representation of element content. The <aref/
|
consistency with the representation of element content. The <aref/
|
||||||
ListOfContent/ consists of the following types:
|
ListOfContent/ consists of the following types:
|
||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{\arg{Atom}}{}
|
\termitem{\arg{Atom}}{}
|
||||||
Atoms are used to represent \const{CDATA}. Note
|
Atoms are used to represent \const{CDATA}. Note
|
||||||
this is possible in SWI-Prolog, as there is no length-limit on atoms and
|
this is possible in SWI-Prolog, as there is no length-limit on atoms and
|
||||||
atom garbage collection is provided.
|
atom garbage collection is provided.
|
||||||
|
|
||||||
\termitem{element}{Name, ListAttributes, ListOfContent}
|
\termitem{element}{Name, ListAttributes, ListOfContent}
|
||||||
\arg{Name} is the name of the element. Using SGML, which is
|
\arg{Name} is the name of the element. Using SGML, which is
|
||||||
case-insensitive, all element names are returned as lowercase atoms.
|
case-insensitive, all element names are returned as lowercase atoms.
|
||||||
|
|
||||||
\arg{ListOfAttributes} is a list of \arg{Name}=\arg{Value} pairs for
|
\arg{ListOfAttributes} is a list of \arg{Name}=\arg{Value} pairs for
|
||||||
@ -209,31 +210,31 @@ integers is supported. \arg{ListOfContent} defines the content for the
|
|||||||
element.
|
element.
|
||||||
|
|
||||||
\termitem{sdata}{Text}
|
\termitem{sdata}{Text}
|
||||||
If an entity with declared content-type \const{SDATA} is encountered, this
|
If an entity with declared content-type \const{SDATA} is encountered, this
|
||||||
term is returned holding the data in \arg{Text}.
|
term is returned holding the data in \arg{Text}.
|
||||||
|
|
||||||
\termitem{ndata}{Text}
|
\termitem{ndata}{Text}
|
||||||
If an entity with declared content-type \const{NDATA} is encountered, this
|
If an entity with declared content-type \const{NDATA} is encountered, this
|
||||||
term is returned holding the data in \arg{Text}.
|
term is returned holding the data in \arg{Text}.
|
||||||
\termitem{pi}{Text}
|
\termitem{pi}{Text}
|
||||||
If a processing instruction is encountered (\verb$<?...?>$), <aref/
|
If a processing instruction is encountered (\verb$<?...?>$), <aref/
|
||||||
Text/ holds the text of the processing instruction. Please note that the
|
Text/ holds the text of the processing instruction. Please note that the
|
||||||
\verb$<?xml ...?>$ instruction is handled internally.
|
\verb$<?xml ...?>$ instruction is handled internally.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
The \arg{Options} list controls the conversion process. Currently
|
The \arg{Options} list controls the conversion process. Currently
|
||||||
defined options are:
|
defined options are:
|
||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{dtd}{?DTD}
|
\termitem{dtd}{?DTD}
|
||||||
Reference to a DTD object. If specified, the \verb$<!DOCTYPE ...>$
|
Reference to a DTD object. If specified, the \verb$<!DOCTYPE ...>$
|
||||||
declaration is ignored and the document is parsed and validated against
|
declaration is ignored and the document is parsed and validated against
|
||||||
the provided DTD. If provided as a variable, the created DTD is
|
the provided DTD. If provided as a variable, the created DTD is
|
||||||
returned. See \secref{implicitdtd}.
|
returned. See \secref{implicitdtd}.
|
||||||
|
|
||||||
\termitem{dialect}{+Dialect}
|
\termitem{dialect}{+Dialect}
|
||||||
Specify the parsing dialect. Supported are \const{sgml} (default), \const{xml}
|
Specify the parsing dialect. Supported are \const{sgml} (default), \const{xml}
|
||||||
and \const{xmlns}. See \secref{xml} for details on the differences.
|
and \const{xmlns}. See \secref{xml} for details on the differences.
|
||||||
|
|
||||||
\termitem{shorttag}{+Bool}
|
\termitem{shorttag}{+Bool}
|
||||||
@ -272,14 +273,14 @@ Defines (overwrites) an entity definition. At the moment, only
|
|||||||
entity options are allowed.
|
entity options are allowed.
|
||||||
|
|
||||||
\termitem{file}{+Name}
|
\termitem{file}{+Name}
|
||||||
Sets the name of the file on which errors are reported. Sets the
|
Sets the name of the file on which errors are reported. Sets the
|
||||||
linenumber to 1.
|
linenumber to 1.
|
||||||
|
|
||||||
\termitem{line}{+Line}
|
\termitem{line}{+Line}
|
||||||
Sets the starting line-number for reporting errors.
|
Sets the starting line-number for reporting errors.
|
||||||
|
|
||||||
\termitem{max_errors}{+Max}
|
\termitem{max_errors}{+Max}
|
||||||
Sets the maximum number of errors. If this number is reached, an
|
Sets the maximum number of errors. If this number is reached, an
|
||||||
exception of the format below is raised. The default is 50. Using
|
exception of the format below is raised. The default is 50. Using
|
||||||
\term{max_errors}{-1} makes the parser continue, no matter how many
|
\term{max_errors}{-1} makes the parser continue, no matter how many
|
||||||
errors it encounters.
|
errors it encounters.
|
||||||
@ -303,26 +304,26 @@ modes are:
|
|||||||
\termitem{space}{sgml}
|
\termitem{space}{sgml}
|
||||||
In SGML, newlines at the start and end of an element are removed.<fn>In
|
In SGML, newlines at the start and end of an element are removed.<fn>In
|
||||||
addition, newlines at the end of lines containing only markup should be
|
addition, newlines at the end of lines containing only markup should be
|
||||||
deleted. This is not yet implemented.</fn> This is the default mode for
|
deleted. This is not yet implemented.</fn> This is the default mode for
|
||||||
the SGML dialect.
|
the SGML dialect.
|
||||||
|
|
||||||
\termitem{space}{preserve}
|
\termitem{space}{preserve}
|
||||||
White space is passed literally to the application. This mode leaves all
|
White space is passed literally to the application. This mode leaves all
|
||||||
white space handling to the application. This is the default mode for
|
white space handling to the application. This is the default mode for
|
||||||
the XML dialect.
|
the XML dialect.
|
||||||
|
|
||||||
\termitem{space}{default}
|
\termitem{space}{default}
|
||||||
In addition to \const{sgml} space-mode, all consequtive white-space is
|
In addition to \const{sgml} space-mode, all consequtive white-space is
|
||||||
reduced to a single space-character. This mode canonises all white
|
reduced to a single space-character. This mode canonises all white
|
||||||
space.
|
space.
|
||||||
|
|
||||||
\termitem{space}{remove}
|
\termitem{space}{remove}
|
||||||
In addition to \const{default}, all leading and trailing white-space is
|
In addition to \const{default}, all leading and trailing white-space is
|
||||||
removed from \const{CDATA} objects. If, as a result, the \const{CDATA}
|
removed from \const{CDATA} objects. If, as a result, the \const{CDATA}
|
||||||
becomes empty, nothing is passed to the application. This mode is
|
becomes empty, nothing is passed to the application. This mode is
|
||||||
especially handy for processing `data-oriented' documents, such as RDF.
|
especially handy for processing `data-oriented' documents, such as RDF.
|
||||||
It is not suitable for normal text documents. Consider the HTML
|
It is not suitable for normal text documents. Consider the HTML
|
||||||
fragment below. When processed in this mode, the spaces between the
|
fragment below. When processed in this mode, the spaces between the
|
||||||
three modified words are lost. This mode is not part of any standard;
|
three modified words are lost. This mode is not part of any standard;
|
||||||
XML 1.0 allows only \const{default} and \const{preserve}.
|
XML 1.0 allows only \const{default} and \const{preserve}.
|
||||||
|
|
||||||
@ -333,9 +334,9 @@ Consider adjacent <b>bold</b> <ul>and</ul> <it>italic</it> words.
|
|||||||
|
|
||||||
\subsection{XML documents} \label{sec:xml}
|
\subsection{XML documents} \label{sec:xml}
|
||||||
|
|
||||||
The parser can operate in two modes: \const{sgml} mode and \const{xml} mode, as
|
The parser can operate in two modes: \const{sgml} mode and \const{xml} mode, as
|
||||||
defined by the \term{dialect}{Dialect} option. Regardless of this
|
defined by the \term{dialect}{Dialect} option. Regardless of this
|
||||||
option, if the first line of the document reads as below, the parser is
|
option, if the first line of the document reads as below, the parser is
|
||||||
switched automatically into XML mode.
|
switched automatically into XML mode.
|
||||||
|
|
||||||
\begin{code}
|
\begin{code}
|
||||||
@ -346,21 +347,21 @@ Currently switching to XML mode implies:
|
|||||||
|
|
||||||
\begin{itemlist}
|
\begin{itemlist}
|
||||||
\item [XML empty elements]
|
\item [XML empty elements]
|
||||||
The construct \verb$<element [attribute...] />$ is recognised as
|
The construct \verb$<element [attribute...] />$ is recognised as
|
||||||
an empty element.
|
an empty element.
|
||||||
|
|
||||||
\item [Predefined entities]
|
\item [Predefined entities]
|
||||||
The following entitities are predefined: \const{lt} (\verb$<$), \const{gt}
|
The following entitities are predefined: \const{lt} (\verb$<$), \const{gt}
|
||||||
(\verb$>$), \const{amp} (\verb$&$), \const{apos} (\verb$'$)
|
(\verb$>$), \const{amp} (\verb$&$), \const{apos} (\verb$'$)
|
||||||
and \const{quot} (\verb$"$).
|
and \const{quot} (\verb$"$).
|
||||||
|
|
||||||
\item [Case sensitivity]
|
\item [Case sensitivity]
|
||||||
In XML mode, names are treated case-sensitive, except for the DTD
|
In XML mode, names are treated case-sensitive, except for the DTD
|
||||||
reserved names (i.e. \exam{ELEMENT}, \emph{etc.}).
|
reserved names (i.e. \exam{ELEMENT}, \emph{etc.}).
|
||||||
|
|
||||||
\item [Character classes]
|
\item [Character classes]
|
||||||
In XML mode, underscores (\verb$_$) and colon (\verb$:$) are
|
In XML mode, underscores (\verb$_$) and colon (\verb$:$) are
|
||||||
allowed in names.
|
allowed in names.
|
||||||
|
|
||||||
\item [White-space handling]
|
\item [White-space handling]
|
||||||
White space mode is set to \const{preserve}. In addition to setting
|
White space mode is set to \const{preserve}. In addition to setting
|
||||||
@ -378,28 +379,28 @@ preserves space, regardless of the default processing mode.
|
|||||||
|
|
||||||
\subsubsection{XML Namespaces} \label{sec:xmlns}
|
\subsubsection{XML Namespaces} \label{sec:xmlns}
|
||||||
|
|
||||||
Using the \jargon{dialect} \const{xmlns}, the parser will interpret XML
|
Using the \jargon{dialect} \const{xmlns}, the parser will interpret XML
|
||||||
namespaces. In this case, the names of elements are returned as a term
|
namespaces. In this case, the names of elements are returned as a term
|
||||||
of the format
|
of the format
|
||||||
|
|
||||||
\begin{quote}
|
\begin{quote}
|
||||||
\arg{URL}\const{:}\arg{LocalName}
|
\arg{URL}\const{:}\arg{LocalName}
|
||||||
\end{quote}
|
\end{quote}
|
||||||
|
|
||||||
If an identifier has no namespace and there is no default namespace it
|
If an identifier has no namespace and there is no default namespace it
|
||||||
is returned as a simple atom. If an identifier has a namespace but this
|
is returned as a simple atom. If an identifier has a namespace but this
|
||||||
namespace is undeclared, the namespace name rather than the related URL
|
namespace is undeclared, the namespace name rather than the related URL
|
||||||
is returned.
|
is returned.
|
||||||
|
|
||||||
Attributes declaring namespaces ({\tt xmlns:<ns>=<url>}) are reported
|
Attributes declaring namespaces ({\tt xmlns:<ns>=<url>}) are reported
|
||||||
as if \const{xmlns} were not a defined resource.
|
as if \const{xmlns} were not a defined resource.
|
||||||
|
|
||||||
In many cases, getting attribute-names as <xmp>\arg{url}:\arg{name}</xmp>
|
In many cases, getting attribute-names as \arg{url}:\arg{name}
|
||||||
is not desirable. Such terms are hard to unify and sometimes multiple
|
is not desirable. Such terms are hard to unify and sometimes multiple
|
||||||
URLs may be mapped to the same identifier. This may happen due to poor
|
URLs may be mapped to the same identifier. This may happen due to poor
|
||||||
version management, poor standardisation or because the the application
|
version management, poor standardisation or because the the application
|
||||||
doesn't care too much about versions. This package defines two
|
doesn't care too much about versions. This package defines two
|
||||||
call-backs that can be set using set_sgml_parser/2 to deal
|
call-backs that can be set using set_sgml_parser/2 to deal
|
||||||
with this problem.
|
with this problem.
|
||||||
|
|
||||||
The call-back \const{xmlns} is called as XML namespaces are noticed.
|
The call-back \const{xmlns} is called as XML namespaces are noticed.
|
||||||
@ -428,6 +429,41 @@ load_rdf_xml(File, Term) :-
|
|||||||
]).
|
]).
|
||||||
\end{code}
|
\end{code}
|
||||||
|
|
||||||
|
The library provides iri_xml_namespace/3 to break down an IRI into
|
||||||
|
its namespace and localname:
|
||||||
|
|
||||||
|
\begin{description}
|
||||||
|
\predicate[det]{iri_xml_namespace}{3}{+IRI, -Namespace, -Localname}
|
||||||
|
Split an IRI (Unicode URI) into its \arg{Namespace} (an IRI) and
|
||||||
|
\arg{Localname} (a Unicode XML name, see xml_name/2). The
|
||||||
|
\arg{Localname} is defined as the longest last part of the IRI that
|
||||||
|
satisfies the syntax of an XML name. With IRI schemas that are designed
|
||||||
|
to work with XML namespaces, this will typically break the IRI on the
|
||||||
|
last \chr{\#} or \chr{/}. Note however that this can produce unexpected
|
||||||
|
results. E.g., in the example below, one might expect the namespace to
|
||||||
|
be \url{http://example.com/images\#}, but an XML name cannot start with
|
||||||
|
a digit.
|
||||||
|
|
||||||
|
\begin{code}
|
||||||
|
?- iri_xml_namespace('http://example.com/images#12345', NS, L).
|
||||||
|
NS = 'http://example.com/images#12345',
|
||||||
|
L = ''.
|
||||||
|
\end{code}
|
||||||
|
|
||||||
|
As we see from the example above, the \arg{Localname} can be the empty
|
||||||
|
atom. Similarly, \arg{Namespace} can be the empty atom if \arg{IRI} is
|
||||||
|
an XML name. Applications will often have to check for either or both
|
||||||
|
these conditions. We decided against failing in these conditions because
|
||||||
|
the application typically wants to know which of the two conditions
|
||||||
|
(empty namespace or empty localname) holds. This predicate is often used
|
||||||
|
for generating RDF/XML from an RDF graph.
|
||||||
|
|
||||||
|
\predicate[det]{iri_xml_namespace}{2}{+IRI, -Namespace}
|
||||||
|
Same as iri_xml_namespace/3, but avoids creating an atom for the
|
||||||
|
\arg{Localname}.
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
\subsection{DTD-Handling}
|
\subsection{DTD-Handling}
|
||||||
|
|
||||||
The DTD (\textbf{D}ocument \textbf{T}ype \textbf{D}efinition) is a
|
The DTD (\textbf{D}ocument \textbf{T}ype \textbf{D}efinition) is a
|
||||||
@ -438,7 +474,7 @@ predicates for handling the DTD.
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\predicate{new_dtd}{2}{+DocType, -DTD}
|
\predicate{new_dtd}{2}{+DocType, -DTD}
|
||||||
Creates an empty DTD for the named \arg{DocType}. The returned
|
Creates an empty DTD for the named \arg{DocType}. The returned
|
||||||
DTD-reference is an opaque term that can be used in the other predicates
|
DTD-reference is an opaque term that can be used in the other predicates
|
||||||
of this package.
|
of this package.
|
||||||
|
|
||||||
@ -468,7 +504,7 @@ Define the DTD dialect. Default is \const{sgml}. Using \const{xml} or
|
|||||||
|
|
||||||
\predicate{dtd}{2}{+DocType, -DTD}
|
\predicate{dtd}{2}{+DocType, -DTD}
|
||||||
Find the DTD representing the indicated \jargon{doctype}. This predicate
|
Find the DTD representing the indicated \jargon{doctype}. This predicate
|
||||||
uses a cache of DTD objects. If a doctype has no associated dtd, it
|
uses a cache of DTD objects. If a doctype has no associated dtd, it
|
||||||
searches for a file using the file search path \exam{dtd} using the call:
|
searches for a file using the file search path \exam{dtd} using the call:
|
||||||
|
|
||||||
\begin{code}
|
\begin{code}
|
||||||
@ -488,15 +524,15 @@ parse multiple documents should be restricted to situations where the
|
|||||||
documents processed are known to be error-free.
|
documents processed are known to be error-free.
|
||||||
|
|
||||||
\predicate{dtd_property}{2}{+DTD, ?Property}
|
\predicate{dtd_property}{2}{+DTD, ?Property}
|
||||||
This predicate is used to examine the content of a DTD. Property is one
|
This predicate is used to examine the content of a DTD. Property is one
|
||||||
of:
|
of:
|
||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{doctype}{DocType}
|
\termitem{doctype}{DocType}
|
||||||
An atom representing the document-type defined by this DTD.
|
An atom representing the document-type defined by this DTD.
|
||||||
|
|
||||||
\termitem{elements}{ListOfElements}
|
\termitem{elements}{ListOfElements}
|
||||||
A list of atoms representing the names of the elements in this DTD.
|
A list of atoms representing the names of the elements in this DTD.
|
||||||
|
|
||||||
\termitem{element}{Name, Omit, Content}
|
\termitem{element}{Name, Omit, Content}
|
||||||
The DTD contains an element with the given name. \arg{Omit} is a term of
|
The DTD contains an element with the given name. \arg{Omit} is a term of
|
||||||
@ -508,7 +544,7 @@ form:
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{empty}{}
|
\termitem{empty}{}
|
||||||
The element has no content.
|
The element has no content.
|
||||||
|
|
||||||
\termitem{cdata}{}
|
\termitem{cdata}{}
|
||||||
The element contains non-parsed character data. All data up to the
|
The element contains non-parsed character data. All data up to the
|
||||||
@ -524,30 +560,30 @@ any order.
|
|||||||
\termitem{\#pcdata}{}
|
\termitem{\#pcdata}{}
|
||||||
The element contains parsed character data .
|
The element contains parsed character data .
|
||||||
|
|
||||||
\termitem{\arg{element}} An element with this name.
|
\termitem{\arg{element}} An element with this name.
|
||||||
|
|
||||||
\termitem{*}{SubModel}
|
\termitem{*}{SubModel}
|
||||||
0 or more appearances.
|
0 or more appearances.
|
||||||
|
|
||||||
\termitem{?}{SubModel}
|
\termitem{?}{SubModel}
|
||||||
0 or one appearance.
|
0 or one appearance.
|
||||||
|
|
||||||
\termitem{+}{SubModel}
|
\termitem{+}{SubModel}
|
||||||
1 or more appearances.
|
1 or more appearances.
|
||||||
|
|
||||||
\termitem{,}{SubModel1, SubModel2}
|
\termitem{,}{SubModel1, SubModel2}
|
||||||
\arg{SubModel1} followed by \arg{SubModel2}.
|
\arg{SubModel1} followed by \arg{SubModel2}.
|
||||||
|
|
||||||
\termitem{\&}{SubModel1, SubModel2}
|
\termitem{\&}{SubModel1, SubModel2}
|
||||||
\arg{SubModel1} and \arg{SubModel2} in any order.
|
\arg{SubModel1} and \arg{SubModel2} in any order.
|
||||||
|
|
||||||
\termitem{\chr{|}}{SubModel1, SubModel2}
|
\termitem{\chr{|}}{SubModel1, SubModel2}
|
||||||
\arg{SubModel1} or \arg{SubModel2}.
|
\arg{SubModel1} or \arg{SubModel2}.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\termitem{attributes}{Element, ListOfAttributes}
|
\termitem{attributes}{Element, ListOfAttributes}
|
||||||
\arg{ListOfAttributes} is a list of atoms representing the attributes
|
\arg{ListOfAttributes} is a list of atoms representing the attributes
|
||||||
of the element \arg{Element}.
|
of the element \arg{Element}.
|
||||||
|
|
||||||
\termitem{attribute}{Element, Attribute, Type, Default}
|
\termitem{attribute}{Element, Attribute, Type, Default}
|
||||||
Query an element. \arg{Type} is one of \const{cdata}, \const{entity},
|
Query an element. \arg{Type} is one of \const{cdata}, \const{entity},
|
||||||
@ -555,34 +591,34 @@ Query an element. \arg{Type} is one of \const{cdata}, \const{entity},
|
|||||||
\const{notation}, \const{number} or \const{nutoken}. For DTD types that
|
\const{notation}, \const{number} or \const{nutoken}. For DTD types that
|
||||||
allow for a list, the notation \term{list}{Type} is used. Finally, the
|
allow for a list, the notation \term{list}{Type} is used. Finally, the
|
||||||
DTD construct \verb$(a|b|...)$ is mapped to the term
|
DTD construct \verb$(a|b|...)$ is mapped to the term
|
||||||
\term{nameof}{ListOfValues}.
|
\term{nameof}{ListOfValues}.
|
||||||
|
|
||||||
\arg{Default} describes the sgml default. It is one \const{required},
|
\arg{Default} describes the sgml default. It is one \const{required},
|
||||||
\const{current}, \const{conref} or \const{implied}. If a real default is
|
\const{current}, \const{conref} or \const{implied}. If a real default is
|
||||||
present, it is one of \term{default}{Value} or \term{fixed}{Value}.
|
present, it is one of \term{default}{Value} or \term{fixed}{Value}.
|
||||||
|
|
||||||
\termitem{entities}{ListOfEntities}
|
\termitem{entities}{ListOfEntities}
|
||||||
\arg{ListOfEntities} is a list of atoms representing the names of the
|
\arg{ListOfEntities} is a list of atoms representing the names of the
|
||||||
defined entities.
|
defined entities.
|
||||||
|
|
||||||
\termitem{entity}{Name, Value}
|
\termitem{entity}{Name, Value}
|
||||||
\arg{Name} is the name of an entity with given value. Value is one of
|
\arg{Name} is the name of an entity with given value. Value is one of
|
||||||
\begin{description}
|
\begin{description}
|
||||||
|
|
||||||
\termitem{\arg{Atom}}{}
|
\termitem{\arg{Atom}}{}
|
||||||
If the value is atomic, it represents the literal value of the entity.
|
If the value is atomic, it represents the literal value of the entity.
|
||||||
|
|
||||||
\termitem{system}{Url}
|
\termitem{system}{Url}
|
||||||
\arg{Url} is the URL of the system external entity.
|
\arg{Url} is the URL of the system external entity.
|
||||||
|
|
||||||
\termitem{public}{Id, Url}
|
\termitem{public}{Id, Url}
|
||||||
For external public entities, \arg{Id} is the identifier. If an URL is
|
For external public entities, \arg{Id} is the identifier. If an URL is
|
||||||
provided this is returned in \arg{Url}. Otherwise this argument is
|
provided this is returned in \arg{Url}. Otherwise this argument is
|
||||||
unbound.
|
unbound.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\termitem{notations}{ListOfNotations}
|
\termitem{notations}{ListOfNotations}
|
||||||
Returns a list holding the names of all \const{NOTATION} declarations.
|
Returns a list holding the names of all \const{NOTATION} declarations.
|
||||||
|
|
||||||
\termitem{notation}{Name, Decl}
|
\termitem{notation}{Name, Decl}
|
||||||
Unify \arg{Decl} with a list if \term{system}{+File} and/or
|
Unify \arg{Decl} with a list if \term{system}{+File} and/or
|
||||||
@ -592,11 +628,11 @@ Unify \arg{Decl} with a list if \term{system}{+File} and/or
|
|||||||
|
|
||||||
\subsubsection{The DOCTYPE declaration}
|
\subsubsection{The DOCTYPE declaration}
|
||||||
|
|
||||||
As this parser allows for processing partial documents and process the
|
As this parser allows for processing partial documents and process the
|
||||||
DTD separately, the DOCTYPE declaration plays a special role.
|
DTD separately, the DOCTYPE declaration plays a special role.
|
||||||
|
|
||||||
If a document has no DOCTYPE declaraction, the parser returns a list
|
If a document has no DOCTYPE declaraction, the parser returns a list
|
||||||
holding all elements and CDATA found. If the document has a DOCTYPE
|
holding all elements and CDATA found. If the document has a DOCTYPE
|
||||||
declaraction, the parser will open the element defined in the DOCTYPE as
|
declaraction, the parser will open the element defined in the DOCTYPE as
|
||||||
soon as the first real data is encountered.
|
soon as the first real data is encountered.
|
||||||
|
|
||||||
@ -632,53 +668,63 @@ elements_in_xml_document(File, Elements) :-
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\predicate{new_sgml_parser}{2}{-Parser, +Options}
|
\predicate{new_sgml_parser}{2}{-Parser, +Options}
|
||||||
Creates a new parser. A parser can be used one or multiple times for
|
Creates a new parser. A parser can be used one or multiple times for
|
||||||
parsing documents or parts thereof. It may be bound to a DTD or the DTD
|
parsing documents or parts thereof. It may be bound to a DTD or the DTD
|
||||||
may be left implicit, in which case it is created from the document
|
may be left implicit, in which case it is created from the document
|
||||||
prologue or parsing is performed without a DTD. Options:
|
prologue or parsing is performed without a DTD. Options:
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{dtd}{?DTD}
|
\termitem{dtd}{?DTD}
|
||||||
If specified with an initialised DTD, this DTD is used for parsing the
|
If specified with an initialised DTD, this DTD is used for parsing the
|
||||||
document, regardless of the document prologue. If specified using as a
|
document, regardless of the document prologue. If specified using as a
|
||||||
variable, a reference to the created DTD is returned. This DTD may be
|
variable, a reference to the created DTD is returned. This DTD may be
|
||||||
created from the document prologue or build implicitely from the
|
created from the document prologue or build implicitely from the
|
||||||
document's content.
|
document's content.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\predicate{free_sgml_parser}{1}{+Parser}
|
\predicate{free_sgml_parser}{1}{+Parser}
|
||||||
Destroy all resources related to the parser. This does not destroy the
|
Destroy all resources related to the parser. This does not destroy the
|
||||||
DTD if the parser was created using the \term{dtd}{DTD} option.
|
DTD if the parser was created using the \term{dtd}{DTD} option.
|
||||||
|
|
||||||
\predicate{set_sgml_parser}{2}{+Parser, +Option}
|
\predicate{set_sgml_parser}{2}{+Parser, +Option}
|
||||||
Sets attributes to the parser. Currently defined attributes:
|
Sets attributes to the parser. Currently defined attributes:
|
||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{file}{File}
|
\termitem{file}{File}
|
||||||
Sets the file for reporting errors and warnings. Sets the line to 1.
|
Sets the file for reporting errors and warnings. Sets the line to 1.
|
||||||
\termitem{line}{Line}
|
\termitem{line}{Line}
|
||||||
Sets the current line. Useful if the stream is not at the start of the
|
Sets the current line. Useful if the stream is not at the start of the
|
||||||
(file) object for generating proper line-numbers.
|
(file) object for generating proper line-numbers.
|
||||||
\termitem{charpos}{Offset}
|
\termitem{charpos}{Offset}
|
||||||
Sets the current character location. See also the \term{file}{File}
|
Sets the current character location. See also the \term{file}{File}
|
||||||
option.
|
option.
|
||||||
\termitem{dialect}{Dialect}
|
\termitem{dialect}{Dialect}
|
||||||
Set the markup dialect. Known dialects:
|
Set the markup dialect. Known dialects:
|
||||||
\begin{description}
|
\begin{description}
|
||||||
|
|
||||||
\termitem{sgml}{}
|
\termitem{sgml}{}
|
||||||
The default dialect is to process as SGML. This implies markup is
|
The default dialect is to process as SGML. This implies markup is
|
||||||
case-insensitive and standard SGML abbreviation is allowed (abreviated
|
case-insensitive and standard SGML abbreviation is allowed (abreviated
|
||||||
attributes and omitted tags).
|
attributes and omitted tags).
|
||||||
|
|
||||||
\termitem{xml}{}
|
\termitem{xml}{}
|
||||||
This dialect is selected automatically if the processing instruction
|
This dialect is selected automatically if the processing instruction
|
||||||
\verb$<?xml ...>$ is encountered. See \secref{xml} for details.
|
\verb$<?xml ...>$ is encountered. See \secref{xml} for details.
|
||||||
|
|
||||||
\termitem{xmlns}{}
|
\termitem{xmlns}{}
|
||||||
Process file as XML file with namespace support. See \secref{xmlns} for
|
Process file as XML file with namespace support. See \secref{xmlns} for
|
||||||
details. See also the \verb$qualify_attributes$ option below.
|
details. See also the \verb$qualify_attributes$ option below.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
\termitem{xmlns}{+URI}
|
||||||
|
Set the default namespace of the outer environment. This option is
|
||||||
|
provided to process partial XML content with proper namespace
|
||||||
|
resolution.
|
||||||
|
|
||||||
|
\termitem{xmlns}{+NS, +URI}
|
||||||
|
Specify a namespace for the outer environment. This option is
|
||||||
|
provided to process partial XML content with proper namespace
|
||||||
|
resolution.
|
||||||
|
|
||||||
\termitem{qualify_attributes}{Boolean}
|
\termitem{qualify_attributes}{Boolean}
|
||||||
How to handle unqualified attribute (i.e. without an explicit namespace)
|
How to handle unqualified attribute (i.e. without an explicit namespace)
|
||||||
in XML namespace (\const{xmlns}) mode. Default and standard compliant is
|
in XML namespace (\const{xmlns}) mode. Default and standard compliant is
|
||||||
@ -715,20 +761,20 @@ sgml_parse/2.
|
|||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\predicate{get_sgml_parser}{2}{+Parser, -Option}
|
\predicate{get_sgml_parser}{2}{+Parser, -Option}
|
||||||
Retrieve infomation on the current status of the parser. Notably useful
|
Retrieve infomation on the current status of the parser. Notably useful
|
||||||
if the parser is used in the call-back mode. Currently defined options:
|
if the parser is used in the call-back mode. Currently defined options:
|
||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{file}{-File}
|
\termitem{file}{-File}
|
||||||
Current file-name. Note that this may be different from the provided
|
Current file-name. Note that this may be different from the provided
|
||||||
file if an external entity is being loaded.
|
file if an external entity is being loaded.
|
||||||
|
|
||||||
\termitem{line}{-Line}
|
\termitem{line}{-Line}
|
||||||
Line-offset from where the parser started its processing in the file-object.
|
Line-offset from where the parser started its processing in the file-object.
|
||||||
|
|
||||||
\termitem{charpos}{-CharPos}
|
\termitem{charpos}{-CharPos}
|
||||||
Offset from where the parser started its processing in the file-object.
|
Offset from where the parser started its processing in the file-object.
|
||||||
See \secref{indexaccess}.
|
See \secref{indexaccess}.
|
||||||
|
|
||||||
\termitem{charpos}{-Start, -End}
|
\termitem{charpos}{-Start, -End}
|
||||||
Character offsets of the start and end of the source processed causing the
|
Character offsets of the start and end of the source processed causing the
|
||||||
@ -736,8 +782,8 @@ current call-back. Used in \program{PceEmacs} to for colouring
|
|||||||
text in SGML and XML modes.
|
text in SGML and XML modes.
|
||||||
|
|
||||||
\termitem{source}{-Stream}
|
\termitem{source}{-Stream}
|
||||||
Prolog stream being processed. May be used in the \const{on_begin}, \emph{etc.}
|
Prolog stream being processed. May be used in the \const{on_begin}, \emph{etc.}
|
||||||
callbacks from sgml_parse/2.
|
callbacks from sgml_parse/2.
|
||||||
|
|
||||||
\termitem{dialect}{-Dialect}
|
\termitem{dialect}{-Dialect}
|
||||||
Return the current dialect used by the parser (\const{sgml}, \const{xml} or \const{xmlns}).
|
Return the current dialect used by the parser (\const{sgml}, \const{xml} or \const{xmlns}).
|
||||||
@ -822,8 +868,8 @@ Input is a stream. A full description of the option-list is below.
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{document}{+Term}
|
\termitem{document}{+Term}
|
||||||
A variable that will be unified with a list describing the content of
|
A variable that will be unified with a list describing the content of
|
||||||
the document (see load_structure/2).
|
the document (see load_structure/2).
|
||||||
\termitem{source}{+Stream}
|
\termitem{source}{+Stream}
|
||||||
An input stream that is read. This option <em/must/ be given.
|
An input stream that is read. This option <em/must/ be given.
|
||||||
\termitem{content_length}{+Characters}
|
\termitem{content_length}{+Characters}
|
||||||
@ -840,7 +886,7 @@ Default. Parse everything upto the end of the input.
|
|||||||
The parser stops after reading the first element. Using
|
The parser stops after reading the first element. Using
|
||||||
\term{source}{Stream}, this implies reading is stopped as soon
|
\term{source}{Stream}, this implies reading is stopped as soon
|
||||||
as the element is complete, and another call may be issued on the same
|
as the element is complete, and another call may be issued on the same
|
||||||
stream to read the next element.
|
stream to read the next element.
|
||||||
|
|
||||||
\termitem{content}{}
|
\termitem{content}{}
|
||||||
The value \const{content} is like \const{element} but assumes the
|
The value \const{content} is like \const{element} but assumes the
|
||||||
@ -860,9 +906,9 @@ all open elements.
|
|||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\termitem{max_errors}{+MaxErrors}
|
\termitem{max_errors}{+MaxErrors}
|
||||||
Set the maximum number of errors. If this number is exceeded further
|
Set the maximum number of errors. If this number is exceeded further
|
||||||
writes to the stream will yield an I/O error exception. Printing of
|
writes to the stream will yield an I/O error exception. Printing of
|
||||||
errors is suppressed after reaching this value. The default is 100.
|
errors is suppressed after reaching this value. The default is 100.
|
||||||
\termitem{syntax_errors}{+ErrorMode}
|
\termitem{syntax_errors}{+ErrorMode}
|
||||||
Defines how syntax errors are handled.
|
Defines how syntax errors are handled.
|
||||||
\begin{description}
|
\begin{description}
|
||||||
@ -875,28 +921,35 @@ Defines how syntax errors are handled.
|
|||||||
using <pref builtin>print_message/2 with severity
|
using <pref builtin>print_message/2 with severity
|
||||||
\const{informational}.
|
\const{informational}.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
\termitem{xml_no_ns}{+Mode}
|
||||||
|
Error handling if an XML namespace is not defined. Default generates
|
||||||
|
an error. If \const{quiet}, the error is suppressed. Can be used
|
||||||
|
together with \term{call}{urlns, Closure} to provide external expansion
|
||||||
|
of namespaces. See also \secref{xmlns}.
|
||||||
|
|
||||||
\termitem{call}{+Event, :PredicateName}
|
\termitem{call}{+Event, :PredicateName}
|
||||||
Issue call-backs on the specified events. \arg{PredicateName} is the
|
Issue call-backs on the specified events. \arg{PredicateName} is the
|
||||||
name of the predicate to call on this event, possibly prefixed with a
|
name of the predicate to call on this event, possibly prefixed with a
|
||||||
module identifier. If the handler throws an exception, parsing is stopped
|
module identifier. If the handler throws an exception, parsing is stopped
|
||||||
and sgml_parse/2 re-throws the exception. The defined events are:
|
and sgml_parse/2 re-throws the exception. The defined events are:
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\termitem{begin}{}
|
\termitem{begin}{}
|
||||||
An open-tag has been parsed. The named handler is called with three
|
An open-tag has been parsed. The named handler is called with three
|
||||||
arguments: \term{\arg{Handler}}{+Tag, +Attributes, +Parser}.
|
arguments: \term{\arg{Handler}}{+Tag, +Attributes, +Parser}.
|
||||||
\termitem{end}{}
|
\termitem{end}{}
|
||||||
A close-tag has been parsed. The named handler is called with two
|
A close-tag has been parsed. The named handler is called with two
|
||||||
arguments: \term{\arg{Handler}}{+Tag, +Parser}.
|
arguments: \term{\arg{Handler}}{+Tag, +Parser}.
|
||||||
|
|
||||||
\termitem{cdata}{}
|
\termitem{cdata}{}
|
||||||
CDATA has been parsed. The named handler is called with two arguments:
|
CDATA has been parsed. The named handler is called with two arguments:
|
||||||
\term{Handler}{+CDATA, +Parser}, where CDATA is an atom
|
\term{Handler}{+CDATA, +Parser}, where CDATA is an atom
|
||||||
representing the data.
|
representing the data.
|
||||||
|
|
||||||
\termitem{pi}{}
|
\termitem{pi}{}
|
||||||
A processing instruction has been parsed. The named handler is called
|
A processing instruction has been parsed. The named handler is called
|
||||||
with two arguments: \term{\arg{Handler}}{+Text, +Parser}, where
|
with two arguments: \term{\arg{Handler}}{+Text, +Parser}, where
|
||||||
\arg{Text} is the text of the processing instruction.
|
\arg{Text} is the text of the processing instruction.
|
||||||
|
|
||||||
\termitem{decl}{}
|
\termitem{decl}{}
|
||||||
A declaration (\verb$<!...>$) has been read. The named handler is
|
A declaration (\verb$<!...>$) has been read. The named handler is
|
||||||
@ -918,33 +971,33 @@ If this option is present, errors and warnings are not reported using
|
|||||||
print_message/3
|
print_message/3
|
||||||
|
|
||||||
\termitem{xmlns}{}
|
\termitem{xmlns}{}
|
||||||
When parsing an in \const{xmlns} mode, a new namespace declaraction is
|
When parsing an in \const{xmlns} mode, a new namespace declaraction is
|
||||||
pushed on the environment. The named handler is called with three
|
pushed on the environment. The named handler is called with three
|
||||||
arguments: \term{\arg{Handler}}{+NameSpace, +URL, +Parser}.
|
arguments: \term{\arg{Handler}}{+NameSpace, +URL, +Parser}.
|
||||||
See \secref{xmlns} for details.
|
See \secref{xmlns} for details.
|
||||||
|
|
||||||
\termitem{urlns}{}
|
\termitem{urlns}{}
|
||||||
When parsing an in \const{xmlns} mode, this predicate can be used to map a
|
When parsing an in \const{xmlns} mode, this predicate can be used to map a
|
||||||
url into either a canonical URL for this namespace or another internal
|
url into either a canonical URL for this namespace or another internal
|
||||||
identifier. See \secref{xmlns} for details.
|
identifier. See \secref{xmlns} for details.
|
||||||
\end{description}
|
\end{description}
|
||||||
\end{description}
|
\end{description}
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\subsubsection{Partial Parsing}
|
\subsubsection{Partial Parsing}
|
||||||
|
|
||||||
In some cases, part of a document needs to be parsed. One option is to
|
In some cases, part of a document needs to be parsed. One option is to
|
||||||
use load_structure/2 or one of its variations and extract
|
use load_structure/2 or one of its variations and extract
|
||||||
the desired elements from the returned structure. This is a clean
|
the desired elements from the returned structure. This is a clean
|
||||||
solution, especially on small and medium-sized documents. It however is
|
solution, especially on small and medium-sized documents. It however is
|
||||||
unsuitable for parsing really big documents. Such documents can only be
|
unsuitable for parsing really big documents. Such documents can only be
|
||||||
handled with the call-back output interface realised by the
|
handled with the call-back output interface realised by the
|
||||||
\term{call}{Event, Action} option of sgml_parse/2.
|
\term{call}{Event, Action} option of sgml_parse/2.
|
||||||
Event-driven processing is not very natural in Prolog.
|
Event-driven processing is not very natural in Prolog.
|
||||||
|
|
||||||
The SGML2PL library allows for a mixed approach. Consider the case where
|
The SGML2PL library allows for a mixed approach. Consider the case where
|
||||||
we want to process all descriptions from RDF elements in a document. The
|
we want to process all descriptions from RDF elements in a document. The
|
||||||
code below calls <xmp>process_rdf_description(Element)</xmp> on each element
|
code below calls <xmp>process_rdf_description(Element)</xmp> on each element
|
||||||
that is directly inside an RDF element.
|
that is directly inside an RDF element.
|
||||||
|
|
||||||
\begin{code}
|
\begin{code}
|
||||||
@ -994,26 +1047,28 @@ set_sgml_parser/2 or, for XML, based on the \const{encoding}
|
|||||||
attribute of the XML header. The parser reads from SWI-Prolog streams,
|
attribute of the XML header. The parser reads from SWI-Prolog streams,
|
||||||
which also provide encoding handling. Therefore, there are two modes
|
which also provide encoding handling. Therefore, there are two modes
|
||||||
for parsing. If the SWI-Prolog stream has encoding \const{octet} (which
|
for parsing. If the SWI-Prolog stream has encoding \const{octet} (which
|
||||||
is the default for binary streams), the decoder of the SGML parser will
|
is the default for binary streams), the decoder of the SGML parser will
|
||||||
be used and positions reported by the parser are octet offsets in the
|
be used and positions reported by the parser are octet offsets in the
|
||||||
stream. In other cases, the Prolog stream decoder is used and offsets
|
stream. In other cases, the Prolog stream decoder is used and offsets
|
||||||
are character code counts.
|
are character code counts.
|
||||||
|
|
||||||
|
\input{xpath.tex}
|
||||||
|
|
||||||
\section{Processing Indexed Files} \label{sec:indexaccess}
|
\section{Processing Indexed Files} \label{sec:indexaccess}
|
||||||
|
|
||||||
In some cases applications wish to process small portions of large
|
In some cases applications wish to process small portions of large
|
||||||
SGML, XML or RDF files. For example, the \emph{OpenDirectory} project
|
SGML, XML or RDF files. For example, the \emph{OpenDirectory} project
|
||||||
by Netscape has produced a 90MB RDF file representing the main index.
|
by Netscape has produced a 90MB RDF file representing the main index.
|
||||||
The parser described here can process this document as a unit, but
|
The parser described here can process this document as a unit, but
|
||||||
loading takes 85 seconds on a Pentium-II 450 and the resulting term
|
loading takes 85 seconds on a Pentium-II 450 and the resulting term
|
||||||
requires about 70MB global stack. One option is to process the entire
|
requires about 70MB global stack. One option is to process the entire
|
||||||
document and output it as a Prolog fact-base of RDF triplets, but in
|
document and output it as a Prolog fact-base of RDF triplets, but in
|
||||||
many cases this is undesirable. Another example is a large SGML file
|
many cases this is undesirable. Another example is a large SGML file
|
||||||
containing online documentation. The application normally wishes to
|
containing online documentation. The application normally wishes to
|
||||||
provide only small portions at a time to the user. Loading the entire
|
provide only small portions at a time to the user. Loading the entire
|
||||||
document into memory is then undesirable.
|
document into memory is then undesirable.
|
||||||
|
|
||||||
Using the \term{parse}{element} option, we open a file, seek
|
Using the \term{parse}{element} option, we open a file, seek
|
||||||
(using <pref builtin>seek/4) to the position of the element and
|
(using <pref builtin>seek/4) to the position of the element and
|
||||||
read the desired element.
|
read the desired element.
|
||||||
|
|
||||||
@ -1059,12 +1114,12 @@ rdf_element(Id, Term) :-
|
|||||||
|
|
||||||
\section{External entities}
|
\section{External entities}
|
||||||
|
|
||||||
While processing an SGML document the document may refer to external
|
While processing an SGML document the document may refer to external
|
||||||
data. This occurs in three places: external parameter entities, normal
|
data. This occurs in three places: external parameter entities, normal
|
||||||
external entities and the \const{DOCTYPE} declaration. The current version
|
external entities and the \const{DOCTYPE} declaration. The current version
|
||||||
of this tool deals rather primitively with external data. External
|
of this tool deals rather primitively with external data. External
|
||||||
entities can only be loaded from a file and the mapping between the
|
entities can only be loaded from a file and the mapping between the
|
||||||
entity names and the file is done using a \jargon{catalog} file in a
|
entity names and the file is done using a \jargon{catalog} file in a
|
||||||
format compatible with that used by James Clark's SP Parser,
|
format compatible with that used by James Clark's SP Parser,
|
||||||
based on the SGML Open (now OASIS) specification.
|
based on the SGML Open (now OASIS) specification.
|
||||||
|
|
||||||
@ -1075,23 +1130,23 @@ sgml_register_catalog_file/2 or the environment variable
|
|||||||
\begin{description}
|
\begin{description}
|
||||||
\predicate{sgml_register_catalog_file}{2}{+File, +Location}
|
\predicate{sgml_register_catalog_file}{2}{+File, +Location}
|
||||||
Register the indicated \arg{File} as a catalog file. \arg{Location} is
|
Register the indicated \arg{File} as a catalog file. \arg{Location} is
|
||||||
either \const{start} or \const{end} and defines whether the catalog is
|
either \const{start} or \const{end} and defines whether the catalog is
|
||||||
considered first or last. This predicate has no effect if \arg{File} is
|
considered first or last. This predicate has no effect if \arg{File} is
|
||||||
already part of the catalog.
|
already part of the catalog.
|
||||||
|
|
||||||
If no files are registered using this predicate, the first query on the
|
If no files are registered using this predicate, the first query on the
|
||||||
catalog examines \env{SGML_CATALOG_FILES} and fills the catalog with
|
catalog examines \env{SGML_CATALOG_FILES} and fills the catalog with
|
||||||
all files in this path.
|
all files in this path.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
Two types of lines are used by this package.
|
Two types of lines are used by this package.
|
||||||
|
|
||||||
\begin{quote}
|
\begin{quote}
|
||||||
\const{DOCTYPE} \arg{doctype} \arg{file} \\
|
\const{DOCTYPE} \arg{doctype} \arg{file} \\
|
||||||
\const{PUBLIC} \exam{"}\arg{Id}\exam{"} \arg{file}
|
\const{PUBLIC} \exam{"}\arg{Id}\exam{"} \arg{file}
|
||||||
\end{quote}
|
\end{quote}
|
||||||
|
|
||||||
The specified \arg{file} path is taken relative to the location of the
|
The specified \arg{file} path is taken relative to the location of the
|
||||||
catolog file. For the \const{DOCTYPE} declaraction, \pllib{sgml} first
|
catolog file. For the \const{DOCTYPE} declaraction, \pllib{sgml} first
|
||||||
makes an attempt to resolve the \const{SYSTEM} or \const{PUBLIC}
|
makes an attempt to resolve the \const{SYSTEM} or \const{PUBLIC}
|
||||||
identifier. If this fails it tries to resolve the \arg{doctype} using
|
identifier. If this fails it tries to resolve the \arg{doctype} using
|
||||||
@ -1102,10 +1157,12 @@ where system identifiers must be Universal Resource Indicators, not
|
|||||||
local file names. Simple uses of relative URIs will work correctly under
|
local file names. Simple uses of relative URIs will work correctly under
|
||||||
UNIX and Windows.
|
UNIX and Windows.
|
||||||
|
|
||||||
In the future we will design a call-back mechanism for locating and
|
In the future we will design a call-back mechanism for locating and
|
||||||
processing external entities, so Prolog-based file-location and Prolog
|
processing external entities, so Prolog-based file-location and Prolog
|
||||||
resources can be used to store external entities.
|
resources can be used to store external entities.
|
||||||
|
|
||||||
|
\input{pwp.tex}
|
||||||
|
|
||||||
\section{Writing markup}
|
\section{Writing markup}
|
||||||
|
|
||||||
\subsection{Writing documents}
|
\subsection{Writing documents}
|
||||||
@ -1149,14 +1206,14 @@ elements are written using increasing indentation. This introduces
|
|||||||
(depending on the mode and defined whitespace handling) CDATA sequences
|
(depending on the mode and defined whitespace handling) CDATA sequences
|
||||||
with only layout between elements when read back in. If \const{false}, no
|
with only layout between elements when read back in. If \const{false}, no
|
||||||
layout characters are added. As this mode does not need to analyse the
|
layout characters are added. As this mode does not need to analyse the
|
||||||
document it is faster and guarantees correct output when read back.
|
document it is faster and guarantees correct output when read back.
|
||||||
Unfortunately the output is hardly human readable and causes problems
|
Unfortunately the output is hardly human readable and causes problems
|
||||||
with many editors.
|
with many editors.
|
||||||
\termitem{indent}{Integer}
|
\termitem{indent}{Integer}
|
||||||
Set the initial element indentation. It more than zero, the indent
|
Set the initial element indentation. It more than zero, the indent
|
||||||
is written before the document.
|
is written before the document.
|
||||||
\termitem{nsmap}{Map}
|
\termitem{nsmap}{Map}
|
||||||
Set the initial namespace map. \arg{Map} is a list of
|
Set the initial namespace map. \arg{Map} is a list of
|
||||||
\arg{Name} = \arg{URI}. This option, together with \const{header} and
|
\arg{Name} = \arg{URI}. This option, together with \const{header} and
|
||||||
\const{ident} is added to use xml_write/3 to generate XML
|
\const{ident} is added to use xml_write/3 to generate XML
|
||||||
that is embedded in a larger XML document.
|
that is embedded in a larger XML document.
|
||||||
@ -1197,7 +1254,7 @@ values are \const{ascii}, \const{iso_latin_1}, \const{utf8} and
|
|||||||
\const{unicode}. Versions with two arguments are provided for backward
|
\const{unicode}. Versions with two arguments are provided for backward
|
||||||
compatibility, making the safe \const{ascii} encoding assumption.
|
compatibility, making the safe \const{ascii} encoding assumption.
|
||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\predicate{xml_quote_attribute}{3}{+In, -Quoted, +Encoding}
|
\predicate{xml_quote_attribute}{3}{+In, -Quoted, +Encoding}
|
||||||
Map the characters that may not appear in XML attributes to entities.
|
Map the characters that may not appear in XML attributes to entities.
|
||||||
Currently these are \verb$<>&"$.%
|
Currently these are \verb$<>&"$.%
|
||||||
@ -1222,8 +1279,8 @@ Assumes \const{ascii} encoding.
|
|||||||
Succeed if \arg{In} is an atom or string that satisfies the rules for
|
Succeed if \arg{In} is an atom or string that satisfies the rules for
|
||||||
a valid XML element or attribute name. As with the other predicates in
|
a valid XML element or attribute name. As with the other predicates in
|
||||||
this group, if \arg{Encoding} cannot represent one of the characters, this
|
this group, if \arg{Encoding} cannot represent one of the characters, this
|
||||||
function fails. It uses a hard-coded table for ASCII-range characters and
|
function fails. Character classification is based on
|
||||||
iswalpha()/iswalnum() for the first and remaining characters of the name.
|
\url{http://www.w3.org/TR/2006/REC-xml-20060816}.
|
||||||
|
|
||||||
\predicate{xml_name}{1}{+In}
|
\predicate{xml_name}{1}{+In}
|
||||||
Backward compatibility version for xml_name/2. Assumes \const{ascii}
|
Backward compatibility version for xml_name/2. Assumes \const{ascii}
|
||||||
@ -1238,8 +1295,8 @@ Known missing SGML features include
|
|||||||
|
|
||||||
\begin{itemlist}
|
\begin{itemlist}
|
||||||
\item [NOTATION on entities]
|
\item [NOTATION on entities]
|
||||||
Though notation is parsed, notation attributes on external entity
|
Though notation is parsed, notation attributes on external entity
|
||||||
declarations are not handed to the user.
|
declarations are not handed to the user.
|
||||||
\item [NOTATION attributes]
|
\item [NOTATION attributes]
|
||||||
SGML notations may have attributes, declared using
|
SGML notations may have attributes, declared using
|
||||||
\verb$<!ATTLIST #NOTATION name attributes>$. Those data attributes
|
\verb$<!ATTLIST #NOTATION name attributes>$. Those data attributes
|
||||||
@ -1261,8 +1318,8 @@ Empty start tags (\verb$<>$), unclosed start tags
|
|||||||
(\verb$<a<b</verb>) and unclosed end tags (<verb></a<b$) are not
|
(\verb$<a<b</verb>) and unclosed end tags (<verb></a<b$) are not
|
||||||
supported.
|
supported.
|
||||||
\item [SGML declaration]
|
\item [SGML declaration]
|
||||||
The `SGML declaration' is fixed, though most of the parameters are
|
The `SGML declaration' is fixed, though most of the parameters are
|
||||||
handled through indirections in the implementation.
|
handled through indirections in the implementation.
|
||||||
\item [The DATATAG feature]
|
\item [The DATATAG feature]
|
||||||
It is regarded as superseeded by SHORTREF, which is supported.
|
It is regarded as superseeded by SHORTREF, which is supported.
|
||||||
(SP does not support it either.)
|
(SP does not support it either.)
|
||||||
@ -1276,7 +1333,7 @@ one DTD at the same time. It is not supported.
|
|||||||
\end{itemlist}
|
\end{itemlist}
|
||||||
|
|
||||||
|
|
||||||
In XML mode the parser recognises SGML constructs that are not allowed
|
In XML mode the parser recognises SGML constructs that are not allowed
|
||||||
in XML. Also various extensions of XML over SGML are not yet realised.
|
in XML. Also various extensions of XML over SGML are not yet realised.
|
||||||
In particular, XInclude is not implemented because the designers of
|
In particular, XInclude is not implemented because the designers of
|
||||||
XInclude can't make up their minds whether to base it on elements or
|
XInclude can't make up their minds whether to base it on elements or
|
||||||
@ -1305,7 +1362,7 @@ refers to the SWI-Prolog `home-directory'.
|
|||||||
|
|
||||||
\section{Acknowledgements}
|
\section{Acknowledgements}
|
||||||
|
|
||||||
The Prolog representation for parsed documents is based on the
|
The Prolog representation for parsed documents is based on the
|
||||||
SWI-Prolog interface to SP by Anjo Anjewierden.
|
SWI-Prolog interface to SP by Anjo Anjewierden.
|
||||||
|
|
||||||
Richard O'Keefe has put a lot of effort testing and providing bug
|
Richard O'Keefe has put a lot of effort testing and providing bug
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -61,7 +61,7 @@ sgml__utf8_get_char(const char *in, int *chr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
*chr = *in;
|
*chr = *in;
|
||||||
|
|
||||||
return (char *)in+1;
|
return (char *)in+1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
|
|
||||||
#define UTIL_H_IMPLEMENTATION
|
#define UTIL_H_IMPLEMENTATION
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include <unistd.h>
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <wctype.h>
|
#include <wctype.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -50,7 +49,7 @@
|
|||||||
size_t
|
size_t
|
||||||
istrlen(const ichar *s)
|
istrlen(const ichar *s)
|
||||||
{ size_t len =0;
|
{ size_t len =0;
|
||||||
|
|
||||||
while(*s++)
|
while(*s++)
|
||||||
len++;
|
len++;
|
||||||
|
|
||||||
@ -67,7 +66,7 @@ istrdup(const ichar *s)
|
|||||||
while(*s)
|
while(*s)
|
||||||
*d++ = *s++;
|
*d++ = *s++;
|
||||||
*d = 0;
|
*d = 0;
|
||||||
|
|
||||||
return dup;
|
return dup;
|
||||||
} else
|
} else
|
||||||
{ return NULL;
|
{ return NULL;
|
||||||
@ -140,10 +139,10 @@ int
|
|||||||
istreq(const ichar *s1, const ichar *s2)
|
istreq(const ichar *s1, const ichar *s2)
|
||||||
{ while(*s1 && *s1 == *s2)
|
{ while(*s1 && *s1 == *s2)
|
||||||
s1++, s2++;
|
s1++, s2++;
|
||||||
|
|
||||||
if ( *s1 == 0 && *s2 == 0 )
|
if ( *s1 == 0 && *s2 == 0 )
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -152,10 +151,10 @@ int
|
|||||||
istrncaseeq(const ichar *s1, const ichar *s2, int len)
|
istrncaseeq(const ichar *s1, const ichar *s2, int len)
|
||||||
{ while(--len >= 0 && towlower(*s1) == towlower(*s2))
|
{ while(--len >= 0 && towlower(*s1) == towlower(*s2))
|
||||||
s1++, s2++;
|
s1++, s2++;
|
||||||
|
|
||||||
if ( len < 0 )
|
if ( len < 0 )
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,10 +163,10 @@ int
|
|||||||
istrprefix(const ichar *pref, const ichar *s)
|
istrprefix(const ichar *pref, const ichar *s)
|
||||||
{ while(*pref && *pref == *s)
|
{ while(*pref && *pref == *s)
|
||||||
pref++, s++;
|
pref++, s++;
|
||||||
|
|
||||||
if ( *pref == 0 )
|
if ( *pref == 0 )
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -212,7 +211,7 @@ istrhash(const ichar *t, int tsize)
|
|||||||
|
|
||||||
while(*t)
|
while(*t)
|
||||||
{ unsigned int c = *t++;
|
{ unsigned int c = *t++;
|
||||||
|
|
||||||
c -= 'a';
|
c -= 'a';
|
||||||
value ^= c << (shift & 0xf);
|
value ^= c << (shift & 0xf);
|
||||||
shift ^= c;
|
shift ^= c;
|
||||||
@ -231,7 +230,7 @@ istrcasehash(const ichar *t, int tsize)
|
|||||||
|
|
||||||
while(*t)
|
while(*t)
|
||||||
{ unsigned int c = towlower(*t++); /* case insensitive */
|
{ unsigned int c = towlower(*t++); /* case insensitive */
|
||||||
|
|
||||||
c -= 'a';
|
c -= 'a';
|
||||||
value ^= c << (shift & 0xf);
|
value ^= c << (shift & 0xf);
|
||||||
shift ^= c;
|
shift ^= c;
|
||||||
@ -301,7 +300,7 @@ __add_icharbuf(icharbuf *buf, int chr)
|
|||||||
else
|
else
|
||||||
buf->data = sgml_malloc(buf->allocated*sizeof(ichar));
|
buf->data = sgml_malloc(buf->allocated*sizeof(ichar));
|
||||||
}
|
}
|
||||||
|
|
||||||
buf->data[buf->size++] = chr;
|
buf->data[buf->size++] = chr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -349,7 +348,7 @@ init_ocharbuf(ocharbuf *buf)
|
|||||||
ocharbuf *
|
ocharbuf *
|
||||||
new_ocharbuf()
|
new_ocharbuf()
|
||||||
{ ocharbuf *buf = sgml_malloc(sizeof(*buf));
|
{ ocharbuf *buf = sgml_malloc(sizeof(*buf));
|
||||||
|
|
||||||
return init_ocharbuf(buf);
|
return init_ocharbuf(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -436,24 +435,76 @@ empty_ocharbuf(ocharbuf *buf)
|
|||||||
*******************************/
|
*******************************/
|
||||||
|
|
||||||
#define RINGSIZE 16
|
#define RINGSIZE 16
|
||||||
static void *ring[RINGSIZE];
|
|
||||||
static int ringp;
|
typedef struct ring
|
||||||
|
{ void *ring[RINGSIZE];
|
||||||
|
int ringp;
|
||||||
|
} ring;
|
||||||
|
|
||||||
|
#ifdef _REENTRANT
|
||||||
|
#include <pthread.h>
|
||||||
|
static pthread_key_t ring_key;
|
||||||
|
|
||||||
|
static void
|
||||||
|
free_ring(void *ptr)
|
||||||
|
{ ring *r = ptr;
|
||||||
|
int i;
|
||||||
|
void **bp;
|
||||||
|
|
||||||
|
for(i=0, bp=r->ring; i<RINGSIZE; i++, bp++)
|
||||||
|
{ if ( *bp )
|
||||||
|
{ sgml_free(*bp);
|
||||||
|
*bp = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sgml_free(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static ring *
|
||||||
|
my_ring()
|
||||||
|
{ ring *r;
|
||||||
|
|
||||||
|
if ( (r=pthread_getspecific(ring_key)) )
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if ( (r = sgml_calloc(1, sizeof(*r))) )
|
||||||
|
pthread_setspecific(ring_key, r);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
init_ring(void)
|
||||||
|
{ pthread_key_create(&ring_key, free_ring);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
static ring ring_store;
|
||||||
|
#define my_ring() (&ring_store)
|
||||||
|
|
||||||
|
void init_ring(void) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
wchar_t *
|
wchar_t *
|
||||||
str2ring(const wchar_t *in)
|
str2ring(const wchar_t *in)
|
||||||
{ wchar_t *copy = sgml_malloc((wcslen(in)+1)*sizeof(wchar_t));
|
{ ring *r;
|
||||||
|
wchar_t *copy;
|
||||||
|
|
||||||
if ( !copy )
|
if ( !(r=my_ring()) ||
|
||||||
|
!(copy = sgml_malloc((wcslen(in)+1)*sizeof(wchar_t))) )
|
||||||
{ sgml_nomem();
|
{ sgml_nomem();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
wcscpy(copy, in);
|
wcscpy(copy, in);
|
||||||
if ( ring[ringp] )
|
if ( r->ring[r->ringp] )
|
||||||
sgml_free(ring[ringp]);
|
sgml_free(r->ring[r->ringp]);
|
||||||
ring[ringp++] = copy;
|
r->ring[r->ringp++] = copy;
|
||||||
if ( ringp == RINGSIZE )
|
if ( r->ringp == RINGSIZE )
|
||||||
ringp = 0;
|
r->ringp = 0;
|
||||||
|
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
@ -461,13 +512,19 @@ str2ring(const wchar_t *in)
|
|||||||
|
|
||||||
void *
|
void *
|
||||||
ringallo(size_t size)
|
ringallo(size_t size)
|
||||||
{ char *result = sgml_malloc(size);
|
{ ring *r;
|
||||||
|
char *result;
|
||||||
if ( ring[ringp] )
|
|
||||||
sgml_free(ring[ringp]);
|
if ( !(r=my_ring()) || !(result = sgml_malloc(size)) )
|
||||||
ring[ringp++] = result;
|
{ sgml_nomem();
|
||||||
if ( ringp == RINGSIZE )
|
return NULL;
|
||||||
ringp = 0;
|
}
|
||||||
|
|
||||||
|
if ( r->ring[r->ringp] )
|
||||||
|
sgml_free(r->ring[r->ringp]);
|
||||||
|
r->ring[r->ringp++] = result;
|
||||||
|
if ( r->ringp == RINGSIZE )
|
||||||
|
r->ringp = 0;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -529,7 +586,7 @@ wcstoutf8(const wchar_t *in)
|
|||||||
{ size++;
|
{ size++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = sgml_malloc(size+1);
|
rc = sgml_malloc(size+1);
|
||||||
for(o=rc, s=in; *s; s++)
|
for(o=rc, s=in; *s; s++)
|
||||||
{ o = utf8_put_char(o, *s);
|
{ o = utf8_put_char(o, *s);
|
||||||
@ -605,7 +662,7 @@ load_sgml_file_to_charp(const ichar *file, int normalise_rsre, size_t *length)
|
|||||||
|
|
||||||
if ( r )
|
if ( r )
|
||||||
{ char *s = r;
|
{ char *s = r;
|
||||||
|
|
||||||
while(len>0)
|
while(len>0)
|
||||||
{ int n;
|
{ int n;
|
||||||
|
|
||||||
@ -652,7 +709,7 @@ load_sgml_file_to_charp(const ichar *file, int normalise_rsre, size_t *length)
|
|||||||
|
|
||||||
if ( last_is_lf )
|
if ( last_is_lf )
|
||||||
r2[--len] = '\0'; /* delete last LF */
|
r2[--len] = '\0'; /* delete last LF */
|
||||||
|
|
||||||
if ( length )
|
if ( length )
|
||||||
*length = len;
|
*length = len;
|
||||||
sgml_free(r);
|
sgml_free(r);
|
||||||
|
@ -34,16 +34,16 @@
|
|||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{ int allocated;
|
{ int allocated;
|
||||||
int size;
|
int size;
|
||||||
ichar *data;
|
ichar *data;
|
||||||
} icharbuf;
|
} icharbuf;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{ int allocated;
|
{ int allocated;
|
||||||
int size;
|
int size;
|
||||||
union
|
union
|
||||||
{ wchar_t *w; /* UCS */
|
{ wchar_t *w; /* UCS */
|
||||||
} data;
|
} data;
|
||||||
wchar_t localbuf[256]; /* Initial local store */
|
wchar_t localbuf[256]; /* Initial local store */
|
||||||
@ -98,6 +98,7 @@ void empty_ocharbuf(ocharbuf *buf);
|
|||||||
{ buf->data.w[at] = chr; \
|
{ buf->data.w[at] = chr; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void init_ring(void);
|
||||||
const wchar_t * str_summary(const wchar_t *s, int len);
|
const wchar_t * str_summary(const wchar_t *s, int len);
|
||||||
wchar_t * str2ring(const wchar_t *in);
|
wchar_t * str2ring(const wchar_t *in);
|
||||||
void * ringallo(size_t);
|
void * ringallo(size_t);
|
||||||
@ -107,8 +108,6 @@ ichar * load_sgml_file_to_charp(const ichar *file, int normalise_rsre,
|
|||||||
size_t *len);
|
size_t *len);
|
||||||
FILE * wfopen(const wchar_t *name, const char *mode);
|
FILE * wfopen(const wchar_t *name, const char *mode);
|
||||||
|
|
||||||
void wputs(ichar *s);
|
|
||||||
|
|
||||||
#if defined(USE_STRING_FUNCTIONS) && !defined(UTIL_H_IMPLEMENTATION)
|
#if defined(USE_STRING_FUNCTIONS) && !defined(UTIL_H_IMPLEMENTATION)
|
||||||
|
|
||||||
#define istrlen(s1) wcslen((s1))
|
#define istrlen(s1) wcslen((s1))
|
||||||
|
@ -29,8 +29,6 @@
|
|||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "xml_unicode.h"
|
|
||||||
|
|
||||||
|
|
||||||
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
XML character classification.
|
XML character classification.
|
||||||
|
@ -29,35 +29,36 @@
|
|||||||
|
|
||||||
#ifdef XMLNS
|
#ifdef XMLNS
|
||||||
|
|
||||||
static xmlns *
|
xmlns *
|
||||||
xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url)
|
xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url)
|
||||||
{ sgml_environment *env = p->environments;
|
{ sgml_environment *env = p->environments;
|
||||||
dtd_symbol *n = (*ns ? dtd_add_symbol(p->dtd, ns) : (dtd_symbol *)NULL);
|
dtd_symbol *n = (*ns ? dtd_add_symbol(p->dtd, ns) : (dtd_symbol *)NULL);
|
||||||
dtd_symbol *u = dtd_add_symbol(p->dtd, url); /* TBD: ochar/ichar */
|
dtd_symbol *u = dtd_add_symbol(p->dtd, url); /* TBD: ochar/ichar */
|
||||||
|
xmlns *x = sgml_malloc(sizeof(*x));
|
||||||
|
|
||||||
if ( p->on_xmlns )
|
x->name = n;
|
||||||
(*p->on_xmlns)(p, n, u);
|
x->url = u;
|
||||||
|
|
||||||
if ( env )
|
if ( env )
|
||||||
{ xmlns *x = sgml_malloc(sizeof(*n));
|
{ if ( p->on_xmlns )
|
||||||
|
(*p->on_xmlns)(p, n, u);
|
||||||
|
|
||||||
x->name = n;
|
|
||||||
x->url = u;
|
|
||||||
x->next = env->xmlns;
|
x->next = env->xmlns;
|
||||||
env->xmlns = x;
|
env->xmlns = x;
|
||||||
|
} else
|
||||||
return x;
|
{ x->next = p->xmlns;
|
||||||
|
p->xmlns = x;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
xmlns_free(sgml_environment *env)
|
xmlns_free(xmlns *n)
|
||||||
{ xmlns *n, *next;
|
{ xmlns *next;
|
||||||
|
|
||||||
for(n = env->xmlns; n; n = next)
|
for(; n; n = next)
|
||||||
{ next = n->next;
|
{ next = n->next;
|
||||||
|
|
||||||
sgml_free(n);
|
sgml_free(n);
|
||||||
@ -66,16 +67,22 @@ xmlns_free(sgml_environment *env)
|
|||||||
|
|
||||||
|
|
||||||
xmlns *
|
xmlns *
|
||||||
xmlns_find(sgml_environment *env, dtd_symbol *ns)
|
xmlns_find(dtd_parser *p, dtd_symbol *ns)
|
||||||
{ for(; env; env = env->parent)
|
{ sgml_environment *env = p->environments;
|
||||||
{ xmlns *n;
|
xmlns *n;
|
||||||
|
|
||||||
for(n=env->xmlns; n; n = n->next)
|
for(; env; env = env->parent)
|
||||||
|
{ for(n=env->xmlns; n; n = n->next)
|
||||||
{ if ( n->name == ns )
|
{ if ( n->name == ns )
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (n=p->xmlns; n; n = n->next)
|
||||||
|
{ if ( n->name == ns )
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,7 +104,7 @@ void
|
|||||||
update_xmlns(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts)
|
update_xmlns(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts)
|
||||||
{ dtd_attr_list *al;
|
{ dtd_attr_list *al;
|
||||||
int nschr = p->dtd->charfunc->func[CF_NS]; /* : */
|
int nschr = p->dtd->charfunc->func[CF_NS]; /* : */
|
||||||
|
|
||||||
for(al=e->attributes; al; al=al->next)
|
for(al=e->attributes; al; al=al->next)
|
||||||
{ dtd_attr *a = al->attribute;
|
{ dtd_attr *a = al->attribute;
|
||||||
const ichar *name = a->name->name;
|
const ichar *name = a->name->name;
|
||||||
@ -123,7 +130,7 @@ update_xmlns(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts)
|
|||||||
xmlns_resolve()
|
xmlns_resolve()
|
||||||
Convert a symbol as returned by the XML level-1.0 parser to its namespace
|
Convert a symbol as returned by the XML level-1.0 parser to its namespace
|
||||||
tuple {url}localname. This function is not used internally, but provided
|
tuple {url}localname. This function is not used internally, but provided
|
||||||
for use from the call-back functions of the parser.
|
for use from the call-back functions of the parser.
|
||||||
|
|
||||||
It exploits the stack of namespace-environments managed by the parser
|
It exploits the stack of namespace-environments managed by the parser
|
||||||
itself (see update_xmlns())
|
itself (see update_xmlns())
|
||||||
@ -150,7 +157,7 @@ xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
|
|||||||
if ( istrprefix(L"xml", buf) ) /* XML reserved namespaces */
|
if ( istrprefix(L"xml", buf) ) /* XML reserved namespaces */
|
||||||
{ *url = n->name;
|
{ *url = n->name;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
} else if ( (ns = xmlns_find(p->environments, n)) )
|
} else if ( (ns = xmlns_find(p, n)) )
|
||||||
{ if ( ns->url->name[0] )
|
{ if ( ns->url->name[0] )
|
||||||
*url = ns->url->name;
|
*url = ns->url->name;
|
||||||
else
|
else
|
||||||
@ -158,7 +165,9 @@ xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
} else
|
} else
|
||||||
{ *url = n->name; /* undefined namespace */
|
{ *url = n->name; /* undefined namespace */
|
||||||
gripe(ERC_EXISTENCE, L"namespace", n->name);
|
if ( p->xml_no_ns == NONS_QUIET )
|
||||||
|
return TRUE;
|
||||||
|
gripe(p, ERC_EXISTENCE, L"namespace", n->name);
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -195,16 +204,16 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url)
|
|||||||
ichar *o = buf;
|
ichar *o = buf;
|
||||||
const ichar *s;
|
const ichar *s;
|
||||||
xmlns *ns;
|
xmlns *ns;
|
||||||
|
|
||||||
for(s=id->name; *s; s++)
|
for(s=id->name; *s; s++)
|
||||||
{ if ( *s == nschr ) /* explicit namespace */
|
{ if ( *s == nschr ) /* explicit namespace */
|
||||||
{ dtd_symbol *n;
|
{ dtd_symbol *n;
|
||||||
|
|
||||||
*o = '\0';
|
*o = '\0';
|
||||||
*local = s+1;
|
*local = s+1;
|
||||||
n = dtd_add_symbol(dtd, buf);
|
n = dtd_add_symbol(dtd, buf);
|
||||||
|
|
||||||
if ( (ns = xmlns_find(p->environments, n)) )
|
if ( (ns = xmlns_find(p, n)) )
|
||||||
{ if ( ns->url->name[0] )
|
{ if ( ns->url->name[0] )
|
||||||
*url = ns->url->name;
|
*url = ns->url->name;
|
||||||
else
|
else
|
||||||
@ -213,17 +222,19 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url)
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
} else
|
} else
|
||||||
{ *url = n->name; /* undefined namespace */
|
{ *url = n->name; /* undefined namespace */
|
||||||
gripe(ERC_EXISTENCE, "namespace", n->name);
|
|
||||||
e->thisns = xmlns_push(p, n->name, n->name); /* define implicitly */
|
e->thisns = xmlns_push(p, n->name, n->name); /* define implicitly */
|
||||||
|
if ( p->xml_no_ns == NONS_QUIET )
|
||||||
|
return TRUE;
|
||||||
|
gripe(p, ERC_EXISTENCE, L"namespace", n->name);
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*o++ = *s;
|
*o++ = *s;
|
||||||
}
|
}
|
||||||
|
|
||||||
*local = id->name;
|
*local = id->name;
|
||||||
|
|
||||||
if ( (ns = xmlns_find(p->environments, NULL)) )
|
if ( (ns = xmlns_find(p, NULL)) )
|
||||||
{ if ( ns->url->name[0] )
|
{ if ( ns->url->name[0] )
|
||||||
*url = ns->url->name;
|
*url = ns->url->name;
|
||||||
else
|
else
|
||||||
|
@ -31,8 +31,9 @@ typedef struct _xmlns
|
|||||||
struct _xmlns *next; /* next name */
|
struct _xmlns *next; /* next name */
|
||||||
} xmlns;
|
} xmlns;
|
||||||
|
|
||||||
void xmlns_free(sgml_environment *env);
|
void xmlns_free(xmlns *list);
|
||||||
xmlns* xmlns_find(sgml_environment *env, dtd_symbol *ns);
|
xmlns* xmlns_find(dtd_parser *p, dtd_symbol *ns);
|
||||||
|
xmlns * xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url);
|
||||||
void update_xmlns(dtd_parser *p, dtd_element *e,
|
void update_xmlns(dtd_parser *p, dtd_element *e,
|
||||||
int natts, sgml_attribute *atts);
|
int natts, sgml_attribute *atts);
|
||||||
int xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
|
int xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
|
||||||
|
Reference in New Issue
Block a user