update sgml package.
This commit is contained in:
parent
0fabe2b9c6
commit
261b5163c7
@ -155,7 +155,7 @@ localpath(const ichar *ref, const ichar *name)
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
int
|
||||
register_catalog_file_unlocked(const ichar *file, catalog_location where)
|
||||
{ catalog_file **f = &catalog;
|
||||
catalog_file *cf;
|
||||
@ -205,7 +205,7 @@ wgetenv(const char *name)
|
||||
|
||||
|
||||
static void
|
||||
init_catalog(void)
|
||||
init_catalog()
|
||||
{ static int done = FALSE;
|
||||
|
||||
LOCK();
|
||||
@ -310,7 +310,7 @@ cs_streql(ichar const *a, ichar const *b)
|
||||
|
||||
static int
|
||||
scan_overflow(size_t buflen)
|
||||
{ gripe(ERC_REPRESENTATION, L"token length");
|
||||
{ gripe(NULL, ERC_REPRESENTATION, L"token length");
|
||||
|
||||
return EOF;
|
||||
}
|
||||
@ -439,7 +439,7 @@ load_one_catalogue(catalog_file * file)
|
||||
int override = 0;
|
||||
|
||||
if ( !src )
|
||||
{ gripe(ERC_NO_CATALOGUE, file->file);
|
||||
{ gripe(NULL, ERC_NO_CATALOGUE, file->file);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -635,7 +635,7 @@ find_in_catalogue(int kind,
|
||||
return 0;
|
||||
|
||||
if ( istrlen(name)+4+1 > penlen )
|
||||
{ gripe(ERC_REPRESENTATION, L"entity name");
|
||||
{ gripe(NULL, ERC_REPRESENTATION, L"entity name");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -466,7 +466,7 @@ dtd * new_dtd(const ichar *doctype);
|
||||
int set_dialect_dtd(dtd *dtd, dtd_dialect dialect);
|
||||
int set_option_dtd(dtd *dtd, dtd_option option, int set);
|
||||
|
||||
void putchar_dtd_parser(dtd_parser *p, int chr);
|
||||
int putchar_dtd_parser(dtd_parser *p, int chr);
|
||||
int begin_document_dtd_parser(dtd_parser *p);
|
||||
int end_document_dtd_parser(dtd_parser *p);
|
||||
void reset_document_dtd_parser(dtd_parser *p);
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
#include "dtd.h"
|
||||
#include "util.h"
|
||||
#include "prolog.h"
|
||||
|
||||
#define streq(s,q) strcmp((s), (q)) == 0
|
||||
@ -42,6 +43,8 @@ int
|
||||
main(int argc, char **argv)
|
||||
{ dtd_dialect dialect = DL_SGML;
|
||||
|
||||
init_ring();
|
||||
|
||||
program = argv[0];
|
||||
argv++;
|
||||
argc--;
|
||||
|
@ -3,9 +3,9 @@
|
||||
Part of SWI-Prolog
|
||||
|
||||
Author: Jan Wielemaker
|
||||
E-mail: jan@swi.psy.uva.nl
|
||||
E-mail: J.Wielemaker@cs.vu.nl
|
||||
WWW: http://www.swi-prolog.org
|
||||
Copyright (C): 1985-2002, University of Amsterdam
|
||||
Copyright (C): 1985-2009, University of Amsterdam
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
@ -32,13 +32,17 @@
|
||||
|
||||
int
|
||||
sgml2pl_error(plerrorid id, ...)
|
||||
{ term_t except = PL_new_term_ref();
|
||||
term_t formal = PL_new_term_ref();
|
||||
term_t swi = PL_new_term_ref();
|
||||
{ int rc;
|
||||
term_t except, formal, swi;
|
||||
va_list args;
|
||||
char msgbuf[1024];
|
||||
char *msg = NULL;
|
||||
|
||||
if ( !(except = PL_new_term_ref()) ||
|
||||
!(formal = PL_new_term_ref()) ||
|
||||
!(swi = PL_new_term_ref()) )
|
||||
return FALSE;
|
||||
|
||||
va_start(args, id);
|
||||
switch(id)
|
||||
{ case ERR_ERRNO:
|
||||
@ -48,32 +52,32 @@ sgml2pl_error(plerrorid id, ...)
|
||||
|
||||
switch(err)
|
||||
{ case ENOMEM:
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "resource_error", 1,
|
||||
PL_CHARS, "no_memory");
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "resource_error", 1,
|
||||
PL_CHARS, "no_memory");
|
||||
break;
|
||||
case EACCES:
|
||||
{ const char *file = va_arg(args, const char *);
|
||||
const char *action = va_arg(args, const char *);
|
||||
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "permission_error", 3,
|
||||
PL_CHARS, action,
|
||||
PL_CHARS, "file",
|
||||
PL_CHARS, file);
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "permission_error", 3,
|
||||
PL_CHARS, action,
|
||||
PL_CHARS, "file",
|
||||
PL_CHARS, file);
|
||||
break;
|
||||
}
|
||||
case ENOENT:
|
||||
{ const char *file = va_arg(args, const char *);
|
||||
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "existence_error", 2,
|
||||
PL_CHARS, "file",
|
||||
PL_CHARS, file);
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "existence_error", 2,
|
||||
PL_CHARS, "file",
|
||||
PL_CHARS, file);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PL_unify_atom_chars(formal, "system_error");
|
||||
rc = PL_unify_atom_chars(formal, "system_error");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@ -84,12 +88,12 @@ sgml2pl_error(plerrorid id, ...)
|
||||
|
||||
if ( PL_is_variable(actual) &&
|
||||
strcmp(expected, "variable") != 0 )
|
||||
PL_unify_atom_chars(formal, "instantiation_error");
|
||||
rc = PL_unify_atom_chars(formal, "instantiation_error");
|
||||
else
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "type_error", 2,
|
||||
PL_CHARS, expected,
|
||||
PL_TERM, actual);
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "type_error", 2,
|
||||
PL_CHARS, expected,
|
||||
PL_TERM, actual);
|
||||
break;
|
||||
}
|
||||
case ERR_DOMAIN:
|
||||
@ -97,31 +101,31 @@ sgml2pl_error(plerrorid id, ...)
|
||||
term_t actual = va_arg(args, term_t);
|
||||
|
||||
if ( PL_is_variable(actual) )
|
||||
PL_unify_atom_chars(formal, "instantiation_error");
|
||||
rc = PL_unify_atom_chars(formal, "instantiation_error");
|
||||
else
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "domain_error", 2,
|
||||
PL_CHARS, expected,
|
||||
PL_TERM, actual);
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "domain_error", 2,
|
||||
PL_CHARS, expected,
|
||||
PL_TERM, actual);
|
||||
break;
|
||||
}
|
||||
case ERR_EXISTENCE:
|
||||
{ const char *type = va_arg(args, const char *);
|
||||
term_t obj = va_arg(args, term_t);
|
||||
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "existence_error", 2,
|
||||
PL_CHARS, type,
|
||||
PL_TERM, obj);
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "existence_error", 2,
|
||||
PL_CHARS, type,
|
||||
PL_TERM, obj);
|
||||
|
||||
break;
|
||||
}
|
||||
case ERR_FAIL:
|
||||
{ term_t goal = va_arg(args, term_t);
|
||||
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "goal_failed", 1,
|
||||
PL_TERM, goal);
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "goal_failed", 1,
|
||||
PL_TERM, goal);
|
||||
|
||||
break;
|
||||
}
|
||||
@ -129,10 +133,10 @@ sgml2pl_error(plerrorid id, ...)
|
||||
{ const char *limit = va_arg(args, const char *);
|
||||
long maxval = va_arg(args, long);
|
||||
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "limit_exceeded", 2,
|
||||
PL_CHARS, limit,
|
||||
PL_LONG, maxval);
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "limit_exceeded", 2,
|
||||
PL_CHARS, limit,
|
||||
PL_LONG, maxval);
|
||||
|
||||
break;
|
||||
}
|
||||
@ -143,9 +147,9 @@ sgml2pl_error(plerrorid id, ...)
|
||||
vsprintf(msgbuf, fmt, args);
|
||||
msg = msgbuf;
|
||||
|
||||
PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "miscellaneous", 1,
|
||||
PL_CHARS, id);
|
||||
rc = PL_unify_term(formal,
|
||||
PL_FUNCTOR_CHARS, "miscellaneous", 1,
|
||||
PL_CHARS, id);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -153,26 +157,29 @@ sgml2pl_error(plerrorid id, ...)
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
if ( msg )
|
||||
if ( rc && msg )
|
||||
{ term_t predterm = PL_new_term_ref();
|
||||
term_t msgterm = PL_new_term_ref();
|
||||
|
||||
if ( msg )
|
||||
{ PL_put_atom_chars(msgterm, msg);
|
||||
}
|
||||
|
||||
PL_unify_term(swi,
|
||||
PL_FUNCTOR_CHARS, "context", 2,
|
||||
PL_TERM, predterm,
|
||||
PL_TERM, msgterm);
|
||||
if ( !(predterm = PL_new_term_ref()) ||
|
||||
!(msgterm = PL_new_term_ref()) ||
|
||||
!PL_put_atom_chars(msgterm, msg) ||
|
||||
!PL_unify_term(swi,
|
||||
PL_FUNCTOR_CHARS, "context", 2,
|
||||
PL_TERM, predterm,
|
||||
PL_TERM, msgterm) )
|
||||
rc = FALSE;
|
||||
}
|
||||
|
||||
PL_unify_term(except,
|
||||
PL_FUNCTOR_CHARS, "error", 2,
|
||||
PL_TERM, formal,
|
||||
PL_TERM, swi);
|
||||
if ( rc )
|
||||
rc = PL_unify_term(except,
|
||||
PL_FUNCTOR_CHARS, "error", 2,
|
||||
PL_TERM, formal,
|
||||
PL_TERM, swi);
|
||||
|
||||
if ( rc )
|
||||
return PL_raise_exception(except);
|
||||
|
||||
return PL_raise_exception(except);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -44,4 +44,3 @@ typedef enum
|
||||
int sgml2pl_error(plerrorid, ...);
|
||||
|
||||
#endif /*H_ERROR_INCLUDED*/
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -145,6 +145,12 @@ typedef enum
|
||||
DM_DATA /* Environment has only elements */
|
||||
} data_mode;
|
||||
|
||||
#ifdef XMLNS
|
||||
typedef enum
|
||||
{ NONS_ERROR = 0,
|
||||
NONS_QUIET
|
||||
} xmlnons;
|
||||
#endif
|
||||
|
||||
typedef struct _sgml_environment
|
||||
{ dtd_element *element; /* element that opened the env */
|
||||
@ -201,6 +207,10 @@ typedef struct _dtd_parser
|
||||
dtd_srcloc startcdata; /* Start of last cdata */
|
||||
dtd_symbol *enforce_outer_element; /* Outer element to look for */
|
||||
sgml_event_class event_class; /* EV_* */
|
||||
xmlnons xml_no_ns; /* What if namespace does not exist? */
|
||||
#ifdef XMLNS
|
||||
struct _xmlns *xmlns; /* Outer xmlns declaration */
|
||||
#endif
|
||||
|
||||
void *closure; /* client handle */
|
||||
sgml_begin_element_f on_begin_element; /* start an element */
|
||||
@ -221,7 +231,7 @@ typedef struct _dtd_parser
|
||||
#include "xmlns.h"
|
||||
#endif
|
||||
|
||||
extern int gripe(dtd_error_id e, ...);
|
||||
extern int gripe(dtd_parser *p, dtd_error_id e, ...);
|
||||
|
||||
#define SGML_SUB_DOCUMENT 0x1
|
||||
|
||||
|
@ -26,14 +26,18 @@
|
||||
#include <SWI-Prolog.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#include HAVE_MALLOC_H
|
||||
#endif
|
||||
#include "error.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <wctype.h>
|
||||
#include "xml_unicode.h"
|
||||
#include "dtd.h"
|
||||
#ifdef __WINDOWS__
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
static atom_t ATOM_iso_latin_1;
|
||||
static atom_t ATOM_utf8;
|
||||
@ -321,12 +325,39 @@ xml_quote_cdata(term_t in, term_t out, term_t encoding)
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
is_xml_nmstart(dtd_charclass *map, int c)
|
||||
{ if ( c <= 0xff )
|
||||
{ return (map->class[c] & CH_NMSTART);
|
||||
} else
|
||||
{ return ( xml_basechar(c) ||
|
||||
xml_ideographic(c)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
is_xml_chname(dtd_charclass *map, int c)
|
||||
{ if ( c <= 0xff )
|
||||
{ return (map->class[c] & CH_NAME);
|
||||
} else
|
||||
{ return ( xml_basechar(c) ||
|
||||
xml_digit(c) ||
|
||||
xml_ideographic(c) ||
|
||||
xml_combining_char(c) ||
|
||||
xml_extender(c)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static dtd_charclass *map;
|
||||
|
||||
static foreign_t
|
||||
xml_name(term_t in, term_t encoding)
|
||||
{ char *ins;
|
||||
wchar_t *inW;
|
||||
size_t len;
|
||||
static dtd_charclass *map;
|
||||
unsigned int i;
|
||||
int maxchr;
|
||||
|
||||
@ -361,21 +392,15 @@ xml_name(term_t in, term_t encoding)
|
||||
{ if ( len == 0 )
|
||||
return FALSE;
|
||||
|
||||
if ( inW[0] > maxchr )
|
||||
return FALSE;
|
||||
|
||||
if ( inW[0] <= 0xff &&
|
||||
!(map->class[inW[0]] & CH_NMSTART) )
|
||||
return FALSE;
|
||||
if ( inW[0] > 0xff && !iswalpha(inW[0]) )
|
||||
if ( inW[0] > maxchr ||
|
||||
!is_xml_nmstart(map, inW[0]) )
|
||||
return FALSE;
|
||||
|
||||
for(i=1; i<len; i++)
|
||||
{ int c = inW[i];
|
||||
|
||||
if ( c <= 0xff && !(map->class[c] & CH_NAME) )
|
||||
return FALSE;
|
||||
if ( c > 0xff && !iswalnum((wint_t)c) )
|
||||
if ( c > maxchr ||
|
||||
!is_xml_chname(map, c) )
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
@ -386,6 +411,57 @@ xml_name(term_t in, term_t encoding)
|
||||
}
|
||||
|
||||
|
||||
static foreign_t
|
||||
iri_xml_namespace(term_t iri, term_t namespace, term_t localname)
|
||||
{ char *s;
|
||||
pl_wchar_t *w;
|
||||
size_t len;
|
||||
|
||||
if ( !map )
|
||||
map = new_charclass();
|
||||
|
||||
if ( PL_get_nchars(iri, &len, &s, CVT_ATOM|CVT_STRING) )
|
||||
{ const char *e = &s[len];
|
||||
const char *p = e;
|
||||
|
||||
while(p>s && (map->class[p[-1]&0xff] & CH_NAME))
|
||||
p--;
|
||||
while(p<e && !(map->class[p[0]&0xff] & CH_NMSTART))
|
||||
p++;
|
||||
|
||||
if ( !PL_unify_atom_nchars(namespace, p-s, s) )
|
||||
return FALSE;
|
||||
if ( localname &&
|
||||
!PL_unify_atom_nchars(localname, e-p, p) )
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
} else if ( PL_get_wchars(iri, &len, &w, CVT_ATOM|CVT_STRING|CVT_EXCEPTION) )
|
||||
{ const pl_wchar_t *e = &w[len];
|
||||
const pl_wchar_t *p = e;
|
||||
|
||||
while(p>w && is_xml_chname(map, p[-1]) )
|
||||
p--;
|
||||
while(p<e && !is_xml_nmstart(map, p[0]) )
|
||||
p++;
|
||||
|
||||
if ( !PL_unify_wchars(namespace, PL_ATOM, p-w, w) )
|
||||
return FALSE;
|
||||
if ( localname &&
|
||||
!PL_unify_wchars(localname, PL_ATOM, e-p, p) )
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static foreign_t
|
||||
iri_xml_namespace2(term_t iri, term_t namespace)
|
||||
{ return iri_xml_namespace(iri, namespace, 0);
|
||||
}
|
||||
|
||||
|
||||
install_t
|
||||
@ -398,4 +474,6 @@ install_xml_quote()
|
||||
PL_register_foreign("xml_quote_attribute", 3, xml_quote_attribute, 0);
|
||||
PL_register_foreign("xml_quote_cdata", 3, xml_quote_cdata, 0);
|
||||
PL_register_foreign("xml_name", 2, xml_name, 0);
|
||||
PL_register_foreign("iri_xml_namespace", 3, iri_xml_namespace, 0);
|
||||
PL_register_foreign("iri_xml_namespace", 2, iri_xml_namespace2, 0);
|
||||
}
|
||||
|
@ -141,23 +141,24 @@ self-contained files in SGML, HTML, or XML into a structured term. They
|
||||
are based on load_structure/3.
|
||||
|
||||
\begin{description}
|
||||
\predicate{load_sgml_file}{2}{+File, -ListOfContent}
|
||||
Same as \term{load_structure}{File, ListOfContent, [dialect(sgml)]}.
|
||||
\predicate{load_sgml_file}{2}{+Source, -ListOfContent}
|
||||
Same as \term{load_structure}{Source, ListOfContent, [dialect(sgml)]}.
|
||||
|
||||
\predicate{load_xml_file}{2}{+File, -ListOfContent}
|
||||
Same as \term{load_structure(File, ListOfContent, [dialect(xml)]}.
|
||||
\predicate{load_xml_file}{2}{+Source, -ListOfContent}
|
||||
Same as \term{load_structure(Source, ListOfContent, [dialect(xml)]}.
|
||||
|
||||
\predicate{load_html_file}{2}{+File, -Content}
|
||||
Load \arg{File} and parse as HTML. Implemented as below. Note that
|
||||
load_html_file/2 re-uses a cached DTD object as defined by dtd/2. As DTD
|
||||
objects may be corrupted while loading errornous documents sharing is
|
||||
undesirable if the documents are not known to be correct. See dtd/2 for
|
||||
details.
|
||||
\predicate{load_html_file}{2}{+Source, -Content}
|
||||
Load \arg{Source} and parse as HTML. \arg{Source} is either the
|
||||
name of a file or term \term{stream}{Handle}. Implemented as
|
||||
below. Note that load_html_file/2 re-uses a cached DTD object as defined
|
||||
by dtd/2. As DTD objects may be corrupted while loading errornous
|
||||
documents sharing is undesirable if the documents are not known to be
|
||||
correct. See dtd/2 for details.
|
||||
|
||||
\begin{code}
|
||||
load_html_file(File, Term) :-
|
||||
load_html_file(Source, Term) :-
|
||||
dtd(html, DTD),
|
||||
load_structure(File, Term,
|
||||
load_structure(Source, Term,
|
||||
[ dtd(DTD),
|
||||
dialect(sgml),
|
||||
shorttag(false)
|
||||
@ -394,7 +395,7 @@ is returned.
|
||||
Attributes declaring namespaces ({\tt xmlns:<ns>=<url>}) are reported
|
||||
as if \const{xmlns} were not a defined resource.
|
||||
|
||||
In many cases, getting attribute-names as <xmp>\arg{url}:\arg{name}</xmp>
|
||||
In many cases, getting attribute-names as \arg{url}:\arg{name}
|
||||
is not desirable. Such terms are hard to unify and sometimes multiple
|
||||
URLs may be mapped to the same identifier. This may happen due to poor
|
||||
version management, poor standardisation or because the the application
|
||||
@ -428,6 +429,41 @@ load_rdf_xml(File, Term) :-
|
||||
]).
|
||||
\end{code}
|
||||
|
||||
The library provides iri_xml_namespace/3 to break down an IRI into
|
||||
its namespace and localname:
|
||||
|
||||
\begin{description}
|
||||
\predicate[det]{iri_xml_namespace}{3}{+IRI, -Namespace, -Localname}
|
||||
Split an IRI (Unicode URI) into its \arg{Namespace} (an IRI) and
|
||||
\arg{Localname} (a Unicode XML name, see xml_name/2). The
|
||||
\arg{Localname} is defined as the longest last part of the IRI that
|
||||
satisfies the syntax of an XML name. With IRI schemas that are designed
|
||||
to work with XML namespaces, this will typically break the IRI on the
|
||||
last \chr{\#} or \chr{/}. Note however that this can produce unexpected
|
||||
results. E.g., in the example below, one might expect the namespace to
|
||||
be \url{http://example.com/images\#}, but an XML name cannot start with
|
||||
a digit.
|
||||
|
||||
\begin{code}
|
||||
?- iri_xml_namespace('http://example.com/images#12345', NS, L).
|
||||
NS = 'http://example.com/images#12345',
|
||||
L = ''.
|
||||
\end{code}
|
||||
|
||||
As we see from the example above, the \arg{Localname} can be the empty
|
||||
atom. Similarly, \arg{Namespace} can be the empty atom if \arg{IRI} is
|
||||
an XML name. Applications will often have to check for either or both
|
||||
these conditions. We decided against failing in these conditions because
|
||||
the application typically wants to know which of the two conditions
|
||||
(empty namespace or empty localname) holds. This predicate is often used
|
||||
for generating RDF/XML from an RDF graph.
|
||||
|
||||
\predicate[det]{iri_xml_namespace}{2}{+IRI, -Namespace}
|
||||
Same as iri_xml_namespace/3, but avoids creating an atom for the
|
||||
\arg{Localname}.
|
||||
\end{description}
|
||||
|
||||
|
||||
\subsection{DTD-Handling}
|
||||
|
||||
The DTD (\textbf{D}ocument \textbf{T}ype \textbf{D}efinition) is a
|
||||
@ -679,6 +715,16 @@ Process file as XML file with namespace support. See \secref{xmlns} for
|
||||
details. See also the \verb$qualify_attributes$ option below.
|
||||
\end{description}
|
||||
|
||||
\termitem{xmlns}{+URI}
|
||||
Set the default namespace of the outer environment. This option is
|
||||
provided to process partial XML content with proper namespace
|
||||
resolution.
|
||||
|
||||
\termitem{xmlns}{+NS, +URI}
|
||||
Specify a namespace for the outer environment. This option is
|
||||
provided to process partial XML content with proper namespace
|
||||
resolution.
|
||||
|
||||
\termitem{qualify_attributes}{Boolean}
|
||||
How to handle unqualified attribute (i.e. without an explicit namespace)
|
||||
in XML namespace (\const{xmlns}) mode. Default and standard compliant is
|
||||
@ -875,6 +921,13 @@ Defines how syntax errors are handled.
|
||||
using <pref builtin>print_message/2 with severity
|
||||
\const{informational}.
|
||||
\end{description}
|
||||
|
||||
\termitem{xml_no_ns}{+Mode}
|
||||
Error handling if an XML namespace is not defined. Default generates
|
||||
an error. If \const{quiet}, the error is suppressed. Can be used
|
||||
together with \term{call}{urlns, Closure} to provide external expansion
|
||||
of namespaces. See also \secref{xmlns}.
|
||||
|
||||
\termitem{call}{+Event, :PredicateName}
|
||||
Issue call-backs on the specified events. \arg{PredicateName} is the
|
||||
name of the predicate to call on this event, possibly prefixed with a
|
||||
@ -999,6 +1052,8 @@ be used and positions reported by the parser are octet offsets in the
|
||||
stream. In other cases, the Prolog stream decoder is used and offsets
|
||||
are character code counts.
|
||||
|
||||
\input{xpath.tex}
|
||||
|
||||
\section{Processing Indexed Files} \label{sec:indexaccess}
|
||||
|
||||
In some cases applications wish to process small portions of large
|
||||
@ -1106,6 +1161,8 @@ In the future we will design a call-back mechanism for locating and
|
||||
processing external entities, so Prolog-based file-location and Prolog
|
||||
resources can be used to store external entities.
|
||||
|
||||
\input{pwp.tex}
|
||||
|
||||
\section{Writing markup}
|
||||
|
||||
\subsection{Writing documents}
|
||||
@ -1222,8 +1279,8 @@ Assumes \const{ascii} encoding.
|
||||
Succeed if \arg{In} is an atom or string that satisfies the rules for
|
||||
a valid XML element or attribute name. As with the other predicates in
|
||||
this group, if \arg{Encoding} cannot represent one of the characters, this
|
||||
function fails. It uses a hard-coded table for ASCII-range characters and
|
||||
iswalpha()/iswalnum() for the first and remaining characters of the name.
|
||||
function fails. Character classification is based on
|
||||
\url{http://www.w3.org/TR/2006/REC-xml-20060816}.
|
||||
|
||||
\predicate{xml_name}{1}{+In}
|
||||
Backward compatibility version for xml_name/2. Assumes \const{ascii}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -26,7 +26,6 @@
|
||||
|
||||
#define UTIL_H_IMPLEMENTATION
|
||||
#include "util.h"
|
||||
#include <unistd.h>
|
||||
#include <ctype.h>
|
||||
#include <wctype.h>
|
||||
#include <stdlib.h>
|
||||
@ -436,24 +435,76 @@ empty_ocharbuf(ocharbuf *buf)
|
||||
*******************************/
|
||||
|
||||
#define RINGSIZE 16
|
||||
static void *ring[RINGSIZE];
|
||||
static int ringp;
|
||||
|
||||
typedef struct ring
|
||||
{ void *ring[RINGSIZE];
|
||||
int ringp;
|
||||
} ring;
|
||||
|
||||
#ifdef _REENTRANT
|
||||
#include <pthread.h>
|
||||
static pthread_key_t ring_key;
|
||||
|
||||
static void
|
||||
free_ring(void *ptr)
|
||||
{ ring *r = ptr;
|
||||
int i;
|
||||
void **bp;
|
||||
|
||||
for(i=0, bp=r->ring; i<RINGSIZE; i++, bp++)
|
||||
{ if ( *bp )
|
||||
{ sgml_free(*bp);
|
||||
*bp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
sgml_free(r);
|
||||
}
|
||||
|
||||
|
||||
static ring *
|
||||
my_ring()
|
||||
{ ring *r;
|
||||
|
||||
if ( (r=pthread_getspecific(ring_key)) )
|
||||
return r;
|
||||
|
||||
if ( (r = sgml_calloc(1, sizeof(*r))) )
|
||||
pthread_setspecific(ring_key, r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void
|
||||
init_ring(void)
|
||||
{ pthread_key_create(&ring_key, free_ring);
|
||||
}
|
||||
|
||||
#else
|
||||
static ring ring_store;
|
||||
#define my_ring() (&ring_store)
|
||||
|
||||
void init_ring(void) {}
|
||||
#endif
|
||||
|
||||
|
||||
wchar_t *
|
||||
str2ring(const wchar_t *in)
|
||||
{ wchar_t *copy = sgml_malloc((wcslen(in)+1)*sizeof(wchar_t));
|
||||
{ ring *r;
|
||||
wchar_t *copy;
|
||||
|
||||
if ( !copy )
|
||||
if ( !(r=my_ring()) ||
|
||||
!(copy = sgml_malloc((wcslen(in)+1)*sizeof(wchar_t))) )
|
||||
{ sgml_nomem();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
wcscpy(copy, in);
|
||||
if ( ring[ringp] )
|
||||
sgml_free(ring[ringp]);
|
||||
ring[ringp++] = copy;
|
||||
if ( ringp == RINGSIZE )
|
||||
ringp = 0;
|
||||
if ( r->ring[r->ringp] )
|
||||
sgml_free(r->ring[r->ringp]);
|
||||
r->ring[r->ringp++] = copy;
|
||||
if ( r->ringp == RINGSIZE )
|
||||
r->ringp = 0;
|
||||
|
||||
return copy;
|
||||
}
|
||||
@ -461,13 +512,19 @@ str2ring(const wchar_t *in)
|
||||
|
||||
void *
|
||||
ringallo(size_t size)
|
||||
{ char *result = sgml_malloc(size);
|
||||
{ ring *r;
|
||||
char *result;
|
||||
|
||||
if ( ring[ringp] )
|
||||
sgml_free(ring[ringp]);
|
||||
ring[ringp++] = result;
|
||||
if ( ringp == RINGSIZE )
|
||||
ringp = 0;
|
||||
if ( !(r=my_ring()) || !(result = sgml_malloc(size)) )
|
||||
{ sgml_nomem();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ( r->ring[r->ringp] )
|
||||
sgml_free(r->ring[r->ringp]);
|
||||
r->ring[r->ringp++] = result;
|
||||
if ( r->ringp == RINGSIZE )
|
||||
r->ringp = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -98,6 +98,7 @@ void empty_ocharbuf(ocharbuf *buf);
|
||||
{ buf->data.w[at] = chr; \
|
||||
}
|
||||
|
||||
void init_ring(void);
|
||||
const wchar_t * str_summary(const wchar_t *s, int len);
|
||||
wchar_t * str2ring(const wchar_t *in);
|
||||
void * ringallo(size_t);
|
||||
@ -107,8 +108,6 @@ ichar * load_sgml_file_to_charp(const ichar *file, int normalise_rsre,
|
||||
size_t *len);
|
||||
FILE * wfopen(const wchar_t *name, const char *mode);
|
||||
|
||||
void wputs(ichar *s);
|
||||
|
||||
#if defined(USE_STRING_FUNCTIONS) && !defined(UTIL_H_IMPLEMENTATION)
|
||||
|
||||
#define istrlen(s1) wcslen((s1))
|
||||
|
@ -29,8 +29,6 @@
|
||||
the GNU General Public License.
|
||||
*/
|
||||
|
||||
#include "xml_unicode.h"
|
||||
|
||||
|
||||
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
XML character classification.
|
||||
|
@ -29,35 +29,36 @@
|
||||
|
||||
#ifdef XMLNS
|
||||
|
||||
static xmlns *
|
||||
xmlns *
|
||||
xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url)
|
||||
{ sgml_environment *env = p->environments;
|
||||
dtd_symbol *n = (*ns ? dtd_add_symbol(p->dtd, ns) : (dtd_symbol *)NULL);
|
||||
dtd_symbol *u = dtd_add_symbol(p->dtd, url); /* TBD: ochar/ichar */
|
||||
xmlns *x = sgml_malloc(sizeof(*x));
|
||||
|
||||
if ( p->on_xmlns )
|
||||
(*p->on_xmlns)(p, n, u);
|
||||
x->name = n;
|
||||
x->url = u;
|
||||
|
||||
if ( env )
|
||||
{ xmlns *x = sgml_malloc(sizeof(*n));
|
||||
{ if ( p->on_xmlns )
|
||||
(*p->on_xmlns)(p, n, u);
|
||||
|
||||
x->name = n;
|
||||
x->url = u;
|
||||
x->next = env->xmlns;
|
||||
env->xmlns = x;
|
||||
|
||||
return x;
|
||||
} else
|
||||
{ x->next = p->xmlns;
|
||||
p->xmlns = x;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
xmlns_free(sgml_environment *env)
|
||||
{ xmlns *n, *next;
|
||||
xmlns_free(xmlns *n)
|
||||
{ xmlns *next;
|
||||
|
||||
for(n = env->xmlns; n; n = next)
|
||||
for(; n; n = next)
|
||||
{ next = n->next;
|
||||
|
||||
sgml_free(n);
|
||||
@ -66,16 +67,22 @@ xmlns_free(sgml_environment *env)
|
||||
|
||||
|
||||
xmlns *
|
||||
xmlns_find(sgml_environment *env, dtd_symbol *ns)
|
||||
{ for(; env; env = env->parent)
|
||||
{ xmlns *n;
|
||||
xmlns_find(dtd_parser *p, dtd_symbol *ns)
|
||||
{ sgml_environment *env = p->environments;
|
||||
xmlns *n;
|
||||
|
||||
for(n=env->xmlns; n; n = n->next)
|
||||
for(; env; env = env->parent)
|
||||
{ for(n=env->xmlns; n; n = n->next)
|
||||
{ if ( n->name == ns )
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
for (n=p->xmlns; n; n = n->next)
|
||||
{ if ( n->name == ns )
|
||||
return n;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -150,7 +157,7 @@ xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
|
||||
if ( istrprefix(L"xml", buf) ) /* XML reserved namespaces */
|
||||
{ *url = n->name;
|
||||
return TRUE;
|
||||
} else if ( (ns = xmlns_find(p->environments, n)) )
|
||||
} else if ( (ns = xmlns_find(p, n)) )
|
||||
{ if ( ns->url->name[0] )
|
||||
*url = ns->url->name;
|
||||
else
|
||||
@ -158,7 +165,9 @@ xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
|
||||
return TRUE;
|
||||
} else
|
||||
{ *url = n->name; /* undefined namespace */
|
||||
gripe(ERC_EXISTENCE, L"namespace", n->name);
|
||||
if ( p->xml_no_ns == NONS_QUIET )
|
||||
return TRUE;
|
||||
gripe(p, ERC_EXISTENCE, L"namespace", n->name);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
@ -204,7 +213,7 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url)
|
||||
*local = s+1;
|
||||
n = dtd_add_symbol(dtd, buf);
|
||||
|
||||
if ( (ns = xmlns_find(p->environments, n)) )
|
||||
if ( (ns = xmlns_find(p, n)) )
|
||||
{ if ( ns->url->name[0] )
|
||||
*url = ns->url->name;
|
||||
else
|
||||
@ -213,8 +222,10 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url)
|
||||
return TRUE;
|
||||
} else
|
||||
{ *url = n->name; /* undefined namespace */
|
||||
gripe(ERC_EXISTENCE, "namespace", n->name);
|
||||
e->thisns = xmlns_push(p, n->name, n->name); /* define implicitly */
|
||||
if ( p->xml_no_ns == NONS_QUIET )
|
||||
return TRUE;
|
||||
gripe(p, ERC_EXISTENCE, L"namespace", n->name);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
@ -223,7 +234,7 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url)
|
||||
|
||||
*local = id->name;
|
||||
|
||||
if ( (ns = xmlns_find(p->environments, NULL)) )
|
||||
if ( (ns = xmlns_find(p, NULL)) )
|
||||
{ if ( ns->url->name[0] )
|
||||
*url = ns->url->name;
|
||||
else
|
||||
|
@ -31,8 +31,9 @@ typedef struct _xmlns
|
||||
struct _xmlns *next; /* next name */
|
||||
} xmlns;
|
||||
|
||||
void xmlns_free(sgml_environment *env);
|
||||
xmlns* xmlns_find(sgml_environment *env, dtd_symbol *ns);
|
||||
void xmlns_free(xmlns *list);
|
||||
xmlns* xmlns_find(dtd_parser *p, dtd_symbol *ns);
|
||||
xmlns * xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url);
|
||||
void update_xmlns(dtd_parser *p, dtd_element *e,
|
||||
int natts, sgml_attribute *atts);
|
||||
int xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
|
||||
|
Reference in New Issue
Block a user