update sgml package.

2010-05-06 10:59:09 +01:00 · 2010-05-06 10:59:09 +01:00 · 261b5163c7
commit 261b5163c7
parent 0fabe2b9c6
20 changed files with 1431 additions and 1010 deletions
--- a/packages/sgml/catalog.c
+++ b/packages/sgml/catalog.c
@ -155,7 +155,7 @@ localpath(const ichar *ref, const ichar *name)
 }


-static int
+int
 register_catalog_file_unlocked(const ichar *file, catalog_location where)
 { catalog_file **f = &catalog;
  catalog_file *cf;
@ -205,7 +205,7 @@ wgetenv(const char *name)


 static void
-init_catalog(void)
+init_catalog()
 { static int done = FALSE;

  LOCK();
@ -241,7 +241,7 @@ init_catalog(void)
 int
 register_catalog_file(const ichar *file, catalog_location where)
 { int rc;
-  
+
  init_catalog();

  LOCK();
@ -310,7 +310,7 @@ cs_streql(ichar const *a, ichar const *b)

 static int
 scan_overflow(size_t buflen)
-{ gripe(ERC_REPRESENTATION, L"token length");
+{ gripe(NULL, ERC_REPRESENTATION, L"token length");

  return EOF;
 }
@ -439,7 +439,7 @@ load_one_catalogue(catalog_file * file)
  int override = 0;

  if ( !src )
-  { gripe(ERC_NO_CATALOGUE, file->file);
+  { gripe(NULL, ERC_NO_CATALOGUE, file->file);
    return;
  }

@ -514,7 +514,7 @@ load_one_catalogue(catalog_file * file)

    To look up a parameter entity:
    f = find_in_catalogue(CAT_PENTITY, name, pubid, sysid, ci);
-    The name may begin with a % but need not; if it doesn't    
+    The name may begin with a % but need not; if it doesn't
    a % will be prefixed for the search.
    If it cannot otherwise be found ${name}.pen will be returned.

@ -635,7 +635,7 @@ find_in_catalogue(int kind,
    return 0;

  if ( istrlen(name)+4+1 > penlen )
-  { gripe(ERC_REPRESENTATION, L"entity name");
+  { gripe(NULL, ERC_REPRESENTATION, L"entity name");
    return NULL;
  }

--- a/packages/sgml/charmap.c
+++ b/packages/sgml/charmap.c
@ -44,7 +44,7 @@ new_charclass()
  char_range(map, 'a', 'z', CH_LCLETTER);
  char_range(map, 'A', 'Z', CH_LCLETTER);
  char_range(map, '0', '9', CH_DIGIT);
-  
+
  ca['.'] |= CH_CNM;
  ca['-'] |= CH_CNM;
  ca[183] |= CH_CNM;			/* XML */
--- a/packages/sgml/dtd.h
+++ b/packages/sgml/dtd.h
@ -466,7 +466,7 @@ dtd *		new_dtd(const ichar *doctype);
 int		set_dialect_dtd(dtd *dtd, dtd_dialect dialect);
 int		set_option_dtd(dtd *dtd, dtd_option option, int set);

-void		putchar_dtd_parser(dtd_parser *p, int chr);
+int		putchar_dtd_parser(dtd_parser *p, int chr);
 int		begin_document_dtd_parser(dtd_parser *p);
 int		end_document_dtd_parser(dtd_parser *p);
 void		reset_document_dtd_parser(dtd_parser *p);
--- a/packages/sgml/dtd2pl.c
+++ b/packages/sgml/dtd2pl.c
@ -27,6 +27,7 @@
 #include <string.h>
 #include <wchar.h>
 #include "dtd.h"
+#include "util.h"
 #include "prolog.h"

 #define streq(s,q) strcmp((s), (q)) == 0
@ -42,10 +43,12 @@ int
 main(int argc, char **argv)
 { dtd_dialect dialect = DL_SGML;

+  init_ring();
+
  program = argv[0];
  argv++;
  argc--;
-  
+
  while(argc > 0 && argv[0][0] == '-')
  { if ( streq(argv[0], "-xml") )
    { dialect = DL_XML;
@ -63,7 +66,7 @@ main(int argc, char **argv)

  if ( argc == 1 )
  { int wl = mbstowcs(NULL, argv[0], 0);
-    
+
    if ( wl > 0 )
    { wchar_t *ws = malloc((wl+1)*sizeof(wchar_t));
      dtd *dtd;
--- a/packages/sgml/error.c
+++ b/packages/sgml/error.c
@ -3,9 +3,9 @@
    Part of SWI-Prolog

    Author:        Jan Wielemaker
-    E-mail:        jan@swi.psy.uva.nl
+    E-mail:        J.Wielemaker@cs.vu.nl
    WWW:           http://www.swi-prolog.org
-    Copyright (C): 1985-2002, University of Amsterdam
+    Copyright (C): 1985-2009, University of Amsterdam

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@ -32,48 +32,52 @@

 int
 sgml2pl_error(plerrorid id, ...)
-{ term_t except = PL_new_term_ref();
-  term_t formal = PL_new_term_ref();
-  term_t swi	= PL_new_term_ref();
+{ int rc;
+  term_t except, formal, swi;
  va_list args;
  char msgbuf[1024];
  char *msg = NULL;

+  if ( !(except = PL_new_term_ref()) ||
+       !(formal = PL_new_term_ref()) ||
+       !(swi	= PL_new_term_ref()) )
+    return FALSE;
+
  va_start(args, id);
  switch(id)
  { case ERR_ERRNO:
    { int err = va_arg(args, int);
-      
+
      msg = strerror(err);

      switch(err)
      { case ENOMEM:
-	  PL_unify_term(formal,
-			PL_FUNCTOR_CHARS, "resource_error", 1,
-			  PL_CHARS, "no_memory");
+	  rc = PL_unify_term(formal,
+			     PL_FUNCTOR_CHARS, "resource_error", 1,
+			       PL_CHARS, "no_memory");
 	  break;
 	case EACCES:
 	{ const char *file = va_arg(args,   const char *);
 	  const char *action = va_arg(args, const char *);

-	  PL_unify_term(formal,
-			PL_FUNCTOR_CHARS, "permission_error", 3,
-			  PL_CHARS, action,
-			  PL_CHARS, "file",
-			  PL_CHARS, file);
+	  rc = PL_unify_term(formal,
+			     PL_FUNCTOR_CHARS, "permission_error", 3,
+			       PL_CHARS, action,
+			       PL_CHARS, "file",
+			       PL_CHARS, file);
 	  break;
 	}
 	case ENOENT:
 	{ const char *file = va_arg(args, const char *);

-	  PL_unify_term(formal,
-			PL_FUNCTOR_CHARS, "existence_error", 2,
-			  PL_CHARS, "file",
-			  PL_CHARS, file);
+	  rc = PL_unify_term(formal,
+			     PL_FUNCTOR_CHARS, "existence_error", 2,
+			       PL_CHARS, "file",
+			       PL_CHARS, file);
 	  break;
 	}
 	default:
-	  PL_unify_atom_chars(formal, "system_error");
+	  rc = PL_unify_atom_chars(formal, "system_error");
 	  break;
      }
      break;
@ -84,12 +88,12 @@ sgml2pl_error(plerrorid id, ...)

      if ( PL_is_variable(actual) &&
 	   strcmp(expected, "variable") != 0 )
-	PL_unify_atom_chars(formal, "instantiation_error");
+	rc = PL_unify_atom_chars(formal, "instantiation_error");
      else
-	PL_unify_term(formal,
-		      PL_FUNCTOR_CHARS, "type_error", 2,
-		      PL_CHARS, expected,
-		      PL_TERM, actual);
+	rc = PL_unify_term(formal,
+			   PL_FUNCTOR_CHARS, "type_error", 2,
+			     PL_CHARS, expected,
+			     PL_TERM, actual);
      break;
    }
    case ERR_DOMAIN:
@ -97,31 +101,31 @@ sgml2pl_error(plerrorid id, ...)
      term_t actual        = va_arg(args, term_t);

      if ( PL_is_variable(actual) )
-	PL_unify_atom_chars(formal, "instantiation_error");
+	rc = PL_unify_atom_chars(formal, "instantiation_error");
      else
-	PL_unify_term(formal,
-		      PL_FUNCTOR_CHARS, "domain_error", 2,
-		      PL_CHARS, expected,
-		      PL_TERM, actual);
+	rc = PL_unify_term(formal,
+			   PL_FUNCTOR_CHARS, "domain_error", 2,
+			     PL_CHARS, expected,
+			     PL_TERM, actual);
      break;
    }
    case ERR_EXISTENCE:
    { const char *type = va_arg(args, const char *);
      term_t obj  = va_arg(args, term_t);

-      PL_unify_term(formal,
-		    PL_FUNCTOR_CHARS, "existence_error", 2,
-		    PL_CHARS, type,
-		    PL_TERM, obj);
+      rc = PL_unify_term(formal,
+			 PL_FUNCTOR_CHARS, "existence_error", 2,
+			   PL_CHARS, type,
+			   PL_TERM, obj);

      break;
    }
    case ERR_FAIL:
    { term_t goal  = va_arg(args, term_t);

-      PL_unify_term(formal,
-		    PL_FUNCTOR_CHARS, "goal_failed", 1,
-		    PL_TERM, goal);
+      rc = PL_unify_term(formal,
+			 PL_FUNCTOR_CHARS, "goal_failed", 1,
+			   PL_TERM, goal);

      break;
    }
@ -129,10 +133,10 @@ sgml2pl_error(plerrorid id, ...)
    { const char *limit = va_arg(args, const char *);
      long maxval  = va_arg(args, long);

-      PL_unify_term(formal,
-		    PL_FUNCTOR_CHARS, "limit_exceeded", 2,
-		    PL_CHARS, limit,
-		    PL_LONG, maxval);
+      rc = PL_unify_term(formal,
+			 PL_FUNCTOR_CHARS, "limit_exceeded", 2,
+			   PL_CHARS, limit,
+			   PL_LONG, maxval);

      break;
    }
@ -142,10 +146,10 @@ sgml2pl_error(plerrorid id, ...)

      vsprintf(msgbuf, fmt, args);
      msg = msgbuf;
-      
-      PL_unify_term(formal,
-		    PL_FUNCTOR_CHARS, "miscellaneous", 1,
-		      PL_CHARS, id);
+
+      rc = PL_unify_term(formal,
+			 PL_FUNCTOR_CHARS, "miscellaneous", 1,
+			   PL_CHARS, id);
      break;
    }
    default:
@ -153,26 +157,29 @@ sgml2pl_error(plerrorid id, ...)
  }
  va_end(args);

-  if ( msg )
+  if ( rc && msg )
  { term_t predterm = PL_new_term_ref();
    term_t msgterm  = PL_new_term_ref();

-    if ( msg )
-    { PL_put_atom_chars(msgterm, msg);
-    }
-
-    PL_unify_term(swi,
-		  PL_FUNCTOR_CHARS, "context", 2,
-		    PL_TERM, predterm,
-		    PL_TERM, msgterm);
+    if ( !(predterm = PL_new_term_ref()) ||
+	 !(msgterm  = PL_new_term_ref()) ||
+	 !PL_put_atom_chars(msgterm, msg) ||
+	 !PL_unify_term(swi,
+			PL_FUNCTOR_CHARS, "context", 2,
+			  PL_TERM, predterm,
+			  PL_TERM, msgterm) )
+      rc = FALSE;
  }

-  PL_unify_term(except,
-		PL_FUNCTOR_CHARS, "error", 2,
-		  PL_TERM, formal,
-		  PL_TERM, swi);
+  if ( rc )
+    rc = PL_unify_term(except,
+		       PL_FUNCTOR_CHARS, "error", 2,
+		         PL_TERM, formal,
+		         PL_TERM, swi);

+  if ( rc )
+    return PL_raise_exception(except);

-  return PL_raise_exception(except);
+  return FALSE;
 }

--- a/packages/sgml/error.h
+++ b/packages/sgml/error.h
@ -44,4 +44,3 @@ typedef enum
 int		sgml2pl_error(plerrorid, ...);

 #endif /*H_ERROR_INCLUDED*/
-
--- a/packages/sgml/model.c
+++ b/packages/sgml/model.c
@ -107,7 +107,7 @@ visit(dtd_state *state, visited *visited)
  { if ( visited->states[i] == state )
      return FALSE;
  }
-      
+
  if ( visited->size >= MAX_VISITED )
  { fprintf(stderr, "Reached MAX_VISITED!\n");
    return FALSE;
@ -262,7 +262,7 @@ do_find_omitted_path(dtd_state *state, dtd_element *e,
 }


-int 
+int
 find_omitted_path(dtd_state *state, dtd_element *e, dtd_element **path)
 { int pl = 0;
  visited visited;
@ -314,13 +314,13 @@ static transition *
 state_transitions(dtd_state *state)
 { if ( !state->transitions && state->expander )
  { expander *ex = state->expander;
-    
+
    switch(ex->type)
    { case EX_AND:
      { dtd_model_list *left = ex->kind.and.set;

 	if ( !left )			/* empty AND (should not happen) */
-	{ link(state, ex->target, NULL); 
+	{ link(state, ex->target, NULL);
 	} else if ( !left->next )	/* only one left */
 	{ translate_model(left->model, state, ex->target);
 	} else
@ -378,7 +378,7 @@ translate_one(dtd_model *m, dtd_state *from, dtd_state *to)

      ex->target = to;
      ex->type   = EX_AND;
-      
+
      for( sub = m->content.group; sub; sub = sub->next )
 	add_model_list(&ex->kind.and.set, sub);

@ -436,7 +436,7 @@ make_state_engine(dtd_element *e)
    { if ( def->content )
      { def->initial_state = new_dtd_state();
 	def->final_state   = new_dtd_state();
-    
+
 	translate_model(def->content, def->initial_state, def->final_state);
      } else if ( def->type == C_CDATA || def->type == C_RCDATA )
      { def->initial_state = new_dtd_state();
@ -450,7 +450,7 @@ make_state_engine(dtd_element *e)

    return def->initial_state;
  }
-  
+
  return NULL;
 }

@ -492,7 +492,7 @@ free_expander(expander *e, visited *visited)
 static void
 do_free_state_engine(dtd_state *state, visited *visited)
 { transition *t, *next;
-  
+
  for(t=state->transitions; t; t=next)
  { next = t->next;

--- a/packages/sgml/parser.c
+++ b/packages/sgml/parser.c
--- a/packages/sgml/parser.h
+++ b/packages/sgml/parser.h
@ -145,6 +145,12 @@ typedef enum
  DM_DATA				/* Environment has only elements */
 } data_mode;

+#ifdef XMLNS
+typedef enum
+{ NONS_ERROR = 0,
+  NONS_QUIET
+} xmlnons;
+#endif

 typedef struct _sgml_environment
 { dtd_element *element;			/* element that opened the env */
@ -201,6 +207,10 @@ typedef struct _dtd_parser
  dtd_srcloc	startcdata;		/* Start of last cdata */
  dtd_symbol   *enforce_outer_element;	/* Outer element to look for */
  sgml_event_class event_class;		/* EV_* */
+  xmlnons	xml_no_ns;		/* What if namespace does not exist? */
+#ifdef XMLNS
+  struct _xmlns *xmlns;			/* Outer xmlns declaration */
+#endif

  void *closure;			/* client handle */
  sgml_begin_element_f	on_begin_element; /* start an element */
@ -221,7 +231,7 @@ typedef struct _dtd_parser
 #include "xmlns.h"
 #endif

-extern int		gripe(dtd_error_id e, ...);
+extern int		gripe(dtd_parser *p, dtd_error_id e, ...);

 #define SGML_SUB_DOCUMENT	0x1

--- a/packages/sgml/prolog.c
+++ b/packages/sgml/prolog.c
@ -342,7 +342,7 @@ prolog_print_attribute(dtd_element *e, dtd_attr *at)
      printf("list(nutoken)");
      break;
  }
-  
+
  printf(", ");				/* print default */
  switch(at->def)
  { case AT_REQUIRED:
@ -427,7 +427,7 @@ prolog_print_element(dtd_element *e, unsigned int flags)

    if ( def->excluded )
    { dtd_element_list *el;
-  
+
      for(el = def->excluded; el; el=el->next)
 	wprintf(L"exclude(%ls, %ls).\n",
 		atom(e->name->name),
@ -435,7 +435,7 @@ prolog_print_element(dtd_element *e, unsigned int flags)
    }
    if ( def->included )
    { dtd_element_list *el;
-  
+
      for(el = def->included; el; el=el->next)
 	wprintf(L"include(%ls, %ls).\n",
 		atom(e->name->name),
--- a/packages/sgml/quote.c
+++ b/packages/sgml/quote.c
@ -26,14 +26,18 @@
 #include <SWI-Prolog.h>
 #include <stdlib.h>
 #ifdef HAVE_MALLOC_H
-#include <malloc.h>
+#include HAVE_MALLOC_H
 #endif
 #include "error.h"
 #include <errno.h>
 #include <string.h>
 #include <stdio.h>
 #include <wctype.h>
+#include "xml_unicode.h"
 #include "dtd.h"
+#ifdef __WINDOWS__
+#define inline __inline
+#endif

 static atom_t ATOM_iso_latin_1;
 static atom_t ATOM_utf8;
@ -86,7 +90,7 @@ room_buf(charbuf *b, size_t room)
    b->end = b->bufp + used;
  }

-  return TRUE;    
+  return TRUE;
 }


@ -178,11 +182,11 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
  if ( inA )
  { for(s = (unsigned char*)inA ; len-- > 0; s++ )
    { int c = *s;
-      
+
      if ( map[c] )
      { if ( !add_str_buf(&buffer, map[c]) )
 	  return FALSE;
-	
+
 	changes++;
      } else if ( c > maxchr )
      { char buf[10];
@ -190,7 +194,7 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
 	sprintf(buf, "&#%d;", c);
 	if ( !add_str_buf(&buffer, buf) )
 	  return FALSE;
-	
+
 	changes++;
      } else
      { add_char_buf(&buffer, c);
@ -204,11 +208,11 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
  } else
  { for( ; len-- > 0; inW++ )
    { int c = *inW;
-      
+
      if ( c <= 0xff && map[c] )
      { if ( !add_str_bufW(&buffer, map[c]) )
 	  return FALSE;
-	
+
 	changes++;
      } else if ( c > maxchr )
      { char buf[10];
@ -216,13 +220,13 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
 	sprintf(buf, "&#%d;", c);
 	if ( !add_str_bufW(&buffer, buf) )
 	  return FALSE;
-	
+
 	changes++;
      }else
      { add_char_bufW(&buffer, c);
      }
    }
-	 
+
    if ( changes > 0 )
      rc = PL_unify_wchars(quoted, PL_ATOM,
 			   used_buf(&buffer)/sizeof(wchar_t),
@ -230,7 +234,7 @@ do_quote(term_t in, term_t quoted, char **map, int maxchr)
    else
      rc = PL_unify(in, quoted);
  }
-  
+
  free_buf(&buffer);

  return rc;
@ -321,12 +325,39 @@ xml_quote_cdata(term_t in, term_t out, term_t encoding)
 }


+static inline int
+is_xml_nmstart(dtd_charclass *map, int c)
+{ if ( c <= 0xff )
+  { return (map->class[c] & CH_NMSTART);
+  } else
+  { return ( xml_basechar(c) ||
+	     xml_ideographic(c)
+	   );
+  }
+}
+
+
+static inline int
+is_xml_chname(dtd_charclass *map, int c)
+{ if ( c <= 0xff )
+  { return (map->class[c] & CH_NAME);
+  } else
+  { return ( xml_basechar(c) ||
+	     xml_digit(c) ||
+	     xml_ideographic(c) ||
+	     xml_combining_char(c) ||
+	     xml_extender(c)
+	   );
+  }
+}
+
+static dtd_charclass *map;
+
 static foreign_t
 xml_name(term_t in, term_t encoding)
 { char *ins;
  wchar_t *inW;
  size_t len;
-  static dtd_charclass *map;
  unsigned int i;
  int maxchr;

@ -345,7 +376,7 @@ xml_name(term_t in, term_t encoding)
    c = ins[0] & 0xff;
    if ( c > maxchr )
      return FALSE;
-    
+
    if ( !(map->class[c] & CH_NMSTART) )
      return FALSE;
    for(i=1; i<len; i++)
@ -360,22 +391,16 @@ xml_name(term_t in, term_t encoding)
  if ( PL_get_wchars(in, &len, &inW, CVT_ATOMIC) )
  { if ( len == 0 )
      return FALSE;
-  
-    if ( inW[0] > maxchr )
+
+    if ( inW[0] > maxchr ||
+	 !is_xml_nmstart(map, inW[0]) )
      return FALSE;

-    if ( inW[0] <= 0xff &&
-	 !(map->class[inW[0]] & CH_NMSTART) )
-      return FALSE;
-    if ( inW[0] > 0xff && !iswalpha(inW[0]) )
-      return FALSE;
-	 
    for(i=1; i<len; i++)
    { int c = inW[i];

-      if ( c <= 0xff && !(map->class[c] & CH_NAME) )
-	return FALSE;
-      if ( c > 0xff && !iswalnum((wint_t)c) )
+      if ( c > maxchr ||
+	   !is_xml_chname(map, c) )
 	return FALSE;
    }

@ -386,6 +411,57 @@ xml_name(term_t in, term_t encoding)
 }


+static foreign_t
+iri_xml_namespace(term_t iri, term_t namespace, term_t localname)
+{ char *s;
+  pl_wchar_t *w;
+  size_t len;
+
+  if ( !map )
+    map = new_charclass();
+
+  if ( PL_get_nchars(iri, &len, &s, CVT_ATOM|CVT_STRING) )
+  { const char *e = &s[len];
+    const char *p = e;
+
+    while(p>s && (map->class[p[-1]&0xff] & CH_NAME))
+      p--;
+    while(p<e && !(map->class[p[0]&0xff] & CH_NMSTART))
+      p++;
+
+    if ( !PL_unify_atom_nchars(namespace, p-s, s) )
+      return FALSE;
+    if ( localname &&
+	 !PL_unify_atom_nchars(localname, e-p, p) )
+      return FALSE;
+
+    return TRUE;
+  } else if ( PL_get_wchars(iri, &len, &w, CVT_ATOM|CVT_STRING|CVT_EXCEPTION) )
+  { const pl_wchar_t *e = &w[len];
+    const pl_wchar_t *p = e;
+
+    while(p>w && is_xml_chname(map, p[-1]) )
+      p--;
+    while(p<e && !is_xml_nmstart(map, p[0]) )
+      p++;
+
+    if ( !PL_unify_wchars(namespace, PL_ATOM, p-w, w) )
+      return FALSE;
+    if ( localname &&
+	 !PL_unify_wchars(localname, PL_ATOM, e-p, p) )
+      return FALSE;
+
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+
+static foreign_t
+iri_xml_namespace2(term_t iri, term_t namespace)
+{ return iri_xml_namespace(iri, namespace, 0);
+}


 install_t
@ -398,4 +474,6 @@ install_xml_quote()
  PL_register_foreign("xml_quote_attribute", 3, xml_quote_attribute, 0);
  PL_register_foreign("xml_quote_cdata",     3, xml_quote_cdata,     0);
  PL_register_foreign("xml_name",            2, xml_name,            0);
+  PL_register_foreign("iri_xml_namespace",   3, iri_xml_namespace,   0);
+  PL_register_foreign("iri_xml_namespace",   2, iri_xml_namespace2,  0);
 }
--- a/packages/sgml/sgml.c
+++ b/packages/sgml/sgml.c
@ -95,10 +95,10 @@ print_word(dtd_parser * p, char c,     /* preceding character */
 static void
 wprint_escaped(FILE *f, const wchar_t *s, int len)
 { const wchar_t *e = &s[len];
-  
+
  while ( s < e )
  { wint_t x = *s++;
-      
+
    if (x >= ' ')
    { if (x == '\\')			/* \ --> \\ */
 	wputc(x, f);
@ -352,7 +352,7 @@ mb2wc(const char *s)

    return ws;
  }
-  
+
  perror("mbstowcs");
  exit(1);
 }
--- a/packages/sgml/sgml.doc
+++ b/packages/sgml/sgml.doc
@ -26,7 +26,7 @@ Markup languages are an increasingly important method for
 data-representation and exchange. This article documents the package
 \pllib{sgml}, a foreign library for SWI-Prolog to parse SGML
 and XML documents, returning information on both the document and the
-document's DTD. The parser is designed to be small, fast and flexible. 
+document's DTD. The parser is designed to be small, fast and flexible.
 \end{abstract}

 \pagebreak
@ -56,17 +56,17 @@ The parser described in this document is small (less than 100 kBytes
 executable on a Pentium), fast (between 2 and 5 times faster than SP),
 provides access to the DTD, and provides flexible input handling.

-The document output is equal to the output produced by \jargon{xml2pl}, 
+The document output is equal to the output produced by \jargon{xml2pl},
 an SP interface to SWI-Prolog written by Anjo Anjewierden.


 \section{Bluffer's Guide}

-This package allows you to parse SGML, XML and HTML data into a Prolog 
-data structure. The high-level interface defined in \pllib{sgml} 
+This package allows you to parse SGML, XML and HTML data into a Prolog
+data structure. The high-level interface defined in \pllib{sgml}
 provides access at the file-level, while the low-level interface defined
-in the foreign module works with Prolog streams. Please use the source 
-of \file{sgml.pl} as a starting point for dealing with data from 
+in the foreign module works with Prolog streams. Please use the source
+of \file{sgml.pl} as a starting point for dealing with data from
 other sources than files, such as SWI-Prolog resources, network-sockets,
 character strings, \emph{etc.} The first example below loads an HTML file.

@ -123,9 +123,9 @@ This is called `omitted-tag' handling.
 ].
 \end{code}

-The document is represented as a list, each element being an atom to 
+The document is represented as a list, each element being an atom to
 represent \const{CDATA} or a term \term{element}{Name, Attributes, Content}.
-Entities (e.g. \verb$&lt;$) are expanded and included in the 
+Entities (e.g. \verb$&lt;$) are expanded and included in the
 atom representing the element content or attribute value.%
    \footnote{Up to SWI-Prolog 5.4.x, Prolog could not represent
 	      \jargon{wide} characters and entities that did not fit in
@ -141,23 +141,24 @@ self-contained files in SGML, HTML, or XML into a structured term. They
 are based on load_structure/3.

 \begin{description}
-    \predicate{load_sgml_file}{2}{+File, -ListOfContent}
-Same as \term{load_structure}{File, ListOfContent, [dialect(sgml)]}.
+    \predicate{load_sgml_file}{2}{+Source, -ListOfContent}
+Same as \term{load_structure}{Source, ListOfContent, [dialect(sgml)]}.

-    \predicate{load_xml_file}{2}{+File, -ListOfContent}
-Same as \term{load_structure(File, ListOfContent, [dialect(xml)]}.
+    \predicate{load_xml_file}{2}{+Source, -ListOfContent}
+Same as \term{load_structure(Source, ListOfContent, [dialect(xml)]}.

-    \predicate{load_html_file}{2}{+File, -Content}
-Load \arg{File} and parse as HTML. Implemented as below. Note that
-load_html_file/2 re-uses a cached DTD object as defined by dtd/2. As DTD
-objects may be corrupted while loading errornous documents sharing is
-undesirable if the documents are not known to be correct. See dtd/2 for
-details.
+    \predicate{load_html_file}{2}{+Source, -Content}
+Load \arg{Source} and parse as HTML. \arg{Source} is either the
+name of a file or term \term{stream}{Handle}. Implemented as
+below. Note that load_html_file/2 re-uses a cached DTD object as defined
+by dtd/2. As DTD objects may be corrupted while loading errornous
+documents sharing is undesirable if the documents are not known to be
+correct. See dtd/2 for details.

 \begin{code}
-load_html_file(File, Term) :-
+load_html_file(Source, Term) :-
        dtd(html, DTD),
-        load_structure(File, Term,
+        load_structure(Source, Term,
                       [ dtd(DTD),
                         dialect(sgml),
 			 shorttag(false)
@ -171,8 +172,8 @@ load_html_file(File, Term) :-
 \subsection{Loading Structured Documents}

 SGML or XML files are loaded through the common predicate
-load_structure/3. This is a predicate with many options. For 
-simplicity a number of commonly used shorthands are provided: 
+load_structure/3. This is a predicate with many options. For
+simplicity a number of commonly used shorthands are provided:
 load_sgml_file/2, load_xml_file/2, and
 load_html_file/2.

@ -184,18 +185,18 @@ Parse \arg{Source} and return the resulting structure in
 options controlling the conversion process.

 A proper XML document contains only a single toplevel element whose name
-matches the document type. Nevertheless, a list is returned for 
+matches the document type. Nevertheless, a list is returned for
 consistency with the representation of element content. The <aref/
 ListOfContent/ consists of the following types:

 \begin{description}
    \termitem{\arg{Atom}}{}
-Atoms are used to represent \const{CDATA}. Note 
+Atoms are used to represent \const{CDATA}. Note
 this is possible in SWI-Prolog, as there is no length-limit on atoms and
 atom garbage collection is provided.

    \termitem{element}{Name, ListAttributes, ListOfContent}
-\arg{Name} is the name of the element. Using SGML, which is 
+\arg{Name} is the name of the element. Using SGML, which is
 case-insensitive, all element names are returned as lowercase atoms.

 \arg{ListOfAttributes} is a list of \arg{Name}=\arg{Value} pairs for
@ -209,31 +210,31 @@ integers is supported. \arg{ListOfContent} defines the content for the
 element.

    \termitem{sdata}{Text}
-If an entity with declared content-type \const{SDATA} is encountered, this 
+If an entity with declared content-type \const{SDATA} is encountered, this
 term is returned holding the data in \arg{Text}.

    \termitem{ndata}{Text}
-If an entity with declared content-type \const{NDATA} is encountered, this 
+If an entity with declared content-type \const{NDATA} is encountered, this
 term is returned holding the data in \arg{Text}.
    \termitem{pi}{Text}
 If a processing instruction is encountered (\verb$<?...?>$), <aref/
 Text/ holds the text of the processing instruction. Please note that the
-\verb$<?xml ...?>$ instruction is handled internally. 
+\verb$<?xml ...?>$ instruction is handled internally.
 \end{description}


-The \arg{Options} list controls the conversion process. Currently 
+The \arg{Options} list controls the conversion process. Currently
 defined options are:

 \begin{description}
    \termitem{dtd}{?DTD}
 Reference to a DTD object. If specified, the \verb$<!DOCTYPE ...>$
-declaration is ignored and the document is parsed and validated against 
+declaration is ignored and the document is parsed and validated against
 the provided DTD. If provided as a variable, the created DTD is
 returned. See \secref{implicitdtd}.

    \termitem{dialect}{+Dialect}
-Specify the parsing dialect. Supported are \const{sgml} (default), \const{xml} 
+Specify the parsing dialect. Supported are \const{sgml} (default), \const{xml}
 and \const{xmlns}. See \secref{xml} for details on the differences.

    \termitem{shorttag}{+Bool}
@ -272,14 +273,14 @@ Defines (overwrites) an entity definition.  At the moment, only
 entity options are allowed.

    \termitem{file}{+Name}
-Sets the name of the file on which errors are reported. Sets the 
+Sets the name of the file on which errors are reported. Sets the
 linenumber to 1.

    \termitem{line}{+Line}
 Sets the starting line-number for reporting errors.

    \termitem{max_errors}{+Max}
-Sets the maximum number of errors. If this number is reached, an 
+Sets the maximum number of errors. If this number is reached, an
 exception of the format below is raised. The default is 50.  Using
 \term{max_errors}{-1} makes the parser continue, no matter how many
 errors it encounters.
@ -303,26 +304,26 @@ modes are:
    \termitem{space}{sgml}
 In SGML, newlines at the start and end of an element are removed.<fn>In
 addition, newlines at the end of lines containing only markup should be
-deleted. This is not yet implemented.</fn> This is the default mode for 
-the SGML dialect. 
+deleted. This is not yet implemented.</fn> This is the default mode for
+the SGML dialect.

    \termitem{space}{preserve}
 White space is passed literally to the application. This mode leaves all
 white space handling to the application. This is the default mode for
-the XML dialect. 
+the XML dialect.

    \termitem{space}{default}
-In addition to \const{sgml} space-mode, all consequtive white-space is 
-reduced to a single space-character. This mode canonises all white 
-space. 
+In addition to \const{sgml} space-mode, all consequtive white-space is
+reduced to a single space-character. This mode canonises all white
+space.

    \termitem{space}{remove}
-In addition to \const{default}, all leading and trailing white-space is 
-removed from \const{CDATA} objects. If, as a result, the \const{CDATA} 
-becomes empty, nothing is passed to the application. This mode is 
-especially handy for processing `data-oriented' documents, such as RDF. 
-It is not suitable for normal text documents. Consider the HTML 
-fragment below. When processed in this mode, the spaces between the 
+In addition to \const{default}, all leading and trailing white-space is
+removed from \const{CDATA} objects. If, as a result, the \const{CDATA}
+becomes empty, nothing is passed to the application. This mode is
+especially handy for processing `data-oriented' documents, such as RDF.
+It is not suitable for normal text documents. Consider the HTML
+fragment below. When processed in this mode, the spaces between the
 three modified words are lost.  This mode is not part of any standard;
 XML 1.0 allows only \const{default} and \const{preserve}.

@ -333,9 +334,9 @@ Consider adjacent <b>bold</b> <ul>and</ul> <it>italic</it> words.

 \subsection{XML documents}		\label{sec:xml}

-The parser can operate in two modes: \const{sgml} mode and \const{xml} mode, as 
-defined by the \term{dialect}{Dialect} option. Regardless of this 
-option, if the first line of the document reads as below, the parser is 
+The parser can operate in two modes: \const{sgml} mode and \const{xml} mode, as
+defined by the \term{dialect}{Dialect} option. Regardless of this
+option, if the first line of the document reads as below, the parser is
 switched automatically into XML mode.

 \begin{code}
@ -346,21 +347,21 @@ Currently switching to XML mode implies:

 \begin{itemlist}
   \item [XML empty elements]
-The construct \verb$<element [attribute...] />$ is recognised as 
-an empty element. 
+The construct \verb$<element [attribute...] />$ is recognised as
+an empty element.

   \item [Predefined entities]
 The following entitities are predefined: \const{lt} (\verb$<$), \const{gt}
-(\verb$>$), \const{amp} (\verb$&$), \const{apos} (\verb$'$) 
-and \const{quot} (\verb$"$). 
+(\verb$>$), \const{amp} (\verb$&$), \const{apos} (\verb$'$)
+and \const{quot} (\verb$"$).

   \item [Case sensitivity]
-In XML mode, names are treated case-sensitive, except for the DTD 
-reserved names (i.e. \exam{ELEMENT}, \emph{etc.}). 
+In XML mode, names are treated case-sensitive, except for the DTD
+reserved names (i.e. \exam{ELEMENT}, \emph{etc.}).

   \item [Character classes]
 In XML mode, underscores (\verb$_$) and colon (\verb$:$) are
-allowed in names. 
+allowed in names.

   \item [White-space handling]
 White space mode is set to \const{preserve}. In addition to setting
@ -378,28 +379,28 @@ preserves space, regardless of the default processing mode.

 \subsubsection{XML Namespaces}		\label{sec:xmlns}

-Using the \jargon{dialect} \const{xmlns}, the parser will interpret XML 
-namespaces. In this case, the names of elements are returned as a term 
+Using the \jargon{dialect} \const{xmlns}, the parser will interpret XML
+namespaces. In this case, the names of elements are returned as a term
 of the format

 \begin{quote}
-\arg{URL}\const{:}\arg{LocalName} 
+\arg{URL}\const{:}\arg{LocalName}
 \end{quote}

-If an identifier has no namespace and there is no default namespace it 
-is returned as a simple atom. If an identifier has a namespace but this 
-namespace is undeclared, the namespace name rather than the related URL 
+If an identifier has no namespace and there is no default namespace it
+is returned as a simple atom. If an identifier has a namespace but this
+namespace is undeclared, the namespace name rather than the related URL
 is returned.

 Attributes declaring namespaces ({\tt xmlns:<ns>=<url>}) are reported
 as if \const{xmlns} were not a defined resource.

-In many cases, getting attribute-names as <xmp>\arg{url}:\arg{name}</xmp>
-is not desirable. Such terms are hard to unify and sometimes multiple 
-URLs may be mapped to the same identifier. This may happen due to poor 
-version management, poor standardisation or because the the application 
-doesn't care too much about versions. This package defines two 
-call-backs that can be set using set_sgml_parser/2 to deal 
+In many cases, getting attribute-names as \arg{url}:\arg{name}
+is not desirable. Such terms are hard to unify and sometimes multiple
+URLs may be mapped to the same identifier. This may happen due to poor
+version management, poor standardisation or because the the application
+doesn't care too much about versions. This package defines two
+call-backs that can be set using set_sgml_parser/2 to deal
 with this problem.

 The call-back \const{xmlns} is called as XML namespaces are noticed.
@ -428,6 +429,41 @@ load_rdf_xml(File, Term) :-
                       ]).
 \end{code}

+The library provides iri_xml_namespace/3 to break down an IRI into
+its namespace and localname:
+
+\begin{description}
+    \predicate[det]{iri_xml_namespace}{3}{+IRI, -Namespace, -Localname}
+Split an IRI (Unicode URI) into its \arg{Namespace} (an IRI) and
+\arg{Localname} (a Unicode XML name, see xml_name/2). The
+\arg{Localname} is defined as the longest last part of the IRI that
+satisfies the syntax of an XML name. With IRI schemas that are designed
+to work with XML namespaces, this will typically break the IRI on the
+last \chr{\#} or \chr{/}. Note however that this can produce unexpected
+results. E.g., in the example below, one might expect the namespace to
+be \url{http://example.com/images\#}, but an XML name cannot start with
+a digit.
+
+\begin{code}
+?- iri_xml_namespace('http://example.com/images#12345', NS, L).
+NS = 'http://example.com/images#12345',
+L = ''.
+\end{code}
+
+As we see from the example above, the \arg{Localname} can be the empty
+atom. Similarly, \arg{Namespace} can be the empty atom if \arg{IRI} is
+an XML name. Applications will often have to check for either or both
+these conditions. We decided against failing in these conditions because
+the application typically wants to know which of the two conditions
+(empty namespace or empty localname) holds. This predicate is often used
+for generating RDF/XML from an RDF graph.
+
+    \predicate[det]{iri_xml_namespace}{2}{+IRI, -Namespace}
+Same as iri_xml_namespace/3, but avoids creating an atom for the
+\arg{Localname}.
+\end{description}
+
+
 \subsection{DTD-Handling}

 The DTD (\textbf{D}ocument \textbf{T}ype \textbf{D}efinition) is a
@ -438,7 +474,7 @@ predicates for handling the DTD.

 \begin{description}
    \predicate{new_dtd}{2}{+DocType, -DTD}
-Creates an empty DTD for the named \arg{DocType}. The returned 
+Creates an empty DTD for the named \arg{DocType}. The returned
 DTD-reference is an opaque term that can be used in the other predicates
 of this package.

@ -468,7 +504,7 @@ Define the DTD dialect. Default is \const{sgml}. Using \const{xml} or

    \predicate{dtd}{2}{+DocType, -DTD}
 Find the DTD representing the indicated \jargon{doctype}. This predicate
-uses a cache of DTD objects. If a doctype has no associated dtd, it 
+uses a cache of DTD objects. If a doctype has no associated dtd, it
 searches for a file using the file search path \exam{dtd} using the call:

 \begin{code}
@ -488,15 +524,15 @@ parse multiple documents should be restricted to situations where the
 documents processed are known to be error-free.

    \predicate{dtd_property}{2}{+DTD, ?Property}
-This predicate is used to examine the content of a DTD. Property is one 
+This predicate is used to examine the content of a DTD. Property is one
 of:

 \begin{description}
    \termitem{doctype}{DocType}
-An atom representing the document-type defined by this DTD. 
+An atom representing the document-type defined by this DTD.

    \termitem{elements}{ListOfElements}
-A list of atoms representing the names of the elements in this DTD. 
+A list of atoms representing the names of the elements in this DTD.

    \termitem{element}{Name, Omit, Content}
 The DTD contains an element with the given name. \arg{Omit} is a term of
@ -508,7 +544,7 @@ form:

    \begin{description}
        \termitem{empty}{}
-The element has no content. 
+The element has no content.

 	\termitem{cdata}{}
 The element contains non-parsed character data.  All data up to the
@ -524,30 +560,30 @@ any order.
 	\termitem{\#pcdata}{}
 The element contains parsed character data .

-	\termitem{\arg{element}} An element with this name. 
+	\termitem{\arg{element}} An element with this name.

 	\termitem{*}{SubModel}
-0 or more appearances. 
+0 or more appearances.

 	\termitem{?}{SubModel}
-0 or one appearance. 
+0 or one appearance.

 	\termitem{+}{SubModel}
-1 or more appearances. 
+1 or more appearances.

 	\termitem{,}{SubModel1, SubModel2}
-\arg{SubModel1} followed by \arg{SubModel2}. 
+\arg{SubModel1} followed by \arg{SubModel2}.

 	\termitem{\&}{SubModel1, SubModel2}
-\arg{SubModel1} and \arg{SubModel2} in any order. 
+\arg{SubModel1} and \arg{SubModel2} in any order.

 	\termitem{\chr{|}}{SubModel1, SubModel2}
-\arg{SubModel1} or \arg{SubModel2}. 
+\arg{SubModel1} or \arg{SubModel2}.
 \end{description}

    \termitem{attributes}{Element, ListOfAttributes}
-\arg{ListOfAttributes} is a list of atoms representing the attributes 
-of the element \arg{Element}. 
+\arg{ListOfAttributes} is a list of atoms representing the attributes
+of the element \arg{Element}.

    \termitem{attribute}{Element, Attribute, Type, Default}
 Query an element. \arg{Type} is one of \const{cdata}, \const{entity},
@ -555,34 +591,34 @@ Query an element. \arg{Type} is one of \const{cdata}, \const{entity},
 \const{notation}, \const{number} or \const{nutoken}. For DTD types that
 allow for a list, the notation \term{list}{Type} is used. Finally, the
 DTD construct \verb$(a|b|...)$ is mapped to the term
-\term{nameof}{ListOfValues}. 
+\term{nameof}{ListOfValues}.

 \arg{Default} describes the sgml default. It is one \const{required},
 \const{current}, \const{conref} or \const{implied}. If a real default is
-present, it is one of \term{default}{Value} or \term{fixed}{Value}. 
+present, it is one of \term{default}{Value} or \term{fixed}{Value}.

    \termitem{entities}{ListOfEntities}
-\arg{ListOfEntities} is a list of atoms representing the names of the 
-defined entities. 
+\arg{ListOfEntities} is a list of atoms representing the names of the
+defined entities.

    \termitem{entity}{Name, Value}
-\arg{Name} is the name of an entity with given value. Value is one of 
+\arg{Name} is the name of an entity with given value. Value is one of
 \begin{description}

    \termitem{\arg{Atom}}{}
-If the value is atomic, it represents the literal value of the entity. 
+If the value is atomic, it represents the literal value of the entity.

    \termitem{system}{Url}
-\arg{Url} is the URL of the system external entity. 
+\arg{Url} is the URL of the system external entity.

    \termitem{public}{Id, Url}
-For external public entities, \arg{Id} is the identifier. If an URL is 
-provided this is returned in \arg{Url}. Otherwise this argument is 
-unbound. 
+For external public entities, \arg{Id} is the identifier. If an URL is
+provided this is returned in \arg{Url}. Otherwise this argument is
+unbound.
 \end{description}

    \termitem{notations}{ListOfNotations}
-Returns a list holding the names of all \const{NOTATION} declarations. 
+Returns a list holding the names of all \const{NOTATION} declarations.

    \termitem{notation}{Name, Decl}
 Unify \arg{Decl} with a list if \term{system}{+File} and/or
@ -592,11 +628,11 @@ Unify \arg{Decl} with a list if \term{system}{+File} and/or

 \subsubsection{The DOCTYPE declaration}

-As this parser allows for processing partial documents and process the 
+As this parser allows for processing partial documents and process the
 DTD separately, the DOCTYPE declaration plays a special role.

-If a document has no DOCTYPE declaraction, the parser returns a list 
-holding all elements and CDATA found. If the document has a DOCTYPE 
+If a document has no DOCTYPE declaraction, the parser returns a list
+holding all elements and CDATA found. If the document has a DOCTYPE
 declaraction, the parser will open the element defined in the DOCTYPE as
 soon as the first real data is encountered.

@ -632,53 +668,63 @@ elements_in_xml_document(File, Elements) :-

 \begin{description}
    \predicate{new_sgml_parser}{2}{-Parser, +Options}
-Creates a new parser. A parser can be used one or multiple times for 
-parsing documents or parts thereof. It may be bound to a DTD or the DTD 
-may be left implicit, in which case it is created from the document 
+Creates a new parser. A parser can be used one or multiple times for
+parsing documents or parts thereof. It may be bound to a DTD or the DTD
+may be left implicit, in which case it is created from the document
 prologue or parsing is performed without a DTD. Options:
 \begin{description}
    \termitem{dtd}{?DTD}
-If specified with an initialised DTD, this DTD is used for parsing the 
-document, regardless of the document prologue. If specified using as a 
-variable, a reference to the created DTD is returned. This DTD may be 
-created from the document prologue or build implicitely from the 
-document's content. 
+If specified with an initialised DTD, this DTD is used for parsing the
+document, regardless of the document prologue. If specified using as a
+variable, a reference to the created DTD is returned. This DTD may be
+created from the document prologue or build implicitely from the
+document's content.
 \end{description}

    \predicate{free_sgml_parser}{1}{+Parser}
-Destroy all resources related to the parser. This does not destroy the 
+Destroy all resources related to the parser. This does not destroy the
 DTD if the parser was created using the \term{dtd}{DTD} option.

    \predicate{set_sgml_parser}{2}{+Parser, +Option}
-Sets attributes to the parser. Currently defined attributes: 
+Sets attributes to the parser. Currently defined attributes:

 \begin{description}
    \termitem{file}{File}
-Sets the file for reporting errors and warnings. Sets the line to 1. 
+Sets the file for reporting errors and warnings. Sets the line to 1.
    \termitem{line}{Line}
-Sets the current line. Useful if the stream is not at the start of the 
-(file) object for generating proper line-numbers. 
+Sets the current line. Useful if the stream is not at the start of the
+(file) object for generating proper line-numbers.
    \termitem{charpos}{Offset}
 Sets the current character location.  See also the \term{file}{File}
 option.
    \termitem{dialect}{Dialect}
-Set the markup dialect. Known dialects: 
+Set the markup dialect. Known dialects:
 \begin{description}

    \termitem{sgml}{}
-The default dialect is to process as SGML. This implies markup is 
-case-insensitive and standard SGML abbreviation is allowed (abreviated 
-attributes and omitted tags). 
+The default dialect is to process as SGML. This implies markup is
+case-insensitive and standard SGML abbreviation is allowed (abreviated
+attributes and omitted tags).

    \termitem{xml}{}
 This dialect is selected automatically if the processing instruction
-\verb$<?xml ...>$ is encountered. See \secref{xml} for details. 
+\verb$<?xml ...>$ is encountered. See \secref{xml} for details.

    \termitem{xmlns}{}
 Process file as XML file with namespace support. See \secref{xmlns} for
 details.  See also the \verb$qualify_attributes$ option below.
 \end{description}

+    \termitem{xmlns}{+URI}
+Set the default namespace of the outer environment.  This option is
+provided to process partial XML content with proper namespace
+resolution.
+
+    \termitem{xmlns}{+NS, +URI}
+Specify a namespace for the outer environment. This option is
+provided to process partial XML content with proper namespace
+resolution.
+
    \termitem{qualify_attributes}{Boolean}
 How to handle unqualified attribute (i.e. without an explicit namespace)
 in XML namespace (\const{xmlns}) mode. Default and standard compliant is
@ -715,20 +761,20 @@ sgml_parse/2.
 \end{description}

    \predicate{get_sgml_parser}{2}{+Parser, -Option}
-Retrieve infomation on the current status of the parser. Notably useful 
-if the parser is used in the call-back mode. Currently defined options: 
+Retrieve infomation on the current status of the parser. Notably useful
+if the parser is used in the call-back mode. Currently defined options:

 \begin{description}
    \termitem{file}{-File}
-Current file-name. Note that this may be different from the provided 
-file if an external entity is being loaded. 
+Current file-name. Note that this may be different from the provided
+file if an external entity is being loaded.

    \termitem{line}{-Line}
-Line-offset from where the parser started its processing in the file-object. 
+Line-offset from where the parser started its processing in the file-object.

    \termitem{charpos}{-CharPos}
-Offset from where the parser started its processing in the file-object. 
-See \secref{indexaccess}. 
+Offset from where the parser started its processing in the file-object.
+See \secref{indexaccess}.

    \termitem{charpos}{-Start, -End}
 Character offsets of the start and end of the source processed causing the
@ -736,8 +782,8 @@ current call-back. Used in \program{PceEmacs} to for colouring
 text in SGML and XML modes.

    \termitem{source}{-Stream}
-Prolog stream being processed. May be used in the \const{on_begin}, \emph{etc.} 
-callbacks from sgml_parse/2. 
+Prolog stream being processed. May be used in the \const{on_begin}, \emph{etc.}
+callbacks from sgml_parse/2.

    \termitem{dialect}{-Dialect}
 Return the current dialect used by the parser (\const{sgml}, \const{xml} or \const{xmlns}).
@ -822,8 +868,8 @@ Input is a stream. A full description of the option-list is below.

 \begin{description}
    \termitem{document}{+Term}
-A variable that will be unified with a list describing the content of 
-the document (see load_structure/2). 
+A variable that will be unified with a list describing the content of
+the document (see load_structure/2).
    \termitem{source}{+Stream}
 An input stream that is read.  This option <em/must/ be given.
    \termitem{content_length}{+Characters}
@ -840,7 +886,7 @@ Default.  Parse everything upto the end of the input.
 The parser stops after reading the first element. Using
 \term{source}{Stream}, this implies reading is stopped as soon
 as the element is complete, and another call may be issued on the same
-stream to read the next element. 
+stream to read the next element.

 	\termitem{content}{}
 The value \const{content} is like \const{element} but assumes the
@ -860,9 +906,9 @@ all open elements.
    \end{description}

    \termitem{max_errors}{+MaxErrors}
-Set the maximum number of errors. If this number is exceeded further 
-writes to the stream will yield an I/O error exception. Printing of 
-errors is suppressed after reaching this value. The default is 100. 
+Set the maximum number of errors. If this number is exceeded further
+writes to the stream will yield an I/O error exception. Printing of
+errors is suppressed after reaching this value. The default is 100.
    \termitem{syntax_errors}{+ErrorMode}
 Defines how syntax errors are handled.
    \begin{description}
@ -875,28 +921,35 @@ Defines how syntax errors are handled.
 	using <pref builtin>print_message/2 with severity
 	\const{informational}.
    \end{description}
+
+    \termitem{xml_no_ns}{+Mode}
+Error handling if an XML namespace is not defined.  Default generates
+an error.  If \const{quiet}, the error is suppressed.  Can be used
+together with \term{call}{urlns, Closure} to provide external expansion
+of namespaces.  See also \secref{xmlns}.
+
    \termitem{call}{+Event, :PredicateName}
-Issue call-backs on the specified events. \arg{PredicateName} is the 
-name of the predicate to call on this event, possibly prefixed with a 
+Issue call-backs on the specified events. \arg{PredicateName} is the
+name of the predicate to call on this event, possibly prefixed with a
 module identifier.  If the handler throws an exception, parsing is stopped
 and sgml_parse/2 re-throws the exception. The defined events are:
 \begin{description}
    \termitem{begin}{}
-An open-tag has been parsed. The named handler is called with three 
-arguments: \term{\arg{Handler}}{+Tag, +Attributes, +Parser}. 
+An open-tag has been parsed. The named handler is called with three
+arguments: \term{\arg{Handler}}{+Tag, +Attributes, +Parser}.
    \termitem{end}{}
-A close-tag has been parsed. The named handler is called with two 
-arguments: \term{\arg{Handler}}{+Tag, +Parser}. 
+A close-tag has been parsed. The named handler is called with two
+arguments: \term{\arg{Handler}}{+Tag, +Parser}.

    \termitem{cdata}{}
 CDATA has been parsed. The named handler is called with two arguments:
 \term{Handler}{+CDATA, +Parser}, where CDATA is an atom
-representing the data. 
+representing the data.

    \termitem{pi}{}
-A processing instruction has been parsed. The named handler is called 
+A processing instruction has been parsed. The named handler is called
 with two arguments: \term{\arg{Handler}}{+Text, +Parser}, where
-\arg{Text} is the text of the processing instruction. 
+\arg{Text} is the text of the processing instruction.

    \termitem{decl}{}
 A declaration (\verb$<!...>$) has been read. The named handler is
@ -918,33 +971,33 @@ If this option is present, errors and warnings are not reported using
 print_message/3

    \termitem{xmlns}{}
-When parsing an in \const{xmlns} mode, a new namespace declaraction is 
-pushed on the environment. The named handler is called with three 
+When parsing an in \const{xmlns} mode, a new namespace declaraction is
+pushed on the environment. The named handler is called with three
 arguments: \term{\arg{Handler}}{+NameSpace, +URL, +Parser}.
-See \secref{xmlns} for details. 
+See \secref{xmlns} for details.

    \termitem{urlns}{}
-When parsing an in \const{xmlns} mode, this predicate can be used to map a 
-url into either a canonical URL for this namespace or another internal 
-identifier. See \secref{xmlns} for details. 
+When parsing an in \const{xmlns} mode, this predicate can be used to map a
+url into either a canonical URL for this namespace or another internal
+identifier. See \secref{xmlns} for details.
 \end{description}
 \end{description}
 \end{description}

 \subsubsection{Partial Parsing}

-In some cases, part of a document needs to be parsed. One option is to 
-use load_structure/2 or one of its variations and extract 
-the desired elements from the returned structure. This is a clean 
-solution, especially on small and medium-sized documents. It however is 
-unsuitable for parsing really big documents. Such documents can only be 
+In some cases, part of a document needs to be parsed. One option is to
+use load_structure/2 or one of its variations and extract
+the desired elements from the returned structure. This is a clean
+solution, especially on small and medium-sized documents. It however is
+unsuitable for parsing really big documents. Such documents can only be
 handled with the call-back output interface realised by the
 \term{call}{Event, Action} option of sgml_parse/2.
 Event-driven processing is not very natural in Prolog.

 The SGML2PL library allows for a mixed approach. Consider the case where
 we want to process all descriptions from RDF elements in a document. The
-code below calls <xmp>process_rdf_description(Element)</xmp> on each element 
+code below calls <xmp>process_rdf_description(Element)</xmp> on each element
 that is directly inside an RDF element.

 \begin{code}
@ -994,26 +1047,28 @@ set_sgml_parser/2 or, for XML, based on the \const{encoding}
 attribute of the XML header.  The parser reads from SWI-Prolog streams,
 which also provide encoding handling.  Therefore, there are two modes
 for parsing.  If the SWI-Prolog stream has encoding \const{octet} (which
-is the default for binary streams), the decoder of the SGML parser will 
+is the default for binary streams), the decoder of the SGML parser will
 be used and positions reported by the parser are octet offsets in the
 stream.  In other cases, the Prolog stream decoder is used and offsets
 are character code counts.

+\input{xpath.tex}
+
 \section{Processing Indexed Files}		\label{sec:indexaccess}

-In some cases applications wish to process small portions of large 
-SGML, XML or RDF files. For example, the \emph{OpenDirectory} project 
-by Netscape has produced a 90MB RDF file representing the main index. 
-The parser described here can process this document as a unit, but 
-loading takes 85 seconds on a Pentium-II 450 and the resulting term 
-requires about 70MB global stack. One option is to process the entire 
-document and output it as a Prolog fact-base of RDF triplets, but in 
-many cases this is undesirable. Another example is a large SGML file 
-containing online documentation. The application normally wishes to 
-provide only small portions at a time to the user. Loading the entire 
+In some cases applications wish to process small portions of large
+SGML, XML or RDF files. For example, the \emph{OpenDirectory} project
+by Netscape has produced a 90MB RDF file representing the main index.
+The parser described here can process this document as a unit, but
+loading takes 85 seconds on a Pentium-II 450 and the resulting term
+requires about 70MB global stack. One option is to process the entire
+document and output it as a Prolog fact-base of RDF triplets, but in
+many cases this is undesirable. Another example is a large SGML file
+containing online documentation. The application normally wishes to
+provide only small portions at a time to the user. Loading the entire
 document into memory is then undesirable.

-Using the \term{parse}{element} option, we open a file, seek 
+Using the \term{parse}{element} option, we open a file, seek
 (using <pref builtin>seek/4) to the position of the element and
 read the desired element.

@ -1059,12 +1114,12 @@ rdf_element(Id, Term) :-

 \section{External entities}

-While processing an SGML document the document may refer to external 
-data. This occurs in three places: external parameter entities, normal 
-external entities and the \const{DOCTYPE} declaration. The current version 
-of this tool deals rather primitively with external data. External 
-entities can only be loaded from a file and the mapping between the 
-entity names and the file is done using a \jargon{catalog} file in a 
+While processing an SGML document the document may refer to external
+data. This occurs in three places: external parameter entities, normal
+external entities and the \const{DOCTYPE} declaration. The current version
+of this tool deals rather primitively with external data. External
+entities can only be loaded from a file and the mapping between the
+entity names and the file is done using a \jargon{catalog} file in a
 format compatible with that used by James Clark's SP Parser,
 based on the SGML Open (now OASIS) specification.

@ -1075,23 +1130,23 @@ sgml_register_catalog_file/2 or the environment variable
 \begin{description}
    \predicate{sgml_register_catalog_file}{2}{+File, +Location}
 Register the indicated \arg{File} as a catalog file. \arg{Location} is
-either \const{start} or \const{end} and defines whether the catalog is 
+either \const{start} or \const{end} and defines whether the catalog is
 considered first or last. This predicate has no effect if \arg{File} is
 already part of the catalog.

-If no files are registered using this predicate, the first query on the 
+If no files are registered using this predicate, the first query on the
 catalog examines \env{SGML_CATALOG_FILES} and fills the catalog with
-all files in this path. 
+all files in this path.
 \end{description}

 Two types of lines are used by this package.

 \begin{quote}
 \const{DOCTYPE} \arg{doctype} \arg{file} \\
-\const{PUBLIC} \exam{"}\arg{Id}\exam{"} \arg{file} 
+\const{PUBLIC} \exam{"}\arg{Id}\exam{"} \arg{file}
 \end{quote}

-The specified \arg{file} path is taken relative to the location of the 
+The specified \arg{file} path is taken relative to the location of the
 catolog file. For the \const{DOCTYPE} declaraction, \pllib{sgml} first
 makes an attempt to resolve the \const{SYSTEM} or \const{PUBLIC}
 identifier. If this fails it tries to resolve the \arg{doctype} using
@ -1102,10 +1157,12 @@ where system identifiers must be Universal Resource Indicators, not
 local file names. Simple uses of relative URIs will work correctly under
 UNIX and Windows.

-In the future we will design a call-back mechanism for locating and 
-processing external entities, so Prolog-based file-location and Prolog 
+In the future we will design a call-back mechanism for locating and
+processing external entities, so Prolog-based file-location and Prolog
 resources can be used to store external entities.

+\input{pwp.tex}
+
 \section{Writing markup}

 \subsection{Writing documents}
@ -1149,14 +1206,14 @@ elements are written using increasing indentation. This introduces
 (depending on the mode and defined whitespace handling) CDATA sequences
 with only layout between elements when read back in.  If \const{false}, no
 layout characters are added.  As this mode does not need to analyse the
-document it is faster and guarantees correct output when read back. 
-Unfortunately the output is hardly human readable and causes problems 
+document it is faster and guarantees correct output when read back.
+Unfortunately the output is hardly human readable and causes problems
 with many editors.
    \termitem{indent}{Integer}
 Set the initial element indentation.  It more than zero, the indent
 is written before the document.
    \termitem{nsmap}{Map}
-Set the initial namespace map.  \arg{Map} is a list of 
+Set the initial namespace map.  \arg{Map} is a list of
 \arg{Name} = \arg{URI}.  This option, together with \const{header} and
 \const{ident} is added to use xml_write/3 to generate XML
 that is embedded in a larger XML document.
@ -1197,7 +1254,7 @@ values are \const{ascii}, \const{iso_latin_1}, \const{utf8} and
 \const{unicode}. Versions with two arguments are provided for backward
 compatibility, making the safe \const{ascii} encoding assumption.

-\begin{description}  
+\begin{description}
    \predicate{xml_quote_attribute}{3}{+In, -Quoted, +Encoding}
 Map the characters that may not appear in XML attributes to entities.
 Currently these are \verb$<>&"$.%
@ -1222,8 +1279,8 @@ Assumes \const{ascii} encoding.
 Succeed if \arg{In} is an atom or string that satisfies the rules for
 a valid XML element or attribute name.  As with the other predicates in
 this group, if \arg{Encoding} cannot represent one of the characters, this
-function fails.  It uses a hard-coded table for ASCII-range characters and
-iswalpha()/iswalnum() for the first and remaining characters of the name.
+function fails. Character classification is based on
+\url{http://www.w3.org/TR/2006/REC-xml-20060816}.

    \predicate{xml_name}{1}{+In}
 Backward compatibility version for xml_name/2. Assumes \const{ascii}
@ -1238,8 +1295,8 @@ Known missing SGML features include

 \begin{itemlist}
    \item [NOTATION on entities]
-Though notation is parsed, notation attributes on external entity 
-declarations are not handed to the user. 
+Though notation is parsed, notation attributes on external entity
+declarations are not handed to the user.
    \item [NOTATION attributes]
 SGML notations may have attributes, declared using
 \verb$<!ATTLIST #NOTATION name attributes>$. Those data attributes
@ -1261,8 +1318,8 @@ Empty start tags (\verb$<>$), unclosed start tags
 (\verb$<a<b</verb>) and unclosed end tags (<verb></a<b$) are not
 supported.
    \item [SGML declaration]
-The `SGML declaration' is fixed, though most of the parameters are 
-handled through indirections in the implementation. 
+The `SGML declaration' is fixed, though most of the parameters are
+handled through indirections in the implementation.
    \item [The DATATAG feature]
 It is regarded as superseeded by SHORTREF, which is supported.
 (SP does not support it either.)
@ -1276,7 +1333,7 @@ one DTD at the same time.  It is not supported.
 \end{itemlist}


-In XML mode the parser recognises SGML constructs that are not allowed 
+In XML mode the parser recognises SGML constructs that are not allowed
 in XML. Also various extensions of XML over SGML are not yet realised.
 In particular, XInclude is not implemented because the designers of
 XInclude can't make up their minds whether to base it on elements or
@ -1305,7 +1362,7 @@ refers to the SWI-Prolog `home-directory'.

 \section{Acknowledgements}

-The Prolog representation for parsed documents is based on the 
+The Prolog representation for parsed documents is based on the
 SWI-Prolog interface to SP by Anjo Anjewierden.

 Richard O'Keefe has put a lot of effort testing and providing bug
--- a/packages/sgml/sgml2pl.c
+++ b/packages/sgml/sgml2pl.c
--- a/packages/sgml/utf8.c
+++ b/packages/sgml/utf8.c
@ -61,7 +61,7 @@ sgml__utf8_get_char(const char *in, int *chr)
  }

  *chr = *in;
-  
+
  return (char *)in+1;
 }

--- a/packages/sgml/util.c
+++ b/packages/sgml/util.c
@ -26,7 +26,6 @@

 #define UTIL_H_IMPLEMENTATION
 #include "util.h"
-#include <unistd.h>
 #include <ctype.h>
 #include <wctype.h>
 #include <stdlib.h>
@ -50,7 +49,7 @@
 size_t
 istrlen(const ichar *s)
 { size_t len =0;
-  
+
  while(*s++)
    len++;

@ -67,7 +66,7 @@ istrdup(const ichar *s)
    while(*s)
      *d++ = *s++;
    *d = 0;
-    
+
    return dup;
  } else
  { return NULL;
@ -140,10 +139,10 @@ int
 istreq(const ichar *s1, const ichar *s2)
 { while(*s1 && *s1 == *s2)
    s1++, s2++;
-  
+
  if ( *s1 == 0 && *s2 == 0 )
    return TRUE;
-  
+
  return FALSE;
 }

@ -152,10 +151,10 @@ int
 istrncaseeq(const ichar *s1, const ichar *s2, int len)
 { while(--len >= 0 && towlower(*s1) == towlower(*s2))
    s1++, s2++;
-  
+
  if ( len < 0 )
    return TRUE;
-  
+
  return FALSE;
 }

@ -164,10 +163,10 @@ int
 istrprefix(const ichar *pref, const ichar *s)
 { while(*pref && *pref == *s)
    pref++, s++;
-  
+
  if ( *pref == 0 )
    return TRUE;
-  
+
  return FALSE;
 }

@ -212,7 +211,7 @@ istrhash(const ichar *t, int tsize)

  while(*t)
  { unsigned int c = *t++;
-    
+
    c -= 'a';
    value ^= c << (shift & 0xf);
    shift ^= c;
@ -231,7 +230,7 @@ istrcasehash(const ichar *t, int tsize)

  while(*t)
  { unsigned int c = towlower(*t++);	/* case insensitive */
-    
+
    c -= 'a';
    value ^= c << (shift & 0xf);
    shift ^= c;
@ -301,7 +300,7 @@ __add_icharbuf(icharbuf *buf, int chr)
    else
      buf->data = sgml_malloc(buf->allocated*sizeof(ichar));
  }
-  
+
  buf->data[buf->size++] = chr;
 }

@ -349,7 +348,7 @@ init_ocharbuf(ocharbuf *buf)
 ocharbuf *
 new_ocharbuf()
 { ocharbuf *buf = sgml_malloc(sizeof(*buf));
-  
+
  return init_ocharbuf(buf);
 }

@ -436,24 +435,76 @@ empty_ocharbuf(ocharbuf *buf)
 		 *******************************/

 #define RINGSIZE 16
-static void *ring[RINGSIZE];
-static int  ringp;
+
+typedef struct ring
+{ void *ring[RINGSIZE];
+  int   ringp;
+} ring;
+
+#ifdef _REENTRANT
+#include <pthread.h>
+static pthread_key_t ring_key;
+
+static void
+free_ring(void *ptr)
+{ ring *r = ptr;
+  int i;
+  void **bp;
+
+  for(i=0, bp=r->ring; i<RINGSIZE; i++, bp++)
+  { if ( *bp )
+    { sgml_free(*bp);
+      *bp = NULL;
+    }
+  }
+
+  sgml_free(r);
+}
+
+
+static ring *
+my_ring()
+{ ring *r;
+
+  if ( (r=pthread_getspecific(ring_key)) )
+    return r;
+
+  if ( (r = sgml_calloc(1, sizeof(*r))) )
+    pthread_setspecific(ring_key, r);
+
+  return r;
+}
+
+void
+init_ring(void)
+{ pthread_key_create(&ring_key, free_ring);
+}
+
+#else
+static ring ring_store;
+#define my_ring() (&ring_store)
+
+void init_ring(void) {}
+#endif
+

 wchar_t *
 str2ring(const wchar_t *in)
-{ wchar_t *copy = sgml_malloc((wcslen(in)+1)*sizeof(wchar_t));
+{ ring *r;
+  wchar_t *copy;

-  if ( !copy )
+  if ( !(r=my_ring()) ||
+       !(copy = sgml_malloc((wcslen(in)+1)*sizeof(wchar_t))) )
  { sgml_nomem();
    return NULL;
  }

  wcscpy(copy, in);
-  if ( ring[ringp] )
-    sgml_free(ring[ringp]);
-  ring[ringp++] = copy;
-  if ( ringp == RINGSIZE )
-    ringp = 0;
+  if ( r->ring[r->ringp] )
+    sgml_free(r->ring[r->ringp]);
+  r->ring[r->ringp++] = copy;
+  if ( r->ringp == RINGSIZE )
+    r->ringp = 0;

  return copy;
 }
@ -461,13 +512,19 @@ str2ring(const wchar_t *in)

 void *
 ringallo(size_t size)
-{ char *result = sgml_malloc(size);
-    
-  if ( ring[ringp] )
-    sgml_free(ring[ringp]);
-  ring[ringp++] = result;
-  if ( ringp == RINGSIZE )
-    ringp = 0;
+{ ring *r;
+  char *result;
+
+  if ( !(r=my_ring()) || !(result = sgml_malloc(size)) )
+  { sgml_nomem();
+    return NULL;
+  }
+
+  if ( r->ring[r->ringp] )
+    sgml_free(r->ring[r->ringp]);
+  r->ring[r->ringp++] = result;
+  if ( r->ringp == RINGSIZE )
+    r->ringp = 0;

  return result;
 }
@ -529,7 +586,7 @@ wcstoutf8(const wchar_t *in)
    { size++;
    }
  }
-  
+
  rc = sgml_malloc(size+1);
  for(o=rc, s=in; *s; s++)
  { o = utf8_put_char(o, *s);
@ -605,7 +662,7 @@ load_sgml_file_to_charp(const ichar *file, int normalise_rsre, size_t *length)

      if ( r )
      { char *s = r;
-	
+
 	while(len>0)
 	{ int n;

@ -652,7 +709,7 @@ load_sgml_file_to_charp(const ichar *file, int normalise_rsre, size_t *length)

 	  if ( last_is_lf )
 	    r2[--len] = '\0';		/* delete last LF */
-	  
+
 	  if ( length )
 	    *length = len;
 	  sgml_free(r);
--- a/packages/sgml/util.h
+++ b/packages/sgml/util.h
@ -34,16 +34,16 @@
 #include <malloc.h>
 #endif

-typedef struct 
+typedef struct
 { int allocated;
  int size;
  ichar *data;
 } icharbuf;

-typedef struct 
+typedef struct
 { int allocated;
  int size;
-  union 
+  union
  { wchar_t *w;				/* UCS */
  } data;
  wchar_t localbuf[256];		/* Initial local store */
@ -98,6 +98,7 @@ void		empty_ocharbuf(ocharbuf *buf);
 	{ buf->data.w[at] = chr; \
 	}

+void		init_ring(void);
 const wchar_t *	str_summary(const wchar_t *s, int len);
 wchar_t *	str2ring(const wchar_t *in);
 void *		ringallo(size_t);
@ -107,8 +108,6 @@ ichar *		load_sgml_file_to_charp(const ichar *file, int normalise_rsre,
 					size_t *len);
 FILE *		wfopen(const wchar_t *name, const char *mode);

-void wputs(ichar *s);
-
 #if defined(USE_STRING_FUNCTIONS) && !defined(UTIL_H_IMPLEMENTATION)

 #define istrlen(s1)   wcslen((s1))
--- a/packages/sgml/xml_unicode.c
+++ b/packages/sgml/xml_unicode.c
@ -29,8 +29,6 @@
    the GNU General Public License.
 */

-#include "xml_unicode.h"
-

 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 XML character classification.
--- a/packages/sgml/xmlns.c
+++ b/packages/sgml/xmlns.c
@ -29,35 +29,36 @@

 #ifdef XMLNS

-static xmlns *
+xmlns *
 xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url)
 { sgml_environment *env = p->environments;
  dtd_symbol *n = (*ns ? dtd_add_symbol(p->dtd, ns) : (dtd_symbol *)NULL);
  dtd_symbol *u = dtd_add_symbol(p->dtd, url); /* TBD: ochar/ichar */
+  xmlns *x = sgml_malloc(sizeof(*x));

-  if ( p->on_xmlns )
-    (*p->on_xmlns)(p, n, u);
+  x->name = n;
+  x->url  = u;

  if ( env )
-  { xmlns *x = sgml_malloc(sizeof(*n));
+  { if ( p->on_xmlns )
+      (*p->on_xmlns)(p, n, u);

-    x->name = n;
-    x->url  = u;
    x->next = env->xmlns;
    env->xmlns = x;
-
-    return x;
+  } else
+  { x->next = p->xmlns;
+    p->xmlns = x;
  }

-  return NULL;
+  return x;
 }


 void
-xmlns_free(sgml_environment *env)
-{ xmlns *n, *next;
+xmlns_free(xmlns *n)
+{ xmlns *next;

-  for(n = env->xmlns; n; n = next)
+  for(; n; n = next)
  { next = n->next;

    sgml_free(n);
@ -66,16 +67,22 @@ xmlns_free(sgml_environment *env)


 xmlns *
-xmlns_find(sgml_environment *env, dtd_symbol *ns)
-{ for(; env; env = env->parent)
-  { xmlns *n;
+xmlns_find(dtd_parser *p, dtd_symbol *ns)
+{ sgml_environment *env = p->environments;
+  xmlns *n;

-    for(n=env->xmlns; n; n = n->next)
+  for(; env; env = env->parent)
+  { for(n=env->xmlns; n; n = n->next)
    { if ( n->name == ns )
 	return n;
    }
  }

+  for (n=p->xmlns; n; n = n->next)
+  { if ( n->name == ns )
+      return n;
+  }
+
  return NULL;
 }

@ -97,7 +104,7 @@ void
 update_xmlns(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts)
 { dtd_attr_list *al;
  int nschr = p->dtd->charfunc->func[CF_NS]; /* : */
-       
+
  for(al=e->attributes; al; al=al->next)
  { dtd_attr *a = al->attribute;
    const ichar *name = a->name->name;
@ -123,7 +130,7 @@ update_xmlns(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts)
 xmlns_resolve()
    Convert a symbol as returned by the XML level-1.0 parser to its namespace
    tuple {url}localname.  This function is not used internally, but provided
-    for use from the call-back functions of the parser.  
+    for use from the call-back functions of the parser.

    It exploits the stack of namespace-environments managed by the parser
    itself (see update_xmlns())
@ -150,7 +157,7 @@ xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
      if ( istrprefix(L"xml", buf) )	/* XML reserved namespaces */
      { *url = n->name;
        return TRUE;
-      } else if ( (ns = xmlns_find(p->environments, n)) )
+      } else if ( (ns = xmlns_find(p, n)) )
      { if ( ns->url->name[0] )
 	  *url = ns->url->name;
 	else
@ -158,7 +165,9 @@ xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,
 	return TRUE;
      } else
      { *url = n->name;			/* undefined namespace */
-	gripe(ERC_EXISTENCE, L"namespace", n->name);
+	if ( p->xml_no_ns == NONS_QUIET )
+	  return TRUE;
+	gripe(p, ERC_EXISTENCE, L"namespace", n->name);
 	return FALSE;
      }
    }
@ -195,16 +204,16 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url)
    ichar *o = buf;
    const ichar *s;
    xmlns *ns;
-  
+
    for(s=id->name; *s; s++)
    { if ( *s == nschr )		/* explicit namespace */
      { dtd_symbol *n;
-  
+
 	*o = '\0';
 	*local = s+1;
 	n = dtd_add_symbol(dtd, buf);

-	if ( (ns = xmlns_find(p->environments, n)) )
+	if ( (ns = xmlns_find(p, n)) )
 	{ if ( ns->url->name[0] )
 	    *url = ns->url->name;
 	  else
@ -213,17 +222,19 @@ xmlns_resolve_element(dtd_parser *p, const ichar **local, const ichar **url)
 	  return TRUE;
 	} else
 	{ *url = n->name;		/* undefined namespace */
-	  gripe(ERC_EXISTENCE, "namespace", n->name);
 	  e->thisns = xmlns_push(p, n->name, n->name); /* define implicitly */
+	  if ( p->xml_no_ns == NONS_QUIET )
+	    return TRUE;
+	  gripe(p, ERC_EXISTENCE, L"namespace", n->name);
 	  return FALSE;
 	}
      }
      *o++ = *s;
    }
-  
+
    *local = id->name;
-  
-    if ( (ns = xmlns_find(p->environments, NULL)) )
+
+    if ( (ns = xmlns_find(p, NULL)) )
    { if ( ns->url->name[0] )
 	*url = ns->url->name;
      else
--- a/packages/sgml/xmlns.h
+++ b/packages/sgml/xmlns.h
@ -31,8 +31,9 @@ typedef struct _xmlns
  struct _xmlns *next;			/* next name */
 } xmlns;

-void		xmlns_free(sgml_environment *env);
-xmlns*		xmlns_find(sgml_environment *env, dtd_symbol *ns);
+void		xmlns_free(xmlns *list);
+xmlns*		xmlns_find(dtd_parser *p, dtd_symbol *ns);
+xmlns *		xmlns_push(dtd_parser *p, const ichar *ns, const ichar *url);
 void		update_xmlns(dtd_parser *p, dtd_element *e,
 			     int natts, sgml_attribute *atts);
 int		xmlns_resolve_attribute(dtd_parser *p, dtd_symbol *id,