2402 lines
		
	
	
		
			57 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			2402 lines
		
	
	
		
			57 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*  $Id$
 | |
| 
 | |
|     Part of SWI-Prolog
 | |
| 
 | |
|     Author:        Jan Wielemaker
 | |
|     E-mail:        wielemak@science.uva.nl
 | |
|     WWW:           http://www.swi-prolog.org
 | |
|     Copyright (C): 1985-2008, University of Amsterdam
 | |
| 
 | |
|     This library is free software; you can redistribute it and/or
 | |
|     modify it under the terms of the GNU Lesser General Public
 | |
|     License as published by the Free Software Foundation; either
 | |
|     version 2.1 of the License, or (at your option) any later version.
 | |
| 
 | |
|     This library is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|     Lesser General Public License for more details.
 | |
| 
 | |
|     You should have received a copy of the GNU Lesser General Public
 | |
|     License along with this library; if not, write to the Free Software
 | |
|     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| */
 | |
| 
 | |
| #define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
 | |
| 
 | |
| #ifdef __WINDOWS__
 | |
| #include <windows.h>
 | |
| #endif
 | |
| 
 | |
| #define DTD_MINOR_ERRORS 1		/* get detailed errors */
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include "dtd.h"
 | |
| #include "catalog.h"
 | |
| #include "model.h"
 | |
| #include <SWI-Stream.h>
 | |
| #include <SWI-Prolog.h>
 | |
| #include <errno.h>
 | |
| #include "error.h"
 | |
| #include <stdlib.h>
 | |
| #include <assert.h>
 | |
| #include <string.h>
 | |
| #include <wctype.h>
 | |
| 
 | |
| #define streq(s1, s2) (strcmp(s1, s2) == 0)
 | |
| 
 | |
| #define MAX_ERRORS	50
 | |
| #define MAX_WARNINGS	50
 | |
| 
 | |
| #define ENDSNUL ((size_t)-1)
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *     PARSER CONTEXT DATA	*
 | |
| 		 *******************************/
 | |
| 
 | |
| #define PD_MAGIC	0x36472ba1	/* just a number */
 | |
| 
 | |
| typedef enum
 | |
| { SA_FILE = 0,				/* Stop at end-of-file */
 | |
|   SA_INPUT,				/* Do not complete input */
 | |
|   SA_ELEMENT,				/* Stop after first element */
 | |
|   SA_CONTENT,				/* Stop after close */
 | |
|   SA_DECL				/* Stop after declaration */
 | |
| } stopat;
 | |
| 
 | |
| typedef enum
 | |
| { EM_QUIET = 0,				/* Suppress messages */
 | |
|   EM_PRINT,				/* Print message */
 | |
|   EM_STYLE				/* include style-messages */
 | |
| } errormode;
 | |
| 
 | |
| typedef struct _env
 | |
| { term_t	tail;
 | |
|   struct _env *parent;
 | |
| } env;
 | |
| 
 | |
| 
 | |
| typedef struct _parser_data
 | |
| { int	      magic;			/* PD_MAGIC */
 | |
|   dtd_parser *parser;			/* parser itself */
 | |
| 
 | |
|   int	      warnings;			/* #warnings seen */
 | |
|   int	      errors;			/* #errors seen */
 | |
|   int	      max_errors;		/* error limit */
 | |
|   int	      max_warnings;		/* warning limit */
 | |
|   errormode   error_mode;		/* how to handle errors */
 | |
|   int	      positions;		/* report file-positions */
 | |
|   int         exception;		/* pending exception from callback */
 | |
| 
 | |
|   predicate_t on_begin;			/* begin element */
 | |
|   predicate_t on_end;			/* end element */
 | |
|   predicate_t on_cdata;			/* cdata */
 | |
|   predicate_t on_entity;		/* entity */
 | |
|   predicate_t on_pi;			/* processing instruction */
 | |
|   predicate_t on_xmlns;			/* xmlns */
 | |
|   predicate_t on_urlns;			/* url --> namespace */
 | |
|   predicate_t on_error;			/* errors */
 | |
|   predicate_t on_decl;			/* declarations */
 | |
| 
 | |
|   stopat      stopat;			/* Where to stop */
 | |
|   int	      stopped;			/* Environment is complete */
 | |
| 
 | |
|   IOSTREAM*   source;			/* Where we are reading from */
 | |
| 
 | |
|   term_t      list;			/* output term (if any) */
 | |
|   term_t      tail;			/* tail of the list */
 | |
|   env 	     *stack;			/* environment stack */
 | |
|   int	      free_on_close;		/* sgml_free parser on close */
 | |
| } parser_data;
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      CONSTANTS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static functor_t FUNCTOR_and2;
 | |
| static functor_t FUNCTOR_bar2;
 | |
| static functor_t FUNCTOR_comma2;
 | |
| static functor_t FUNCTOR_default1;
 | |
| static functor_t FUNCTOR_dialect1;
 | |
| static functor_t FUNCTOR_document1;
 | |
| static functor_t FUNCTOR_dtd1;
 | |
| static functor_t FUNCTOR_dtd2;
 | |
| static functor_t FUNCTOR_element3;
 | |
| static functor_t FUNCTOR_entity1;
 | |
| static functor_t FUNCTOR_equal2;
 | |
| static functor_t FUNCTOR_file1;
 | |
| static functor_t FUNCTOR_fixed1;
 | |
| static functor_t FUNCTOR_line1;
 | |
| static functor_t FUNCTOR_list1;
 | |
| static functor_t FUNCTOR_max_errors1;
 | |
| static functor_t FUNCTOR_nameof1;
 | |
| static functor_t FUNCTOR_notation1;
 | |
| static functor_t FUNCTOR_omit2;
 | |
| static functor_t FUNCTOR_opt1;
 | |
| static functor_t FUNCTOR_plus1;
 | |
| static functor_t FUNCTOR_rep1;
 | |
| static functor_t FUNCTOR_sgml_parser1;
 | |
| static functor_t FUNCTOR_parse1;
 | |
| static functor_t FUNCTOR_source1;
 | |
| static functor_t FUNCTOR_content_length1;
 | |
| static functor_t FUNCTOR_call2;
 | |
| static functor_t FUNCTOR_charpos1;
 | |
| static functor_t FUNCTOR_charpos2;
 | |
| static functor_t FUNCTOR_ns2;		/* :/2 */
 | |
| static functor_t FUNCTOR_space1;
 | |
| static functor_t FUNCTOR_pi1;
 | |
| static functor_t FUNCTOR_sdata1;
 | |
| static functor_t FUNCTOR_ndata1;
 | |
| static functor_t FUNCTOR_number1;
 | |
| static functor_t FUNCTOR_syntax_errors1;
 | |
| static functor_t FUNCTOR_minus2;
 | |
| static functor_t FUNCTOR_positions1;
 | |
| static functor_t FUNCTOR_event_class1;
 | |
| static functor_t FUNCTOR_doctype1;
 | |
| static functor_t FUNCTOR_allowed1;
 | |
| static functor_t FUNCTOR_context1;
 | |
| static functor_t FUNCTOR_defaults1;
 | |
| static functor_t FUNCTOR_shorttag1;
 | |
| static functor_t FUNCTOR_qualify_attributes1;
 | |
| static functor_t FUNCTOR_encoding1;
 | |
| 
 | |
| static atom_t ATOM_true;
 | |
| static atom_t ATOM_false;
 | |
| static atom_t ATOM_cdata;
 | |
| static atom_t ATOM_rcdata;
 | |
| static atom_t ATOM_pcdata;
 | |
| static atom_t ATOM_empty;
 | |
| static atom_t ATOM_any;
 | |
| static atom_t ATOM_position;
 | |
| 
 | |
| #define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a)
 | |
| 
 | |
| static void
 | |
| initConstants(void)
 | |
| {
 | |
|   FUNCTOR_sgml_parser1	 = mkfunctor("sgml_parser", 1);
 | |
|   FUNCTOR_equal2	 = mkfunctor("=", 2);
 | |
|   FUNCTOR_dtd1		 = mkfunctor("dtd", 1);
 | |
|   FUNCTOR_element3	 = mkfunctor("element", 3);
 | |
|   FUNCTOR_entity1	 = mkfunctor("entity", 1);
 | |
|   FUNCTOR_document1	 = mkfunctor("document", 1);
 | |
|   FUNCTOR_dtd2		 = mkfunctor("dtd", 2);
 | |
|   FUNCTOR_omit2		 = mkfunctor("omit", 2);
 | |
|   FUNCTOR_and2		 = mkfunctor("&", 2);
 | |
|   FUNCTOR_comma2	 = mkfunctor(",", 2);
 | |
|   FUNCTOR_bar2		 = mkfunctor("|", 2);
 | |
|   FUNCTOR_opt1		 = mkfunctor("?", 1);
 | |
|   FUNCTOR_rep1		 = mkfunctor("*", 1);
 | |
|   FUNCTOR_plus1		 = mkfunctor("+", 1);
 | |
|   FUNCTOR_default1	 = mkfunctor("default", 1);
 | |
|   FUNCTOR_fixed1	 = mkfunctor("fixed", 1);
 | |
|   FUNCTOR_list1		 = mkfunctor("list", 1);
 | |
|   FUNCTOR_nameof1	 = mkfunctor("nameof", 1);
 | |
|   FUNCTOR_notation1	 = mkfunctor("notation", 1);
 | |
|   FUNCTOR_file1		 = mkfunctor("file", 1);
 | |
|   FUNCTOR_line1		 = mkfunctor("line", 1);
 | |
|   FUNCTOR_dialect1	 = mkfunctor("dialect", 1);
 | |
|   FUNCTOR_max_errors1	 = mkfunctor("max_errors", 1);
 | |
|   FUNCTOR_parse1	 = mkfunctor("parse", 1);
 | |
|   FUNCTOR_source1	 = mkfunctor("source", 1);
 | |
|   FUNCTOR_content_length1= mkfunctor("content_length", 1);
 | |
|   FUNCTOR_call2		 = mkfunctor("call", 2);
 | |
|   FUNCTOR_charpos1	 = mkfunctor("charpos", 1);
 | |
|   FUNCTOR_charpos2	 = mkfunctor("charpos", 2);
 | |
|   FUNCTOR_ns2		 = mkfunctor(":", 2);
 | |
|   FUNCTOR_space1	 = mkfunctor("space", 1);
 | |
|   FUNCTOR_pi1		 = mkfunctor("pi", 1);
 | |
|   FUNCTOR_sdata1	 = mkfunctor("sdata", 1);
 | |
|   FUNCTOR_ndata1	 = mkfunctor("ndata", 1);
 | |
|   FUNCTOR_number1	 = mkfunctor("number", 1);
 | |
|   FUNCTOR_syntax_errors1 = mkfunctor("syntax_errors", 1);
 | |
|   FUNCTOR_minus2 	 = mkfunctor("-", 2);
 | |
|   FUNCTOR_positions1 	 = mkfunctor("positions", 1);
 | |
|   FUNCTOR_event_class1 	 = mkfunctor("event_class", 1);
 | |
|   FUNCTOR_doctype1       = mkfunctor("doctype", 1);
 | |
|   FUNCTOR_allowed1       = mkfunctor("allowed", 1);
 | |
|   FUNCTOR_context1       = mkfunctor("context", 1);
 | |
|   FUNCTOR_defaults1	 = mkfunctor("defaults", 1);
 | |
|   FUNCTOR_shorttag1	 = mkfunctor("shorttag", 1);
 | |
|   FUNCTOR_qualify_attributes1 = mkfunctor("qualify_attributes", 1);
 | |
|   FUNCTOR_encoding1	 = mkfunctor("encoding", 1);
 | |
| 
 | |
|   ATOM_true = PL_new_atom("true");
 | |
|   ATOM_false = PL_new_atom("false");
 | |
|   ATOM_cdata = PL_new_atom("cdata");
 | |
|   ATOM_rcdata = PL_new_atom("rcdata");
 | |
|   ATOM_pcdata = PL_new_atom("#pcdata");
 | |
|   ATOM_empty = PL_new_atom("empty");
 | |
|   ATOM_any = PL_new_atom("any");
 | |
|   ATOM_position = PL_new_atom("#position");
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	       ACCESS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static int
 | |
| unify_parser(term_t parser, dtd_parser *p)
 | |
| { return PL_unify_term(parser, PL_FUNCTOR, FUNCTOR_sgml_parser1,
 | |
| 		         PL_POINTER, p);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| get_parser(term_t parser, dtd_parser **p)
 | |
| { if ( PL_is_functor(parser, FUNCTOR_sgml_parser1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     void *ptr;
 | |
| 
 | |
|     PL_get_arg(1, parser, a);
 | |
|     if ( PL_get_pointer(a, &ptr) )
 | |
|     { dtd_parser *tmp = ptr;
 | |
| 
 | |
|       if ( tmp->magic == SGML_PARSER_MAGIC )
 | |
|       { *p = tmp;
 | |
| 
 | |
|         return TRUE;
 | |
|       }
 | |
|       return sgml2pl_error(ERR_EXISTENCE, "sgml_parser", parser);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return sgml2pl_error(ERR_TYPE, "sgml_parser", parser);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| unify_dtd(term_t t, dtd *dtd)
 | |
| { if ( dtd->doctype )
 | |
|     return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_dtd2,
 | |
| 			 PL_POINTER, dtd,
 | |
| 			 PL_CHARS, dtd->doctype);
 | |
|   else
 | |
|     return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_dtd2,
 | |
| 			 PL_POINTER, dtd,
 | |
| 			 PL_VARIABLE);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| get_dtd(term_t t, dtd **dtdp)
 | |
| { if ( PL_is_functor(t, FUNCTOR_dtd2) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     void *ptr;
 | |
| 
 | |
|     PL_get_arg(1, t, a);
 | |
|     if ( PL_get_pointer(a, &ptr) )
 | |
|     { dtd *tmp = ptr;
 | |
| 
 | |
|       if ( tmp->magic == SGML_DTD_MAGIC )
 | |
|       { *dtdp = tmp;
 | |
| 
 | |
|         return TRUE;
 | |
|       }
 | |
|       return sgml2pl_error(ERR_EXISTENCE, "dtd", t);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return sgml2pl_error(ERR_TYPE, "dtd", t);
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      NEW/FREE		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static foreign_t
 | |
| pl_new_sgml_parser(term_t ref, term_t options)
 | |
| { term_t head = PL_new_term_ref();
 | |
|   term_t tail = PL_copy_term_ref(options);
 | |
|   term_t tmp  = PL_new_term_ref();
 | |
| 
 | |
|   dtd *dtd = NULL;
 | |
|   dtd_parser *p;
 | |
| 
 | |
|   while ( PL_get_list(tail, head, tail) )
 | |
|   { if ( PL_is_functor(head, FUNCTOR_dtd1) )
 | |
|     { PL_get_arg(1, head, tmp);
 | |
| 
 | |
|       if ( PL_is_variable(tmp) )	/* dtd(X) */
 | |
|       { dtd = new_dtd(NULL);		/* no known doctype */
 | |
| 	dtd->references++;
 | |
| 	unify_dtd(tmp, dtd);
 | |
|       } else if ( !get_dtd(tmp, &dtd) )
 | |
| 	return FALSE;
 | |
|     }
 | |
|   }
 | |
|   if ( !PL_get_nil(tail) )
 | |
|     return sgml2pl_error(ERR_TYPE, "list", tail);
 | |
| 
 | |
|   p = new_dtd_parser(dtd);
 | |
| 
 | |
|   return unify_parser(ref, p);
 | |
| }
 | |
| 
 | |
| 
 | |
| static foreign_t
 | |
| pl_free_sgml_parser(term_t parser)
 | |
| { dtd_parser *p;
 | |
| 
 | |
|   if ( get_parser(parser, &p) )
 | |
|   { free_dtd_parser(p);
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static foreign_t
 | |
| pl_new_dtd(term_t doctype, term_t ref)
 | |
| { ichar *dt;
 | |
|   dtd *dtd;
 | |
| 
 | |
|   if ( !PL_get_wchars(doctype, NULL, &dt, CVT_ATOM|CVT_EXCEPTION) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( !(dtd=new_dtd(dt)) )
 | |
|     return FALSE;
 | |
| 
 | |
|   dtd->references++;
 | |
| 
 | |
|   return unify_dtd(ref, dtd);
 | |
| }
 | |
| 
 | |
| 
 | |
| static foreign_t
 | |
| pl_free_dtd(term_t t)
 | |
| { dtd *dtd;
 | |
| 
 | |
|   if ( get_dtd(t, &dtd) )
 | |
|   { free_dtd(dtd);
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	   DATA EXCHANGE	*
 | |
| 		 *******************************/
 | |
| 
 | |
| static void
 | |
| put_atom_wchars(term_t t, wchar_t const *s)
 | |
| { PL_put_variable(t);
 | |
|   PL_unify_wchars(t, PL_ATOM, ENDSNUL, s);
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	    PROPERTIES		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static foreign_t
 | |
| pl_set_sgml_parser(term_t parser, term_t option)
 | |
| { dtd_parser *p;
 | |
| 
 | |
|   if ( !get_parser(parser, &p) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( PL_is_functor(option, FUNCTOR_file1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     wchar_t *file;
 | |
|     dtd_symbol *fs;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_wchars(a, NULL, &file, CVT_ATOM|CVT_EXCEPTION) )
 | |
|       return FALSE;
 | |
|     fs = dtd_add_symbol(p->dtd, file);	/* symbol will be freed */
 | |
|     set_file_dtd_parser(p, IN_FILE, fs->name);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_line1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_integer(a, &p->location.line) )
 | |
|       return sgml2pl_error(ERR_TYPE, "integer", a);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_charpos1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_long(a, &p->location.charpos) )
 | |
|       return sgml2pl_error(ERR_TYPE, "integer", a);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_dialect1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     char *s;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_atom_chars(a, &s) )
 | |
|       return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
| 
 | |
|     if ( streq(s, "xml") )
 | |
|       set_dialect_dtd(p->dtd, DL_XML);
 | |
|     else if ( streq(s, "xmlns") )
 | |
|       set_dialect_dtd(p->dtd, DL_XMLNS);
 | |
|     else if ( streq(s, "sgml") )
 | |
|       set_dialect_dtd(p->dtd, DL_SGML);
 | |
|     else
 | |
|       return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", a);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_space1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     char *s;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_atom_chars(a, &s) )
 | |
|       return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
| 
 | |
|     if ( streq(s, "preserve") )
 | |
|       p->dtd->space_mode = SP_PRESERVE;
 | |
|     else if ( streq(s, "default") )
 | |
|       p->dtd->space_mode = SP_DEFAULT;
 | |
|     else if ( streq(s, "remove") )
 | |
|       p->dtd->space_mode = SP_REMOVE;
 | |
|     else if ( streq(s, "sgml") )
 | |
|       p->dtd->space_mode = SP_SGML;
 | |
| 
 | |
|     else
 | |
|       return sgml2pl_error(ERR_DOMAIN, "space", a);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_defaults1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     int val;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_bool(a, &val) )
 | |
|       return sgml2pl_error(ERR_TYPE, "boolean", a);
 | |
| 
 | |
|     if ( val )
 | |
|       p->flags &= ~SGML_PARSER_NODEFS;
 | |
|     else
 | |
|       p->flags |= SGML_PARSER_NODEFS;
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_qualify_attributes1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     int val;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_bool(a, &val) )
 | |
|       return sgml2pl_error(ERR_TYPE, "boolean", a);
 | |
| 
 | |
|     if ( val )
 | |
|       p->flags |= SGML_PARSER_QUALIFY_ATTS;
 | |
|     else
 | |
|       p->flags &= ~SGML_PARSER_QUALIFY_ATTS;
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_shorttag1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     int val;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_bool(a, &val) )
 | |
|       return sgml2pl_error(ERR_TYPE, "boolean", a);
 | |
| 
 | |
|     set_option_dtd(p->dtd, OPT_SHORTTAG, val);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_number1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     char *s;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_atom_chars(a, &s) )
 | |
|       return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
| 
 | |
|     if ( streq(s, "token") )
 | |
|       p->dtd->number_mode = NU_TOKEN;
 | |
|     else if ( streq(s, "integer") )
 | |
|       p->dtd->number_mode = NU_INTEGER;
 | |
|     else
 | |
|       return sgml2pl_error(ERR_DOMAIN, "number", a);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_encoding1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     char *val;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( !PL_get_atom_chars(a, &val) )
 | |
|       return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
|     if ( !xml_set_encoding(p, val) )
 | |
|       return sgml2pl_error(ERR_DOMAIN, "encoding", a);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_doctype1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
|     ichar *s;
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( PL_is_variable(a) )
 | |
|     { p->enforce_outer_element = NULL;
 | |
|     } else
 | |
|     { if ( !PL_get_wchars(a, NULL, &s, CVT_ATOM) )
 | |
| 	return sgml2pl_error(ERR_TYPE, "atom_or_variable", a);
 | |
|     
 | |
|       p->enforce_outer_element = dtd_add_symbol(p->dtd, s);
 | |
|     }
 | |
|   } else
 | |
|     return sgml2pl_error(ERR_DOMAIN, "sgml_parser_option", option);
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static dtd_srcloc *
 | |
| file_location(dtd_parser *p, dtd_srcloc *l)
 | |
| { while(l->parent && l->type != IN_FILE)
 | |
|     l = l->parent;
 | |
| 
 | |
|   return l;
 | |
| }
 | |
| 
 | |
| 
 | |
| static foreign_t
 | |
| pl_get_sgml_parser(term_t parser, term_t option)
 | |
| { dtd_parser *p;
 | |
| 
 | |
|   if ( !get_parser(parser, &p) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( PL_is_functor(option, FUNCTOR_charpos1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     return PL_unify_integer(a, file_location(p, &p->startloc)->charpos);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_line1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     return PL_unify_integer(a, file_location(p, &p->startloc)->line);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_charpos2) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     if ( PL_get_arg(1, option, a) &&
 | |
| 	 PL_unify_integer(a, file_location(p, &p->startloc)->charpos) &&
 | |
| 	 PL_get_arg(2, option, a) &&
 | |
| 	 PL_unify_integer(a, file_location(p, &p->location)->charpos) )
 | |
|       return TRUE;
 | |
|     else
 | |
|       return FALSE;
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_file1) )
 | |
|   { dtd_srcloc *l = file_location(p, &p->location);
 | |
| 
 | |
|     if ( l->type == IN_FILE && l->name.file )
 | |
|     { term_t a = PL_new_term_ref();
 | |
| 
 | |
|       PL_get_arg(1, option, a);
 | |
|       return PL_unify_wchars(a, PL_ATOM, ENDSNUL, l->name.file);
 | |
|     }
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_source1) )
 | |
|   { parser_data *pd = p->closure;
 | |
| 
 | |
|     if ( pd && pd->magic == PD_MAGIC && pd->source )
 | |
|     { term_t a = PL_new_term_ref();
 | |
| 
 | |
|       PL_get_arg(1, option, a);
 | |
|       return PL_unify_stream(a, pd->source);
 | |
|     }
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_dialect1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     switch(p->dtd->dialect)
 | |
|     { case DL_SGML:
 | |
| 	return PL_unify_atom_chars(a, "sgml");
 | |
|       case DL_XML:
 | |
| 	return PL_unify_atom_chars(a, "xml");
 | |
|       case DL_XMLNS:
 | |
| 	return PL_unify_atom_chars(a, "xmlns");
 | |
|     }
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_event_class1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     switch(p->event_class)
 | |
|     { case EV_EXPLICIT:
 | |
| 	return PL_unify_atom_chars(a, "explicit");
 | |
|       case EV_OMITTED:
 | |
| 	return PL_unify_atom_chars(a, "omitted");
 | |
|       case EV_SHORTTAG:
 | |
| 	return PL_unify_atom_chars(a, "shorttag");
 | |
|       case EV_SHORTREF:
 | |
| 	return PL_unify_atom_chars(a, "shortref");
 | |
|     }
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_dtd1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
| 
 | |
|     return unify_dtd(a, p->dtd);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_doctype1) )
 | |
|   { term_t a = PL_new_term_ref();
 | |
| 
 | |
|     PL_get_arg(1, option, a);
 | |
|     if ( p->enforce_outer_element )
 | |
|       return PL_unify_wchars(a, PL_ATOM, ENDSNUL,
 | |
| 			     p->enforce_outer_element->name);
 | |
|     else
 | |
|       return TRUE;			/* leave variable */
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_allowed1) )
 | |
|   { term_t tail = PL_new_term_ref();
 | |
|     term_t head = PL_new_term_ref();
 | |
|     term_t tmp = PL_new_term_ref();
 | |
|     sgml_environment *env = p->environments;
 | |
|       
 | |
|     PL_get_arg(1, option, tail);
 | |
| 
 | |
|     if ( env )
 | |
|     { for( ; env; env = env->parent)
 | |
|       { dtd_element *buf[256];		/* MAX_VISITED! */
 | |
| 	int n = sizeof(buf)/sizeof(dtd_element *); /* not yet used! */
 | |
| 	int i;
 | |
|   
 | |
| 	state_allows_for(env->state, buf, &n);
 | |
|   
 | |
| 	for(i=0; i<n; i++)
 | |
| 	{ if ( buf[i] == CDATA_ELEMENT )
 | |
| 	    PL_put_atom_chars(tmp, "#pcdata");
 | |
| 	  else
 | |
| 	    put_atom_wchars(tmp, buf[i]->name->name);
 | |
|   
 | |
| 	  if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	       !PL_unify(head, tmp) )
 | |
| 	    return FALSE;
 | |
| 	}
 | |
|   
 | |
| 	if ( !env->element->structure ||
 | |
| 	     !env->element->structure->omit_close )
 | |
| 	  break;
 | |
|       }
 | |
|     } else if ( p->enforce_outer_element )
 | |
|     { put_atom_wchars(tmp, p->enforce_outer_element->name);
 | |
| 
 | |
|       if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	   !PL_unify(head, tmp) )
 | |
| 	return FALSE;
 | |
|     }
 | |
| 
 | |
|     return PL_unify_nil(tail);
 | |
|   } else if ( PL_is_functor(option, FUNCTOR_context1) )
 | |
|   { term_t tail = PL_new_term_ref();
 | |
|     term_t head = PL_new_term_ref();
 | |
|     term_t tmp = PL_new_term_ref();
 | |
|     sgml_environment *env = p->environments;
 | |
|       
 | |
|     PL_get_arg(1, option, tail);
 | |
| 
 | |
|     for( ; env; env = env->parent)
 | |
|     { put_atom_wchars(tmp, env->element->name->name);
 | |
| 
 | |
|       if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	   !PL_unify(head, tmp) )
 | |
| 	return FALSE;
 | |
|     }
 | |
| 
 | |
|     return PL_unify_nil(tail);
 | |
|   } else
 | |
|     return sgml2pl_error(ERR_DOMAIN, "parser_option", option);
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| call_prolog(parser_data *pd, predicate_t pred, term_t av)
 | |
| { qid_t qid = PL_open_query(NULL, PL_Q_PASS_EXCEPTION, pred, av);
 | |
|   int rc = PL_next_solution(qid);
 | |
| 
 | |
|   if ( !rc && PL_exception(qid) )
 | |
|     pd->exception = TRUE;
 | |
|   else
 | |
|     pd->exception = FALSE;
 | |
| 
 | |
|   PL_close_query(qid);
 | |
| 
 | |
|   return rc;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| end_frame(fid_t fid, int ex)
 | |
| { if ( ex )
 | |
|     PL_close_foreign_frame(fid);
 | |
|   else
 | |
|     PL_discard_foreign_frame(fid);
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| put_url(dtd_parser *p, term_t t, const ichar *url)
 | |
|     Store the url-part of a name-space qualifier in term.  We call
 | |
|     xml:xmlns(-Canonical, +Full) trying to resolve the specified
 | |
|     namespace to an internal canonical namespace.
 | |
| 
 | |
|     We do a little caching as there will generally be only a very
 | |
|     small pool of urls in use.  We assume the url-pointers we get
 | |
|     life for the time of the parser.  It might be possible that
 | |
|     multiple url pointers point to the same url, but this only clobbers
 | |
|     the cache a little.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| #define URL_CACHE 4			/* # entries cached */
 | |
| 
 | |
| typedef struct
 | |
| { const ichar *url;			/* URL pointer */
 | |
|   atom_t canonical;
 | |
| } url_cache;
 | |
| 
 | |
| static url_cache cache[URL_CACHE];
 | |
| 
 | |
| static void
 | |
| reset_url_cache(void)
 | |
| { int i;
 | |
|   url_cache *c = cache;
 | |
| 
 | |
|   for(i=0; i<URL_CACHE; i++)
 | |
|   { c[i].url = NULL;
 | |
|     if ( c[i].canonical )
 | |
|       PL_unregister_atom(c[i].canonical);
 | |
|     c[i].canonical = 0;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| put_url(dtd_parser *p, term_t t, const ichar *url)
 | |
| { parser_data *pd = p->closure;
 | |
|   int i;
 | |
| 
 | |
|   if ( !pd->on_urlns )
 | |
|   { put_atom_wchars(t, url);
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   for(i=0; i<URL_CACHE; i++)
 | |
|   { if ( cache[i].url == url )		/* cache hit */
 | |
|     { if ( cache[i].canonical )		/* and a canonical value */
 | |
| 	PL_put_atom(t, cache[i].canonical);
 | |
|       else
 | |
| 	put_atom_wchars(t, url);
 | |
| 
 | |
|       return;
 | |
|     }
 | |
|   }
 | |
| 					/* shift the cache */
 | |
|   i = URL_CACHE-1;
 | |
|   if ( cache[i].canonical )
 | |
|     PL_unregister_atom(cache[i].canonical);
 | |
|   for(i=URL_CACHE-1; i>0; i--)
 | |
|     cache[i] = cache[i-1];
 | |
|   cache[0].url = url;
 | |
|   cache[0].canonical = 0;
 | |
| 
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(3);
 | |
|     atom_t a;
 | |
| 
 | |
|     put_atom_wchars(av+0, url);
 | |
|     unify_parser(av+2, p);
 | |
|     if ( PL_call_predicate(NULL, PL_Q_NORMAL, pd->on_urlns, av) &&
 | |
| 	 PL_get_atom(av+1, &a) )
 | |
|     { PL_register_atom(a);
 | |
|       cache[0].canonical = a;
 | |
|       PL_put_atom(t, a);
 | |
|     } else
 | |
|     { put_atom_wchars(t, url);
 | |
|     }
 | |
|     PL_discard_foreign_frame(fid);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| put_attribute_name(dtd_parser *p, term_t t, dtd_symbol *nm)
 | |
| { const ichar *url, *local;
 | |
| 
 | |
|   if ( p->dtd->dialect == DL_XMLNS )
 | |
|   { xmlns_resolve_attribute(p, nm, &local, &url);
 | |
| 
 | |
|     if ( url )
 | |
|     { term_t av = PL_new_term_refs(2);
 | |
|     
 | |
|       put_url(p, av+0, url);
 | |
|       put_atom_wchars(av+1, local);
 | |
|       PL_cons_functor_v(t, FUNCTOR_ns2, av);
 | |
|     } else
 | |
|       put_atom_wchars(t, local);
 | |
|   } else
 | |
|     put_atom_wchars(t, nm->name);
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| put_element_name(dtd_parser *p, term_t t, dtd_element *e)
 | |
| { const ichar *url, *local;
 | |
| 
 | |
|   if ( p->dtd->dialect == DL_XMLNS )
 | |
|   { assert(p->environments->element == e);
 | |
|     xmlns_resolve_element(p, &local, &url);
 | |
| 
 | |
|     if ( url )
 | |
|     { term_t av = PL_new_term_refs(2);
 | |
|     
 | |
|       put_url(p, av+0, url);
 | |
|       put_atom_wchars(av+1, local);
 | |
|       PL_cons_functor_v(t, FUNCTOR_ns2, av);
 | |
|     } else
 | |
|       put_atom_wchars(t, local);
 | |
|   } else
 | |
|     put_atom_wchars(t, e->name->name);
 | |
| }
 | |
| 
 | |
| 
 | |
| static ichar *
 | |
| istrblank(const ichar *s)
 | |
| { for( ; *s; s++ )
 | |
|   { if ( iswspace(*s) )
 | |
|       return (ichar *)s;
 | |
|   }
 | |
| 
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| unify_listval(dtd_parser *p,
 | |
| 	      term_t t, attrtype type, size_t len, const ichar *text)
 | |
| { if ( type == AT_NUMBERS && p->dtd->number_mode == NU_INTEGER )
 | |
|   { wchar_t *e;
 | |
| 
 | |
| #if SIZEOF_LONG == 4 && defined(HAVE_WCSTOLL)
 | |
|     int64_t v = wcstoll(text, &e, 10);
 | |
|     if ( (size_t)(e-text) == len && errno != ERANGE )
 | |
|       return PL_unify_int64(t, v);
 | |
| #else
 | |
|     long v = wcstol(text, &e, 10);
 | |
| 
 | |
|     if ( (size_t)(e-text) == len && errno != ERANGE )
 | |
|       return PL_unify_integer(t, v);
 | |
| #endif
 | |
| 					/* TBD: Error!? */
 | |
|   }
 | |
| 
 | |
|   return PL_unify_wchars(t, PL_ATOM, len, text);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| put_att_text(term_t t, sgml_attribute *a)
 | |
| { if ( a->value.textW )
 | |
|   { PL_put_variable(t);
 | |
|     PL_unify_wchars(t, PL_ATOM, a->value.number, a->value.textW);
 | |
|     return TRUE;
 | |
|   } else
 | |
|     return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| put_attribute_value(dtd_parser *p, term_t t, sgml_attribute *a)
 | |
| { switch(a->definition->type)
 | |
|   { case AT_CDATA:
 | |
|       put_att_text(t, a);
 | |
|       break;
 | |
|     case AT_NUMBER:
 | |
|     { if ( !put_att_text(t, a) )
 | |
| 	PL_put_integer(t, a->value.number);
 | |
|       break;
 | |
|     }
 | |
|     default:				/* multi-valued attribute */
 | |
|     { if ( a->definition->islist && a->value.textW )
 | |
|       { term_t tail, head = PL_new_term_ref();
 | |
| 	const ichar *val = a->value.textW;
 | |
| 	const ichar *e;
 | |
| 
 | |
| 	PL_put_variable(t);
 | |
| 	tail = PL_copy_term_ref(t);
 | |
| 	
 | |
| 	for(e=istrblank(val); e; val = e+1, e=istrblank(val))
 | |
| 	{ if ( e == val )
 | |
| 	    continue;			/* skip spaces */
 | |
| 	  PL_unify_list(tail, head, tail);
 | |
| 	  unify_listval(p, head, a->definition->type, e-val, val);
 | |
| 	}
 | |
|         PL_unify_list(tail, head, tail);
 | |
| 	unify_listval(p, head, a->definition->type, istrlen(val), val);
 | |
|         PL_unify_nil(tail);
 | |
|       } else
 | |
| 	put_att_text(t, a);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Produce a tag-location in the format
 | |
| 
 | |
| 	start_location=file:char-char	
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static int
 | |
| put_tag_position(dtd_parser *p, term_t pos)
 | |
| { dtd_srcloc *l = &p->startloc;
 | |
| 
 | |
|   if ( l->type == IN_FILE && l->name.file )
 | |
|   { PL_put_variable(pos);
 | |
|     PL_unify_term(pos,
 | |
| 		  PL_FUNCTOR, FUNCTOR_ns2,
 | |
| 		    PL_NWCHARS, wcslen(l->name.file), l->name.file,
 | |
| 		    PL_FUNCTOR, FUNCTOR_minus2,
 | |
| 		      PL_LONG, l->charpos,
 | |
| 		      PL_LONG, p->location.charpos);
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static int
 | |
| unify_attribute_list(dtd_parser *p, term_t alist,
 | |
| 		     int argc, sgml_attribute *argv)
 | |
| { int i;
 | |
|   term_t tail = PL_copy_term_ref(alist);
 | |
|   term_t h    = PL_new_term_ref();
 | |
|   term_t a    = PL_new_term_refs(2);
 | |
|   parser_data *pd = p->closure;
 | |
| 
 | |
|   for(i=0; i<argc; i++)
 | |
|   { put_attribute_name(p, a+0, argv[i].definition->name);
 | |
|     put_attribute_value(p, a+1, &argv[i]);
 | |
|     PL_cons_functor_v(a, FUNCTOR_equal2, a);
 | |
|     if ( !PL_unify_list(tail, h, tail) ||
 | |
| 	 !PL_unify(h, a) )
 | |
|       return FALSE;
 | |
|   }
 | |
| 
 | |
|   if ( pd->positions && put_tag_position(p, a+1) )
 | |
|   { PL_put_atom(a, ATOM_position);
 | |
|     
 | |
|     PL_cons_functor_v(a, FUNCTOR_equal2, a);
 | |
|     if ( !PL_unify_list(tail, h, tail) ||
 | |
| 	 !PL_unify(h, a) )
 | |
|       return FALSE;
 | |
|   } 
 | |
| 
 | |
|   if ( PL_unify_nil(tail) )
 | |
|   { PL_reset_term_refs(tail);
 | |
| 
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv)
 | |
| { parser_data *pd = p->closure;
 | |
| 
 | |
|   if ( pd->stopped )
 | |
|     return TRUE;
 | |
| 
 | |
|   if ( pd->tail )
 | |
|   { term_t content = PL_new_term_ref();	/* element content */
 | |
|     term_t alist   = PL_new_term_ref();	/* attribute list */
 | |
|     term_t et	   = PL_new_term_ref();	/* element structure */
 | |
|     term_t h       = PL_new_term_ref();
 | |
| 
 | |
|     put_element_name(p, h, e);
 | |
|     unify_attribute_list(p, alist, argc, argv);
 | |
|     PL_unify_term(et, PL_FUNCTOR, FUNCTOR_element3,
 | |
| 		    PL_TERM, h,
 | |
| 		    PL_TERM, alist,
 | |
| 		    PL_TERM, content);
 | |
|     if ( PL_unify_list(pd->tail, h, pd->tail) &&
 | |
| 	 PL_unify(h, et) )
 | |
|     { env *env = sgml_calloc(1, sizeof(*env));
 | |
| 
 | |
|       env->tail   = pd->tail;
 | |
|       env->parent = pd->stack;
 | |
|       pd->stack   = env;
 | |
| 
 | |
|       pd->tail = content;
 | |
|       PL_reset_term_refs(alist);
 | |
|     }
 | |
| 
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   if ( pd->on_begin )
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(3);
 | |
| 
 | |
|     put_element_name(p, av+0, e);
 | |
|     unify_attribute_list(p, av+1, argc, argv);
 | |
|     unify_parser(av+2, p);
 | |
| 
 | |
|     call_prolog(pd, pd->on_begin, av);
 | |
|     end_frame(fid, pd->exception);
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_end(dtd_parser *p, dtd_element *e)
 | |
| { parser_data *pd = p->closure;
 | |
| 
 | |
|   if ( pd->stopped )
 | |
|     return TRUE;
 | |
| 
 | |
|   if ( pd->on_end )
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(2);
 | |
| 
 | |
|     put_element_name(p, av+0, e);
 | |
|     unify_parser(av+1, p);
 | |
| 
 | |
|     call_prolog(pd, pd->on_end, av);
 | |
|     end_frame(fid, pd->exception);
 | |
|   }
 | |
| 
 | |
|   if ( pd->tail && !pd->stopped )
 | |
|   { PL_unify_nil(pd->tail);
 | |
|     PL_reset_term_refs(pd->tail);	/* ? */
 | |
| 
 | |
|     if ( pd->stack )
 | |
|     { env *parent = pd->stack->parent;
 | |
| 
 | |
|       pd->tail = pd->stack->tail;
 | |
|       sgml_free(pd->stack);
 | |
|       pd->stack = parent;
 | |
|     } else
 | |
|     { if ( pd->stopat == SA_CONTENT )
 | |
| 	pd->stopped = TRUE;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( pd->stopat == SA_ELEMENT && !p->environments->parent )
 | |
|     pd->stopped = TRUE;
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_entity(dtd_parser *p, dtd_entity *e, int chr)
 | |
| { parser_data *pd = p->closure;
 | |
| 
 | |
|   if ( pd->stopped )
 | |
|     return TRUE;
 | |
| 
 | |
|   if ( pd->on_entity )
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(2);
 | |
| 
 | |
|     if ( e )
 | |
|       put_atom_wchars(av+0, e->name->name);
 | |
|     else
 | |
|       PL_put_integer(av+0, chr);
 | |
| 
 | |
|     unify_parser(av+1, p);
 | |
| 
 | |
|     call_prolog(pd, pd->on_end, av);
 | |
|     end_frame(fid, pd->exception);
 | |
|   }
 | |
| 
 | |
|   if ( pd->tail )
 | |
|   { term_t h = PL_new_term_ref();
 | |
| 
 | |
|     if ( !PL_unify_list(pd->tail, h, pd->tail) )
 | |
|       return FALSE;
 | |
| 
 | |
|     if ( e )
 | |
|       PL_unify_term(h,
 | |
| 		    PL_FUNCTOR, FUNCTOR_entity1,
 | |
| 		    PL_CHARS, e->name->name);
 | |
|     else
 | |
|       PL_unify_term(h,
 | |
| 		    PL_FUNCTOR, FUNCTOR_entity1,
 | |
| 		    PL_INT, chr);
 | |
| 		
 | |
|     PL_reset_term_refs(h);
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_data(dtd_parser *p, data_type type, int len, const wchar_t *data)
 | |
| { parser_data *pd = p->closure;
 | |
| 
 | |
|   if ( pd->on_cdata )
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(2);
 | |
| 
 | |
|     PL_unify_wchars(av+0, PL_ATOM, len, data);
 | |
| 
 | |
|     unify_parser(av+1, p);
 | |
| 
 | |
|     call_prolog(pd, pd->on_cdata, av);
 | |
|     end_frame(fid, pd->exception);
 | |
|   }
 | |
| 
 | |
|   if ( pd->tail && !pd->stopped )
 | |
|   { term_t h = PL_new_term_ref();
 | |
| 
 | |
|     if ( PL_unify_list(pd->tail, h, pd->tail) )
 | |
|     { int rval = TRUE;
 | |
|       term_t a;
 | |
| 
 | |
|       switch(type)
 | |
|       { case EC_CDATA:
 | |
| 	  a = h;
 | |
| 	  break;
 | |
| 	case EC_SDATA:
 | |
| 	{ term_t d = PL_new_term_ref();
 | |
| 
 | |
| 	  a = d;
 | |
| 	  rval = PL_unify_term(h, PL_FUNCTOR, FUNCTOR_sdata1, PL_TERM, d);
 | |
| 	  break;
 | |
| 	}
 | |
| 	case EC_NDATA:
 | |
| 	{ term_t d = PL_new_term_ref();
 | |
| 
 | |
| 	  a = d;
 | |
| 	  rval = PL_unify_term(h, PL_FUNCTOR, FUNCTOR_ndata1, PL_TERM, d);
 | |
| 	  break;
 | |
| 	}
 | |
| 	default:
 | |
| 	  rval = FALSE;
 | |
| 	  assert(0);
 | |
|       }
 | |
| 			       
 | |
|       if ( rval )
 | |
| 	rval = PL_unify_wchars(a, PL_ATOM, len, data);
 | |
| 
 | |
|       if ( rval )
 | |
|       { PL_reset_term_refs(h);
 | |
| 	return TRUE;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_cdata(dtd_parser *p, data_type type, int len, const wchar_t *data)
 | |
| { return on_data(p, type, len, data);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| can_end_omitted(dtd_parser *p)
 | |
| { sgml_environment *env;
 | |
| 
 | |
|   for(env=p->environments; env; env = env->parent)
 | |
|   { dtd_element *e = env->element;
 | |
| 
 | |
|     if ( !(e->structure && e->structure->omit_close) )
 | |
|       return FALSE;
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_error(dtd_parser *p, dtd_error *error)
 | |
| { parser_data *pd = p->closure;
 | |
|   const char *severity;
 | |
| 
 | |
|   if ( pd->stopped )
 | |
|     return TRUE;
 | |
| 
 | |
|   if ( pd->stopat == SA_ELEMENT &&
 | |
|        (error->minor == ERC_NOT_OPEN || error->minor == ERC_NOT_ALLOWED) &&
 | |
|        can_end_omitted(p) )
 | |
|   { end_document_dtd_parser(p);
 | |
|     sgml_cplocation(&p->location, &p->startloc);
 | |
|     pd->stopped = TRUE;
 | |
|     return TRUE;
 | |
|   }
 | |
| 
 | |
|   switch(error->severity)
 | |
|   { case ERS_STYLE:
 | |
|       if ( pd->error_mode != EM_STYLE )
 | |
| 	return TRUE;
 | |
|       severity = "informational";
 | |
|       break;
 | |
|     case ERS_WARNING:
 | |
|       pd->warnings++;
 | |
|       severity = "warning";
 | |
|       break;
 | |
|     case ERS_ERROR:
 | |
|     default:				/* make compiler happy */
 | |
|       pd->errors++;
 | |
|       severity = "error";
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   if ( pd->on_error )			/* msg, parser */
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(3);
 | |
|     
 | |
|     PL_put_atom_chars(av+0, severity);
 | |
|     PL_unify_wchars(av+1, PL_ATOM,
 | |
| 		    wcslen(error->plain_message), error->plain_message);
 | |
|     unify_parser(av+2, p);
 | |
| 
 | |
|     call_prolog(pd, pd->on_error, av);
 | |
|     end_frame(fid, pd->exception);
 | |
|   } else if ( pd->error_mode != EM_QUIET )
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     predicate_t pred = PL_predicate("print_message", 2, "user");
 | |
|     term_t av = PL_new_term_refs(2);
 | |
|     term_t src = PL_new_term_ref();
 | |
|     term_t parser = PL_new_term_ref();
 | |
|     dtd_srcloc *l = file_location(p, &p->startloc);
 | |
| 
 | |
|     unify_parser(parser, p);
 | |
|     PL_put_atom_chars(av+0, severity);
 | |
| 
 | |
|     if ( l->name.file )
 | |
|     { if ( l->type == IN_FILE )
 | |
| 	put_atom_wchars(src, l->name.file);
 | |
|       else
 | |
| 	put_atom_wchars(src, l->name.entity);
 | |
|     } else
 | |
|     { PL_put_nil(src);
 | |
|     }
 | |
| 
 | |
|     PL_unify_term(av+1,
 | |
| 		  PL_FUNCTOR_CHARS, "sgml", 4,
 | |
| 		    PL_TERM, parser,
 | |
| 		    PL_TERM, src,
 | |
| 		    PL_INT, l->line,
 | |
| 		    PL_NWCHARS, wcslen(error->plain_message), error->plain_message);
 | |
| 
 | |
|     PL_call_predicate(NULL, PL_Q_NODEBUG, pred, av);
 | |
| 
 | |
|     PL_discard_foreign_frame(fid);
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_xmlns(dtd_parser *p, dtd_symbol *ns, dtd_symbol *url)
 | |
| { parser_data *pd = p->closure;
 | |
| 
 | |
|   if ( pd->stopped )
 | |
|     return TRUE;
 | |
| 
 | |
|   if ( pd->on_xmlns )
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(3);
 | |
| 
 | |
|     if ( ns )
 | |
|       put_atom_wchars(av+0, ns->name);
 | |
|     else
 | |
|       PL_put_nil(av+0);
 | |
|     put_atom_wchars(av+1, url->name);
 | |
|     unify_parser(av+2, p);
 | |
| 
 | |
|     call_prolog(pd, pd->on_xmlns, av);
 | |
|     end_frame(fid, pd->exception);
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_pi(dtd_parser *p, const ichar *pi)
 | |
| { parser_data *pd = p->closure;
 | |
| 
 | |
|   if ( pd->stopped )
 | |
|     return TRUE;
 | |
| 
 | |
|   if ( pd->on_pi )
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(2);
 | |
| 
 | |
|     put_atom_wchars(av+0, pi);
 | |
|     unify_parser(av+1, p);
 | |
| 
 | |
|     call_prolog(pd, pd->on_pi, av);
 | |
|     end_frame(fid, pd->exception);
 | |
|   }
 | |
| 
 | |
|   if ( pd->tail )
 | |
|   { term_t h = PL_new_term_ref();
 | |
| 
 | |
|     if ( !PL_unify_list(pd->tail, h, pd->tail) )
 | |
|       return FALSE;
 | |
| 
 | |
|     PL_unify_term(h,
 | |
| 		  PL_FUNCTOR, FUNCTOR_pi1,
 | |
| 		  PL_NWCHARS, wcslen(pi), pi);
 | |
| 			 
 | |
|     PL_reset_term_refs(h);
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| on_decl(dtd_parser *p, const ichar *decl)
 | |
| { parser_data *pd = p->closure;
 | |
| 
 | |
|   if ( pd->stopped )
 | |
|     return TRUE;
 | |
| 
 | |
|   if ( pd->on_decl )
 | |
|   { fid_t fid = PL_open_foreign_frame();
 | |
|     term_t av = PL_new_term_refs(2);
 | |
| 
 | |
|     put_atom_wchars(av+0, decl);
 | |
|     unify_parser(av+1, p);
 | |
| 
 | |
|     call_prolog(pd, pd->on_decl, av);
 | |
|     end_frame(fid, pd->exception);
 | |
|   }
 | |
| 
 | |
|   if ( pd->stopat == SA_DECL )
 | |
|     pd->stopped = TRUE;
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| write_parser(void *h, char *buf, int len)
 | |
| { parser_data *pd = h;
 | |
|   unsigned char *s = (unsigned char *)buf;
 | |
|   unsigned char *e = s+len;
 | |
| 
 | |
|   if ( !pd->parser || pd->parser->magic != SGML_PARSER_MAGIC )
 | |
|   { errno = EINVAL;
 | |
|     return -1;
 | |
|   }
 | |
|   
 | |
|   if ( (pd->errors > pd->max_errors && pd->max_errors >= 0) || pd->stopped )
 | |
|   { errno = EIO;
 | |
|     return -1;
 | |
|   }
 | |
| 
 | |
|   for(; s<e; s++)
 | |
|     putchar_dtd_parser(pd->parser, *s);
 | |
| 
 | |
|   return len;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| close_parser(void *h)
 | |
| { parser_data *pd = h;
 | |
|   dtd_parser *p;
 | |
| 
 | |
|   if ( !(p=pd->parser) || p->magic != SGML_PARSER_MAGIC )
 | |
|   { errno = EINVAL;
 | |
|     return -1;
 | |
|   }
 | |
| 
 | |
|   if ( pd->tail )
 | |
|     PL_unify_nil(pd->tail);
 | |
| 
 | |
|   if ( p->dmode == DM_DTD )
 | |
|     p->dtd->implicit = FALSE;		/* assume we loaded a DTD */
 | |
| 
 | |
|   if ( pd->free_on_close )
 | |
|     free_dtd_parser(p);
 | |
|   else
 | |
|     p->closure = NULL;
 | |
| 
 | |
|   sgml_free(pd);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static IOFUNCTIONS sgml_stream_functions =
 | |
| { (Sread_function)  NULL,
 | |
|   (Swrite_function) write_parser,
 | |
|   (Sseek_function)  NULL,
 | |
|   (Sclose_function) close_parser,
 | |
| 		    NULL
 | |
| };
 | |
| 
 | |
| 
 | |
| static parser_data *
 | |
| new_parser_data(dtd_parser *p)
 | |
| { parser_data *pd;
 | |
| 
 | |
|   pd = sgml_calloc(1, sizeof(*pd));
 | |
|   pd->magic = PD_MAGIC;
 | |
|   pd->parser = p;
 | |
|   pd->max_errors = MAX_ERRORS;
 | |
|   pd->max_warnings = MAX_WARNINGS;
 | |
|   pd->error_mode = EM_PRINT;
 | |
|   pd->exception = FALSE;
 | |
|   p->closure = pd;
 | |
|   
 | |
|   return pd;
 | |
| }
 | |
| 
 | |
| 
 | |
| static foreign_t
 | |
| pl_open_dtd(term_t ref, term_t options, term_t stream)
 | |
| { dtd *dtd;
 | |
|   dtd_parser *p;
 | |
|   parser_data *pd;
 | |
|   IOSTREAM *s;
 | |
|   term_t tail = PL_copy_term_ref(options);
 | |
|   term_t option = PL_new_term_ref();
 | |
| 
 | |
|   if ( !get_dtd(ref, &dtd) )
 | |
|     return FALSE;
 | |
|   p = new_dtd_parser(dtd);
 | |
|   p->dmode = DM_DTD;
 | |
|   pd = new_parser_data(p);
 | |
|   pd->free_on_close = TRUE;
 | |
| 
 | |
|   while( PL_get_list(tail, option, tail) )
 | |
|   { if ( PL_is_functor(option, FUNCTOR_dialect1) )
 | |
|     { term_t a = PL_new_term_ref();
 | |
|       char *s;
 | |
|   
 | |
|       PL_get_arg(1, option, a);
 | |
|       if ( !PL_get_atom_chars(a, &s) )
 | |
| 	return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
|   
 | |
|       if ( streq(s, "xml") )
 | |
| 	set_dialect_dtd(dtd, DL_XML);
 | |
|       else if ( streq(s, "xmlns") )
 | |
| 	set_dialect_dtd(dtd, DL_XMLNS);
 | |
|       else if ( streq(s, "sgml") )
 | |
| 	set_dialect_dtd(dtd, DL_SGML);
 | |
|       else
 | |
| 	return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", a);
 | |
|     } else
 | |
|       return sgml2pl_error(ERR_DOMAIN, "dtd_option", option);
 | |
|   }
 | |
|   if ( !PL_get_nil(tail) )
 | |
|     return sgml2pl_error(ERR_TYPE, "list", options);
 | |
| 
 | |
|   s = Snew(pd, SIO_OUTPUT|SIO_FBUF, &sgml_stream_functions);
 | |
| 
 | |
|   if ( !PL_open_stream(stream, s) )
 | |
|     return FALSE;
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| set_callback_predicates(parser_data *pd, term_t option)
 | |
| { term_t a = PL_new_term_ref();
 | |
|   char *fname;
 | |
|   atom_t pname;
 | |
|   predicate_t *pp = NULL;		/* keep compiler happy */
 | |
|   int arity;
 | |
|   module_t m = NULL;
 | |
| 
 | |
|   PL_get_arg(2, option, a);
 | |
|   PL_strip_module(a, &m, a);
 | |
|   if ( !PL_get_atom(a, &pname) )
 | |
|     return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
|   PL_get_arg(1, option, a);
 | |
|   if ( !PL_get_atom_chars(a, &fname) )
 | |
|     return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
|   
 | |
|   if ( streq(fname, "begin") )
 | |
|   { pp = &pd->on_begin;			/* tag, attributes, parser */
 | |
|     arity = 3;
 | |
|   } else if ( streq(fname, "end") )
 | |
|   { pp = &pd->on_end;			/* tag, parser */
 | |
|     arity = 2;
 | |
|   } else if ( streq(fname, "cdata") )
 | |
|   { pp = &pd->on_cdata;			/* cdata, parser */
 | |
|     arity = 2;
 | |
|   } else if ( streq(fname, "entity") )
 | |
|   { pp = &pd->on_entity;		/* name, parser */
 | |
|     arity = 2;
 | |
|   } else if ( streq(fname, "pi") )
 | |
|   { pp = &pd->on_pi;			/* pi, parser */
 | |
|     arity = 2;
 | |
|   } else if ( streq(fname, "xmlns") )
 | |
|   { pp = &pd->on_xmlns;			/* ns, url, parser */
 | |
|     arity = 3;
 | |
|   } else if ( streq(fname, "urlns") )
 | |
|   { pp = &pd->on_urlns;			/* url, ns, parser */
 | |
|     arity = 3;
 | |
|   } else if ( streq(fname, "error") )
 | |
|   { pp = &pd->on_error;			/* severity, message, parser */
 | |
|     arity = 3;
 | |
|   } else if ( streq(fname, "decl") )
 | |
|   { pp = &pd->on_decl;			/* decl, parser */
 | |
|     arity = 2;
 | |
|   } else
 | |
|     return sgml2pl_error(ERR_DOMAIN, "sgml_callback", a);
 | |
| 
 | |
|   *pp = PL_pred(PL_new_functor(pname, arity), m);
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static foreign_t
 | |
| pl_sgml_parse(term_t parser, term_t options)
 | |
| { dtd_parser *p;
 | |
|   parser_data *pd;
 | |
|   parser_data *oldpd;
 | |
|   term_t head = PL_new_term_ref();
 | |
|   term_t tail = PL_copy_term_ref(options);
 | |
|   IOSTREAM *in = NULL;
 | |
|   int recursive;
 | |
|   int has_content_length = FALSE;
 | |
|   int64_t content_length = 0;		/* content_length(Len) */
 | |
|   int count = 0;
 | |
|   int rc = TRUE;
 | |
| 
 | |
|   if ( !get_parser(parser, &p) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( p->closure )			/* recursive call */
 | |
|   { recursive = TRUE;
 | |
| 
 | |
|     oldpd = p->closure;
 | |
|     if ( oldpd->magic != PD_MAGIC || oldpd->parser != p )
 | |
|       return sgml2pl_error(ERR_MISC, "sgml",
 | |
| 			   "Parser associated with illegal data");
 | |
|     
 | |
|     pd = sgml_calloc(1, sizeof(*pd));
 | |
|     *pd = *oldpd;
 | |
|     p->closure = pd;
 | |
| 
 | |
|     in = pd->source;
 | |
|   } else
 | |
|   { recursive = FALSE;
 | |
|     oldpd = NULL;			/* keep compiler happy */
 | |
| 
 | |
|     set_mode_dtd_parser(p, DM_DATA);
 | |
| 
 | |
|     p->on_begin_element = on_begin;
 | |
|     p->on_end_element   = on_end;
 | |
|     p->on_entity	= on_entity;
 | |
|     p->on_pi		= on_pi;
 | |
|     p->on_data          = on_cdata;
 | |
|     p->on_error	        = on_error;
 | |
|     p->on_xmlns		= on_xmlns;
 | |
|     p->on_decl		= on_decl;
 | |
|   
 | |
|     pd = new_parser_data(p);
 | |
|   }
 | |
| 
 | |
|   while ( PL_get_list(tail, head, tail) )
 | |
|     { if ( PL_is_functor(head, FUNCTOR_document1) )
 | |
|     { pd->list  = PL_new_term_ref();
 | |
|       PL_get_arg(1, head, pd->list);
 | |
|       pd->tail  = PL_copy_term_ref(pd->list);
 | |
|       pd->stack = NULL;
 | |
|     } else if ( PL_is_functor(head, FUNCTOR_source1) )
 | |
|     { term_t a = PL_new_term_ref();
 | |
| 
 | |
|       PL_get_arg(1, head, a);
 | |
|       if ( !PL_get_stream_handle(a, &in) )
 | |
| 	return FALSE;
 | |
|     } else if ( PL_is_functor(head, FUNCTOR_content_length1) )
 | |
|     { term_t a = PL_new_term_ref();
 | |
| 
 | |
|       PL_get_arg(1, head, a);
 | |
|       if ( !PL_get_int64(a, &content_length) )
 | |
| 	return sgml2pl_error(ERR_TYPE, "integer", a);
 | |
|       has_content_length = TRUE;
 | |
|     } else if ( PL_is_functor(head, FUNCTOR_call2) )
 | |
|     { if ( !set_callback_predicates(pd, head) )
 | |
| 	return FALSE;
 | |
|     } else if ( PL_is_functor(head, FUNCTOR_parse1) )
 | |
|     { term_t a = PL_new_term_ref();
 | |
|       char *s;
 | |
| 
 | |
|       PL_get_arg(1, head, a);
 | |
|       if ( !PL_get_atom_chars(a, &s) )
 | |
| 	return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
|       if ( streq(s, "element") )
 | |
| 	pd->stopat = SA_ELEMENT;
 | |
|       else if ( streq(s, "content") )
 | |
| 	pd->stopat = SA_CONTENT;
 | |
|       else if ( streq(s, "file") )
 | |
| 	pd->stopat = SA_FILE;
 | |
|       else if ( streq(s, "input") )
 | |
| 	pd->stopat = SA_INPUT;
 | |
|       else if ( streq(s, "declaration") )
 | |
| 	pd->stopat = SA_DECL;
 | |
|       else
 | |
| 	return sgml2pl_error(ERR_DOMAIN, "parse", a);
 | |
|     } else if ( PL_is_functor(head, FUNCTOR_max_errors1) )
 | |
|     { term_t a = PL_new_term_ref();
 | |
| 
 | |
|       PL_get_arg(1, head, a);
 | |
|       if ( !PL_get_integer(a, &pd->max_errors) )
 | |
| 	return sgml2pl_error(ERR_TYPE, "integer", a);
 | |
|     } else if ( PL_is_functor(head, FUNCTOR_syntax_errors1) )
 | |
|     { term_t a = PL_new_term_ref();
 | |
|       char *s;
 | |
| 
 | |
|       PL_get_arg(1, head, a);
 | |
|       if ( !PL_get_atom_chars(a, &s) )
 | |
| 	return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
| 
 | |
|       if ( streq(s, "quiet") )
 | |
| 	pd->error_mode = EM_QUIET;
 | |
|       else if ( streq(s, "print") )
 | |
| 	pd->error_mode = EM_PRINT;
 | |
|       else if ( streq(s, "style") )
 | |
| 	pd->error_mode = EM_STYLE;
 | |
|       else
 | |
| 	return sgml2pl_error(ERR_DOMAIN, "syntax_error", a);
 | |
|     } else if ( PL_is_functor(head, FUNCTOR_positions1) )
 | |
|     { term_t a = PL_new_term_ref();
 | |
|       char *s;
 | |
| 
 | |
|       PL_get_arg(1, head, a);
 | |
|       if ( !PL_get_atom_chars(a, &s) )
 | |
| 	return sgml2pl_error(ERR_TYPE, "atom", a);
 | |
| 
 | |
|       if ( streq(s, "true") )
 | |
| 	pd->positions = TRUE;
 | |
|       else if ( streq(s, "false") )
 | |
| 	pd->positions = FALSE;
 | |
|       else
 | |
| 	return sgml2pl_error(ERR_DOMAIN, "positions", a);
 | |
|     } else
 | |
|       return sgml2pl_error(ERR_DOMAIN, "sgml_option", head);
 | |
|   }
 | |
|   if ( !PL_get_nil(tail) )
 | |
|     return sgml2pl_error(ERR_TYPE, "list", tail);
 | |
| 
 | |
| 					/* Parsing input from a stream */
 | |
| #define CHECKERROR \
 | |
|     { if ( pd->exception ) \
 | |
|       { rc = FALSE; \
 | |
| 	goto out; \
 | |
|       } \
 | |
|       if ( pd->errors > pd->max_errors && pd->max_errors >= 0 ) \
 | |
|       { rc = sgml2pl_error(ERR_LIMIT, "max_errors", (long)pd->max_errors); \
 | |
| 	goto out; \
 | |
|       } \
 | |
|     }
 | |
| 
 | |
|   if ( pd->stopat == SA_CONTENT && p->empty_element )
 | |
|     goto out;
 | |
| 
 | |
|   if ( in )
 | |
|   { int eof = FALSE;
 | |
| 
 | |
|     if ( in->encoding == ENC_OCTET )
 | |
|       p->encoded = TRUE;		/* parser must decode */
 | |
|     else
 | |
|       p->encoded = FALSE;		/* already decoded */
 | |
| 
 | |
|     if ( !recursive )
 | |
|     { pd->source = in;
 | |
|       begin_document_dtd_parser(p);
 | |
|     }
 | |
| 
 | |
|     while(!eof)
 | |
|     { int c, ateof;
 | |
| 
 | |
|       if ( (++count % 8192) == 0 && PL_handle_signals() < 0 )
 | |
|       { rc = FALSE;
 | |
| 	goto out;
 | |
|       }
 | |
| 
 | |
|       if ( has_content_length )
 | |
|       { if ( content_length <= 0 )
 | |
| 	  c = EOF;
 | |
| 	else
 | |
| 	  c = Sgetcode(in);
 | |
| 	ateof = (--content_length <= 0);
 | |
|       } else
 | |
|       { c = Sgetcode(in);
 | |
| 	ateof = Sfeof(in);
 | |
|       }
 | |
| 
 | |
|       if ( ateof )
 | |
|       { eof = TRUE;
 | |
| 	if ( c == LF )			/* file ends in LF */
 | |
| 	  c = CR;
 | |
| 	else if ( c != CR )		/* file ends in normal char */
 | |
| 	{ if ( has_content_length && in->position )
 | |
| 	  { int64_t bc0 = in->position->byteno;
 | |
| 	    putchar_dtd_parser(p, c);
 | |
| 	    content_length -= in->position->byteno-bc0;
 | |
| 	  } else
 | |
| 	  { putchar_dtd_parser(p, c);
 | |
| 	  }
 | |
| 	  CHECKERROR;
 | |
| 	  if ( pd->stopped )
 | |
| 	    goto stopped;
 | |
| 	  c = CR;
 | |
| 	}
 | |
|       }
 | |
| 
 | |
|       if ( has_content_length && in->position )
 | |
|       { int64_t bc0 = in->position->byteno;
 | |
| 	putchar_dtd_parser(p, c);
 | |
| 	content_length -= in->position->byteno-bc0;
 | |
|       } else
 | |
|       { putchar_dtd_parser(p, c);
 | |
|       }
 | |
|       CHECKERROR;
 | |
|       if ( pd->stopped )
 | |
|       { stopped:
 | |
| 	pd->stopped = FALSE;
 | |
| 	if ( pd->stopat != SA_CONTENT )
 | |
| 	  reset_document_dtd_parser(p);	/* ensure a clean start */
 | |
| 	goto out;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     if ( !recursive && pd->stopat != SA_INPUT )
 | |
|       end_document_dtd_parser(p);
 | |
|     CHECKERROR;
 | |
| 
 | |
|   out:
 | |
|     reset_url_cache();
 | |
|     if ( pd->tail )
 | |
|       PL_unify_nil(pd->tail);
 | |
| 
 | |
|     if ( recursive )
 | |
|     { p->closure = oldpd;
 | |
|     } else
 | |
|     { p->closure = NULL;
 | |
|     }
 | |
| 
 | |
|     pd->magic = 0;			/* invalidate */
 | |
|     sgml_free(pd);
 | |
| 
 | |
|     return rc;
 | |
|   }
 | |
| 
 | |
|   reset_url_cache();
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	  DTD PROPERTIES	*
 | |
| 		 *******************************/
 | |
| 
 | |
| static void	put_model(term_t t, dtd_model *m);
 | |
| 
 | |
| /* doctype(DocType) */
 | |
| 
 | |
| static int
 | |
| dtd_prop_doctype(dtd *dtd, term_t prop)
 | |
| { if ( dtd->doctype )
 | |
|     return PL_unify_wchars(prop, PL_ATOM, ENDSNUL, dtd->doctype);
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* elements(ListOfElements) */
 | |
| 
 | |
| static void
 | |
| make_model_list(term_t t, dtd_model *m, functor_t f)
 | |
| { if ( !m->next )
 | |
|   { put_model(t, m);
 | |
|   } else
 | |
|   { term_t av = PL_new_term_refs(2);
 | |
| 
 | |
|     put_model(av+0, m);
 | |
|     make_model_list(av+1, m->next, f);
 | |
|     PL_cons_functor_v(t, f, av);
 | |
|     PL_reset_term_refs(av);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| put_model(term_t t, dtd_model *m)
 | |
| { functor_t f;
 | |
| 
 | |
|   switch(m->type)
 | |
|   { case MT_PCDATA:
 | |
|       PL_put_atom(t, ATOM_pcdata);
 | |
|       goto card;
 | |
|     case MT_ELEMENT:
 | |
|       put_atom_wchars(t, m->content.element->name->name);
 | |
|       goto card;
 | |
|     case MT_AND:
 | |
|       f = FUNCTOR_and2;
 | |
|       break;
 | |
|     case MT_SEQ:
 | |
|       f = FUNCTOR_comma2;
 | |
|       break;
 | |
|     case MT_OR:
 | |
|       f = FUNCTOR_bar2;
 | |
|       break;
 | |
|     case MT_UNDEF:
 | |
|     default:
 | |
|       assert(0);
 | |
|       f = 0;
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   if ( !m->content.group )
 | |
|     PL_put_atom(t, ATOM_empty);
 | |
|   else
 | |
|     make_model_list(t, m->content.group, f);
 | |
|   
 | |
| card:
 | |
|   switch(m->cardinality)
 | |
|   { case MC_ONE:
 | |
|       break;
 | |
|     case MC_OPT:
 | |
|       PL_cons_functor_v(t, FUNCTOR_opt1, t);
 | |
|       break;
 | |
|     case MC_REP:
 | |
|       PL_cons_functor_v(t, FUNCTOR_rep1, t);
 | |
|       break;
 | |
|     case MC_PLUS:
 | |
|       PL_cons_functor_v(t, FUNCTOR_plus1, t);
 | |
|       break;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| put_content(term_t t, dtd_edef *def)
 | |
| { switch(def->type)
 | |
|   { case C_EMPTY:
 | |
|       PL_put_atom(t, ATOM_empty);
 | |
|       return;
 | |
|     case C_CDATA:
 | |
|       PL_put_atom(t, ATOM_cdata);
 | |
|       return;
 | |
|     case C_RCDATA:
 | |
|       PL_put_atom(t, ATOM_rcdata);
 | |
|       return;
 | |
|     case C_ANY:
 | |
|       PL_put_atom(t, ATOM_any);
 | |
|       return;
 | |
|     default:
 | |
|       if ( def->content )
 | |
| 	put_model(t, def->content);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| dtd_prop_elements(dtd *dtd, term_t prop)
 | |
| { term_t tail = PL_copy_term_ref(prop);
 | |
|   term_t head = PL_new_term_ref();
 | |
|   term_t et   = PL_new_term_ref();
 | |
|   dtd_element *e;
 | |
|   
 | |
|   for( e=dtd->elements; e; e=e->next )
 | |
|   { put_atom_wchars(et, e->name->name);
 | |
|     if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	 !PL_unify(head, et) )
 | |
|       return FALSE;
 | |
|   }
 | |
| 
 | |
|   return PL_unify_nil(tail);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| get_element(dtd *dtd, term_t name, dtd_element **elem)
 | |
| { ichar *s;
 | |
|   dtd_element *e;
 | |
|   dtd_symbol *id;
 | |
| 
 | |
|   if ( !PL_get_wchars(name, NULL, &s, CVT_ATOM|CVT_EXCEPTION) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( !(id=dtd_find_symbol(dtd, s)) ||
 | |
|        !(e=id->element) )
 | |
|     return FALSE;
 | |
| 
 | |
|   *elem = e;
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| static int
 | |
| dtd_prop_element(dtd *dtd, term_t name, term_t omit, term_t content)
 | |
| { dtd_element *e;
 | |
|   dtd_edef *def;
 | |
|   term_t model = PL_new_term_ref();
 | |
| 
 | |
|   if ( !get_element(dtd, name, &e) || !(def=e->structure) )
 | |
|     return FALSE;
 | |
|   
 | |
|   if ( !PL_unify_term(omit, PL_FUNCTOR, FUNCTOR_omit2,
 | |
| 		        PL_ATOM, def->omit_open ?  ATOM_true : ATOM_false,
 | |
| 		        PL_ATOM, def->omit_close ? ATOM_true : ATOM_false) )
 | |
|     return FALSE;
 | |
| 
 | |
|   put_content(model, def);
 | |
|   return PL_unify(content, model);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| dtd_prop_attributes(dtd *dtd, term_t ename, term_t atts)
 | |
| { dtd_element *e;
 | |
|   term_t tail = PL_copy_term_ref(atts);
 | |
|   term_t head = PL_new_term_ref();
 | |
|   term_t elem = PL_new_term_ref();
 | |
|   dtd_attr_list *al;
 | |
| 
 | |
|   if ( !get_element(dtd, ename, &e) )
 | |
|     return FALSE;
 | |
|   
 | |
|   for(al=e->attributes; al; al=al->next)
 | |
|   { put_atom_wchars(elem, al->attribute->name->name);
 | |
| 
 | |
|     if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	 !PL_unify(head, elem) )
 | |
|       return FALSE;
 | |
|   }
 | |
| 
 | |
|   return PL_unify_nil(tail);
 | |
| }
 | |
| 
 | |
| 
 | |
| typedef struct _plattrdef
 | |
| { attrtype	type;			/* AT_* */
 | |
|   const char *	name;			/* name */
 | |
|   int	       islist;			/* list-type */
 | |
|   atom_t	atom;			/* name as atom */
 | |
| } plattrdef;
 | |
| 
 | |
| static plattrdef plattrs[] = 
 | |
| {
 | |
|   { AT_CDATA,	 "cdata",    FALSE },
 | |
|   { AT_ENTITY,	 "entity",   FALSE },
 | |
|   { AT_ENTITIES, "entity",   TRUE },
 | |
|   { AT_ID,	 "id",	     FALSE },
 | |
|   { AT_IDREF,	 "idref",    FALSE },
 | |
|   { AT_IDREFS,	 "idref",    TRUE },
 | |
|   { AT_NAME,	 "name",     FALSE },
 | |
|   { AT_NAMES,	 "name",     TRUE },
 | |
| /*{ AT_NAMEOF,	 "nameof",   FALSE },*/
 | |
|   { AT_NMTOKEN,	 "nmtoken",  FALSE },
 | |
|   { AT_NMTOKENS, "nmtoken",  TRUE },
 | |
|   { AT_NUMBER,	 "number",   FALSE },
 | |
|   { AT_NUMBERS,	 "number",   TRUE },
 | |
|   { AT_NUTOKEN,	 "nutoken",  FALSE },
 | |
|   { AT_NUTOKENS, "nutoken",  TRUE },
 | |
|   { AT_NOTATION, "notation", FALSE },
 | |
| 
 | |
|   { AT_CDATA,    NULL,       FALSE }
 | |
| };
 | |
| 
 | |
| 
 | |
| static int
 | |
| unify_attribute_type(term_t type, dtd_attr *a)
 | |
| { plattrdef *ad = plattrs;
 | |
| 
 | |
|   for( ; ad->name; ad++ )
 | |
|   { if ( ad->type == a->type )
 | |
|     { if ( !ad->atom )
 | |
| 	ad->atom = PL_new_atom(ad->name);
 | |
| 
 | |
|       if ( ad->islist )
 | |
| 	return PL_unify_term(type, PL_FUNCTOR, FUNCTOR_list1,
 | |
| 			     PL_ATOM, ad->atom);
 | |
|       else
 | |
| 	return PL_unify_atom(type, ad->atom);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
 | |
|   { dtd_name_list *nl;
 | |
|     term_t tail = PL_new_term_ref();
 | |
|     term_t head = PL_new_term_ref();
 | |
|     term_t elem = PL_new_term_ref();
 | |
| 
 | |
|     if ( !PL_unify_functor(type,
 | |
| 			   a->type == AT_NAMEOF ?
 | |
| 			     FUNCTOR_nameof1 :
 | |
| 			     FUNCTOR_notation1) )
 | |
|       return FALSE;
 | |
|     PL_get_arg(1, type, tail);
 | |
| 
 | |
|     for(nl = a->typeex.nameof; nl; nl = nl->next)
 | |
|     { put_atom_wchars(elem, nl->value->name);
 | |
|       
 | |
|       if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	   !PL_unify(head, elem) )
 | |
| 	return FALSE;
 | |
|     }
 | |
|     return PL_unify_nil(tail);
 | |
|   }
 | |
| 
 | |
|   assert(0);
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static int
 | |
| unify_attribute_default(term_t defval, dtd_attr *a)
 | |
| { int v;
 | |
| 
 | |
|   switch(a->def)
 | |
|   { case AT_REQUIRED:
 | |
|       return PL_unify_atom_chars(defval, "required");
 | |
|     case AT_CURRENT:
 | |
|       return PL_unify_atom_chars(defval, "current");
 | |
|     case AT_CONREF:
 | |
|       return PL_unify_atom_chars(defval, "conref");
 | |
|     case AT_IMPLIED:
 | |
|       return PL_unify_atom_chars(defval, "implied");
 | |
|     case AT_DEFAULT:
 | |
|       v = PL_unify_functor(defval, FUNCTOR_default1);
 | |
|       goto common;
 | |
|     case AT_FIXED:
 | |
|       v = PL_unify_functor(defval, FUNCTOR_fixed1);
 | |
|     common:
 | |
|       if ( v )
 | |
|       { term_t tmp = PL_new_term_ref();
 | |
| 
 | |
| 	PL_get_arg(1, defval, tmp);
 | |
| 	switch( a->type )
 | |
| 	{ case AT_CDATA:
 | |
| 	    return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.cdata);
 | |
| 	  case AT_NAME:
 | |
| 	  case AT_NMTOKEN:
 | |
| 	  case AT_NAMEOF:
 | |
| 	  case AT_NOTATION:
 | |
| 	    return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.name->name);
 | |
| 	  case AT_NUMBER:
 | |
| 	    return PL_unify_integer(tmp, a->att_def.number);
 | |
| 	  default:
 | |
| 	    assert(0);
 | |
| 	}
 | |
|       } else
 | |
| 	return FALSE;
 | |
|     default:
 | |
|       assert(0);
 | |
|       return FALSE;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| dtd_prop_attribute(dtd *dtd, term_t ename, term_t aname,
 | |
| 		   term_t type, term_t def_value)
 | |
| { dtd_element *e;
 | |
|   ichar *achars;
 | |
|   dtd_symbol *asym;
 | |
|   dtd_attr_list *al;
 | |
| 
 | |
| 
 | |
|   if ( !get_element(dtd, ename, &e) )
 | |
|     return FALSE;
 | |
|   if ( !PL_get_wchars(aname, NULL, &achars, CVT_ATOM|CVT_EXCEPTION) )
 | |
|     return FALSE;
 | |
|   if ( !(asym=dtd_find_symbol(dtd, achars)) )
 | |
|     return FALSE;
 | |
| 
 | |
|   for(al=e->attributes; al; al=al->next)
 | |
|   { if ( al->attribute->name == asym )
 | |
|     { if ( unify_attribute_type(type, al->attribute) &&
 | |
| 	   unify_attribute_default(def_value, al->attribute) )
 | |
| 	return TRUE;
 | |
| 
 | |
|       return FALSE;
 | |
|     }
 | |
|   } 
 | |
|   
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| dtd_prop_entities(dtd *dtd, term_t list)
 | |
| { term_t tail = PL_copy_term_ref(list);
 | |
|   term_t head = PL_new_term_ref();
 | |
|   term_t et   = PL_new_term_ref();
 | |
|   dtd_entity *e;
 | |
|   
 | |
|   for( e=dtd->entities; e; e=e->next )
 | |
|   { put_atom_wchars(et, e->name->name);
 | |
|     if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	 !PL_unify(head, et) )
 | |
|       return FALSE;
 | |
|   }
 | |
| 
 | |
|   return PL_unify_nil(tail);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| dtd_prop_entity(dtd *dtd, term_t ename, term_t value)
 | |
| { ichar *s;
 | |
|   dtd_entity *e;
 | |
|   dtd_symbol *id;
 | |
| 
 | |
|   if ( !PL_get_wchars(ename, NULL, &s, CVT_ATOM|CVT_EXCEPTION) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( !(id=dtd_find_symbol(dtd, s)) ||
 | |
|        !(e=id->entity)  )
 | |
|     return FALSE;
 | |
| 
 | |
|   switch(e->type)
 | |
|   { case ET_SYSTEM:
 | |
|       return PL_unify_term(value, PL_FUNCTOR_CHARS, "system", 1,
 | |
| 			   PL_CHARS, e->exturl);
 | |
|     case ET_PUBLIC:
 | |
|       if ( e->exturl )
 | |
| 	return PL_unify_term(value, PL_FUNCTOR_CHARS, "public", 2,
 | |
| 			     PL_CHARS, e->extid,
 | |
| 			     PL_CHARS, e->exturl);
 | |
|       else
 | |
| 	return PL_unify_term(value, PL_FUNCTOR_CHARS, "public", 2,
 | |
| 			     PL_CHARS, e->extid,
 | |
| 			     PL_VARIABLE);
 | |
| 
 | |
|     case ET_LITERAL:
 | |
|     default:
 | |
|       if ( e->value )
 | |
|       { const char *wrap;
 | |
| 
 | |
| 	switch(e->content)
 | |
| 	{ case EC_SGML:     wrap = "sgml"; break;
 | |
| 	  case EC_STARTTAG: wrap = "start_tag"; break;
 | |
| 	  case EC_ENDTAG:   wrap = "end_tag"; break;
 | |
| 	  case EC_CDATA:    wrap = NULL; break;
 | |
| 	  case EC_SDATA:    wrap = "sdata"; break;
 | |
| 	  case EC_NDATA:    wrap = "ndata"; break;
 | |
| 	  case EC_PI:       wrap = "pi"; break;
 | |
| 	  default:	    wrap = NULL; assert(0);
 | |
| 	}
 | |
| 
 | |
| 	if ( wrap )
 | |
| 	  return PL_unify_term(value, PL_FUNCTOR_CHARS, wrap, 1,
 | |
| 			       PL_CHARS, e->value);
 | |
| 	else
 | |
| 	  return PL_unify_wchars(value, PL_ATOM, wcslen(e->value), e->value);
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   assert(0);
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| dtd_prop_notations(dtd *dtd, term_t list)
 | |
| { dtd_notation *n;
 | |
|   term_t tail = PL_copy_term_ref(list);
 | |
|   term_t head = PL_new_term_ref();
 | |
| 
 | |
|   for(n=dtd->notations; n; n=n->next)
 | |
|   { if ( PL_unify_list(tail, head, tail) &&
 | |
| 	 PL_unify_wchars(head, PL_ATOM, wcslen(n->name->name), n->name->name) )
 | |
|       continue;
 | |
|       
 | |
|     return FALSE;
 | |
|   }
 | |
| 
 | |
|   return PL_unify_nil(tail);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| dtd_prop_notation(dtd *dtd, term_t nname, term_t desc)
 | |
| { char *s;
 | |
|   dtd_symbol *id;
 | |
| 
 | |
|   if ( !PL_get_atom_chars(nname, &s) )
 | |
|     return sgml2pl_error(ERR_TYPE, "atom", nname);
 | |
| 
 | |
|   if ( (id=dtd_find_symbol(dtd, (ichar *)s)) )
 | |
|   { dtd_notation *n;
 | |
| 
 | |
|     for(n=dtd->notations; n; n=n->next)
 | |
|     { if ( n->name == id )
 | |
|       { term_t tail = PL_copy_term_ref(desc);
 | |
| 	term_t head = PL_new_term_ref();
 | |
| 
 | |
| 	if ( n->system )
 | |
| 	{ if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	       !PL_unify_term(head,
 | |
| 			      PL_FUNCTOR_CHARS, "system", 1,
 | |
| 			        PL_CHARS, n->system) )
 | |
| 	    return FALSE;
 | |
| 	}
 | |
| 	if ( n->public )
 | |
| 	{ if ( !PL_unify_list(tail, head, tail) ||
 | |
| 	       !PL_unify_term(head,
 | |
| 			      PL_FUNCTOR_CHARS, "public", 1,
 | |
| 			        PL_CHARS, n->public) )
 | |
| 	    return FALSE;
 | |
| 	}
 | |
| 
 | |
| 	return PL_unify_nil(tail);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| typedef struct _prop
 | |
| { int (*func)();
 | |
|   const char *name;
 | |
|   int arity;
 | |
|   functor_t functor;
 | |
| } prop;
 | |
| 
 | |
| 
 | |
| static prop dtd_props[] =
 | |
| { { dtd_prop_doctype,    "doctype",    1 },
 | |
|   { dtd_prop_elements,	 "elements",   1 },
 | |
|   { dtd_prop_element,	 "element",    3 },
 | |
|   { dtd_prop_attributes, "attributes", 2, },
 | |
|   { dtd_prop_attribute,	 "attribute",  4, },
 | |
|   { dtd_prop_entities,	 "entities",   1, },
 | |
|   { dtd_prop_entity,	 "entity",     2, },
 | |
|   { dtd_prop_notations,	 "notations",  1, },
 | |
|   { dtd_prop_notation,	 "notation",   2, },
 | |
|   { NULL }
 | |
| };
 | |
| 
 | |
| 
 | |
| static void
 | |
| initprops(void)
 | |
| { static int done = FALSE;
 | |
| 
 | |
|   if ( !done )
 | |
|   { prop *p;
 | |
| 
 | |
|     done = TRUE;
 | |
|     for(p=dtd_props; p->func; p++)
 | |
|       p->functor = PL_new_functor(PL_new_atom(p->name), p->arity);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| dtd_property(DTD, doctype(DocType))
 | |
| dtd_property(DTD, elements(ListOfNames))
 | |
| dtd_property(DTD, element(Name, Omit, Model))
 | |
| dtd_property(DTD, attributes(ElementName, ListOfAttributes)),
 | |
| dtd_property(DTD, attribute(ElementName, AttributeName, Type, Default))
 | |
| dtd_property(DTD, entities(ListOfEntityNames))
 | |
| dtd_property(DTD, entity(Name, Type))
 | |
| dtd_property(DTD, notations(ListOfNotationNames)
 | |
| dtd_property(DTD, notation(Name, File))
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| static foreign_t
 | |
| pl_dtd_property(term_t ref, term_t property)
 | |
| { dtd *dtd;
 | |
|   const prop *p;
 | |
| 
 | |
|   initprops();
 | |
| 
 | |
|   if ( !get_dtd(ref, &dtd) )
 | |
|     return FALSE;
 | |
| 
 | |
|   for(p=dtd_props; p->func; p++)
 | |
|   { if ( PL_is_functor(property, p->functor) )
 | |
|     { term_t a = PL_new_term_refs(p->arity);
 | |
|       int i;
 | |
| 
 | |
|       for(i=0; i<p->arity; i++)
 | |
| 	PL_get_arg(i+1, property, a+i);
 | |
| 
 | |
|       switch(p->arity)
 | |
|       { case 1:
 | |
| 	  return (*p->func)(dtd, a+0);
 | |
| 	case 2:
 | |
| 	  return (*p->func)(dtd, a+0, a+1);
 | |
| 	case 3:
 | |
| 	  return (*p->func)(dtd, a+0, a+1, a+2);
 | |
| 	case 4:
 | |
| 	  return (*p->func)(dtd, a+0, a+1, a+2, a+3);
 | |
| 	default:
 | |
| 	  assert(0);
 | |
| 	  return FALSE;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return sgml2pl_error(ERR_DOMAIN, "dtd_property", property);
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	     CATALOG		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static foreign_t
 | |
| pl_sgml_register_catalog_file(term_t file, term_t where)
 | |
| { wchar_t *fn;
 | |
|   char *w;
 | |
|   catalog_location loc;
 | |
| 
 | |
|   if ( !PL_get_wchars(file, NULL, &fn, CVT_ATOM|CVT_EXCEPTION) )
 | |
|     return FALSE;
 | |
|   if ( !PL_get_atom_chars(where, &w) )
 | |
|     return sgml2pl_error(ERR_TYPE, "atom", where);
 | |
| 
 | |
|   if ( streq(w, "start") )
 | |
|     loc = CTL_START;
 | |
|   else if ( streq(w, "end") )
 | |
|     loc = CTL_END;
 | |
|   else
 | |
|     return sgml2pl_error(ERR_DOMAIN, "location", where);
 | |
|   
 | |
|   return register_catalog_file(fn, loc);
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      INSTALL		*
 | |
| 		 *******************************/
 | |
| 
 | |
| extern install_t install_xml_quote(void);
 | |
| #ifdef O_STATISTICS
 | |
| extern void sgml_statistics(void);
 | |
| #endif
 | |
| 
 | |
| install_t
 | |
| install(void)
 | |
| { initConstants();
 | |
| 
 | |
|   PL_register_foreign("new_dtd",	  2, pl_new_dtd,	  0);
 | |
|   PL_register_foreign("free_dtd",	  1, pl_free_dtd,	  0);
 | |
|   PL_register_foreign("new_sgml_parser",  2, pl_new_sgml_parser,  0);
 | |
|   PL_register_foreign("free_sgml_parser", 1, pl_free_sgml_parser, 0);
 | |
|   PL_register_foreign("set_sgml_parser",  2, pl_set_sgml_parser,  0);
 | |
|   PL_register_foreign("get_sgml_parser",  2, pl_get_sgml_parser,  0);
 | |
|   PL_register_foreign("open_dtd",         3, pl_open_dtd,	  0);
 | |
|   PL_register_foreign("sgml_parse",       2, pl_sgml_parse,
 | |
| 		      PL_FA_TRANSPARENT);
 | |
|   PL_register_foreign("_sgml_register_catalog_file", 2,
 | |
| 		      pl_sgml_register_catalog_file, 0);
 | |
|   PL_register_foreign("$dtd_property",	  2, pl_dtd_property,
 | |
| 		      0);
 | |
| 
 | |
|   install_xml_quote();
 | |
| #ifdef O_STATISTICS
 | |
|   atexit(sgml_statistics);
 | |
| #endif
 | |
| }
 | |
| 
 |