230 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			230 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*  $Id$
 | |
| 
 | |
|     Part of SWI-Prolog
 | |
| 
 | |
|     Author:        Jan Wielemaker
 | |
|     E-mail:        jan@swi.psy.uva.nl
 | |
|     WWW:           http://www.swi-prolog.org
 | |
|     Copyright (C): 1985-2002, University of Amsterdam
 | |
| 
 | |
|     This library is free software; you can redistribute it and/or
 | |
|     modify it under the terms of the GNU Lesser General Public
 | |
|     License as published by the Free Software Foundation; either
 | |
|     version 2.1 of the License, or (at your option) any later version.
 | |
| 
 | |
|     This library is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|     Lesser General Public License for more details.
 | |
| 
 | |
|     You should have received a copy of the GNU Lesser General Public
 | |
|     License along with this library; if not, write to the Free Software
 | |
|     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| */
 | |
| 
 | |
| #ifndef SGML_PARSER_H_INCLUDED
 | |
| #define SGML_PARSER_H_INCLUDED
 | |
| #include "util.h"
 | |
| #include <wchar.h>
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      CALL-BACK		*
 | |
| 		 *******************************/
 | |
| 
 | |
| 					/* sgml_attribute->flags */
 | |
| #define SGML_AT_DEFAULT		0x1
 | |
| 
 | |
| typedef struct _sgml_attribute
 | |
| { struct				/* so we can free members */
 | |
|   { wchar_t *textW;			/* UCS textual value */
 | |
|     long   number;			/* numeric value/length */
 | |
|   } value;
 | |
|   dtd_attr *definition;			/* DTD definition */
 | |
|   unsigned flags;			/* additional flags */
 | |
| } sgml_attribute;
 | |
| 
 | |
| typedef struct _dtd_parser *dtd_parser_p;
 | |
| 
 | |
| typedef int (*sgml_begin_element_f)(dtd_parser_p parser,
 | |
| 				    dtd_element *e,
 | |
| 				    int argc,
 | |
| 				    sgml_attribute *argv);
 | |
| typedef int (*sgml_end_element_f)(dtd_parser_p parser,
 | |
| 				  dtd_element *e);
 | |
| typedef int (*sgml_data_f)(dtd_parser_p parser,
 | |
| 			   data_type type, int len, const wchar_t *text);
 | |
| typedef int (*sgml_wdata_f)(dtd_parser_p parser,
 | |
| 			   data_type type, int len, const wchar_t *text);
 | |
| typedef int (*sgml_entity_f)(dtd_parser_p parser,
 | |
| 			     dtd_entity *entity,
 | |
| 			     int chr);
 | |
| typedef int (*sgml_pi_f)(dtd_parser_p parser, const ichar *pi);
 | |
| typedef int (*sgml_error_f)(dtd_parser_p parser,
 | |
| 			    dtd_error *error);
 | |
| typedef int (*sgml_decl_f)(dtd_parser_p parser, const ichar *decl);
 | |
| #ifdef XMLNS
 | |
| typedef int (*xmlns_f)(dtd_parser_p parser,
 | |
| 		       dtd_symbol *ns, dtd_symbol *url);
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	 PARSER AND STATES	*
 | |
| 		 *******************************/
 | |
| 
 | |
| #define SGML_PARSER_MAGIC	(0x834ab663)
 | |
| 
 | |
| typedef enum
 | |
| { S_PCDATA,				/* between declarations */
 | |
| #ifdef UTF8
 | |
|   S_UTF8,				/* Loading UTF-8 character */
 | |
| #endif
 | |
|   S_CDATA,				/* non-parsed data */
 | |
|   S_RCDATA,				/* CDATA+entities */
 | |
|   S_MSCDATA,				/* <![CDATA[...]]> */
 | |
|   S_EMSCDATA1,				/* Seen ] in S_MSCDATA */
 | |
|   S_EMSCDATA2,				/* Seen ]] in S_MSCDATA */
 | |
|   S_ECDATA1,				/* Seen < in CDATA */
 | |
|   S_ECDATA2,				/* Seen </ in CDATA */
 | |
|   S_EMSC1,				/* Seen ] in marked section */
 | |
|   S_EMSC2,				/* Seen ]] in marked section */
 | |
|   S_PI,					/* Seen <? */
 | |
|   S_PI2,				/* Seen <?...? */
 | |
|   S_DECL0,				/* Seen < */
 | |
|   S_DECL,				/* inside a declaration */
 | |
|   S_MDECL0,				/* Seen <! */
 | |
|   S_STRING,				/* inside a "string" or 'string' */
 | |
|   S_DECLCMT0,				/* Seen <...- */
 | |
|   S_DECLCMT,				/* Seen <...-- */
 | |
|   S_DECLCMTE0,				/* Seen <...--..- */
 | |
|   S_CMTO,				/* Seen <!- */
 | |
|   S_CMT1,				/* Seen <!-- */
 | |
|   S_CMT,				/* Seen <!--X... */
 | |
|   S_CMTE0,				/* Seem <!--...- */
 | |
|   S_CMTE1,				/* Seem <!--...-- */
 | |
|   S_GROUP,				/* inside [...] */
 | |
|   S_PENT,				/* Seen % */
 | |
|   S_ENT0,				/* Seen & */
 | |
|   S_ENT,				/* Seen &(#|\w) */
 | |
|   S_ENTCR				/* Seen &entity<CR> */
 | |
| } dtdstate;
 | |
| 
 | |
| 
 | |
| typedef enum
 | |
| { DCL_DTD,				/* DTD Declaration */
 | |
|   DCL_BEGIN,				/* begin-tag */
 | |
|   DCL_END				/* end-tag */
 | |
| } dcl_type;
 | |
| 
 | |
| 
 | |
| typedef enum
 | |
| { MS_IGNORE,				/* ignore this data */
 | |
|   MS_INCLUDE,				/* process normally */
 | |
|   MS_CDATA,				/* pass literally */
 | |
|   MS_RCDATA				/* replace entities */
 | |
| } marktype;
 | |
| 
 | |
| 
 | |
| typedef enum
 | |
| { EV_EXPLICIT,				/* Explicit event */
 | |
|   EV_OMITTED,				/* Omitted tag event */
 | |
|   EV_SHORTTAG,				/* SHORTTAG event: <tag/value/ */
 | |
|   EV_SHORTREF				/* SHORTREF event */
 | |
| } sgml_event_class;
 | |
| 
 | |
| 
 | |
| typedef struct _dtd_marked
 | |
| { dtd_symbol *keyword;			/* keyword of the marked section */
 | |
|   marktype	type;			/* processing type */
 | |
|   struct _dtd_marked *parent;		/* parent marked section */
 | |
| } dtd_marked;
 | |
| 
 | |
| 
 | |
| typedef enum
 | |
| { DM_DTD,				/* DTD mode: no data allowed (?) */
 | |
|   DM_DATA				/* Environment has only elements */
 | |
| } data_mode;
 | |
| 
 | |
| 
 | |
| typedef struct _sgml_environment
 | |
| { dtd_element *element;			/* element that opened the env */
 | |
|   struct _dtd_state *state;		/* State we are in */
 | |
| #ifdef XMLNS
 | |
|   struct _xmlns *xmlns;			/* XML namespace */
 | |
|   struct _xmlns *thisns;		/* Name space of element */
 | |
| #endif
 | |
| #ifdef XMLBASE
 | |
|   ichar *uri_base;			/* xml:base handling */
 | |
| #endif
 | |
|   dtd_space_mode space_mode;		/* How to handle blanks */
 | |
|   dtd_shortref *map;			/* SHORTREF map */
 | |
|   struct _sgml_environment *parent;	/* Parent environment */
 | |
|   int	wants_net;			/* I want a net */
 | |
|   int	saved_waiting_for_net;		/* saved value of waiting for net */
 | |
| } sgml_environment;
 | |
| 
 | |
| 					/* parser->flags */
 | |
| #define SGML_PARSER_NODEFS	 0x01	/* don't handle default atts */
 | |
| #define SGML_PARSER_QUALIFY_ATTS 0x02	/* qualify attributes in XML mode */
 | |
| 
 | |
| typedef struct _dtd_parser
 | |
| { unsigned long magic;			/* SGML_PARSER_MAGIC */
 | |
|   dtd     *dtd;				/* DTD we are building */
 | |
|   dtdstate state;			/* current state */
 | |
|   dtdstate cdata_state;			/* S_CDATA/S_RCDATA */
 | |
|   dtd_marked *marked;			/* marked section stack */
 | |
|   marktype mark_state;			/* processing mode */
 | |
|   dtd_element *empty_element;		/* empty of <tag/> seen */
 | |
|   sgml_environment *environments;	/* Open environments */
 | |
|   data_mode dmode;			/* How to handle characters */
 | |
|   int	   first;			/* Just seen <tag> */
 | |
|   int	   waiting_for_net;		/* waiting for / in <shorttag/mode/ */
 | |
|   icharbuf *buffer;			/* buffer for temp data */
 | |
|   ocharbuf *cdata;			/* collected character data */
 | |
|   int	   blank_cdata;			/* CDATA is all blank */
 | |
|   int	   cdata_must_be_empty;		/* Only shortrefs allowed here */
 | |
|   const ichar *etag;			/* name of end-tag in CDATA */
 | |
|   int	   etaglen;			/* length of end-tag */
 | |
|   int	   grouplevel;			/* [..] level in declaration */
 | |
|   int	   saved;			/* saved character */
 | |
|   dtdstate lit_saved_state;		/* literal saved-state */
 | |
|   int      encoded;			/* TRUE for binary input */
 | |
|   dtd_shortref *map;			/* SHORTREF map */
 | |
| #ifdef UTF8
 | |
|   int	   utf8_decode;			/* decode UTF-8 sequences? */
 | |
|   int      utf8_char;			/* building character */
 | |
|   int	   utf8_left;			/* bytes left */
 | |
|   dtdstate utf8_saved_state;		/* state from which we come */
 | |
| #endif
 | |
|   dtd_srcloc	location;		/* Current location */
 | |
|   dtd_srcloc	startloc;		/* Start of last markup */
 | |
|   dtd_srcloc	startcdata;		/* Start of last cdata */
 | |
|   dtd_symbol   *enforce_outer_element;	/* Outer element to look for */
 | |
|   sgml_event_class event_class;		/* EV_* */
 | |
| 
 | |
|   void *closure;			/* client handle */
 | |
|   sgml_begin_element_f	on_begin_element; /* start an element */
 | |
|   sgml_end_element_f	on_end_element;	/* end an element */
 | |
|   sgml_data_f		on_data;	/* process cdata */
 | |
|   sgml_entity_f		on_entity;	/* unprocessed entity */
 | |
|   sgml_pi_f		on_pi;		/* processing instruction */
 | |
|   sgml_error_f		on_error;	/* handle error */
 | |
|   sgml_decl_f		on_decl;	/* handle declarations */
 | |
| #ifdef XMLNS
 | |
|   xmlns_f		on_xmlns;	/* handle new namespace */
 | |
| #endif
 | |
|   unsigned		flags;		/* misc flags */
 | |
| } dtd_parser;
 | |
| 
 | |
| 
 | |
| #ifdef XMLNS
 | |
| #include "xmlns.h"
 | |
| #endif
 | |
| 
 | |
| extern int		gripe(dtd_error_id e, ...);
 | |
| 
 | |
| #define SGML_SUB_DOCUMENT	0x1
 | |
| 
 | |
| #endif /*SGML_PARSER_H_INCLUDED*/
 | |
| 
 |