230 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			230 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								/*  $Id$
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Part of SWI-Prolog
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Author:        Jan Wielemaker
							 | 
						||
| 
								 | 
							
								    E-mail:        jan@swi.psy.uva.nl
							 | 
						||
| 
								 | 
							
								    WWW:           http://www.swi-prolog.org
							 | 
						||
| 
								 | 
							
								    Copyright (C): 1985-2002, University of Amsterdam
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This library is free software; you can redistribute it and/or
							 | 
						||
| 
								 | 
							
								    modify it under the terms of the GNU Lesser General Public
							 | 
						||
| 
								 | 
							
								    License as published by the Free Software Foundation; either
							 | 
						||
| 
								 | 
							
								    version 2.1 of the License, or (at your option) any later version.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This library is distributed in the hope that it will be useful,
							 | 
						||
| 
								 | 
							
								    but WITHOUT ANY WARRANTY; without even the implied warranty of
							 | 
						||
| 
								 | 
							
								    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
							 | 
						||
| 
								 | 
							
								    Lesser General Public License for more details.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    You should have received a copy of the GNU Lesser General Public
							 | 
						||
| 
								 | 
							
								    License along with this library; if not, write to the Free Software
							 | 
						||
| 
								 | 
							
								    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifndef SGML_PARSER_H_INCLUDED
							 | 
						||
| 
								 | 
							
								#define SGML_PARSER_H_INCLUDED
							 | 
						||
| 
								 | 
							
								#include "util.h"
							 | 
						||
| 
								 | 
							
								#include <wchar.h>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	      CALL-BACK		*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
													/* sgml_attribute->flags */
							 | 
						||
| 
								 | 
							
								#define SGML_AT_DEFAULT		0x1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct _sgml_attribute
							 | 
						||
| 
								 | 
							
								{ struct				/* so we can free members */
							 | 
						||
| 
								 | 
							
								  { wchar_t *textW;			/* UCS textual value */
							 | 
						||
| 
								 | 
							
								    long   number;			/* numeric value/length */
							 | 
						||
| 
								 | 
							
								  } value;
							 | 
						||
| 
								 | 
							
								  dtd_attr *definition;			/* DTD definition */
							 | 
						||
| 
								 | 
							
								  unsigned flags;			/* additional flags */
							 | 
						||
| 
								 | 
							
								} sgml_attribute;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct _dtd_parser *dtd_parser_p;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef int (*sgml_begin_element_f)(dtd_parser_p parser,
							 | 
						||
| 
								 | 
							
												    dtd_element *e,
							 | 
						||
| 
								 | 
							
												    int argc,
							 | 
						||
| 
								 | 
							
												    sgml_attribute *argv);
							 | 
						||
| 
								 | 
							
								typedef int (*sgml_end_element_f)(dtd_parser_p parser,
							 | 
						||
| 
								 | 
							
												  dtd_element *e);
							 | 
						||
| 
								 | 
							
								typedef int (*sgml_data_f)(dtd_parser_p parser,
							 | 
						||
| 
								 | 
							
											   data_type type, int len, const wchar_t *text);
							 | 
						||
| 
								 | 
							
								typedef int (*sgml_wdata_f)(dtd_parser_p parser,
							 | 
						||
| 
								 | 
							
											   data_type type, int len, const wchar_t *text);
							 | 
						||
| 
								 | 
							
								typedef int (*sgml_entity_f)(dtd_parser_p parser,
							 | 
						||
| 
								 | 
							
											     dtd_entity *entity,
							 | 
						||
| 
								 | 
							
											     int chr);
							 | 
						||
| 
								 | 
							
								typedef int (*sgml_pi_f)(dtd_parser_p parser, const ichar *pi);
							 | 
						||
| 
								 | 
							
								typedef int (*sgml_error_f)(dtd_parser_p parser,
							 | 
						||
| 
								 | 
							
											    dtd_error *error);
							 | 
						||
| 
								 | 
							
								typedef int (*sgml_decl_f)(dtd_parser_p parser, const ichar *decl);
							 | 
						||
| 
								 | 
							
								#ifdef XMLNS
							 | 
						||
| 
								 | 
							
								typedef int (*xmlns_f)(dtd_parser_p parser,
							 | 
						||
| 
								 | 
							
										       dtd_symbol *ns, dtd_symbol *url);
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	 PARSER AND STATES	*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define SGML_PARSER_MAGIC	(0x834ab663)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef enum
							 | 
						||
| 
								 | 
							
								{ S_PCDATA,				/* between declarations */
							 | 
						||
| 
								 | 
							
								#ifdef UTF8
							 | 
						||
| 
								 | 
							
								  S_UTF8,				/* Loading UTF-8 character */
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  S_CDATA,				/* non-parsed data */
							 | 
						||
| 
								 | 
							
								  S_RCDATA,				/* CDATA+entities */
							 | 
						||
| 
								 | 
							
								  S_MSCDATA,				/* <![CDATA[...]]> */
							 | 
						||
| 
								 | 
							
								  S_EMSCDATA1,				/* Seen ] in S_MSCDATA */
							 | 
						||
| 
								 | 
							
								  S_EMSCDATA2,				/* Seen ]] in S_MSCDATA */
							 | 
						||
| 
								 | 
							
								  S_ECDATA1,				/* Seen < in CDATA */
							 | 
						||
| 
								 | 
							
								  S_ECDATA2,				/* Seen </ in CDATA */
							 | 
						||
| 
								 | 
							
								  S_EMSC1,				/* Seen ] in marked section */
							 | 
						||
| 
								 | 
							
								  S_EMSC2,				/* Seen ]] in marked section */
							 | 
						||
| 
								 | 
							
								  S_PI,					/* Seen <? */
							 | 
						||
| 
								 | 
							
								  S_PI2,				/* Seen <?...? */
							 | 
						||
| 
								 | 
							
								  S_DECL0,				/* Seen < */
							 | 
						||
| 
								 | 
							
								  S_DECL,				/* inside a declaration */
							 | 
						||
| 
								 | 
							
								  S_MDECL0,				/* Seen <! */
							 | 
						||
| 
								 | 
							
								  S_STRING,				/* inside a "string" or 'string' */
							 | 
						||
| 
								 | 
							
								  S_DECLCMT0,				/* Seen <...- */
							 | 
						||
| 
								 | 
							
								  S_DECLCMT,				/* Seen <...-- */
							 | 
						||
| 
								 | 
							
								  S_DECLCMTE0,				/* Seen <...--..- */
							 | 
						||
| 
								 | 
							
								  S_CMTO,				/* Seen <!- */
							 | 
						||
| 
								 | 
							
								  S_CMT1,				/* Seen <!-- */
							 | 
						||
| 
								 | 
							
								  S_CMT,				/* Seen <!--X... */
							 | 
						||
| 
								 | 
							
								  S_CMTE0,				/* Seem <!--...- */
							 | 
						||
| 
								 | 
							
								  S_CMTE1,				/* Seem <!--...-- */
							 | 
						||
| 
								 | 
							
								  S_GROUP,				/* inside [...] */
							 | 
						||
| 
								 | 
							
								  S_PENT,				/* Seen % */
							 | 
						||
| 
								 | 
							
								  S_ENT0,				/* Seen & */
							 | 
						||
| 
								 | 
							
								  S_ENT,				/* Seen &(#|\w) */
							 | 
						||
| 
								 | 
							
								  S_ENTCR				/* Seen &entity<CR> */
							 | 
						||
| 
								 | 
							
								} dtdstate;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef enum
							 | 
						||
| 
								 | 
							
								{ DCL_DTD,				/* DTD Declaration */
							 | 
						||
| 
								 | 
							
								  DCL_BEGIN,				/* begin-tag */
							 | 
						||
| 
								 | 
							
								  DCL_END				/* end-tag */
							 | 
						||
| 
								 | 
							
								} dcl_type;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef enum
							 | 
						||
| 
								 | 
							
								{ MS_IGNORE,				/* ignore this data */
							 | 
						||
| 
								 | 
							
								  MS_INCLUDE,				/* process normally */
							 | 
						||
| 
								 | 
							
								  MS_CDATA,				/* pass literally */
							 | 
						||
| 
								 | 
							
								  MS_RCDATA				/* replace entities */
							 | 
						||
| 
								 | 
							
								} marktype;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef enum
							 | 
						||
| 
								 | 
							
								{ EV_EXPLICIT,				/* Explicit event */
							 | 
						||
| 
								 | 
							
								  EV_OMITTED,				/* Omitted tag event */
							 | 
						||
| 
								 | 
							
								  EV_SHORTTAG,				/* SHORTTAG event: <tag/value/ */
							 | 
						||
| 
								 | 
							
								  EV_SHORTREF				/* SHORTREF event */
							 | 
						||
| 
								 | 
							
								} sgml_event_class;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct _dtd_marked
							 | 
						||
| 
								 | 
							
								{ dtd_symbol *keyword;			/* keyword of the marked section */
							 | 
						||
| 
								 | 
							
								  marktype	type;			/* processing type */
							 | 
						||
| 
								 | 
							
								  struct _dtd_marked *parent;		/* parent marked section */
							 | 
						||
| 
								 | 
							
								} dtd_marked;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef enum
							 | 
						||
| 
								 | 
							
								{ DM_DTD,				/* DTD mode: no data allowed (?) */
							 | 
						||
| 
								 | 
							
								  DM_DATA				/* Environment has only elements */
							 | 
						||
| 
								 | 
							
								} data_mode;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct _sgml_environment
							 | 
						||
| 
								 | 
							
								{ dtd_element *element;			/* element that opened the env */
							 | 
						||
| 
								 | 
							
								  struct _dtd_state *state;		/* State we are in */
							 | 
						||
| 
								 | 
							
								#ifdef XMLNS
							 | 
						||
| 
								 | 
							
								  struct _xmlns *xmlns;			/* XML namespace */
							 | 
						||
| 
								 | 
							
								  struct _xmlns *thisns;		/* Name space of element */
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#ifdef XMLBASE
							 | 
						||
| 
								 | 
							
								  ichar *uri_base;			/* xml:base handling */
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  dtd_space_mode space_mode;		/* How to handle blanks */
							 | 
						||
| 
								 | 
							
								  dtd_shortref *map;			/* SHORTREF map */
							 | 
						||
| 
								 | 
							
								  struct _sgml_environment *parent;	/* Parent environment */
							 | 
						||
| 
								 | 
							
								  int	wants_net;			/* I want a net */
							 | 
						||
| 
								 | 
							
								  int	saved_waiting_for_net;		/* saved value of waiting for net */
							 | 
						||
| 
								 | 
							
								} sgml_environment;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
													/* parser->flags */
							 | 
						||
| 
								 | 
							
								#define SGML_PARSER_NODEFS	 0x01	/* don't handle default atts */
							 | 
						||
| 
								 | 
							
								#define SGML_PARSER_QUALIFY_ATTS 0x02	/* qualify attributes in XML mode */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct _dtd_parser
							 | 
						||
| 
								 | 
							
								{ unsigned long magic;			/* SGML_PARSER_MAGIC */
							 | 
						||
| 
								 | 
							
								  dtd     *dtd;				/* DTD we are building */
							 | 
						||
| 
								 | 
							
								  dtdstate state;			/* current state */
							 | 
						||
| 
								 | 
							
								  dtdstate cdata_state;			/* S_CDATA/S_RCDATA */
							 | 
						||
| 
								 | 
							
								  dtd_marked *marked;			/* marked section stack */
							 | 
						||
| 
								 | 
							
								  marktype mark_state;			/* processing mode */
							 | 
						||
| 
								 | 
							
								  dtd_element *empty_element;		/* empty of <tag/> seen */
							 | 
						||
| 
								 | 
							
								  sgml_environment *environments;	/* Open environments */
							 | 
						||
| 
								 | 
							
								  data_mode dmode;			/* How to handle characters */
							 | 
						||
| 
								 | 
							
								  int	   first;			/* Just seen <tag> */
							 | 
						||
| 
								 | 
							
								  int	   waiting_for_net;		/* waiting for / in <shorttag/mode/ */
							 | 
						||
| 
								 | 
							
								  icharbuf *buffer;			/* buffer for temp data */
							 | 
						||
| 
								 | 
							
								  ocharbuf *cdata;			/* collected character data */
							 | 
						||
| 
								 | 
							
								  int	   blank_cdata;			/* CDATA is all blank */
							 | 
						||
| 
								 | 
							
								  int	   cdata_must_be_empty;		/* Only shortrefs allowed here */
							 | 
						||
| 
								 | 
							
								  const ichar *etag;			/* name of end-tag in CDATA */
							 | 
						||
| 
								 | 
							
								  int	   etaglen;			/* length of end-tag */
							 | 
						||
| 
								 | 
							
								  int	   grouplevel;			/* [..] level in declaration */
							 | 
						||
| 
								 | 
							
								  int	   saved;			/* saved character */
							 | 
						||
| 
								 | 
							
								  dtdstate lit_saved_state;		/* literal saved-state */
							 | 
						||
| 
								 | 
							
								  int      encoded;			/* TRUE for binary input */
							 | 
						||
| 
								 | 
							
								  dtd_shortref *map;			/* SHORTREF map */
							 | 
						||
| 
								 | 
							
								#ifdef UTF8
							 | 
						||
| 
								 | 
							
								  int	   utf8_decode;			/* decode UTF-8 sequences? */
							 | 
						||
| 
								 | 
							
								  int      utf8_char;			/* building character */
							 | 
						||
| 
								 | 
							
								  int	   utf8_left;			/* bytes left */
							 | 
						||
| 
								 | 
							
								  dtdstate utf8_saved_state;		/* state from which we come */
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  dtd_srcloc	location;		/* Current location */
							 | 
						||
| 
								 | 
							
								  dtd_srcloc	startloc;		/* Start of last markup */
							 | 
						||
| 
								 | 
							
								  dtd_srcloc	startcdata;		/* Start of last cdata */
							 | 
						||
| 
								 | 
							
								  dtd_symbol   *enforce_outer_element;	/* Outer element to look for */
							 | 
						||
| 
								 | 
							
								  sgml_event_class event_class;		/* EV_* */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void *closure;			/* client handle */
							 | 
						||
| 
								 | 
							
								  sgml_begin_element_f	on_begin_element; /* start an element */
							 | 
						||
| 
								 | 
							
								  sgml_end_element_f	on_end_element;	/* end an element */
							 | 
						||
| 
								 | 
							
								  sgml_data_f		on_data;	/* process cdata */
							 | 
						||
| 
								 | 
							
								  sgml_entity_f		on_entity;	/* unprocessed entity */
							 | 
						||
| 
								 | 
							
								  sgml_pi_f		on_pi;		/* processing instruction */
							 | 
						||
| 
								 | 
							
								  sgml_error_f		on_error;	/* handle error */
							 | 
						||
| 
								 | 
							
								  sgml_decl_f		on_decl;	/* handle declarations */
							 | 
						||
| 
								 | 
							
								#ifdef XMLNS
							 | 
						||
| 
								 | 
							
								  xmlns_f		on_xmlns;	/* handle new namespace */
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  unsigned		flags;		/* misc flags */
							 | 
						||
| 
								 | 
							
								} dtd_parser;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef XMLNS
							 | 
						||
| 
								 | 
							
								#include "xmlns.h"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								extern int		gripe(dtd_error_id e, ...);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define SGML_SUB_DOCUMENT	0x1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#endif /*SGML_PARSER_H_INCLUDED*/
							 | 
						||
| 
								 | 
							
								
							 |