d6a06fe092
as possible.
230 lines
7.4 KiB
C
230 lines
7.4 KiB
C
/* $Id$
|
|
|
|
Part of SWI-Prolog
|
|
|
|
Author: Jan Wielemaker
|
|
E-mail: jan@swi.psy.uva.nl
|
|
WWW: http://www.swi-prolog.org
|
|
Copyright (C): 1985-2002, University of Amsterdam
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#ifndef SGML_PARSER_H_INCLUDED
|
|
#define SGML_PARSER_H_INCLUDED
|
|
#include "util.h"
|
|
#include <wchar.h>
|
|
|
|
/*******************************
|
|
* CALL-BACK *
|
|
*******************************/
|
|
|
|
/* sgml_attribute->flags */
|
|
#define SGML_AT_DEFAULT 0x1
|
|
|
|
typedef struct _sgml_attribute
|
|
{ struct /* so we can free members */
|
|
{ wchar_t *textW; /* UCS textual value */
|
|
long number; /* numeric value/length */
|
|
} value;
|
|
dtd_attr *definition; /* DTD definition */
|
|
unsigned flags; /* additional flags */
|
|
} sgml_attribute;
|
|
|
|
typedef struct _dtd_parser *dtd_parser_p;
|
|
|
|
typedef int (*sgml_begin_element_f)(dtd_parser_p parser,
|
|
dtd_element *e,
|
|
int argc,
|
|
sgml_attribute *argv);
|
|
typedef int (*sgml_end_element_f)(dtd_parser_p parser,
|
|
dtd_element *e);
|
|
typedef int (*sgml_data_f)(dtd_parser_p parser,
|
|
data_type type, int len, const wchar_t *text);
|
|
typedef int (*sgml_wdata_f)(dtd_parser_p parser,
|
|
data_type type, int len, const wchar_t *text);
|
|
typedef int (*sgml_entity_f)(dtd_parser_p parser,
|
|
dtd_entity *entity,
|
|
int chr);
|
|
typedef int (*sgml_pi_f)(dtd_parser_p parser, const ichar *pi);
|
|
typedef int (*sgml_error_f)(dtd_parser_p parser,
|
|
dtd_error *error);
|
|
typedef int (*sgml_decl_f)(dtd_parser_p parser, const ichar *decl);
|
|
#ifdef XMLNS
|
|
typedef int (*xmlns_f)(dtd_parser_p parser,
|
|
dtd_symbol *ns, dtd_symbol *url);
|
|
#endif
|
|
|
|
|
|
/*******************************
|
|
* PARSER AND STATES *
|
|
*******************************/
|
|
|
|
#define SGML_PARSER_MAGIC (0x834ab663)
|
|
|
|
typedef enum
|
|
{ S_PCDATA, /* between declarations */
|
|
#ifdef UTF8
|
|
S_UTF8, /* Loading UTF-8 character */
|
|
#endif
|
|
S_CDATA, /* non-parsed data */
|
|
S_RCDATA, /* CDATA+entities */
|
|
S_MSCDATA, /* <![CDATA[...]]> */
|
|
S_EMSCDATA1, /* Seen ] in S_MSCDATA */
|
|
S_EMSCDATA2, /* Seen ]] in S_MSCDATA */
|
|
S_ECDATA1, /* Seen < in CDATA */
|
|
S_ECDATA2, /* Seen </ in CDATA */
|
|
S_EMSC1, /* Seen ] in marked section */
|
|
S_EMSC2, /* Seen ]] in marked section */
|
|
S_PI, /* Seen <? */
|
|
S_PI2, /* Seen <?...? */
|
|
S_DECL0, /* Seen < */
|
|
S_DECL, /* inside a declaration */
|
|
S_MDECL0, /* Seen <! */
|
|
S_STRING, /* inside a "string" or 'string' */
|
|
S_DECLCMT0, /* Seen <...- */
|
|
S_DECLCMT, /* Seen <...-- */
|
|
S_DECLCMTE0, /* Seen <...--..- */
|
|
S_CMTO, /* Seen <!- */
|
|
S_CMT1, /* Seen <!-- */
|
|
S_CMT, /* Seen <!--X... */
|
|
S_CMTE0, /* Seem <!--...- */
|
|
S_CMTE1, /* Seem <!--...-- */
|
|
S_GROUP, /* inside [...] */
|
|
S_PENT, /* Seen % */
|
|
S_ENT0, /* Seen & */
|
|
S_ENT, /* Seen &(#|\w) */
|
|
S_ENTCR /* Seen &entity<CR> */
|
|
} dtdstate;
|
|
|
|
|
|
typedef enum
|
|
{ DCL_DTD, /* DTD Declaration */
|
|
DCL_BEGIN, /* begin-tag */
|
|
DCL_END /* end-tag */
|
|
} dcl_type;
|
|
|
|
|
|
typedef enum
|
|
{ MS_IGNORE, /* ignore this data */
|
|
MS_INCLUDE, /* process normally */
|
|
MS_CDATA, /* pass literally */
|
|
MS_RCDATA /* replace entities */
|
|
} marktype;
|
|
|
|
|
|
typedef enum
|
|
{ EV_EXPLICIT, /* Explicit event */
|
|
EV_OMITTED, /* Omitted tag event */
|
|
EV_SHORTTAG, /* SHORTTAG event: <tag/value/ */
|
|
EV_SHORTREF /* SHORTREF event */
|
|
} sgml_event_class;
|
|
|
|
|
|
typedef struct _dtd_marked
|
|
{ dtd_symbol *keyword; /* keyword of the marked section */
|
|
marktype type; /* processing type */
|
|
struct _dtd_marked *parent; /* parent marked section */
|
|
} dtd_marked;
|
|
|
|
|
|
typedef enum
|
|
{ DM_DTD, /* DTD mode: no data allowed (?) */
|
|
DM_DATA /* Environment has only elements */
|
|
} data_mode;
|
|
|
|
|
|
typedef struct _sgml_environment
|
|
{ dtd_element *element; /* element that opened the env */
|
|
struct _dtd_state *state; /* State we are in */
|
|
#ifdef XMLNS
|
|
struct _xmlns *xmlns; /* XML namespace */
|
|
struct _xmlns *thisns; /* Name space of element */
|
|
#endif
|
|
#ifdef XMLBASE
|
|
ichar *uri_base; /* xml:base handling */
|
|
#endif
|
|
dtd_space_mode space_mode; /* How to handle blanks */
|
|
dtd_shortref *map; /* SHORTREF map */
|
|
struct _sgml_environment *parent; /* Parent environment */
|
|
int wants_net; /* I want a net */
|
|
int saved_waiting_for_net; /* saved value of waiting for net */
|
|
} sgml_environment;
|
|
|
|
/* parser->flags */
|
|
#define SGML_PARSER_NODEFS 0x01 /* don't handle default atts */
|
|
#define SGML_PARSER_QUALIFY_ATTS 0x02 /* qualify attributes in XML mode */
|
|
|
|
typedef struct _dtd_parser
|
|
{ unsigned long magic; /* SGML_PARSER_MAGIC */
|
|
dtd *dtd; /* DTD we are building */
|
|
dtdstate state; /* current state */
|
|
dtdstate cdata_state; /* S_CDATA/S_RCDATA */
|
|
dtd_marked *marked; /* marked section stack */
|
|
marktype mark_state; /* processing mode */
|
|
dtd_element *empty_element; /* empty of <tag/> seen */
|
|
sgml_environment *environments; /* Open environments */
|
|
data_mode dmode; /* How to handle characters */
|
|
int first; /* Just seen <tag> */
|
|
int waiting_for_net; /* waiting for / in <shorttag/mode/ */
|
|
icharbuf *buffer; /* buffer for temp data */
|
|
ocharbuf *cdata; /* collected character data */
|
|
int blank_cdata; /* CDATA is all blank */
|
|
int cdata_must_be_empty; /* Only shortrefs allowed here */
|
|
const ichar *etag; /* name of end-tag in CDATA */
|
|
int etaglen; /* length of end-tag */
|
|
int grouplevel; /* [..] level in declaration */
|
|
int saved; /* saved character */
|
|
dtdstate lit_saved_state; /* literal saved-state */
|
|
int encoded; /* TRUE for binary input */
|
|
dtd_shortref *map; /* SHORTREF map */
|
|
#ifdef UTF8
|
|
int utf8_decode; /* decode UTF-8 sequences? */
|
|
int utf8_char; /* building character */
|
|
int utf8_left; /* bytes left */
|
|
dtdstate utf8_saved_state; /* state from which we come */
|
|
#endif
|
|
dtd_srcloc location; /* Current location */
|
|
dtd_srcloc startloc; /* Start of last markup */
|
|
dtd_srcloc startcdata; /* Start of last cdata */
|
|
dtd_symbol *enforce_outer_element; /* Outer element to look for */
|
|
sgml_event_class event_class; /* EV_* */
|
|
|
|
void *closure; /* client handle */
|
|
sgml_begin_element_f on_begin_element; /* start an element */
|
|
sgml_end_element_f on_end_element; /* end an element */
|
|
sgml_data_f on_data; /* process cdata */
|
|
sgml_entity_f on_entity; /* unprocessed entity */
|
|
sgml_pi_f on_pi; /* processing instruction */
|
|
sgml_error_f on_error; /* handle error */
|
|
sgml_decl_f on_decl; /* handle declarations */
|
|
#ifdef XMLNS
|
|
xmlns_f on_xmlns; /* handle new namespace */
|
|
#endif
|
|
unsigned flags; /* misc flags */
|
|
} dtd_parser;
|
|
|
|
|
|
#ifdef XMLNS
|
|
#include "xmlns.h"
|
|
#endif
|
|
|
|
extern int gripe(dtd_error_id e, ...);
|
|
|
|
#define SGML_SUB_DOCUMENT 0x1
|
|
|
|
#endif /*SGML_PARSER_H_INCLUDED*/
|
|
|