This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/sgml/parser.h
2010-05-06 10:59:09 +01:00

240 lines
7.6 KiB
C

/* $Id$
Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: jan@swi.psy.uva.nl
WWW: http://www.swi-prolog.org
Copyright (C): 1985-2002, University of Amsterdam
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef SGML_PARSER_H_INCLUDED
#define SGML_PARSER_H_INCLUDED
#include "util.h"
#include <wchar.h>
/*******************************
* CALL-BACK *
*******************************/
/* sgml_attribute->flags */
#define SGML_AT_DEFAULT 0x1
typedef struct _sgml_attribute
{ struct /* so we can free members */
{ wchar_t *textW; /* UCS textual value */
long number; /* numeric value/length */
} value;
dtd_attr *definition; /* DTD definition */
unsigned flags; /* additional flags */
} sgml_attribute;
typedef struct _dtd_parser *dtd_parser_p;
typedef int (*sgml_begin_element_f)(dtd_parser_p parser,
dtd_element *e,
int argc,
sgml_attribute *argv);
typedef int (*sgml_end_element_f)(dtd_parser_p parser,
dtd_element *e);
typedef int (*sgml_data_f)(dtd_parser_p parser,
data_type type, int len, const wchar_t *text);
typedef int (*sgml_wdata_f)(dtd_parser_p parser,
data_type type, int len, const wchar_t *text);
typedef int (*sgml_entity_f)(dtd_parser_p parser,
dtd_entity *entity,
int chr);
typedef int (*sgml_pi_f)(dtd_parser_p parser, const ichar *pi);
typedef int (*sgml_error_f)(dtd_parser_p parser,
dtd_error *error);
typedef int (*sgml_decl_f)(dtd_parser_p parser, const ichar *decl);
#ifdef XMLNS
typedef int (*xmlns_f)(dtd_parser_p parser,
dtd_symbol *ns, dtd_symbol *url);
#endif
/*******************************
* PARSER AND STATES *
*******************************/
#define SGML_PARSER_MAGIC (0x834ab663)
typedef enum
{ S_PCDATA, /* between declarations */
#ifdef UTF8
S_UTF8, /* Loading UTF-8 character */
#endif
S_CDATA, /* non-parsed data */
S_RCDATA, /* CDATA+entities */
S_MSCDATA, /* <![CDATA[...]]> */
S_EMSCDATA1, /* Seen ] in S_MSCDATA */
S_EMSCDATA2, /* Seen ]] in S_MSCDATA */
S_ECDATA1, /* Seen < in CDATA */
S_ECDATA2, /* Seen </ in CDATA */
S_EMSC1, /* Seen ] in marked section */
S_EMSC2, /* Seen ]] in marked section */
S_PI, /* Seen <? */
S_PI2, /* Seen <?...? */
S_DECL0, /* Seen < */
S_DECL, /* inside a declaration */
S_MDECL0, /* Seen <! */
S_STRING, /* inside a "string" or 'string' */
S_DECLCMT0, /* Seen <...- */
S_DECLCMT, /* Seen <...-- */
S_DECLCMTE0, /* Seen <...--..- */
S_CMTO, /* Seen <!- */
S_CMT1, /* Seen <!-- */
S_CMT, /* Seen <!--X... */
S_CMTE0, /* Seem <!--...- */
S_CMTE1, /* Seem <!--...-- */
S_GROUP, /* inside [...] */
S_PENT, /* Seen % */
S_ENT0, /* Seen & */
S_ENT, /* Seen &(#|\w) */
S_ENTCR /* Seen &entity<CR> */
} dtdstate;
typedef enum
{ DCL_DTD, /* DTD Declaration */
DCL_BEGIN, /* begin-tag */
DCL_END /* end-tag */
} dcl_type;
typedef enum
{ MS_IGNORE, /* ignore this data */
MS_INCLUDE, /* process normally */
MS_CDATA, /* pass literally */
MS_RCDATA /* replace entities */
} marktype;
typedef enum
{ EV_EXPLICIT, /* Explicit event */
EV_OMITTED, /* Omitted tag event */
EV_SHORTTAG, /* SHORTTAG event: <tag/value/ */
EV_SHORTREF /* SHORTREF event */
} sgml_event_class;
typedef struct _dtd_marked
{ dtd_symbol *keyword; /* keyword of the marked section */
marktype type; /* processing type */
struct _dtd_marked *parent; /* parent marked section */
} dtd_marked;
typedef enum
{ DM_DTD, /* DTD mode: no data allowed (?) */
DM_DATA /* Environment has only elements */
} data_mode;
#ifdef XMLNS
typedef enum
{ NONS_ERROR = 0,
NONS_QUIET
} xmlnons;
#endif
typedef struct _sgml_environment
{ dtd_element *element; /* element that opened the env */
struct _dtd_state *state; /* State we are in */
#ifdef XMLNS
struct _xmlns *xmlns; /* XML namespace */
struct _xmlns *thisns; /* Name space of element */
#endif
#ifdef XMLBASE
ichar *uri_base; /* xml:base handling */
#endif
dtd_space_mode space_mode; /* How to handle blanks */
dtd_shortref *map; /* SHORTREF map */
struct _sgml_environment *parent; /* Parent environment */
int wants_net; /* I want a net */
int saved_waiting_for_net; /* saved value of waiting for net */
} sgml_environment;
/* parser->flags */
#define SGML_PARSER_NODEFS 0x01 /* don't handle default atts */
#define SGML_PARSER_QUALIFY_ATTS 0x02 /* qualify attributes in XML mode */
typedef struct _dtd_parser
{ unsigned long magic; /* SGML_PARSER_MAGIC */
dtd *dtd; /* DTD we are building */
dtdstate state; /* current state */
dtdstate cdata_state; /* S_CDATA/S_RCDATA */
dtd_marked *marked; /* marked section stack */
marktype mark_state; /* processing mode */
dtd_element *empty_element; /* empty of <tag/> seen */
sgml_environment *environments; /* Open environments */
data_mode dmode; /* How to handle characters */
int first; /* Just seen <tag> */
int waiting_for_net; /* waiting for / in <shorttag/mode/ */
icharbuf *buffer; /* buffer for temp data */
ocharbuf *cdata; /* collected character data */
int blank_cdata; /* CDATA is all blank */
int cdata_must_be_empty; /* Only shortrefs allowed here */
const ichar *etag; /* name of end-tag in CDATA */
int etaglen; /* length of end-tag */
int grouplevel; /* [..] level in declaration */
int saved; /* saved character */
dtdstate lit_saved_state; /* literal saved-state */
int encoded; /* TRUE for binary input */
dtd_shortref *map; /* SHORTREF map */
#ifdef UTF8
int utf8_decode; /* decode UTF-8 sequences? */
int utf8_char; /* building character */
int utf8_left; /* bytes left */
dtdstate utf8_saved_state; /* state from which we come */
#endif
dtd_srcloc location; /* Current location */
dtd_srcloc startloc; /* Start of last markup */
dtd_srcloc startcdata; /* Start of last cdata */
dtd_symbol *enforce_outer_element; /* Outer element to look for */
sgml_event_class event_class; /* EV_* */
xmlnons xml_no_ns; /* What if namespace does not exist? */
#ifdef XMLNS
struct _xmlns *xmlns; /* Outer xmlns declaration */
#endif
void *closure; /* client handle */
sgml_begin_element_f on_begin_element; /* start an element */
sgml_end_element_f on_end_element; /* end an element */
sgml_data_f on_data; /* process cdata */
sgml_entity_f on_entity; /* unprocessed entity */
sgml_pi_f on_pi; /* processing instruction */
sgml_error_f on_error; /* handle error */
sgml_decl_f on_decl; /* handle declarations */
#ifdef XMLNS
xmlns_f on_xmlns; /* handle new namespace */
#endif
unsigned flags; /* misc flags */
} dtd_parser;
#ifdef XMLNS
#include "xmlns.h"
#endif
extern int gripe(dtd_parser *p, dtd_error_id e, ...);
#define SGML_SUB_DOCUMENT 0x1
#endif /*SGML_PARSER_H_INCLUDED*/