/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: jan@swi.psy.uva.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2002, University of Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef SGML_PARSER_H_INCLUDED #define SGML_PARSER_H_INCLUDED #include "util.h" #include <wchar.h> /******************************* * CALL-BACK * *******************************/ /* sgml_attribute->flags */ #define SGML_AT_DEFAULT 0x1 typedef struct _sgml_attribute { struct /* so we can free members */ { wchar_t *textW; /* UCS textual value */ long number; /* numeric value/length */ } value; dtd_attr *definition; /* DTD definition */ unsigned flags; /* additional flags */ } sgml_attribute; typedef struct _dtd_parser *dtd_parser_p; typedef int (*sgml_begin_element_f)(dtd_parser_p parser, dtd_element *e, int argc, sgml_attribute *argv); typedef int (*sgml_end_element_f)(dtd_parser_p parser, dtd_element *e); typedef int (*sgml_data_f)(dtd_parser_p parser, data_type type, int len, const wchar_t *text); typedef int (*sgml_wdata_f)(dtd_parser_p parser, data_type type, int len, const wchar_t *text); typedef int (*sgml_entity_f)(dtd_parser_p parser, dtd_entity *entity, int chr); typedef int (*sgml_pi_f)(dtd_parser_p parser, const ichar *pi); typedef int (*sgml_error_f)(dtd_parser_p parser, dtd_error *error); typedef int (*sgml_decl_f)(dtd_parser_p parser, const ichar *decl); #ifdef XMLNS typedef int (*xmlns_f)(dtd_parser_p parser, dtd_symbol *ns, dtd_symbol *url); #endif /******************************* * PARSER AND STATES * *******************************/ #define SGML_PARSER_MAGIC (0x834ab663) typedef enum { S_PCDATA, /* between declarations */ #ifdef UTF8 S_UTF8, /* Loading UTF-8 character */ #endif S_CDATA, /* non-parsed data */ S_RCDATA, /* CDATA+entities */ S_MSCDATA, /* <![CDATA[...]]> */ S_EMSCDATA1, /* Seen ] in S_MSCDATA */ S_EMSCDATA2, /* Seen ]] in S_MSCDATA */ S_ECDATA1, /* Seen < in CDATA */ S_ECDATA2, /* Seen </ in CDATA */ S_EMSC1, /* Seen ] in marked section */ S_EMSC2, /* Seen ]] in marked section */ S_PI, /* Seen <? */ S_PI2, /* Seen <?...? */ S_DECL0, /* Seen < */ S_DECL, /* inside a declaration */ S_MDECL0, /* Seen <! */ S_STRING, /* inside a "string" or 'string' */ S_DECLCMT0, /* Seen <...- */ S_DECLCMT, /* Seen <...-- */ S_DECLCMTE0, /* Seen <...--..- */ S_CMTO, /* Seen <!- */ S_CMT1, /* Seen <!-- */ S_CMT, /* Seen <!--X... */ S_CMTE0, /* Seem <!--...- */ S_CMTE1, /* Seem <!--...-- */ S_GROUP, /* inside [...] */ S_PENT, /* Seen % */ S_ENT0, /* Seen & */ S_ENT, /* Seen &(#|\w) */ S_ENTCR /* Seen &entity<CR> */ } dtdstate; typedef enum { DCL_DTD, /* DTD Declaration */ DCL_BEGIN, /* begin-tag */ DCL_END /* end-tag */ } dcl_type; typedef enum { MS_IGNORE, /* ignore this data */ MS_INCLUDE, /* process normally */ MS_CDATA, /* pass literally */ MS_RCDATA /* replace entities */ } marktype; typedef enum { EV_EXPLICIT, /* Explicit event */ EV_OMITTED, /* Omitted tag event */ EV_SHORTTAG, /* SHORTTAG event: <tag/value/ */ EV_SHORTREF /* SHORTREF event */ } sgml_event_class; typedef struct _dtd_marked { dtd_symbol *keyword; /* keyword of the marked section */ marktype type; /* processing type */ struct _dtd_marked *parent; /* parent marked section */ } dtd_marked; typedef enum { DM_DTD, /* DTD mode: no data allowed (?) */ DM_DATA /* Environment has only elements */ } data_mode; #ifdef XMLNS typedef enum { NONS_ERROR = 0, NONS_QUIET } xmlnons; #endif typedef struct _sgml_environment { dtd_element *element; /* element that opened the env */ struct _dtd_state *state; /* State we are in */ #ifdef XMLNS struct _xmlns *xmlns; /* XML namespace */ struct _xmlns *thisns; /* Name space of element */ #endif #ifdef XMLBASE ichar *uri_base; /* xml:base handling */ #endif dtd_space_mode space_mode; /* How to handle blanks */ dtd_shortref *map; /* SHORTREF map */ struct _sgml_environment *parent; /* Parent environment */ int wants_net; /* I want a net */ int saved_waiting_for_net; /* saved value of waiting for net */ } sgml_environment; /* parser->flags */ #define SGML_PARSER_NODEFS 0x01 /* don't handle default atts */ #define SGML_PARSER_QUALIFY_ATTS 0x02 /* qualify attributes in XML mode */ typedef struct _dtd_parser { unsigned long magic; /* SGML_PARSER_MAGIC */ dtd *dtd; /* DTD we are building */ dtdstate state; /* current state */ dtdstate cdata_state; /* S_CDATA/S_RCDATA */ dtd_marked *marked; /* marked section stack */ marktype mark_state; /* processing mode */ dtd_element *empty_element; /* empty of <tag/> seen */ sgml_environment *environments; /* Open environments */ data_mode dmode; /* How to handle characters */ int first; /* Just seen <tag> */ int waiting_for_net; /* waiting for / in <shorttag/mode/ */ icharbuf *buffer; /* buffer for temp data */ ocharbuf *cdata; /* collected character data */ int blank_cdata; /* CDATA is all blank */ int cdata_must_be_empty; /* Only shortrefs allowed here */ const ichar *etag; /* name of end-tag in CDATA */ int etaglen; /* length of end-tag */ int grouplevel; /* [..] level in declaration */ int saved; /* saved character */ dtdstate lit_saved_state; /* literal saved-state */ int encoded; /* TRUE for binary input */ dtd_shortref *map; /* SHORTREF map */ #ifdef UTF8 int utf8_decode; /* decode UTF-8 sequences? */ int utf8_char; /* building character */ int utf8_left; /* bytes left */ dtdstate utf8_saved_state; /* state from which we come */ #endif dtd_srcloc location; /* Current location */ dtd_srcloc startloc; /* Start of last markup */ dtd_srcloc startcdata; /* Start of last cdata */ dtd_symbol *enforce_outer_element; /* Outer element to look for */ sgml_event_class event_class; /* EV_* */ xmlnons xml_no_ns; /* What if namespace does not exist? */ #ifdef XMLNS struct _xmlns *xmlns; /* Outer xmlns declaration */ #endif void *closure; /* client handle */ sgml_begin_element_f on_begin_element; /* start an element */ sgml_end_element_f on_end_element; /* end an element */ sgml_data_f on_data; /* process cdata */ sgml_entity_f on_entity; /* unprocessed entity */ sgml_pi_f on_pi; /* processing instruction */ sgml_error_f on_error; /* handle error */ sgml_decl_f on_decl; /* handle declarations */ #ifdef XMLNS xmlns_f on_xmlns; /* handle new namespace */ #endif unsigned flags; /* misc flags */ } dtd_parser; #ifdef XMLNS #include "xmlns.h" #endif extern int gripe(dtd_parser *p, dtd_error_id e, ...); #define SGML_SUB_DOCUMENT 0x1 #endif /*SGML_PARSER_H_INCLUDED*/