/*  $Id$

    Part of SWI-Prolog

    Author:        Jan Wielemaker
    E-mail:        jan@swi.psy.uva.nl
    WWW:           http://www.swi-prolog.org
    Copyright (C): 1985-2002, University of Amsterdam

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#ifndef SGML_PARSER_H_INCLUDED
#define SGML_PARSER_H_INCLUDED
#include "util.h"
#include <wchar.h>

		 /*******************************
		 *	      CALL-BACK		*
		 *******************************/

					/* sgml_attribute->flags */
#define SGML_AT_DEFAULT		0x1

typedef struct _sgml_attribute
{ struct				/* so we can free members */
  { wchar_t *textW;			/* UCS textual value */
    long   number;			/* numeric value/length */
  } value;
  dtd_attr *definition;			/* DTD definition */
  unsigned flags;			/* additional flags */
} sgml_attribute;

typedef struct _dtd_parser *dtd_parser_p;

typedef int (*sgml_begin_element_f)(dtd_parser_p parser,
				    dtd_element *e,
				    int argc,
				    sgml_attribute *argv);
typedef int (*sgml_end_element_f)(dtd_parser_p parser,
				  dtd_element *e);
typedef int (*sgml_data_f)(dtd_parser_p parser,
			   data_type type, int len, const wchar_t *text);
typedef int (*sgml_wdata_f)(dtd_parser_p parser,
			   data_type type, int len, const wchar_t *text);
typedef int (*sgml_entity_f)(dtd_parser_p parser,
			     dtd_entity *entity,
			     int chr);
typedef int (*sgml_pi_f)(dtd_parser_p parser, const ichar *pi);
typedef int (*sgml_error_f)(dtd_parser_p parser,
			    dtd_error *error);
typedef int (*sgml_decl_f)(dtd_parser_p parser, const ichar *decl);
#ifdef XMLNS
typedef int (*xmlns_f)(dtd_parser_p parser,
		       dtd_symbol *ns, dtd_symbol *url);
#endif


		 /*******************************
		 *	 PARSER AND STATES	*
		 *******************************/

#define SGML_PARSER_MAGIC	(0x834ab663)

typedef enum
{ S_PCDATA,				/* between declarations */
#ifdef UTF8
  S_UTF8,				/* Loading UTF-8 character */
#endif
  S_CDATA,				/* non-parsed data */
  S_RCDATA,				/* CDATA+entities */
  S_MSCDATA,				/* <![CDATA[...]]> */
  S_EMSCDATA1,				/* Seen ] in S_MSCDATA */
  S_EMSCDATA2,				/* Seen ]] in S_MSCDATA */
  S_ECDATA1,				/* Seen < in CDATA */
  S_ECDATA2,				/* Seen </ in CDATA */
  S_EMSC1,				/* Seen ] in marked section */
  S_EMSC2,				/* Seen ]] in marked section */
  S_PI,					/* Seen <? */
  S_PI2,				/* Seen <?...? */
  S_DECL0,				/* Seen < */
  S_DECL,				/* inside a declaration */
  S_MDECL0,				/* Seen <! */
  S_STRING,				/* inside a "string" or 'string' */
  S_DECLCMT0,				/* Seen <...- */
  S_DECLCMT,				/* Seen <...-- */
  S_DECLCMTE0,				/* Seen <...--..- */
  S_CMTO,				/* Seen <!- */
  S_CMT1,				/* Seen <!-- */
  S_CMT,				/* Seen <!--X... */
  S_CMTE0,				/* Seem <!--...- */
  S_CMTE1,				/* Seem <!--...-- */
  S_GROUP,				/* inside [...] */
  S_PENT,				/* Seen % */
  S_ENT0,				/* Seen & */
  S_ENT,				/* Seen &(#|\w) */
  S_ENTCR				/* Seen &entity<CR> */
} dtdstate;


typedef enum
{ DCL_DTD,				/* DTD Declaration */
  DCL_BEGIN,				/* begin-tag */
  DCL_END				/* end-tag */
} dcl_type;


typedef enum
{ MS_IGNORE,				/* ignore this data */
  MS_INCLUDE,				/* process normally */
  MS_CDATA,				/* pass literally */
  MS_RCDATA				/* replace entities */
} marktype;


typedef enum
{ EV_EXPLICIT,				/* Explicit event */
  EV_OMITTED,				/* Omitted tag event */
  EV_SHORTTAG,				/* SHORTTAG event: <tag/value/ */
  EV_SHORTREF				/* SHORTREF event */
} sgml_event_class;


typedef struct _dtd_marked
{ dtd_symbol *keyword;			/* keyword of the marked section */
  marktype	type;			/* processing type */
  struct _dtd_marked *parent;		/* parent marked section */
} dtd_marked;


typedef enum
{ DM_DTD,				/* DTD mode: no data allowed (?) */
  DM_DATA				/* Environment has only elements */
} data_mode;

#ifdef XMLNS
typedef enum
{ NONS_ERROR = 0,
  NONS_QUIET
} xmlnons;
#endif

typedef struct _sgml_environment
{ dtd_element *element;			/* element that opened the env */
  struct _dtd_state *state;		/* State we are in */
#ifdef XMLNS
  struct _xmlns *xmlns;			/* XML namespace */
  struct _xmlns *thisns;		/* Name space of element */
#endif
#ifdef XMLBASE
  ichar *uri_base;			/* xml:base handling */
#endif
  dtd_space_mode space_mode;		/* How to handle blanks */
  dtd_shortref *map;			/* SHORTREF map */
  struct _sgml_environment *parent;	/* Parent environment */
  int	wants_net;			/* I want a net */
  int	saved_waiting_for_net;		/* saved value of waiting for net */
} sgml_environment;

					/* parser->flags */
#define SGML_PARSER_NODEFS	 0x01	/* don't handle default atts */
#define SGML_PARSER_QUALIFY_ATTS 0x02	/* qualify attributes in XML mode */

typedef struct _dtd_parser
{ unsigned long magic;			/* SGML_PARSER_MAGIC */
  dtd     *dtd;				/* DTD we are building */
  dtdstate state;			/* current state */
  dtdstate cdata_state;			/* S_CDATA/S_RCDATA */
  dtd_marked *marked;			/* marked section stack */
  marktype mark_state;			/* processing mode */
  dtd_element *empty_element;		/* empty of <tag/> seen */
  sgml_environment *environments;	/* Open environments */
  data_mode dmode;			/* How to handle characters */
  int	   first;			/* Just seen <tag> */
  int	   waiting_for_net;		/* waiting for / in <shorttag/mode/ */
  icharbuf *buffer;			/* buffer for temp data */
  ocharbuf *cdata;			/* collected character data */
  int	   blank_cdata;			/* CDATA is all blank */
  int	   cdata_must_be_empty;		/* Only shortrefs allowed here */
  const ichar *etag;			/* name of end-tag in CDATA */
  int	   etaglen;			/* length of end-tag */
  int	   grouplevel;			/* [..] level in declaration */
  int	   saved;			/* saved character */
  dtdstate lit_saved_state;		/* literal saved-state */
  int      encoded;			/* TRUE for binary input */
  dtd_shortref *map;			/* SHORTREF map */
#ifdef UTF8
  int	   utf8_decode;			/* decode UTF-8 sequences? */
  int      utf8_char;			/* building character */
  int	   utf8_left;			/* bytes left */
  dtdstate utf8_saved_state;		/* state from which we come */
#endif
  dtd_srcloc	location;		/* Current location */
  dtd_srcloc	startloc;		/* Start of last markup */
  dtd_srcloc	startcdata;		/* Start of last cdata */
  dtd_symbol   *enforce_outer_element;	/* Outer element to look for */
  sgml_event_class event_class;		/* EV_* */
  xmlnons	xml_no_ns;		/* What if namespace does not exist? */
#ifdef XMLNS
  struct _xmlns *xmlns;			/* Outer xmlns declaration */
#endif

  void *closure;			/* client handle */
  sgml_begin_element_f	on_begin_element; /* start an element */
  sgml_end_element_f	on_end_element;	/* end an element */
  sgml_data_f		on_data;	/* process cdata */
  sgml_entity_f		on_entity;	/* unprocessed entity */
  sgml_pi_f		on_pi;		/* processing instruction */
  sgml_error_f		on_error;	/* handle error */
  sgml_decl_f		on_decl;	/* handle declarations */
#ifdef XMLNS
  xmlns_f		on_xmlns;	/* handle new namespace */
#endif
  unsigned		flags;		/* misc flags */
} dtd_parser;


#ifdef XMLNS
#include "xmlns.h"
#endif

extern int		gripe(dtd_parser *p, dtd_error_id e, ...);

#define SGML_SUB_DOCUMENT	0x1

#endif /*SGML_PARSER_H_INCLUDED*/