/*  $Id$

    Part of SWI-Prolog

    Author:        Jan Wielemaker
    E-mail:        jan@swi.psy.uva.nl
    WWW:           http://www.swi-prolog.org
    Copyright (C): 1985-2002, University of Amsterdam

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#ifndef DTD_H_INCLUDED
#define DTD_H_INCLUDED
#include "sgmldefs.h"

#define CH_WHITE	0x0001
#define CH_LCLETTER	0x0002
#define CH_UCLETTER	0x0004
#define CH_CNMSTRT	0x0008		/* may start a name */
#define CH_CNM		0x0010		/* may be in a name */
#define CH_DIGIT	0x0020
#define CH_RE		0x0040
#define CH_RS		0x0080

#define CH_LETTER	(CH_LCLETTER|CH_UCLETTER)
#define CH_NMSTART	(CH_LCLETTER|CH_UCLETTER|CH_CNMSTRT)
#define CH_NAME		(CH_NMSTART|CH_DIGIT|CH_CNM)
#define CH_BLANK	(CH_WHITE|CH_RE|CH_RS)

#define CHR_BLANK	0x1		/* SHORTREF 'B' */
#define CHR_DBLANK	0x2		/* SHORTREF 'BB' */

#define SGML_DTD_MAGIC	0x7364573

typedef enum
{ CF_STAGO = 0,				/* < */
  CF_STAGC,				/* > */
  CF_ETAGO1,				/* < */
  CF_ETAGO2,				/* / */
  CF_VI,				/* = */
  CF_NS,				/* : (XMLNS) */
  CF_LIT,				/* " */
  CF_LITA,				/* ' */
  CF_PERO,				/* % */
  CF_ERO,				/* & */
  CF_ERC,				/* ; */
  CF_MDO1,				/* < */
  CF_MDO2,				/* ! (MDO=<!) */
  CF_MDC,				/* > */
  CF_PRO1,				/* < */
  CF_PRO2,				/* ? (PRO=<?) */
  CF_PRC,				/* > */
  CF_GRPO,				/* ( */
  CF_GRPC,				/* ) */
  CF_SEQ,				/* , */
  CF_AND,				/* & */
  CF_OR,				/* | */
  CF_OPT,				/* ? */
  CF_PLUS,				/* + */
  CF_DSO,				/* [ */
  CF_DSC,				/* ] */
  CF_REP,				/* * */
  CF_RS,				/* \n */
  CF_RE,				/* \r */
  CF_CMT,				/* - */
  CF_NG,				/* , or & or | */
  CF_ENDTABLE				/* to find size */
} charfunc;				/* function of characters */

typedef enum
{ SGML_ENC_ISO_LATIN1 = 0,		/* ISO Latin-1 */
  SGML_ENC_UTF8				/* Multi-byte UTF-8 encoding */
} dtd_char_encoding;

typedef enum
{ C_CDATA,				/* pure cdata */
  C_PCDATA,				/* parsed character data */
  C_RCDATA,				/* pure cdata + entities */
  C_EMPTY,				/* empy element */
  C_ANY					/* element may contain anything */
} contenttype;

typedef enum
{ MC_ONE,				/* one time */
  MC_OPT,				/* optional element (?) */
  MC_REP,				/* any times (*) */
  MC_PLUS				/* one-or-more (+) */
} modelcard;

typedef enum
{ MT_UNDEF = 0,				/* undefined */
  MT_PCDATA,				/* Contains PCDATA */
  MT_ELEMENT,				/* refers to element */
  MT_SEQ,				/* Sequence (,) */
  MT_AND,				/* Ony order (&) */
  MT_OR					/* Disjunction (|) */
} modeltype;

typedef enum
{ AT_CDATA,				/* CDATA attribute */
  AT_ENTITY,				/* entity-name */
  AT_ENTITIES,				/* entity-name list */
  AT_ID,				/* identifier */
  AT_IDREF,				/* identifier reference */
  AT_IDREFS,				/* list of identifier references */
  AT_NAME,				/* name token */
  AT_NAMES,				/* list of names */
  AT_NAMEOF,				/* one of these names */
  AT_NMTOKEN,				/* name-token */
  AT_NMTOKENS,				/* name-token list */
  AT_NOTATION,				/* notation-name */
  AT_NUMBER,				/* number */
  AT_NUMBERS,				/* number list */
  AT_NUTOKEN,				/* number token */
  AT_NUTOKENS				/* number token list */
} attrtype;

typedef enum
{ AT_FIXED,				/* fixed value */
  AT_REQUIRED,				/* Required attribute */
  AT_CURRENT,				/* most recent value */
  AT_CONREF,				/* cross-reference */
  AT_IMPLIED,				/* Implied attribute */
  AT_DEFAULT				/* has default */
} attrdef;


typedef enum
{ ET_SYSTEM,				/* System (file) entity */
  ET_PUBLIC,				/* Public (external) entity */
  ET_LITERAL				/* Literal text */
} entity_type;


typedef enum
{ EC_SGML,				/* SGML data */
  EC_STARTTAG,				/* SGML start-tag */
  EC_ENDTAG,				/* SGML end-tag */
  EC_CDATA,				/* CDATA entity */
  EC_SDATA,				/* SDATA entity */
  EC_NDATA,				/* non-sgml data */
  EC_PI					/* Programming instruction */
} data_type;


typedef enum
{ DL_SGML,				/* Use SGML */
  DL_XML,				/* Use XML */
  DL_XMLNS				/* Use XML + Namespaces */
} dtd_dialect;


typedef enum
{ OPT_SHORTTAG				/* do/don't accept shorttag */
} dtd_option;


typedef enum
{ SP_PRESERVE = 0,			/* Preserve all white-space */
  SP_DEFAULT,				/* Default space handling */
  SP_REMOVE,				/* Remove all blank CDATA elements */
  SP_SGML,				/* Compliant SGML mode */
  SP_INHERIT				/* DTD: inherit from environment */
} dtd_space_mode;


typedef enum
{ NU_TOKEN,				/* Treat numbers as tokens */
  NU_INTEGER				/* Convert to integer */
} dtd_number_mode;


		 /*******************************
		 *	      ERRORS		*
		 *******************************/

#ifdef DTD_IMPLEMENTATION
#define DTD_MINOR_ERRORS 1
#endif

typedef enum
{ ERS_WARNING,				/* probably correct result */
  ERS_ERROR,				/* probably incrorrect result */
  ERS_STYLE				/* dubious/bad style; correct result */
} dtd_error_severity;


typedef enum
{ ERC_REPRESENTATION,			/* Internal limit */
	/* id */
  ERC_RESOURCE,				/* external limit */
	/* id */
  ERC_LIMIT,				/* Exceeded SGML limit */
	/* id */
  ERC_VALIDATE,				/* DTD Validation */
	/* Message */
  ERC_SYNTAX_ERROR,			/* Syntax error */
	/* Message, found */
  ERC_EXISTENCE,			/* Existence error */
	/* Type, name */
  ERC_REDEFINED				/* Redefined object */
	/* Type, name */
#ifdef DTD_MINOR_ERRORS
  ,					/* reopen list */
  ERC_SYNTAX_WARNING,			/* Syntax warning (i.e. fixed) */
	/* Message, found */
  ERC_DOMAIN,				/* Relative to declared type */
	/* Type, found */
  ERC_OMITTED_CLOSE,
	/* Element */
  ERC_OMITTED_OPEN,
	/* Element */
  ERC_NOT_OPEN,
	/* Element */
  ERC_NOT_ALLOWED,
	/* Element */
  ERC_NOT_ALLOWED_PCDATA,
        /* Text */
  ERC_NO_ATTRIBUTE,
	/* Element, Attribute */
  ERC_NO_ATTRIBUTE_VALUE,
	/* Element, Value */
  ERC_NO_VALUE,
	/* Entity */
  ERC_NO_DOCTYPE,
        /* Implicit, file */
  ERC_NO_CATALOGUE
	/* file */
#endif
} dtd_error_id;


typedef enum
{ IN_NONE,				/* unspecified input */
  IN_FILE,				/* input from file */
  IN_ENTITY				/* input from entity */
} input_type;


typedef struct _dtd_srcloc
{ input_type  type;			/* type of input */
  union
  { const ichar *file;			/* name of the file */
    const ichar *entity;		/* name of entity */
  } name;
  int	      line;			/* 1-based Line no */
  int	      linepos;			/* 1-based char  */
  long	      charpos;			/* 0-based file char  */
  struct _dtd_srcloc *parent;		/* parent location */
} dtd_srcloc;


typedef struct _dtd_error
{ dtd_error_id id;			/* ERC_* identifier */
  dtd_error_id minor;			/* Minor code */
  dtd_error_severity severity;		/* ERS_* severity */
  dtd_srcloc *location;			/* location of the error */
  wchar_t *plain_message;		/* Clean message */
  wchar_t *message;			/* complete message */
					/* (Warning: file:line: <plain>) */
  wchar_t *argv[2];			/* context arguments */
} dtd_error;


		 /*******************************
		 *	     DTD TYPES		*
		 *******************************/

typedef struct _dtd_symbol
{ const ichar *name;			/* name of the atom */
  struct _dtd_symbol *next;		/* next in atom list */
  struct _dtd_element *element;		/* connected element (if any) */
  struct _dtd_entity  *entity;		/* connected entity (if any) */
} dtd_symbol;


typedef struct _dtd_symbol_table
{ int		size;			/* Allocated size */
  dtd_symbol  **entries;		/* Entries */
} dtd_symbol_table;


typedef struct _dtd_entity
{ dtd_symbol *name;			/* its name */
  entity_type type;			/* ET_* */
  data_type content;			/* EC_* */
  int catalog_location;			/* what catalog to use for lookup */
  int length;				/* size of literal value */
  ichar *value;				/* literal value */
  ichar *extid;				/* external identifier */
  ichar *exturl;			/* url to fetch from */
  ichar *baseurl;			/* base url for exturl */
  struct _dtd_entity *next;		/* list-link */
} dtd_entity;


typedef struct _dtd_notation
{ dtd_symbol *name;			/* name of the notation */
  entity_type type;			/* ET_{PUBLIC|SYSTEM} */
  ichar *public;			/* public id */
  ichar *system;			/* file with info */
  struct _dtd_notation *next;		/* list-link */
} dtd_notation;


typedef struct _dtd_element_list
{ struct _dtd_element *value;		/* element */
  struct _dtd_element_list *next;	/* next in list */
} dtd_element_list;


typedef struct _dtd_name_list
{ dtd_symbol	*value;
  struct _dtd_name_list *next;
} dtd_name_list;


typedef struct _dtd_attr
{ dtd_symbol  *name;			/* name of attribute */
  attrtype type;			/* type (AT_*) */
  attrdef  def;				/* AT_REQUIRED/AT_IMPLIED */
  int islist;				/* attribute is a list */
  union
  { dtd_name_list *nameof;		/* (name1|name2|...) */
  } typeex;
  union
  { ichar *cdata;			/* default for CDATA */
    ichar *list;			/* text for list-data */
    dtd_symbol *name;			/* AT_NAME or AT_NAMEOF */
    long number;			/* AT_NUMBER */
  } att_def;
  int references;			/* reference count */
} dtd_attr;


typedef struct _dtd_attr_list
{ dtd_attr	*attribute;
  struct _dtd_attr_list *next;
} dtd_attr_list;


typedef struct _dtd_model
{ modeltype type;			/* MT_* */
  modelcard cardinality;		/* MC_* */

  union
  { struct _dtd_model *group;		/* ,/|/& group */
    struct _dtd_element *element;	/* element */
  } content;
  struct _dtd_model *next;		/* next in list (for groups) */
} dtd_model;


typedef struct _dtd_edef
{ contenttype	type;			/* EMPTY, MIXED, ... */
  int		omit_open;		/* allow omitted open tag? */
  int		omit_close;		/* allow omitted close tag? */
  dtd_model	*content;		/* the content model */
  dtd_element_list *included;		/* +(namegroup) */
  dtd_element_list *excluded;		/* -(namegroup) */
  struct _dtd_state *initial_state;	/* Initial state in state engine */
  struct _dtd_state *final_state;	/* Final state in state engine */
  int		references;		/* #elements using this def */
} dtd_edef;


typedef struct _dtd_map
{ ichar	       *from;			/* mapped text */
  int		len;			/* length of mapped text */
  dtd_symbol   *to;			/* name of symbol mapped onto */
  struct _dtd_map *next;		/* next in shortref map */
} dtd_map;


typedef struct _dtd_shortref
{ dtd_symbol	*name;			/* name of SHORTREF map */
  dtd_map	*map;			/* implemented map */
  char		ends[SHORTMAP_SIZE];	/* ending-characters in map */
  int		defined;		/* has been defined */
  struct _dtd_shortref *next;		/* next declared shortref */
} dtd_shortref;


typedef struct _dtd_element
{ dtd_symbol	*name;			/* its name */
  dtd_edef	*structure;		/* content structure of the element */
  dtd_attr_list *attributes;		/* defined attributes */
  dtd_space_mode space_mode;		/* How to handle white-space (SP_*) */
  dtd_shortref	*map;			/* SHORTREF map */
  int		undefined;		/* Only implicitely defined */
  struct _dtd_element *next;		/* in DTD'e element list */
} dtd_element;


typedef struct _dtd_charclass
{ unsigned char	class[INPUT_CHARSET_SIZE]; /* ichar --> class-mask */
} dtd_charclass;


typedef struct _dtd_charfunc
{ ichar func[(int)CF_ENDTABLE];		/* CF_ --> ichar */
} dtd_charfunc;


typedef struct _dtd
{ int		        magic;		/* SGML_DTD_MAGIC */
  int			implicit;	/* There is no DTD */
  dtd_dialect		dialect;	/* DL_* */
  int			case_sensitive;	/* Tags are case-sensitive */
  int			ent_case_sensitive; /* Entities are case-sensitive */
  ichar		       *doctype;	/* defined document type */
  dtd_symbol_table     *symbols;	/* symbol-table */
  dtd_entity           *pentities;	/* defined parameter entities */
  dtd_entity	       *entities;	/* defined entities */
  dtd_entity	       *default_entity;	/* default-entity (if any) */
  dtd_notation	       *notations;	/* Declared notations */
  dtd_shortref	       *shortrefs;	/* SHORTREF declarations */
  dtd_element          *elements;	/* defined elements */
  dtd_charfunc	       *charfunc;	/* CF_ --> ichar */
  dtd_charclass	       *charclass;	/* ichar -> CH_-mask */
  dtd_char_encoding	encoding;	/* document encoding */
  dtd_space_mode	space_mode;	/* Default for handling white-space */
  dtd_number_mode	number_mode;	/* How to treat number attributes */
  int			shorttag;	/* support SHORTTAG */
  int			references;	/* destruction reference count */
} dtd;

extern dtd_charfunc *new_charfunc(void);   /* default classification */
extern dtd_charclass *new_charclass(void); /* default classification */

extern dtd_symbol*	dtd_find_symbol(dtd *dtd, const ichar *name);
extern dtd_symbol*	dtd_add_symbol(dtd *dtd, const ichar *name);


		 /*******************************
		 *	       PUBLIC		*
		 *******************************/

#include "parser.h"

dtd *		file_to_dtd(const ichar *file, const ichar *doctype,
			    dtd_dialect dialect);
int		sgml_process_file(dtd_parser *p,
				  const ichar *file, unsigned flags);
int		sgml_process_stream(dtd_parser *p, FILE *in,
				    unsigned flags);
dtd_parser *	new_dtd_parser(dtd *dtd);
void		free_dtd_parser(dtd_parser *p);

void		free_dtd(dtd *dtd);
int		load_dtd_from_file(dtd_parser *p, const ichar *file);
dtd *		new_dtd(const ichar *doctype);
int		set_dialect_dtd(dtd *dtd, dtd_dialect dialect);
int		set_option_dtd(dtd *dtd, dtd_option option, int set);

int		putchar_dtd_parser(dtd_parser *p, int chr);
int		begin_document_dtd_parser(dtd_parser *p);
int		end_document_dtd_parser(dtd_parser *p);
void		reset_document_dtd_parser(dtd_parser *p);
void		set_file_dtd_parser(dtd_parser *p,
				    input_type in, const ichar *file);
void		set_mode_dtd_parser(dtd_parser *p, data_mode mode);
void		sgml_cplocation(dtd_srcloc *dst, dtd_srcloc *src);
int		xml_set_encoding(dtd_parser *p, const char *enc);

#endif /*DTD_H_INCLUDED*/