/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: jan@swi.psy.uva.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2002, University of Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef DTD_H_INCLUDED #define DTD_H_INCLUDED #include "sgmldefs.h" #define CH_WHITE 0x0001 #define CH_LCLETTER 0x0002 #define CH_UCLETTER 0x0004 #define CH_CNMSTRT 0x0008 /* may start a name */ #define CH_CNM 0x0010 /* may be in a name */ #define CH_DIGIT 0x0020 #define CH_RE 0x0040 #define CH_RS 0x0080 #define CH_LETTER (CH_LCLETTER|CH_UCLETTER) #define CH_NMSTART (CH_LCLETTER|CH_UCLETTER|CH_CNMSTRT) #define CH_NAME (CH_NMSTART|CH_DIGIT|CH_CNM) #define CH_BLANK (CH_WHITE|CH_RE|CH_RS) #define CHR_BLANK 0x1 /* SHORTREF 'B' */ #define CHR_DBLANK 0x2 /* SHORTREF 'BB' */ #define SGML_DTD_MAGIC 0x7364573 typedef enum { CF_STAGO = 0, /* < */ CF_STAGC, /* > */ CF_ETAGO1, /* < */ CF_ETAGO2, /* / */ CF_VI, /* = */ CF_NS, /* : (XMLNS) */ CF_LIT, /* " */ CF_LITA, /* ' */ CF_PERO, /* % */ CF_ERO, /* & */ CF_ERC, /* ; */ CF_MDO1, /* < */ CF_MDO2, /* ! (MDO=<!) */ CF_MDC, /* > */ CF_PRO1, /* < */ CF_PRO2, /* ? (PRO=<?) */ CF_PRC, /* > */ CF_GRPO, /* ( */ CF_GRPC, /* ) */ CF_SEQ, /* , */ CF_AND, /* & */ CF_OR, /* | */ CF_OPT, /* ? */ CF_PLUS, /* + */ CF_DSO, /* [ */ CF_DSC, /* ] */ CF_REP, /* * */ CF_RS, /* \n */ CF_RE, /* \r */ CF_CMT, /* - */ CF_NG, /* , or & or | */ CF_ENDTABLE /* to find size */ } charfunc; /* function of characters */ typedef enum { SGML_ENC_ISO_LATIN1 = 0, /* ISO Latin-1 */ SGML_ENC_UTF8 /* Multi-byte UTF-8 encoding */ } dtd_char_encoding; typedef enum { C_CDATA, /* pure cdata */ C_PCDATA, /* parsed character data */ C_RCDATA, /* pure cdata + entities */ C_EMPTY, /* empy element */ C_ANY /* element may contain anything */ } contenttype; typedef enum { MC_ONE, /* one time */ MC_OPT, /* optional element (?) */ MC_REP, /* any times (*) */ MC_PLUS /* one-or-more (+) */ } modelcard; typedef enum { MT_UNDEF = 0, /* undefined */ MT_PCDATA, /* Contains PCDATA */ MT_ELEMENT, /* refers to element */ MT_SEQ, /* Sequence (,) */ MT_AND, /* Ony order (&) */ MT_OR /* Disjunction (|) */ } modeltype; typedef enum { AT_CDATA, /* CDATA attribute */ AT_ENTITY, /* entity-name */ AT_ENTITIES, /* entity-name list */ AT_ID, /* identifier */ AT_IDREF, /* identifier reference */ AT_IDREFS, /* list of identifier references */ AT_NAME, /* name token */ AT_NAMES, /* list of names */ AT_NAMEOF, /* one of these names */ AT_NMTOKEN, /* name-token */ AT_NMTOKENS, /* name-token list */ AT_NOTATION, /* notation-name */ AT_NUMBER, /* number */ AT_NUMBERS, /* number list */ AT_NUTOKEN, /* number token */ AT_NUTOKENS /* number token list */ } attrtype; typedef enum { AT_FIXED, /* fixed value */ AT_REQUIRED, /* Required attribute */ AT_CURRENT, /* most recent value */ AT_CONREF, /* cross-reference */ AT_IMPLIED, /* Implied attribute */ AT_DEFAULT /* has default */ } attrdef; typedef enum { ET_SYSTEM, /* System (file) entity */ ET_PUBLIC, /* Public (external) entity */ ET_LITERAL /* Literal text */ } entity_type; typedef enum { EC_SGML, /* SGML data */ EC_STARTTAG, /* SGML start-tag */ EC_ENDTAG, /* SGML end-tag */ EC_CDATA, /* CDATA entity */ EC_SDATA, /* SDATA entity */ EC_NDATA, /* non-sgml data */ EC_PI /* Programming instruction */ } data_type; typedef enum { DL_SGML, /* Use SGML */ DL_XML, /* Use XML */ DL_XMLNS /* Use XML + Namespaces */ } dtd_dialect; typedef enum { OPT_SHORTTAG /* do/don't accept shorttag */ } dtd_option; typedef enum { SP_PRESERVE = 0, /* Preserve all white-space */ SP_DEFAULT, /* Default space handling */ SP_REMOVE, /* Remove all blank CDATA elements */ SP_SGML, /* Compliant SGML mode */ SP_INHERIT /* DTD: inherit from environment */ } dtd_space_mode; typedef enum { NU_TOKEN, /* Treat numbers as tokens */ NU_INTEGER /* Convert to integer */ } dtd_number_mode; /******************************* * ERRORS * *******************************/ #ifdef DTD_IMPLEMENTATION #define DTD_MINOR_ERRORS 1 #endif typedef enum { ERS_WARNING, /* probably correct result */ ERS_ERROR, /* probably incrorrect result */ ERS_STYLE /* dubious/bad style; correct result */ } dtd_error_severity; typedef enum { ERC_REPRESENTATION, /* Internal limit */ /* id */ ERC_RESOURCE, /* external limit */ /* id */ ERC_LIMIT, /* Exceeded SGML limit */ /* id */ ERC_VALIDATE, /* DTD Validation */ /* Message */ ERC_SYNTAX_ERROR, /* Syntax error */ /* Message, found */ ERC_EXISTENCE, /* Existence error */ /* Type, name */ ERC_REDEFINED /* Redefined object */ /* Type, name */ #ifdef DTD_MINOR_ERRORS , /* reopen list */ ERC_SYNTAX_WARNING, /* Syntax warning (i.e. fixed) */ /* Message, found */ ERC_DOMAIN, /* Relative to declared type */ /* Type, found */ ERC_OMITTED_CLOSE, /* Element */ ERC_OMITTED_OPEN, /* Element */ ERC_NOT_OPEN, /* Element */ ERC_NOT_ALLOWED, /* Element */ ERC_NOT_ALLOWED_PCDATA, /* Text */ ERC_NO_ATTRIBUTE, /* Element, Attribute */ ERC_NO_ATTRIBUTE_VALUE, /* Element, Value */ ERC_NO_VALUE, /* Entity */ ERC_NO_DOCTYPE, /* Implicit, file */ ERC_NO_CATALOGUE /* file */ #endif } dtd_error_id; typedef enum { IN_NONE, /* unspecified input */ IN_FILE, /* input from file */ IN_ENTITY /* input from entity */ } input_type; typedef struct _dtd_srcloc { input_type type; /* type of input */ union { const ichar *file; /* name of the file */ const ichar *entity; /* name of entity */ } name; int line; /* 1-based Line no */ int linepos; /* 1-based char */ long charpos; /* 0-based file char */ struct _dtd_srcloc *parent; /* parent location */ } dtd_srcloc; typedef struct _dtd_error { dtd_error_id id; /* ERC_* identifier */ dtd_error_id minor; /* Minor code */ dtd_error_severity severity; /* ERS_* severity */ dtd_srcloc *location; /* location of the error */ wchar_t *plain_message; /* Clean message */ wchar_t *message; /* complete message */ /* (Warning: file:line: <plain>) */ wchar_t *argv[2]; /* context arguments */ } dtd_error; /******************************* * DTD TYPES * *******************************/ typedef struct _dtd_symbol { const ichar *name; /* name of the atom */ struct _dtd_symbol *next; /* next in atom list */ struct _dtd_element *element; /* connected element (if any) */ struct _dtd_entity *entity; /* connected entity (if any) */ } dtd_symbol; typedef struct _dtd_symbol_table { int size; /* Allocated size */ dtd_symbol **entries; /* Entries */ } dtd_symbol_table; typedef struct _dtd_entity { dtd_symbol *name; /* its name */ entity_type type; /* ET_* */ data_type content; /* EC_* */ int catalog_location; /* what catalog to use for lookup */ int length; /* size of literal value */ ichar *value; /* literal value */ ichar *extid; /* external identifier */ ichar *exturl; /* url to fetch from */ ichar *baseurl; /* base url for exturl */ struct _dtd_entity *next; /* list-link */ } dtd_entity; typedef struct _dtd_notation { dtd_symbol *name; /* name of the notation */ entity_type type; /* ET_{PUBLIC|SYSTEM} */ ichar *public; /* public id */ ichar *system; /* file with info */ struct _dtd_notation *next; /* list-link */ } dtd_notation; typedef struct _dtd_element_list { struct _dtd_element *value; /* element */ struct _dtd_element_list *next; /* next in list */ } dtd_element_list; typedef struct _dtd_name_list { dtd_symbol *value; struct _dtd_name_list *next; } dtd_name_list; typedef struct _dtd_attr { dtd_symbol *name; /* name of attribute */ attrtype type; /* type (AT_*) */ attrdef def; /* AT_REQUIRED/AT_IMPLIED */ int islist; /* attribute is a list */ union { dtd_name_list *nameof; /* (name1|name2|...) */ } typeex; union { ichar *cdata; /* default for CDATA */ ichar *list; /* text for list-data */ dtd_symbol *name; /* AT_NAME or AT_NAMEOF */ long number; /* AT_NUMBER */ } att_def; int references; /* reference count */ } dtd_attr; typedef struct _dtd_attr_list { dtd_attr *attribute; struct _dtd_attr_list *next; } dtd_attr_list; typedef struct _dtd_model { modeltype type; /* MT_* */ modelcard cardinality; /* MC_* */ union { struct _dtd_model *group; /* ,/|/& group */ struct _dtd_element *element; /* element */ } content; struct _dtd_model *next; /* next in list (for groups) */ } dtd_model; typedef struct _dtd_edef { contenttype type; /* EMPTY, MIXED, ... */ int omit_open; /* allow omitted open tag? */ int omit_close; /* allow omitted close tag? */ dtd_model *content; /* the content model */ dtd_element_list *included; /* +(namegroup) */ dtd_element_list *excluded; /* -(namegroup) */ struct _dtd_state *initial_state; /* Initial state in state engine */ struct _dtd_state *final_state; /* Final state in state engine */ int references; /* #elements using this def */ } dtd_edef; typedef struct _dtd_map { ichar *from; /* mapped text */ int len; /* length of mapped text */ dtd_symbol *to; /* name of symbol mapped onto */ struct _dtd_map *next; /* next in shortref map */ } dtd_map; typedef struct _dtd_shortref { dtd_symbol *name; /* name of SHORTREF map */ dtd_map *map; /* implemented map */ char ends[SHORTMAP_SIZE]; /* ending-characters in map */ int defined; /* has been defined */ struct _dtd_shortref *next; /* next declared shortref */ } dtd_shortref; typedef struct _dtd_element { dtd_symbol *name; /* its name */ dtd_edef *structure; /* content structure of the element */ dtd_attr_list *attributes; /* defined attributes */ dtd_space_mode space_mode; /* How to handle white-space (SP_*) */ dtd_shortref *map; /* SHORTREF map */ int undefined; /* Only implicitely defined */ struct _dtd_element *next; /* in DTD'e element list */ } dtd_element; typedef struct _dtd_charclass { unsigned char class[INPUT_CHARSET_SIZE]; /* ichar --> class-mask */ } dtd_charclass; typedef struct _dtd_charfunc { ichar func[(int)CF_ENDTABLE]; /* CF_ --> ichar */ } dtd_charfunc; typedef struct _dtd { int magic; /* SGML_DTD_MAGIC */ int implicit; /* There is no DTD */ dtd_dialect dialect; /* DL_* */ int case_sensitive; /* Tags are case-sensitive */ int ent_case_sensitive; /* Entities are case-sensitive */ ichar *doctype; /* defined document type */ dtd_symbol_table *symbols; /* symbol-table */ dtd_entity *pentities; /* defined parameter entities */ dtd_entity *entities; /* defined entities */ dtd_entity *default_entity; /* default-entity (if any) */ dtd_notation *notations; /* Declared notations */ dtd_shortref *shortrefs; /* SHORTREF declarations */ dtd_element *elements; /* defined elements */ dtd_charfunc *charfunc; /* CF_ --> ichar */ dtd_charclass *charclass; /* ichar -> CH_-mask */ dtd_char_encoding encoding; /* document encoding */ dtd_space_mode space_mode; /* Default for handling white-space */ dtd_number_mode number_mode; /* How to treat number attributes */ int shorttag; /* support SHORTTAG */ int references; /* destruction reference count */ } dtd; extern dtd_charfunc *new_charfunc(void); /* default classification */ extern dtd_charclass *new_charclass(void); /* default classification */ extern dtd_symbol* dtd_find_symbol(dtd *dtd, const ichar *name); extern dtd_symbol* dtd_add_symbol(dtd *dtd, const ichar *name); /******************************* * PUBLIC * *******************************/ #include "parser.h" dtd * file_to_dtd(const ichar *file, const ichar *doctype, dtd_dialect dialect); int sgml_process_file(dtd_parser *p, const ichar *file, unsigned flags); int sgml_process_stream(dtd_parser *p, FILE *in, unsigned flags); dtd_parser * new_dtd_parser(dtd *dtd); void free_dtd_parser(dtd_parser *p); void free_dtd(dtd *dtd); int load_dtd_from_file(dtd_parser *p, const ichar *file); dtd * new_dtd(const ichar *doctype); int set_dialect_dtd(dtd *dtd, dtd_dialect dialect); int set_option_dtd(dtd *dtd, dtd_option option, int set); void putchar_dtd_parser(dtd_parser *p, int chr); int begin_document_dtd_parser(dtd_parser *p); int end_document_dtd_parser(dtd_parser *p); void reset_document_dtd_parser(dtd_parser *p); void set_file_dtd_parser(dtd_parser *p, input_type in, const ichar *file); void set_mode_dtd_parser(dtd_parser *p, data_mode mode); void sgml_cplocation(dtd_srcloc *dst, dtd_srcloc *src); int xml_set_encoding(dtd_parser *p, const char *enc); #endif /*DTD_H_INCLUDED*/