d6a06fe092
as possible.
2402 lines
57 KiB
C
2402 lines
57 KiB
C
/* $Id$
|
|
|
|
Part of SWI-Prolog
|
|
|
|
Author: Jan Wielemaker
|
|
E-mail: wielemak@science.uva.nl
|
|
WWW: http://www.swi-prolog.org
|
|
Copyright (C): 1985-2008, University of Amsterdam
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#define _ISOC99_SOURCE 1 /* fwprintf(), etc prototypes */
|
|
|
|
#ifdef __WINDOWS__
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#define DTD_MINOR_ERRORS 1 /* get detailed errors */
|
|
|
|
#include <stdio.h>
|
|
#include "dtd.h"
|
|
#include "catalog.h"
|
|
#include "model.h"
|
|
#include <SWI-Stream.h>
|
|
#include <SWI-Prolog.h>
|
|
#include <errno.h>
|
|
#include "error.h"
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <wctype.h>
|
|
|
|
#define streq(s1, s2) (strcmp(s1, s2) == 0)
|
|
|
|
#define MAX_ERRORS 50
|
|
#define MAX_WARNINGS 50
|
|
|
|
#define ENDSNUL ((size_t)-1)
|
|
|
|
/*******************************
|
|
* PARSER CONTEXT DATA *
|
|
*******************************/
|
|
|
|
#define PD_MAGIC 0x36472ba1 /* just a number */
|
|
|
|
typedef enum
|
|
{ SA_FILE = 0, /* Stop at end-of-file */
|
|
SA_INPUT, /* Do not complete input */
|
|
SA_ELEMENT, /* Stop after first element */
|
|
SA_CONTENT, /* Stop after close */
|
|
SA_DECL /* Stop after declaration */
|
|
} stopat;
|
|
|
|
typedef enum
|
|
{ EM_QUIET = 0, /* Suppress messages */
|
|
EM_PRINT, /* Print message */
|
|
EM_STYLE /* include style-messages */
|
|
} errormode;
|
|
|
|
typedef struct _env
|
|
{ term_t tail;
|
|
struct _env *parent;
|
|
} env;
|
|
|
|
|
|
typedef struct _parser_data
|
|
{ int magic; /* PD_MAGIC */
|
|
dtd_parser *parser; /* parser itself */
|
|
|
|
int warnings; /* #warnings seen */
|
|
int errors; /* #errors seen */
|
|
int max_errors; /* error limit */
|
|
int max_warnings; /* warning limit */
|
|
errormode error_mode; /* how to handle errors */
|
|
int positions; /* report file-positions */
|
|
int exception; /* pending exception from callback */
|
|
|
|
predicate_t on_begin; /* begin element */
|
|
predicate_t on_end; /* end element */
|
|
predicate_t on_cdata; /* cdata */
|
|
predicate_t on_entity; /* entity */
|
|
predicate_t on_pi; /* processing instruction */
|
|
predicate_t on_xmlns; /* xmlns */
|
|
predicate_t on_urlns; /* url --> namespace */
|
|
predicate_t on_error; /* errors */
|
|
predicate_t on_decl; /* declarations */
|
|
|
|
stopat stopat; /* Where to stop */
|
|
int stopped; /* Environment is complete */
|
|
|
|
IOSTREAM* source; /* Where we are reading from */
|
|
|
|
term_t list; /* output term (if any) */
|
|
term_t tail; /* tail of the list */
|
|
env *stack; /* environment stack */
|
|
int free_on_close; /* sgml_free parser on close */
|
|
} parser_data;
|
|
|
|
|
|
/*******************************
|
|
* CONSTANTS *
|
|
*******************************/
|
|
|
|
static functor_t FUNCTOR_and2;
|
|
static functor_t FUNCTOR_bar2;
|
|
static functor_t FUNCTOR_comma2;
|
|
static functor_t FUNCTOR_default1;
|
|
static functor_t FUNCTOR_dialect1;
|
|
static functor_t FUNCTOR_document1;
|
|
static functor_t FUNCTOR_dtd1;
|
|
static functor_t FUNCTOR_dtd2;
|
|
static functor_t FUNCTOR_element3;
|
|
static functor_t FUNCTOR_entity1;
|
|
static functor_t FUNCTOR_equal2;
|
|
static functor_t FUNCTOR_file1;
|
|
static functor_t FUNCTOR_fixed1;
|
|
static functor_t FUNCTOR_line1;
|
|
static functor_t FUNCTOR_list1;
|
|
static functor_t FUNCTOR_max_errors1;
|
|
static functor_t FUNCTOR_nameof1;
|
|
static functor_t FUNCTOR_notation1;
|
|
static functor_t FUNCTOR_omit2;
|
|
static functor_t FUNCTOR_opt1;
|
|
static functor_t FUNCTOR_plus1;
|
|
static functor_t FUNCTOR_rep1;
|
|
static functor_t FUNCTOR_sgml_parser1;
|
|
static functor_t FUNCTOR_parse1;
|
|
static functor_t FUNCTOR_source1;
|
|
static functor_t FUNCTOR_content_length1;
|
|
static functor_t FUNCTOR_call2;
|
|
static functor_t FUNCTOR_charpos1;
|
|
static functor_t FUNCTOR_charpos2;
|
|
static functor_t FUNCTOR_ns2; /* :/2 */
|
|
static functor_t FUNCTOR_space1;
|
|
static functor_t FUNCTOR_pi1;
|
|
static functor_t FUNCTOR_sdata1;
|
|
static functor_t FUNCTOR_ndata1;
|
|
static functor_t FUNCTOR_number1;
|
|
static functor_t FUNCTOR_syntax_errors1;
|
|
static functor_t FUNCTOR_minus2;
|
|
static functor_t FUNCTOR_positions1;
|
|
static functor_t FUNCTOR_event_class1;
|
|
static functor_t FUNCTOR_doctype1;
|
|
static functor_t FUNCTOR_allowed1;
|
|
static functor_t FUNCTOR_context1;
|
|
static functor_t FUNCTOR_defaults1;
|
|
static functor_t FUNCTOR_shorttag1;
|
|
static functor_t FUNCTOR_qualify_attributes1;
|
|
static functor_t FUNCTOR_encoding1;
|
|
|
|
static atom_t ATOM_true;
|
|
static atom_t ATOM_false;
|
|
static atom_t ATOM_cdata;
|
|
static atom_t ATOM_rcdata;
|
|
static atom_t ATOM_pcdata;
|
|
static atom_t ATOM_empty;
|
|
static atom_t ATOM_any;
|
|
static atom_t ATOM_position;
|
|
|
|
#define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a)
|
|
|
|
static void
|
|
initConstants(void)
|
|
{
|
|
FUNCTOR_sgml_parser1 = mkfunctor("sgml_parser", 1);
|
|
FUNCTOR_equal2 = mkfunctor("=", 2);
|
|
FUNCTOR_dtd1 = mkfunctor("dtd", 1);
|
|
FUNCTOR_element3 = mkfunctor("element", 3);
|
|
FUNCTOR_entity1 = mkfunctor("entity", 1);
|
|
FUNCTOR_document1 = mkfunctor("document", 1);
|
|
FUNCTOR_dtd2 = mkfunctor("dtd", 2);
|
|
FUNCTOR_omit2 = mkfunctor("omit", 2);
|
|
FUNCTOR_and2 = mkfunctor("&", 2);
|
|
FUNCTOR_comma2 = mkfunctor(",", 2);
|
|
FUNCTOR_bar2 = mkfunctor("|", 2);
|
|
FUNCTOR_opt1 = mkfunctor("?", 1);
|
|
FUNCTOR_rep1 = mkfunctor("*", 1);
|
|
FUNCTOR_plus1 = mkfunctor("+", 1);
|
|
FUNCTOR_default1 = mkfunctor("default", 1);
|
|
FUNCTOR_fixed1 = mkfunctor("fixed", 1);
|
|
FUNCTOR_list1 = mkfunctor("list", 1);
|
|
FUNCTOR_nameof1 = mkfunctor("nameof", 1);
|
|
FUNCTOR_notation1 = mkfunctor("notation", 1);
|
|
FUNCTOR_file1 = mkfunctor("file", 1);
|
|
FUNCTOR_line1 = mkfunctor("line", 1);
|
|
FUNCTOR_dialect1 = mkfunctor("dialect", 1);
|
|
FUNCTOR_max_errors1 = mkfunctor("max_errors", 1);
|
|
FUNCTOR_parse1 = mkfunctor("parse", 1);
|
|
FUNCTOR_source1 = mkfunctor("source", 1);
|
|
FUNCTOR_content_length1= mkfunctor("content_length", 1);
|
|
FUNCTOR_call2 = mkfunctor("call", 2);
|
|
FUNCTOR_charpos1 = mkfunctor("charpos", 1);
|
|
FUNCTOR_charpos2 = mkfunctor("charpos", 2);
|
|
FUNCTOR_ns2 = mkfunctor(":", 2);
|
|
FUNCTOR_space1 = mkfunctor("space", 1);
|
|
FUNCTOR_pi1 = mkfunctor("pi", 1);
|
|
FUNCTOR_sdata1 = mkfunctor("sdata", 1);
|
|
FUNCTOR_ndata1 = mkfunctor("ndata", 1);
|
|
FUNCTOR_number1 = mkfunctor("number", 1);
|
|
FUNCTOR_syntax_errors1 = mkfunctor("syntax_errors", 1);
|
|
FUNCTOR_minus2 = mkfunctor("-", 2);
|
|
FUNCTOR_positions1 = mkfunctor("positions", 1);
|
|
FUNCTOR_event_class1 = mkfunctor("event_class", 1);
|
|
FUNCTOR_doctype1 = mkfunctor("doctype", 1);
|
|
FUNCTOR_allowed1 = mkfunctor("allowed", 1);
|
|
FUNCTOR_context1 = mkfunctor("context", 1);
|
|
FUNCTOR_defaults1 = mkfunctor("defaults", 1);
|
|
FUNCTOR_shorttag1 = mkfunctor("shorttag", 1);
|
|
FUNCTOR_qualify_attributes1 = mkfunctor("qualify_attributes", 1);
|
|
FUNCTOR_encoding1 = mkfunctor("encoding", 1);
|
|
|
|
ATOM_true = PL_new_atom("true");
|
|
ATOM_false = PL_new_atom("false");
|
|
ATOM_cdata = PL_new_atom("cdata");
|
|
ATOM_rcdata = PL_new_atom("rcdata");
|
|
ATOM_pcdata = PL_new_atom("#pcdata");
|
|
ATOM_empty = PL_new_atom("empty");
|
|
ATOM_any = PL_new_atom("any");
|
|
ATOM_position = PL_new_atom("#position");
|
|
}
|
|
|
|
/*******************************
|
|
* ACCESS *
|
|
*******************************/
|
|
|
|
static int
|
|
unify_parser(term_t parser, dtd_parser *p)
|
|
{ return PL_unify_term(parser, PL_FUNCTOR, FUNCTOR_sgml_parser1,
|
|
PL_POINTER, p);
|
|
}
|
|
|
|
|
|
static int
|
|
get_parser(term_t parser, dtd_parser **p)
|
|
{ if ( PL_is_functor(parser, FUNCTOR_sgml_parser1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
void *ptr;
|
|
|
|
PL_get_arg(1, parser, a);
|
|
if ( PL_get_pointer(a, &ptr) )
|
|
{ dtd_parser *tmp = ptr;
|
|
|
|
if ( tmp->magic == SGML_PARSER_MAGIC )
|
|
{ *p = tmp;
|
|
|
|
return TRUE;
|
|
}
|
|
return sgml2pl_error(ERR_EXISTENCE, "sgml_parser", parser);
|
|
}
|
|
}
|
|
|
|
return sgml2pl_error(ERR_TYPE, "sgml_parser", parser);
|
|
}
|
|
|
|
|
|
static int
|
|
unify_dtd(term_t t, dtd *dtd)
|
|
{ if ( dtd->doctype )
|
|
return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_dtd2,
|
|
PL_POINTER, dtd,
|
|
PL_CHARS, dtd->doctype);
|
|
else
|
|
return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_dtd2,
|
|
PL_POINTER, dtd,
|
|
PL_VARIABLE);
|
|
}
|
|
|
|
|
|
static int
|
|
get_dtd(term_t t, dtd **dtdp)
|
|
{ if ( PL_is_functor(t, FUNCTOR_dtd2) )
|
|
{ term_t a = PL_new_term_ref();
|
|
void *ptr;
|
|
|
|
PL_get_arg(1, t, a);
|
|
if ( PL_get_pointer(a, &ptr) )
|
|
{ dtd *tmp = ptr;
|
|
|
|
if ( tmp->magic == SGML_DTD_MAGIC )
|
|
{ *dtdp = tmp;
|
|
|
|
return TRUE;
|
|
}
|
|
return sgml2pl_error(ERR_EXISTENCE, "dtd", t);
|
|
}
|
|
}
|
|
|
|
return sgml2pl_error(ERR_TYPE, "dtd", t);
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* NEW/FREE *
|
|
*******************************/
|
|
|
|
static foreign_t
|
|
pl_new_sgml_parser(term_t ref, term_t options)
|
|
{ term_t head = PL_new_term_ref();
|
|
term_t tail = PL_copy_term_ref(options);
|
|
term_t tmp = PL_new_term_ref();
|
|
|
|
dtd *dtd = NULL;
|
|
dtd_parser *p;
|
|
|
|
while ( PL_get_list(tail, head, tail) )
|
|
{ if ( PL_is_functor(head, FUNCTOR_dtd1) )
|
|
{ PL_get_arg(1, head, tmp);
|
|
|
|
if ( PL_is_variable(tmp) ) /* dtd(X) */
|
|
{ dtd = new_dtd(NULL); /* no known doctype */
|
|
dtd->references++;
|
|
unify_dtd(tmp, dtd);
|
|
} else if ( !get_dtd(tmp, &dtd) )
|
|
return FALSE;
|
|
}
|
|
}
|
|
if ( !PL_get_nil(tail) )
|
|
return sgml2pl_error(ERR_TYPE, "list", tail);
|
|
|
|
p = new_dtd_parser(dtd);
|
|
|
|
return unify_parser(ref, p);
|
|
}
|
|
|
|
|
|
static foreign_t
|
|
pl_free_sgml_parser(term_t parser)
|
|
{ dtd_parser *p;
|
|
|
|
if ( get_parser(parser, &p) )
|
|
{ free_dtd_parser(p);
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static foreign_t
|
|
pl_new_dtd(term_t doctype, term_t ref)
|
|
{ ichar *dt;
|
|
dtd *dtd;
|
|
|
|
if ( !PL_get_wchars(doctype, NULL, &dt, CVT_ATOM|CVT_EXCEPTION) )
|
|
return FALSE;
|
|
|
|
if ( !(dtd=new_dtd(dt)) )
|
|
return FALSE;
|
|
|
|
dtd->references++;
|
|
|
|
return unify_dtd(ref, dtd);
|
|
}
|
|
|
|
|
|
static foreign_t
|
|
pl_free_dtd(term_t t)
|
|
{ dtd *dtd;
|
|
|
|
if ( get_dtd(t, &dtd) )
|
|
{ free_dtd(dtd);
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/*******************************
|
|
* DATA EXCHANGE *
|
|
*******************************/
|
|
|
|
static void
|
|
put_atom_wchars(term_t t, wchar_t const *s)
|
|
{ PL_put_variable(t);
|
|
PL_unify_wchars(t, PL_ATOM, ENDSNUL, s);
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* PROPERTIES *
|
|
*******************************/
|
|
|
|
static foreign_t
|
|
pl_set_sgml_parser(term_t parser, term_t option)
|
|
{ dtd_parser *p;
|
|
|
|
if ( !get_parser(parser, &p) )
|
|
return FALSE;
|
|
|
|
if ( PL_is_functor(option, FUNCTOR_file1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
wchar_t *file;
|
|
dtd_symbol *fs;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_wchars(a, NULL, &file, CVT_ATOM|CVT_EXCEPTION) )
|
|
return FALSE;
|
|
fs = dtd_add_symbol(p->dtd, file); /* symbol will be freed */
|
|
set_file_dtd_parser(p, IN_FILE, fs->name);
|
|
} else if ( PL_is_functor(option, FUNCTOR_line1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_integer(a, &p->location.line) )
|
|
return sgml2pl_error(ERR_TYPE, "integer", a);
|
|
} else if ( PL_is_functor(option, FUNCTOR_charpos1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_long(a, &p->location.charpos) )
|
|
return sgml2pl_error(ERR_TYPE, "integer", a);
|
|
} else if ( PL_is_functor(option, FUNCTOR_dialect1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
char *s;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_atom_chars(a, &s) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
|
|
if ( streq(s, "xml") )
|
|
set_dialect_dtd(p->dtd, DL_XML);
|
|
else if ( streq(s, "xmlns") )
|
|
set_dialect_dtd(p->dtd, DL_XMLNS);
|
|
else if ( streq(s, "sgml") )
|
|
set_dialect_dtd(p->dtd, DL_SGML);
|
|
else
|
|
return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", a);
|
|
} else if ( PL_is_functor(option, FUNCTOR_space1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
char *s;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_atom_chars(a, &s) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
|
|
if ( streq(s, "preserve") )
|
|
p->dtd->space_mode = SP_PRESERVE;
|
|
else if ( streq(s, "default") )
|
|
p->dtd->space_mode = SP_DEFAULT;
|
|
else if ( streq(s, "remove") )
|
|
p->dtd->space_mode = SP_REMOVE;
|
|
else if ( streq(s, "sgml") )
|
|
p->dtd->space_mode = SP_SGML;
|
|
|
|
else
|
|
return sgml2pl_error(ERR_DOMAIN, "space", a);
|
|
} else if ( PL_is_functor(option, FUNCTOR_defaults1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
int val;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_bool(a, &val) )
|
|
return sgml2pl_error(ERR_TYPE, "boolean", a);
|
|
|
|
if ( val )
|
|
p->flags &= ~SGML_PARSER_NODEFS;
|
|
else
|
|
p->flags |= SGML_PARSER_NODEFS;
|
|
} else if ( PL_is_functor(option, FUNCTOR_qualify_attributes1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
int val;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_bool(a, &val) )
|
|
return sgml2pl_error(ERR_TYPE, "boolean", a);
|
|
|
|
if ( val )
|
|
p->flags |= SGML_PARSER_QUALIFY_ATTS;
|
|
else
|
|
p->flags &= ~SGML_PARSER_QUALIFY_ATTS;
|
|
} else if ( PL_is_functor(option, FUNCTOR_shorttag1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
int val;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_bool(a, &val) )
|
|
return sgml2pl_error(ERR_TYPE, "boolean", a);
|
|
|
|
set_option_dtd(p->dtd, OPT_SHORTTAG, val);
|
|
} else if ( PL_is_functor(option, FUNCTOR_number1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
char *s;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_atom_chars(a, &s) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
|
|
if ( streq(s, "token") )
|
|
p->dtd->number_mode = NU_TOKEN;
|
|
else if ( streq(s, "integer") )
|
|
p->dtd->number_mode = NU_INTEGER;
|
|
else
|
|
return sgml2pl_error(ERR_DOMAIN, "number", a);
|
|
} else if ( PL_is_functor(option, FUNCTOR_encoding1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
char *val;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_atom_chars(a, &val) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
if ( !xml_set_encoding(p, val) )
|
|
return sgml2pl_error(ERR_DOMAIN, "encoding", a);
|
|
} else if ( PL_is_functor(option, FUNCTOR_doctype1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
ichar *s;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( PL_is_variable(a) )
|
|
{ p->enforce_outer_element = NULL;
|
|
} else
|
|
{ if ( !PL_get_wchars(a, NULL, &s, CVT_ATOM) )
|
|
return sgml2pl_error(ERR_TYPE, "atom_or_variable", a);
|
|
|
|
p->enforce_outer_element = dtd_add_symbol(p->dtd, s);
|
|
}
|
|
} else
|
|
return sgml2pl_error(ERR_DOMAIN, "sgml_parser_option", option);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static dtd_srcloc *
|
|
file_location(dtd_parser *p, dtd_srcloc *l)
|
|
{ while(l->parent && l->type != IN_FILE)
|
|
l = l->parent;
|
|
|
|
return l;
|
|
}
|
|
|
|
|
|
static foreign_t
|
|
pl_get_sgml_parser(term_t parser, term_t option)
|
|
{ dtd_parser *p;
|
|
|
|
if ( !get_parser(parser, &p) )
|
|
return FALSE;
|
|
|
|
if ( PL_is_functor(option, FUNCTOR_charpos1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
return PL_unify_integer(a, file_location(p, &p->startloc)->charpos);
|
|
} else if ( PL_is_functor(option, FUNCTOR_line1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
return PL_unify_integer(a, file_location(p, &p->startloc)->line);
|
|
} else if ( PL_is_functor(option, FUNCTOR_charpos2) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
if ( PL_get_arg(1, option, a) &&
|
|
PL_unify_integer(a, file_location(p, &p->startloc)->charpos) &&
|
|
PL_get_arg(2, option, a) &&
|
|
PL_unify_integer(a, file_location(p, &p->location)->charpos) )
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
} else if ( PL_is_functor(option, FUNCTOR_file1) )
|
|
{ dtd_srcloc *l = file_location(p, &p->location);
|
|
|
|
if ( l->type == IN_FILE && l->name.file )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
return PL_unify_wchars(a, PL_ATOM, ENDSNUL, l->name.file);
|
|
}
|
|
} else if ( PL_is_functor(option, FUNCTOR_source1) )
|
|
{ parser_data *pd = p->closure;
|
|
|
|
if ( pd && pd->magic == PD_MAGIC && pd->source )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
return PL_unify_stream(a, pd->source);
|
|
}
|
|
} else if ( PL_is_functor(option, FUNCTOR_dialect1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
switch(p->dtd->dialect)
|
|
{ case DL_SGML:
|
|
return PL_unify_atom_chars(a, "sgml");
|
|
case DL_XML:
|
|
return PL_unify_atom_chars(a, "xml");
|
|
case DL_XMLNS:
|
|
return PL_unify_atom_chars(a, "xmlns");
|
|
}
|
|
} else if ( PL_is_functor(option, FUNCTOR_event_class1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
switch(p->event_class)
|
|
{ case EV_EXPLICIT:
|
|
return PL_unify_atom_chars(a, "explicit");
|
|
case EV_OMITTED:
|
|
return PL_unify_atom_chars(a, "omitted");
|
|
case EV_SHORTTAG:
|
|
return PL_unify_atom_chars(a, "shorttag");
|
|
case EV_SHORTREF:
|
|
return PL_unify_atom_chars(a, "shortref");
|
|
}
|
|
} else if ( PL_is_functor(option, FUNCTOR_dtd1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
|
|
return unify_dtd(a, p->dtd);
|
|
} else if ( PL_is_functor(option, FUNCTOR_doctype1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( p->enforce_outer_element )
|
|
return PL_unify_wchars(a, PL_ATOM, ENDSNUL,
|
|
p->enforce_outer_element->name);
|
|
else
|
|
return TRUE; /* leave variable */
|
|
} else if ( PL_is_functor(option, FUNCTOR_allowed1) )
|
|
{ term_t tail = PL_new_term_ref();
|
|
term_t head = PL_new_term_ref();
|
|
term_t tmp = PL_new_term_ref();
|
|
sgml_environment *env = p->environments;
|
|
|
|
PL_get_arg(1, option, tail);
|
|
|
|
if ( env )
|
|
{ for( ; env; env = env->parent)
|
|
{ dtd_element *buf[256]; /* MAX_VISITED! */
|
|
int n = sizeof(buf)/sizeof(dtd_element *); /* not yet used! */
|
|
int i;
|
|
|
|
state_allows_for(env->state, buf, &n);
|
|
|
|
for(i=0; i<n; i++)
|
|
{ if ( buf[i] == CDATA_ELEMENT )
|
|
PL_put_atom_chars(tmp, "#pcdata");
|
|
else
|
|
put_atom_wchars(tmp, buf[i]->name->name);
|
|
|
|
if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify(head, tmp) )
|
|
return FALSE;
|
|
}
|
|
|
|
if ( !env->element->structure ||
|
|
!env->element->structure->omit_close )
|
|
break;
|
|
}
|
|
} else if ( p->enforce_outer_element )
|
|
{ put_atom_wchars(tmp, p->enforce_outer_element->name);
|
|
|
|
if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify(head, tmp) )
|
|
return FALSE;
|
|
}
|
|
|
|
return PL_unify_nil(tail);
|
|
} else if ( PL_is_functor(option, FUNCTOR_context1) )
|
|
{ term_t tail = PL_new_term_ref();
|
|
term_t head = PL_new_term_ref();
|
|
term_t tmp = PL_new_term_ref();
|
|
sgml_environment *env = p->environments;
|
|
|
|
PL_get_arg(1, option, tail);
|
|
|
|
for( ; env; env = env->parent)
|
|
{ put_atom_wchars(tmp, env->element->name->name);
|
|
|
|
if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify(head, tmp) )
|
|
return FALSE;
|
|
}
|
|
|
|
return PL_unify_nil(tail);
|
|
} else
|
|
return sgml2pl_error(ERR_DOMAIN, "parser_option", option);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static int
|
|
call_prolog(parser_data *pd, predicate_t pred, term_t av)
|
|
{ qid_t qid = PL_open_query(NULL, PL_Q_PASS_EXCEPTION, pred, av);
|
|
int rc = PL_next_solution(qid);
|
|
|
|
if ( !rc && PL_exception(qid) )
|
|
pd->exception = TRUE;
|
|
else
|
|
pd->exception = FALSE;
|
|
|
|
PL_close_query(qid);
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
static void
|
|
end_frame(fid_t fid, int ex)
|
|
{ if ( ex )
|
|
PL_close_foreign_frame(fid);
|
|
else
|
|
PL_discard_foreign_frame(fid);
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
put_url(dtd_parser *p, term_t t, const ichar *url)
|
|
Store the url-part of a name-space qualifier in term. We call
|
|
xml:xmlns(-Canonical, +Full) trying to resolve the specified
|
|
namespace to an internal canonical namespace.
|
|
|
|
We do a little caching as there will generally be only a very
|
|
small pool of urls in use. We assume the url-pointers we get
|
|
life for the time of the parser. It might be possible that
|
|
multiple url pointers point to the same url, but this only clobbers
|
|
the cache a little.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
#define URL_CACHE 4 /* # entries cached */
|
|
|
|
typedef struct
|
|
{ const ichar *url; /* URL pointer */
|
|
atom_t canonical;
|
|
} url_cache;
|
|
|
|
static url_cache cache[URL_CACHE];
|
|
|
|
static void
|
|
reset_url_cache(void)
|
|
{ int i;
|
|
url_cache *c = cache;
|
|
|
|
for(i=0; i<URL_CACHE; i++)
|
|
{ c[i].url = NULL;
|
|
if ( c[i].canonical )
|
|
PL_unregister_atom(c[i].canonical);
|
|
c[i].canonical = 0;
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
put_url(dtd_parser *p, term_t t, const ichar *url)
|
|
{ parser_data *pd = p->closure;
|
|
int i;
|
|
|
|
if ( !pd->on_urlns )
|
|
{ put_atom_wchars(t, url);
|
|
return;
|
|
}
|
|
|
|
for(i=0; i<URL_CACHE; i++)
|
|
{ if ( cache[i].url == url ) /* cache hit */
|
|
{ if ( cache[i].canonical ) /* and a canonical value */
|
|
PL_put_atom(t, cache[i].canonical);
|
|
else
|
|
put_atom_wchars(t, url);
|
|
|
|
return;
|
|
}
|
|
}
|
|
/* shift the cache */
|
|
i = URL_CACHE-1;
|
|
if ( cache[i].canonical )
|
|
PL_unregister_atom(cache[i].canonical);
|
|
for(i=URL_CACHE-1; i>0; i--)
|
|
cache[i] = cache[i-1];
|
|
cache[0].url = url;
|
|
cache[0].canonical = 0;
|
|
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(3);
|
|
atom_t a;
|
|
|
|
put_atom_wchars(av+0, url);
|
|
unify_parser(av+2, p);
|
|
if ( PL_call_predicate(NULL, PL_Q_NORMAL, pd->on_urlns, av) &&
|
|
PL_get_atom(av+1, &a) )
|
|
{ PL_register_atom(a);
|
|
cache[0].canonical = a;
|
|
PL_put_atom(t, a);
|
|
} else
|
|
{ put_atom_wchars(t, url);
|
|
}
|
|
PL_discard_foreign_frame(fid);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
put_attribute_name(dtd_parser *p, term_t t, dtd_symbol *nm)
|
|
{ const ichar *url, *local;
|
|
|
|
if ( p->dtd->dialect == DL_XMLNS )
|
|
{ xmlns_resolve_attribute(p, nm, &local, &url);
|
|
|
|
if ( url )
|
|
{ term_t av = PL_new_term_refs(2);
|
|
|
|
put_url(p, av+0, url);
|
|
put_atom_wchars(av+1, local);
|
|
PL_cons_functor_v(t, FUNCTOR_ns2, av);
|
|
} else
|
|
put_atom_wchars(t, local);
|
|
} else
|
|
put_atom_wchars(t, nm->name);
|
|
}
|
|
|
|
|
|
static void
|
|
put_element_name(dtd_parser *p, term_t t, dtd_element *e)
|
|
{ const ichar *url, *local;
|
|
|
|
if ( p->dtd->dialect == DL_XMLNS )
|
|
{ assert(p->environments->element == e);
|
|
xmlns_resolve_element(p, &local, &url);
|
|
|
|
if ( url )
|
|
{ term_t av = PL_new_term_refs(2);
|
|
|
|
put_url(p, av+0, url);
|
|
put_atom_wchars(av+1, local);
|
|
PL_cons_functor_v(t, FUNCTOR_ns2, av);
|
|
} else
|
|
put_atom_wchars(t, local);
|
|
} else
|
|
put_atom_wchars(t, e->name->name);
|
|
}
|
|
|
|
|
|
static ichar *
|
|
istrblank(const ichar *s)
|
|
{ for( ; *s; s++ )
|
|
{ if ( iswspace(*s) )
|
|
return (ichar *)s;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static int
|
|
unify_listval(dtd_parser *p,
|
|
term_t t, attrtype type, size_t len, const ichar *text)
|
|
{ if ( type == AT_NUMBERS && p->dtd->number_mode == NU_INTEGER )
|
|
{ wchar_t *e;
|
|
|
|
#if SIZEOF_LONG == 4 && defined(HAVE_WCSTOLL)
|
|
int64_t v = wcstoll(text, &e, 10);
|
|
if ( (size_t)(e-text) == len && errno != ERANGE )
|
|
return PL_unify_int64(t, v);
|
|
#else
|
|
long v = wcstol(text, &e, 10);
|
|
|
|
if ( (size_t)(e-text) == len && errno != ERANGE )
|
|
return PL_unify_integer(t, v);
|
|
#endif
|
|
/* TBD: Error!? */
|
|
}
|
|
|
|
return PL_unify_wchars(t, PL_ATOM, len, text);
|
|
}
|
|
|
|
|
|
static int
|
|
put_att_text(term_t t, sgml_attribute *a)
|
|
{ if ( a->value.textW )
|
|
{ PL_put_variable(t);
|
|
PL_unify_wchars(t, PL_ATOM, a->value.number, a->value.textW);
|
|
return TRUE;
|
|
} else
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static void
|
|
put_attribute_value(dtd_parser *p, term_t t, sgml_attribute *a)
|
|
{ switch(a->definition->type)
|
|
{ case AT_CDATA:
|
|
put_att_text(t, a);
|
|
break;
|
|
case AT_NUMBER:
|
|
{ if ( !put_att_text(t, a) )
|
|
PL_put_integer(t, a->value.number);
|
|
break;
|
|
}
|
|
default: /* multi-valued attribute */
|
|
{ if ( a->definition->islist && a->value.textW )
|
|
{ term_t tail, head = PL_new_term_ref();
|
|
const ichar *val = a->value.textW;
|
|
const ichar *e;
|
|
|
|
PL_put_variable(t);
|
|
tail = PL_copy_term_ref(t);
|
|
|
|
for(e=istrblank(val); e; val = e+1, e=istrblank(val))
|
|
{ if ( e == val )
|
|
continue; /* skip spaces */
|
|
PL_unify_list(tail, head, tail);
|
|
unify_listval(p, head, a->definition->type, e-val, val);
|
|
}
|
|
PL_unify_list(tail, head, tail);
|
|
unify_listval(p, head, a->definition->type, istrlen(val), val);
|
|
PL_unify_nil(tail);
|
|
} else
|
|
put_att_text(t, a);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Produce a tag-location in the format
|
|
|
|
start_location=file:char-char
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static int
|
|
put_tag_position(dtd_parser *p, term_t pos)
|
|
{ dtd_srcloc *l = &p->startloc;
|
|
|
|
if ( l->type == IN_FILE && l->name.file )
|
|
{ PL_put_variable(pos);
|
|
PL_unify_term(pos,
|
|
PL_FUNCTOR, FUNCTOR_ns2,
|
|
PL_NWCHARS, wcslen(l->name.file), l->name.file,
|
|
PL_FUNCTOR, FUNCTOR_minus2,
|
|
PL_LONG, l->charpos,
|
|
PL_LONG, p->location.charpos);
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
unify_attribute_list(dtd_parser *p, term_t alist,
|
|
int argc, sgml_attribute *argv)
|
|
{ int i;
|
|
term_t tail = PL_copy_term_ref(alist);
|
|
term_t h = PL_new_term_ref();
|
|
term_t a = PL_new_term_refs(2);
|
|
parser_data *pd = p->closure;
|
|
|
|
for(i=0; i<argc; i++)
|
|
{ put_attribute_name(p, a+0, argv[i].definition->name);
|
|
put_attribute_value(p, a+1, &argv[i]);
|
|
PL_cons_functor_v(a, FUNCTOR_equal2, a);
|
|
if ( !PL_unify_list(tail, h, tail) ||
|
|
!PL_unify(h, a) )
|
|
return FALSE;
|
|
}
|
|
|
|
if ( pd->positions && put_tag_position(p, a+1) )
|
|
{ PL_put_atom(a, ATOM_position);
|
|
|
|
PL_cons_functor_v(a, FUNCTOR_equal2, a);
|
|
if ( !PL_unify_list(tail, h, tail) ||
|
|
!PL_unify(h, a) )
|
|
return FALSE;
|
|
}
|
|
|
|
if ( PL_unify_nil(tail) )
|
|
{ PL_reset_term_refs(tail);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv)
|
|
{ parser_data *pd = p->closure;
|
|
|
|
if ( pd->stopped )
|
|
return TRUE;
|
|
|
|
if ( pd->tail )
|
|
{ term_t content = PL_new_term_ref(); /* element content */
|
|
term_t alist = PL_new_term_ref(); /* attribute list */
|
|
term_t et = PL_new_term_ref(); /* element structure */
|
|
term_t h = PL_new_term_ref();
|
|
|
|
put_element_name(p, h, e);
|
|
unify_attribute_list(p, alist, argc, argv);
|
|
PL_unify_term(et, PL_FUNCTOR, FUNCTOR_element3,
|
|
PL_TERM, h,
|
|
PL_TERM, alist,
|
|
PL_TERM, content);
|
|
if ( PL_unify_list(pd->tail, h, pd->tail) &&
|
|
PL_unify(h, et) )
|
|
{ env *env = sgml_calloc(1, sizeof(*env));
|
|
|
|
env->tail = pd->tail;
|
|
env->parent = pd->stack;
|
|
pd->stack = env;
|
|
|
|
pd->tail = content;
|
|
PL_reset_term_refs(alist);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
if ( pd->on_begin )
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(3);
|
|
|
|
put_element_name(p, av+0, e);
|
|
unify_attribute_list(p, av+1, argc, argv);
|
|
unify_parser(av+2, p);
|
|
|
|
call_prolog(pd, pd->on_begin, av);
|
|
end_frame(fid, pd->exception);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
on_end(dtd_parser *p, dtd_element *e)
|
|
{ parser_data *pd = p->closure;
|
|
|
|
if ( pd->stopped )
|
|
return TRUE;
|
|
|
|
if ( pd->on_end )
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(2);
|
|
|
|
put_element_name(p, av+0, e);
|
|
unify_parser(av+1, p);
|
|
|
|
call_prolog(pd, pd->on_end, av);
|
|
end_frame(fid, pd->exception);
|
|
}
|
|
|
|
if ( pd->tail && !pd->stopped )
|
|
{ PL_unify_nil(pd->tail);
|
|
PL_reset_term_refs(pd->tail); /* ? */
|
|
|
|
if ( pd->stack )
|
|
{ env *parent = pd->stack->parent;
|
|
|
|
pd->tail = pd->stack->tail;
|
|
sgml_free(pd->stack);
|
|
pd->stack = parent;
|
|
} else
|
|
{ if ( pd->stopat == SA_CONTENT )
|
|
pd->stopped = TRUE;
|
|
}
|
|
}
|
|
|
|
if ( pd->stopat == SA_ELEMENT && !p->environments->parent )
|
|
pd->stopped = TRUE;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
on_entity(dtd_parser *p, dtd_entity *e, int chr)
|
|
{ parser_data *pd = p->closure;
|
|
|
|
if ( pd->stopped )
|
|
return TRUE;
|
|
|
|
if ( pd->on_entity )
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(2);
|
|
|
|
if ( e )
|
|
put_atom_wchars(av+0, e->name->name);
|
|
else
|
|
PL_put_integer(av+0, chr);
|
|
|
|
unify_parser(av+1, p);
|
|
|
|
call_prolog(pd, pd->on_end, av);
|
|
end_frame(fid, pd->exception);
|
|
}
|
|
|
|
if ( pd->tail )
|
|
{ term_t h = PL_new_term_ref();
|
|
|
|
if ( !PL_unify_list(pd->tail, h, pd->tail) )
|
|
return FALSE;
|
|
|
|
if ( e )
|
|
PL_unify_term(h,
|
|
PL_FUNCTOR, FUNCTOR_entity1,
|
|
PL_CHARS, e->name->name);
|
|
else
|
|
PL_unify_term(h,
|
|
PL_FUNCTOR, FUNCTOR_entity1,
|
|
PL_INT, chr);
|
|
|
|
PL_reset_term_refs(h);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
on_data(dtd_parser *p, data_type type, int len, const wchar_t *data)
|
|
{ parser_data *pd = p->closure;
|
|
|
|
if ( pd->on_cdata )
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(2);
|
|
|
|
PL_unify_wchars(av+0, PL_ATOM, len, data);
|
|
|
|
unify_parser(av+1, p);
|
|
|
|
call_prolog(pd, pd->on_cdata, av);
|
|
end_frame(fid, pd->exception);
|
|
}
|
|
|
|
if ( pd->tail && !pd->stopped )
|
|
{ term_t h = PL_new_term_ref();
|
|
|
|
if ( PL_unify_list(pd->tail, h, pd->tail) )
|
|
{ int rval = TRUE;
|
|
term_t a;
|
|
|
|
switch(type)
|
|
{ case EC_CDATA:
|
|
a = h;
|
|
break;
|
|
case EC_SDATA:
|
|
{ term_t d = PL_new_term_ref();
|
|
|
|
a = d;
|
|
rval = PL_unify_term(h, PL_FUNCTOR, FUNCTOR_sdata1, PL_TERM, d);
|
|
break;
|
|
}
|
|
case EC_NDATA:
|
|
{ term_t d = PL_new_term_ref();
|
|
|
|
a = d;
|
|
rval = PL_unify_term(h, PL_FUNCTOR, FUNCTOR_ndata1, PL_TERM, d);
|
|
break;
|
|
}
|
|
default:
|
|
rval = FALSE;
|
|
assert(0);
|
|
}
|
|
|
|
if ( rval )
|
|
rval = PL_unify_wchars(a, PL_ATOM, len, data);
|
|
|
|
if ( rval )
|
|
{ PL_reset_term_refs(h);
|
|
return TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static int
|
|
on_cdata(dtd_parser *p, data_type type, int len, const wchar_t *data)
|
|
{ return on_data(p, type, len, data);
|
|
}
|
|
|
|
|
|
static int
|
|
can_end_omitted(dtd_parser *p)
|
|
{ sgml_environment *env;
|
|
|
|
for(env=p->environments; env; env = env->parent)
|
|
{ dtd_element *e = env->element;
|
|
|
|
if ( !(e->structure && e->structure->omit_close) )
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
on_error(dtd_parser *p, dtd_error *error)
|
|
{ parser_data *pd = p->closure;
|
|
const char *severity;
|
|
|
|
if ( pd->stopped )
|
|
return TRUE;
|
|
|
|
if ( pd->stopat == SA_ELEMENT &&
|
|
(error->minor == ERC_NOT_OPEN || error->minor == ERC_NOT_ALLOWED) &&
|
|
can_end_omitted(p) )
|
|
{ end_document_dtd_parser(p);
|
|
sgml_cplocation(&p->location, &p->startloc);
|
|
pd->stopped = TRUE;
|
|
return TRUE;
|
|
}
|
|
|
|
switch(error->severity)
|
|
{ case ERS_STYLE:
|
|
if ( pd->error_mode != EM_STYLE )
|
|
return TRUE;
|
|
severity = "informational";
|
|
break;
|
|
case ERS_WARNING:
|
|
pd->warnings++;
|
|
severity = "warning";
|
|
break;
|
|
case ERS_ERROR:
|
|
default: /* make compiler happy */
|
|
pd->errors++;
|
|
severity = "error";
|
|
break;
|
|
}
|
|
|
|
if ( pd->on_error ) /* msg, parser */
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(3);
|
|
|
|
PL_put_atom_chars(av+0, severity);
|
|
PL_unify_wchars(av+1, PL_ATOM,
|
|
wcslen(error->plain_message), error->plain_message);
|
|
unify_parser(av+2, p);
|
|
|
|
call_prolog(pd, pd->on_error, av);
|
|
end_frame(fid, pd->exception);
|
|
} else if ( pd->error_mode != EM_QUIET )
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
predicate_t pred = PL_predicate("print_message", 2, "user");
|
|
term_t av = PL_new_term_refs(2);
|
|
term_t src = PL_new_term_ref();
|
|
term_t parser = PL_new_term_ref();
|
|
dtd_srcloc *l = file_location(p, &p->startloc);
|
|
|
|
unify_parser(parser, p);
|
|
PL_put_atom_chars(av+0, severity);
|
|
|
|
if ( l->name.file )
|
|
{ if ( l->type == IN_FILE )
|
|
put_atom_wchars(src, l->name.file);
|
|
else
|
|
put_atom_wchars(src, l->name.entity);
|
|
} else
|
|
{ PL_put_nil(src);
|
|
}
|
|
|
|
PL_unify_term(av+1,
|
|
PL_FUNCTOR_CHARS, "sgml", 4,
|
|
PL_TERM, parser,
|
|
PL_TERM, src,
|
|
PL_INT, l->line,
|
|
PL_NWCHARS, wcslen(error->plain_message), error->plain_message);
|
|
|
|
PL_call_predicate(NULL, PL_Q_NODEBUG, pred, av);
|
|
|
|
PL_discard_foreign_frame(fid);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
on_xmlns(dtd_parser *p, dtd_symbol *ns, dtd_symbol *url)
|
|
{ parser_data *pd = p->closure;
|
|
|
|
if ( pd->stopped )
|
|
return TRUE;
|
|
|
|
if ( pd->on_xmlns )
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(3);
|
|
|
|
if ( ns )
|
|
put_atom_wchars(av+0, ns->name);
|
|
else
|
|
PL_put_nil(av+0);
|
|
put_atom_wchars(av+1, url->name);
|
|
unify_parser(av+2, p);
|
|
|
|
call_prolog(pd, pd->on_xmlns, av);
|
|
end_frame(fid, pd->exception);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
on_pi(dtd_parser *p, const ichar *pi)
|
|
{ parser_data *pd = p->closure;
|
|
|
|
if ( pd->stopped )
|
|
return TRUE;
|
|
|
|
if ( pd->on_pi )
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(2);
|
|
|
|
put_atom_wchars(av+0, pi);
|
|
unify_parser(av+1, p);
|
|
|
|
call_prolog(pd, pd->on_pi, av);
|
|
end_frame(fid, pd->exception);
|
|
}
|
|
|
|
if ( pd->tail )
|
|
{ term_t h = PL_new_term_ref();
|
|
|
|
if ( !PL_unify_list(pd->tail, h, pd->tail) )
|
|
return FALSE;
|
|
|
|
PL_unify_term(h,
|
|
PL_FUNCTOR, FUNCTOR_pi1,
|
|
PL_NWCHARS, wcslen(pi), pi);
|
|
|
|
PL_reset_term_refs(h);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
on_decl(dtd_parser *p, const ichar *decl)
|
|
{ parser_data *pd = p->closure;
|
|
|
|
if ( pd->stopped )
|
|
return TRUE;
|
|
|
|
if ( pd->on_decl )
|
|
{ fid_t fid = PL_open_foreign_frame();
|
|
term_t av = PL_new_term_refs(2);
|
|
|
|
put_atom_wchars(av+0, decl);
|
|
unify_parser(av+1, p);
|
|
|
|
call_prolog(pd, pd->on_decl, av);
|
|
end_frame(fid, pd->exception);
|
|
}
|
|
|
|
if ( pd->stopat == SA_DECL )
|
|
pd->stopped = TRUE;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
write_parser(void *h, char *buf, int len)
|
|
{ parser_data *pd = h;
|
|
unsigned char *s = (unsigned char *)buf;
|
|
unsigned char *e = s+len;
|
|
|
|
if ( !pd->parser || pd->parser->magic != SGML_PARSER_MAGIC )
|
|
{ errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if ( (pd->errors > pd->max_errors && pd->max_errors >= 0) || pd->stopped )
|
|
{ errno = EIO;
|
|
return -1;
|
|
}
|
|
|
|
for(; s<e; s++)
|
|
putchar_dtd_parser(pd->parser, *s);
|
|
|
|
return len;
|
|
}
|
|
|
|
|
|
static int
|
|
close_parser(void *h)
|
|
{ parser_data *pd = h;
|
|
dtd_parser *p;
|
|
|
|
if ( !(p=pd->parser) || p->magic != SGML_PARSER_MAGIC )
|
|
{ errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if ( pd->tail )
|
|
PL_unify_nil(pd->tail);
|
|
|
|
if ( p->dmode == DM_DTD )
|
|
p->dtd->implicit = FALSE; /* assume we loaded a DTD */
|
|
|
|
if ( pd->free_on_close )
|
|
free_dtd_parser(p);
|
|
else
|
|
p->closure = NULL;
|
|
|
|
sgml_free(pd);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static IOFUNCTIONS sgml_stream_functions =
|
|
{ (Sread_function) NULL,
|
|
(Swrite_function) write_parser,
|
|
(Sseek_function) NULL,
|
|
(Sclose_function) close_parser,
|
|
NULL
|
|
};
|
|
|
|
|
|
static parser_data *
|
|
new_parser_data(dtd_parser *p)
|
|
{ parser_data *pd;
|
|
|
|
pd = sgml_calloc(1, sizeof(*pd));
|
|
pd->magic = PD_MAGIC;
|
|
pd->parser = p;
|
|
pd->max_errors = MAX_ERRORS;
|
|
pd->max_warnings = MAX_WARNINGS;
|
|
pd->error_mode = EM_PRINT;
|
|
pd->exception = FALSE;
|
|
p->closure = pd;
|
|
|
|
return pd;
|
|
}
|
|
|
|
|
|
static foreign_t
|
|
pl_open_dtd(term_t ref, term_t options, term_t stream)
|
|
{ dtd *dtd;
|
|
dtd_parser *p;
|
|
parser_data *pd;
|
|
IOSTREAM *s;
|
|
term_t tail = PL_copy_term_ref(options);
|
|
term_t option = PL_new_term_ref();
|
|
|
|
if ( !get_dtd(ref, &dtd) )
|
|
return FALSE;
|
|
p = new_dtd_parser(dtd);
|
|
p->dmode = DM_DTD;
|
|
pd = new_parser_data(p);
|
|
pd->free_on_close = TRUE;
|
|
|
|
while( PL_get_list(tail, option, tail) )
|
|
{ if ( PL_is_functor(option, FUNCTOR_dialect1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
char *s;
|
|
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_atom_chars(a, &s) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
|
|
if ( streq(s, "xml") )
|
|
set_dialect_dtd(dtd, DL_XML);
|
|
else if ( streq(s, "xmlns") )
|
|
set_dialect_dtd(dtd, DL_XMLNS);
|
|
else if ( streq(s, "sgml") )
|
|
set_dialect_dtd(dtd, DL_SGML);
|
|
else
|
|
return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", a);
|
|
} else
|
|
return sgml2pl_error(ERR_DOMAIN, "dtd_option", option);
|
|
}
|
|
if ( !PL_get_nil(tail) )
|
|
return sgml2pl_error(ERR_TYPE, "list", options);
|
|
|
|
s = Snew(pd, SIO_OUTPUT|SIO_FBUF, &sgml_stream_functions);
|
|
|
|
if ( !PL_open_stream(stream, s) )
|
|
return FALSE;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static int
|
|
set_callback_predicates(parser_data *pd, term_t option)
|
|
{ term_t a = PL_new_term_ref();
|
|
char *fname;
|
|
atom_t pname;
|
|
predicate_t *pp = NULL; /* keep compiler happy */
|
|
int arity;
|
|
module_t m = NULL;
|
|
|
|
PL_get_arg(2, option, a);
|
|
PL_strip_module(a, &m, a);
|
|
if ( !PL_get_atom(a, &pname) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
PL_get_arg(1, option, a);
|
|
if ( !PL_get_atom_chars(a, &fname) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
|
|
if ( streq(fname, "begin") )
|
|
{ pp = &pd->on_begin; /* tag, attributes, parser */
|
|
arity = 3;
|
|
} else if ( streq(fname, "end") )
|
|
{ pp = &pd->on_end; /* tag, parser */
|
|
arity = 2;
|
|
} else if ( streq(fname, "cdata") )
|
|
{ pp = &pd->on_cdata; /* cdata, parser */
|
|
arity = 2;
|
|
} else if ( streq(fname, "entity") )
|
|
{ pp = &pd->on_entity; /* name, parser */
|
|
arity = 2;
|
|
} else if ( streq(fname, "pi") )
|
|
{ pp = &pd->on_pi; /* pi, parser */
|
|
arity = 2;
|
|
} else if ( streq(fname, "xmlns") )
|
|
{ pp = &pd->on_xmlns; /* ns, url, parser */
|
|
arity = 3;
|
|
} else if ( streq(fname, "urlns") )
|
|
{ pp = &pd->on_urlns; /* url, ns, parser */
|
|
arity = 3;
|
|
} else if ( streq(fname, "error") )
|
|
{ pp = &pd->on_error; /* severity, message, parser */
|
|
arity = 3;
|
|
} else if ( streq(fname, "decl") )
|
|
{ pp = &pd->on_decl; /* decl, parser */
|
|
arity = 2;
|
|
} else
|
|
return sgml2pl_error(ERR_DOMAIN, "sgml_callback", a);
|
|
|
|
*pp = PL_pred(PL_new_functor(pname, arity), m);
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static foreign_t
|
|
pl_sgml_parse(term_t parser, term_t options)
|
|
{ dtd_parser *p;
|
|
parser_data *pd;
|
|
parser_data *oldpd;
|
|
term_t head = PL_new_term_ref();
|
|
term_t tail = PL_copy_term_ref(options);
|
|
IOSTREAM *in = NULL;
|
|
int recursive;
|
|
int has_content_length = FALSE;
|
|
int64_t content_length = 0; /* content_length(Len) */
|
|
int count = 0;
|
|
int rc = TRUE;
|
|
|
|
if ( !get_parser(parser, &p) )
|
|
return FALSE;
|
|
|
|
if ( p->closure ) /* recursive call */
|
|
{ recursive = TRUE;
|
|
|
|
oldpd = p->closure;
|
|
if ( oldpd->magic != PD_MAGIC || oldpd->parser != p )
|
|
return sgml2pl_error(ERR_MISC, "sgml",
|
|
"Parser associated with illegal data");
|
|
|
|
pd = sgml_calloc(1, sizeof(*pd));
|
|
*pd = *oldpd;
|
|
p->closure = pd;
|
|
|
|
in = pd->source;
|
|
} else
|
|
{ recursive = FALSE;
|
|
oldpd = NULL; /* keep compiler happy */
|
|
|
|
set_mode_dtd_parser(p, DM_DATA);
|
|
|
|
p->on_begin_element = on_begin;
|
|
p->on_end_element = on_end;
|
|
p->on_entity = on_entity;
|
|
p->on_pi = on_pi;
|
|
p->on_data = on_cdata;
|
|
p->on_error = on_error;
|
|
p->on_xmlns = on_xmlns;
|
|
p->on_decl = on_decl;
|
|
|
|
pd = new_parser_data(p);
|
|
}
|
|
|
|
while ( PL_get_list(tail, head, tail) )
|
|
{ if ( PL_is_functor(head, FUNCTOR_document1) )
|
|
{ pd->list = PL_new_term_ref();
|
|
PL_get_arg(1, head, pd->list);
|
|
pd->tail = PL_copy_term_ref(pd->list);
|
|
pd->stack = NULL;
|
|
} else if ( PL_is_functor(head, FUNCTOR_source1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, head, a);
|
|
if ( !PL_get_stream_handle(a, &in) )
|
|
return FALSE;
|
|
} else if ( PL_is_functor(head, FUNCTOR_content_length1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, head, a);
|
|
if ( !PL_get_int64(a, &content_length) )
|
|
return sgml2pl_error(ERR_TYPE, "integer", a);
|
|
has_content_length = TRUE;
|
|
} else if ( PL_is_functor(head, FUNCTOR_call2) )
|
|
{ if ( !set_callback_predicates(pd, head) )
|
|
return FALSE;
|
|
} else if ( PL_is_functor(head, FUNCTOR_parse1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
char *s;
|
|
|
|
PL_get_arg(1, head, a);
|
|
if ( !PL_get_atom_chars(a, &s) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
if ( streq(s, "element") )
|
|
pd->stopat = SA_ELEMENT;
|
|
else if ( streq(s, "content") )
|
|
pd->stopat = SA_CONTENT;
|
|
else if ( streq(s, "file") )
|
|
pd->stopat = SA_FILE;
|
|
else if ( streq(s, "input") )
|
|
pd->stopat = SA_INPUT;
|
|
else if ( streq(s, "declaration") )
|
|
pd->stopat = SA_DECL;
|
|
else
|
|
return sgml2pl_error(ERR_DOMAIN, "parse", a);
|
|
} else if ( PL_is_functor(head, FUNCTOR_max_errors1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, head, a);
|
|
if ( !PL_get_integer(a, &pd->max_errors) )
|
|
return sgml2pl_error(ERR_TYPE, "integer", a);
|
|
} else if ( PL_is_functor(head, FUNCTOR_syntax_errors1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
char *s;
|
|
|
|
PL_get_arg(1, head, a);
|
|
if ( !PL_get_atom_chars(a, &s) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
|
|
if ( streq(s, "quiet") )
|
|
pd->error_mode = EM_QUIET;
|
|
else if ( streq(s, "print") )
|
|
pd->error_mode = EM_PRINT;
|
|
else if ( streq(s, "style") )
|
|
pd->error_mode = EM_STYLE;
|
|
else
|
|
return sgml2pl_error(ERR_DOMAIN, "syntax_error", a);
|
|
} else if ( PL_is_functor(head, FUNCTOR_positions1) )
|
|
{ term_t a = PL_new_term_ref();
|
|
char *s;
|
|
|
|
PL_get_arg(1, head, a);
|
|
if ( !PL_get_atom_chars(a, &s) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", a);
|
|
|
|
if ( streq(s, "true") )
|
|
pd->positions = TRUE;
|
|
else if ( streq(s, "false") )
|
|
pd->positions = FALSE;
|
|
else
|
|
return sgml2pl_error(ERR_DOMAIN, "positions", a);
|
|
} else
|
|
return sgml2pl_error(ERR_DOMAIN, "sgml_option", head);
|
|
}
|
|
if ( !PL_get_nil(tail) )
|
|
return sgml2pl_error(ERR_TYPE, "list", tail);
|
|
|
|
/* Parsing input from a stream */
|
|
#define CHECKERROR \
|
|
{ if ( pd->exception ) \
|
|
{ rc = FALSE; \
|
|
goto out; \
|
|
} \
|
|
if ( pd->errors > pd->max_errors && pd->max_errors >= 0 ) \
|
|
{ rc = sgml2pl_error(ERR_LIMIT, "max_errors", (long)pd->max_errors); \
|
|
goto out; \
|
|
} \
|
|
}
|
|
|
|
if ( pd->stopat == SA_CONTENT && p->empty_element )
|
|
goto out;
|
|
|
|
if ( in )
|
|
{ int eof = FALSE;
|
|
|
|
if ( in->encoding == ENC_OCTET )
|
|
p->encoded = TRUE; /* parser must decode */
|
|
else
|
|
p->encoded = FALSE; /* already decoded */
|
|
|
|
if ( !recursive )
|
|
{ pd->source = in;
|
|
begin_document_dtd_parser(p);
|
|
}
|
|
|
|
while(!eof)
|
|
{ int c, ateof;
|
|
|
|
if ( (++count % 8192) == 0 && PL_handle_signals() < 0 )
|
|
{ rc = FALSE;
|
|
goto out;
|
|
}
|
|
|
|
if ( has_content_length )
|
|
{ if ( content_length <= 0 )
|
|
c = EOF;
|
|
else
|
|
c = Sgetcode(in);
|
|
ateof = (--content_length <= 0);
|
|
} else
|
|
{ c = Sgetcode(in);
|
|
ateof = Sfeof(in);
|
|
}
|
|
|
|
if ( ateof )
|
|
{ eof = TRUE;
|
|
if ( c == LF ) /* file ends in LF */
|
|
c = CR;
|
|
else if ( c != CR ) /* file ends in normal char */
|
|
{ if ( has_content_length && in->position )
|
|
{ int64_t bc0 = in->position->byteno;
|
|
putchar_dtd_parser(p, c);
|
|
content_length -= in->position->byteno-bc0;
|
|
} else
|
|
{ putchar_dtd_parser(p, c);
|
|
}
|
|
CHECKERROR;
|
|
if ( pd->stopped )
|
|
goto stopped;
|
|
c = CR;
|
|
}
|
|
}
|
|
|
|
if ( has_content_length && in->position )
|
|
{ int64_t bc0 = in->position->byteno;
|
|
putchar_dtd_parser(p, c);
|
|
content_length -= in->position->byteno-bc0;
|
|
} else
|
|
{ putchar_dtd_parser(p, c);
|
|
}
|
|
CHECKERROR;
|
|
if ( pd->stopped )
|
|
{ stopped:
|
|
pd->stopped = FALSE;
|
|
if ( pd->stopat != SA_CONTENT )
|
|
reset_document_dtd_parser(p); /* ensure a clean start */
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if ( !recursive && pd->stopat != SA_INPUT )
|
|
end_document_dtd_parser(p);
|
|
CHECKERROR;
|
|
|
|
out:
|
|
reset_url_cache();
|
|
if ( pd->tail )
|
|
PL_unify_nil(pd->tail);
|
|
|
|
if ( recursive )
|
|
{ p->closure = oldpd;
|
|
} else
|
|
{ p->closure = NULL;
|
|
}
|
|
|
|
pd->magic = 0; /* invalidate */
|
|
sgml_free(pd);
|
|
|
|
return rc;
|
|
}
|
|
|
|
reset_url_cache();
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* DTD PROPERTIES *
|
|
*******************************/
|
|
|
|
static void put_model(term_t t, dtd_model *m);
|
|
|
|
/* doctype(DocType) */
|
|
|
|
static int
|
|
dtd_prop_doctype(dtd *dtd, term_t prop)
|
|
{ if ( dtd->doctype )
|
|
return PL_unify_wchars(prop, PL_ATOM, ENDSNUL, dtd->doctype);
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/* elements(ListOfElements) */
|
|
|
|
static void
|
|
make_model_list(term_t t, dtd_model *m, functor_t f)
|
|
{ if ( !m->next )
|
|
{ put_model(t, m);
|
|
} else
|
|
{ term_t av = PL_new_term_refs(2);
|
|
|
|
put_model(av+0, m);
|
|
make_model_list(av+1, m->next, f);
|
|
PL_cons_functor_v(t, f, av);
|
|
PL_reset_term_refs(av);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
put_model(term_t t, dtd_model *m)
|
|
{ functor_t f;
|
|
|
|
switch(m->type)
|
|
{ case MT_PCDATA:
|
|
PL_put_atom(t, ATOM_pcdata);
|
|
goto card;
|
|
case MT_ELEMENT:
|
|
put_atom_wchars(t, m->content.element->name->name);
|
|
goto card;
|
|
case MT_AND:
|
|
f = FUNCTOR_and2;
|
|
break;
|
|
case MT_SEQ:
|
|
f = FUNCTOR_comma2;
|
|
break;
|
|
case MT_OR:
|
|
f = FUNCTOR_bar2;
|
|
break;
|
|
case MT_UNDEF:
|
|
default:
|
|
assert(0);
|
|
f = 0;
|
|
break;
|
|
}
|
|
|
|
if ( !m->content.group )
|
|
PL_put_atom(t, ATOM_empty);
|
|
else
|
|
make_model_list(t, m->content.group, f);
|
|
|
|
card:
|
|
switch(m->cardinality)
|
|
{ case MC_ONE:
|
|
break;
|
|
case MC_OPT:
|
|
PL_cons_functor_v(t, FUNCTOR_opt1, t);
|
|
break;
|
|
case MC_REP:
|
|
PL_cons_functor_v(t, FUNCTOR_rep1, t);
|
|
break;
|
|
case MC_PLUS:
|
|
PL_cons_functor_v(t, FUNCTOR_plus1, t);
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
put_content(term_t t, dtd_edef *def)
|
|
{ switch(def->type)
|
|
{ case C_EMPTY:
|
|
PL_put_atom(t, ATOM_empty);
|
|
return;
|
|
case C_CDATA:
|
|
PL_put_atom(t, ATOM_cdata);
|
|
return;
|
|
case C_RCDATA:
|
|
PL_put_atom(t, ATOM_rcdata);
|
|
return;
|
|
case C_ANY:
|
|
PL_put_atom(t, ATOM_any);
|
|
return;
|
|
default:
|
|
if ( def->content )
|
|
put_model(t, def->content);
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
dtd_prop_elements(dtd *dtd, term_t prop)
|
|
{ term_t tail = PL_copy_term_ref(prop);
|
|
term_t head = PL_new_term_ref();
|
|
term_t et = PL_new_term_ref();
|
|
dtd_element *e;
|
|
|
|
for( e=dtd->elements; e; e=e->next )
|
|
{ put_atom_wchars(et, e->name->name);
|
|
if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify(head, et) )
|
|
return FALSE;
|
|
}
|
|
|
|
return PL_unify_nil(tail);
|
|
}
|
|
|
|
|
|
static int
|
|
get_element(dtd *dtd, term_t name, dtd_element **elem)
|
|
{ ichar *s;
|
|
dtd_element *e;
|
|
dtd_symbol *id;
|
|
|
|
if ( !PL_get_wchars(name, NULL, &s, CVT_ATOM|CVT_EXCEPTION) )
|
|
return FALSE;
|
|
|
|
if ( !(id=dtd_find_symbol(dtd, s)) ||
|
|
!(e=id->element) )
|
|
return FALSE;
|
|
|
|
*elem = e;
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
dtd_prop_element(dtd *dtd, term_t name, term_t omit, term_t content)
|
|
{ dtd_element *e;
|
|
dtd_edef *def;
|
|
term_t model = PL_new_term_ref();
|
|
|
|
if ( !get_element(dtd, name, &e) || !(def=e->structure) )
|
|
return FALSE;
|
|
|
|
if ( !PL_unify_term(omit, PL_FUNCTOR, FUNCTOR_omit2,
|
|
PL_ATOM, def->omit_open ? ATOM_true : ATOM_false,
|
|
PL_ATOM, def->omit_close ? ATOM_true : ATOM_false) )
|
|
return FALSE;
|
|
|
|
put_content(model, def);
|
|
return PL_unify(content, model);
|
|
}
|
|
|
|
|
|
static int
|
|
dtd_prop_attributes(dtd *dtd, term_t ename, term_t atts)
|
|
{ dtd_element *e;
|
|
term_t tail = PL_copy_term_ref(atts);
|
|
term_t head = PL_new_term_ref();
|
|
term_t elem = PL_new_term_ref();
|
|
dtd_attr_list *al;
|
|
|
|
if ( !get_element(dtd, ename, &e) )
|
|
return FALSE;
|
|
|
|
for(al=e->attributes; al; al=al->next)
|
|
{ put_atom_wchars(elem, al->attribute->name->name);
|
|
|
|
if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify(head, elem) )
|
|
return FALSE;
|
|
}
|
|
|
|
return PL_unify_nil(tail);
|
|
}
|
|
|
|
|
|
typedef struct _plattrdef
|
|
{ attrtype type; /* AT_* */
|
|
const char * name; /* name */
|
|
int islist; /* list-type */
|
|
atom_t atom; /* name as atom */
|
|
} plattrdef;
|
|
|
|
static plattrdef plattrs[] =
|
|
{
|
|
{ AT_CDATA, "cdata", FALSE },
|
|
{ AT_ENTITY, "entity", FALSE },
|
|
{ AT_ENTITIES, "entity", TRUE },
|
|
{ AT_ID, "id", FALSE },
|
|
{ AT_IDREF, "idref", FALSE },
|
|
{ AT_IDREFS, "idref", TRUE },
|
|
{ AT_NAME, "name", FALSE },
|
|
{ AT_NAMES, "name", TRUE },
|
|
/*{ AT_NAMEOF, "nameof", FALSE },*/
|
|
{ AT_NMTOKEN, "nmtoken", FALSE },
|
|
{ AT_NMTOKENS, "nmtoken", TRUE },
|
|
{ AT_NUMBER, "number", FALSE },
|
|
{ AT_NUMBERS, "number", TRUE },
|
|
{ AT_NUTOKEN, "nutoken", FALSE },
|
|
{ AT_NUTOKENS, "nutoken", TRUE },
|
|
{ AT_NOTATION, "notation", FALSE },
|
|
|
|
{ AT_CDATA, NULL, FALSE }
|
|
};
|
|
|
|
|
|
static int
|
|
unify_attribute_type(term_t type, dtd_attr *a)
|
|
{ plattrdef *ad = plattrs;
|
|
|
|
for( ; ad->name; ad++ )
|
|
{ if ( ad->type == a->type )
|
|
{ if ( !ad->atom )
|
|
ad->atom = PL_new_atom(ad->name);
|
|
|
|
if ( ad->islist )
|
|
return PL_unify_term(type, PL_FUNCTOR, FUNCTOR_list1,
|
|
PL_ATOM, ad->atom);
|
|
else
|
|
return PL_unify_atom(type, ad->atom);
|
|
}
|
|
}
|
|
|
|
if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
|
|
{ dtd_name_list *nl;
|
|
term_t tail = PL_new_term_ref();
|
|
term_t head = PL_new_term_ref();
|
|
term_t elem = PL_new_term_ref();
|
|
|
|
if ( !PL_unify_functor(type,
|
|
a->type == AT_NAMEOF ?
|
|
FUNCTOR_nameof1 :
|
|
FUNCTOR_notation1) )
|
|
return FALSE;
|
|
PL_get_arg(1, type, tail);
|
|
|
|
for(nl = a->typeex.nameof; nl; nl = nl->next)
|
|
{ put_atom_wchars(elem, nl->value->name);
|
|
|
|
if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify(head, elem) )
|
|
return FALSE;
|
|
}
|
|
return PL_unify_nil(tail);
|
|
}
|
|
|
|
assert(0);
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
unify_attribute_default(term_t defval, dtd_attr *a)
|
|
{ int v;
|
|
|
|
switch(a->def)
|
|
{ case AT_REQUIRED:
|
|
return PL_unify_atom_chars(defval, "required");
|
|
case AT_CURRENT:
|
|
return PL_unify_atom_chars(defval, "current");
|
|
case AT_CONREF:
|
|
return PL_unify_atom_chars(defval, "conref");
|
|
case AT_IMPLIED:
|
|
return PL_unify_atom_chars(defval, "implied");
|
|
case AT_DEFAULT:
|
|
v = PL_unify_functor(defval, FUNCTOR_default1);
|
|
goto common;
|
|
case AT_FIXED:
|
|
v = PL_unify_functor(defval, FUNCTOR_fixed1);
|
|
common:
|
|
if ( v )
|
|
{ term_t tmp = PL_new_term_ref();
|
|
|
|
PL_get_arg(1, defval, tmp);
|
|
switch( a->type )
|
|
{ case AT_CDATA:
|
|
return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.cdata);
|
|
case AT_NAME:
|
|
case AT_NMTOKEN:
|
|
case AT_NAMEOF:
|
|
case AT_NOTATION:
|
|
return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.name->name);
|
|
case AT_NUMBER:
|
|
return PL_unify_integer(tmp, a->att_def.number);
|
|
default:
|
|
assert(0);
|
|
}
|
|
} else
|
|
return FALSE;
|
|
default:
|
|
assert(0);
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
dtd_prop_attribute(dtd *dtd, term_t ename, term_t aname,
|
|
term_t type, term_t def_value)
|
|
{ dtd_element *e;
|
|
ichar *achars;
|
|
dtd_symbol *asym;
|
|
dtd_attr_list *al;
|
|
|
|
|
|
if ( !get_element(dtd, ename, &e) )
|
|
return FALSE;
|
|
if ( !PL_get_wchars(aname, NULL, &achars, CVT_ATOM|CVT_EXCEPTION) )
|
|
return FALSE;
|
|
if ( !(asym=dtd_find_symbol(dtd, achars)) )
|
|
return FALSE;
|
|
|
|
for(al=e->attributes; al; al=al->next)
|
|
{ if ( al->attribute->name == asym )
|
|
{ if ( unify_attribute_type(type, al->attribute) &&
|
|
unify_attribute_default(def_value, al->attribute) )
|
|
return TRUE;
|
|
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static int
|
|
dtd_prop_entities(dtd *dtd, term_t list)
|
|
{ term_t tail = PL_copy_term_ref(list);
|
|
term_t head = PL_new_term_ref();
|
|
term_t et = PL_new_term_ref();
|
|
dtd_entity *e;
|
|
|
|
for( e=dtd->entities; e; e=e->next )
|
|
{ put_atom_wchars(et, e->name->name);
|
|
if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify(head, et) )
|
|
return FALSE;
|
|
}
|
|
|
|
return PL_unify_nil(tail);
|
|
}
|
|
|
|
|
|
static int
|
|
dtd_prop_entity(dtd *dtd, term_t ename, term_t value)
|
|
{ ichar *s;
|
|
dtd_entity *e;
|
|
dtd_symbol *id;
|
|
|
|
if ( !PL_get_wchars(ename, NULL, &s, CVT_ATOM|CVT_EXCEPTION) )
|
|
return FALSE;
|
|
|
|
if ( !(id=dtd_find_symbol(dtd, s)) ||
|
|
!(e=id->entity) )
|
|
return FALSE;
|
|
|
|
switch(e->type)
|
|
{ case ET_SYSTEM:
|
|
return PL_unify_term(value, PL_FUNCTOR_CHARS, "system", 1,
|
|
PL_CHARS, e->exturl);
|
|
case ET_PUBLIC:
|
|
if ( e->exturl )
|
|
return PL_unify_term(value, PL_FUNCTOR_CHARS, "public", 2,
|
|
PL_CHARS, e->extid,
|
|
PL_CHARS, e->exturl);
|
|
else
|
|
return PL_unify_term(value, PL_FUNCTOR_CHARS, "public", 2,
|
|
PL_CHARS, e->extid,
|
|
PL_VARIABLE);
|
|
|
|
case ET_LITERAL:
|
|
default:
|
|
if ( e->value )
|
|
{ const char *wrap;
|
|
|
|
switch(e->content)
|
|
{ case EC_SGML: wrap = "sgml"; break;
|
|
case EC_STARTTAG: wrap = "start_tag"; break;
|
|
case EC_ENDTAG: wrap = "end_tag"; break;
|
|
case EC_CDATA: wrap = NULL; break;
|
|
case EC_SDATA: wrap = "sdata"; break;
|
|
case EC_NDATA: wrap = "ndata"; break;
|
|
case EC_PI: wrap = "pi"; break;
|
|
default: wrap = NULL; assert(0);
|
|
}
|
|
|
|
if ( wrap )
|
|
return PL_unify_term(value, PL_FUNCTOR_CHARS, wrap, 1,
|
|
PL_CHARS, e->value);
|
|
else
|
|
return PL_unify_wchars(value, PL_ATOM, wcslen(e->value), e->value);
|
|
}
|
|
}
|
|
|
|
assert(0);
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static int
|
|
dtd_prop_notations(dtd *dtd, term_t list)
|
|
{ dtd_notation *n;
|
|
term_t tail = PL_copy_term_ref(list);
|
|
term_t head = PL_new_term_ref();
|
|
|
|
for(n=dtd->notations; n; n=n->next)
|
|
{ if ( PL_unify_list(tail, head, tail) &&
|
|
PL_unify_wchars(head, PL_ATOM, wcslen(n->name->name), n->name->name) )
|
|
continue;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
return PL_unify_nil(tail);
|
|
}
|
|
|
|
|
|
static int
|
|
dtd_prop_notation(dtd *dtd, term_t nname, term_t desc)
|
|
{ char *s;
|
|
dtd_symbol *id;
|
|
|
|
if ( !PL_get_atom_chars(nname, &s) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", nname);
|
|
|
|
if ( (id=dtd_find_symbol(dtd, (ichar *)s)) )
|
|
{ dtd_notation *n;
|
|
|
|
for(n=dtd->notations; n; n=n->next)
|
|
{ if ( n->name == id )
|
|
{ term_t tail = PL_copy_term_ref(desc);
|
|
term_t head = PL_new_term_ref();
|
|
|
|
if ( n->system )
|
|
{ if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify_term(head,
|
|
PL_FUNCTOR_CHARS, "system", 1,
|
|
PL_CHARS, n->system) )
|
|
return FALSE;
|
|
}
|
|
if ( n->public )
|
|
{ if ( !PL_unify_list(tail, head, tail) ||
|
|
!PL_unify_term(head,
|
|
PL_FUNCTOR_CHARS, "public", 1,
|
|
PL_CHARS, n->public) )
|
|
return FALSE;
|
|
}
|
|
|
|
return PL_unify_nil(tail);
|
|
}
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
|
|
typedef struct _prop
|
|
{ int (*func)();
|
|
const char *name;
|
|
int arity;
|
|
functor_t functor;
|
|
} prop;
|
|
|
|
|
|
static prop dtd_props[] =
|
|
{ { dtd_prop_doctype, "doctype", 1 },
|
|
{ dtd_prop_elements, "elements", 1 },
|
|
{ dtd_prop_element, "element", 3 },
|
|
{ dtd_prop_attributes, "attributes", 2, },
|
|
{ dtd_prop_attribute, "attribute", 4, },
|
|
{ dtd_prop_entities, "entities", 1, },
|
|
{ dtd_prop_entity, "entity", 2, },
|
|
{ dtd_prop_notations, "notations", 1, },
|
|
{ dtd_prop_notation, "notation", 2, },
|
|
{ NULL }
|
|
};
|
|
|
|
|
|
static void
|
|
initprops(void)
|
|
{ static int done = FALSE;
|
|
|
|
if ( !done )
|
|
{ prop *p;
|
|
|
|
done = TRUE;
|
|
for(p=dtd_props; p->func; p++)
|
|
p->functor = PL_new_functor(PL_new_atom(p->name), p->arity);
|
|
}
|
|
}
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
dtd_property(DTD, doctype(DocType))
|
|
dtd_property(DTD, elements(ListOfNames))
|
|
dtd_property(DTD, element(Name, Omit, Model))
|
|
dtd_property(DTD, attributes(ElementName, ListOfAttributes)),
|
|
dtd_property(DTD, attribute(ElementName, AttributeName, Type, Default))
|
|
dtd_property(DTD, entities(ListOfEntityNames))
|
|
dtd_property(DTD, entity(Name, Type))
|
|
dtd_property(DTD, notations(ListOfNotationNames)
|
|
dtd_property(DTD, notation(Name, File))
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
static foreign_t
|
|
pl_dtd_property(term_t ref, term_t property)
|
|
{ dtd *dtd;
|
|
const prop *p;
|
|
|
|
initprops();
|
|
|
|
if ( !get_dtd(ref, &dtd) )
|
|
return FALSE;
|
|
|
|
for(p=dtd_props; p->func; p++)
|
|
{ if ( PL_is_functor(property, p->functor) )
|
|
{ term_t a = PL_new_term_refs(p->arity);
|
|
int i;
|
|
|
|
for(i=0; i<p->arity; i++)
|
|
PL_get_arg(i+1, property, a+i);
|
|
|
|
switch(p->arity)
|
|
{ case 1:
|
|
return (*p->func)(dtd, a+0);
|
|
case 2:
|
|
return (*p->func)(dtd, a+0, a+1);
|
|
case 3:
|
|
return (*p->func)(dtd, a+0, a+1, a+2);
|
|
case 4:
|
|
return (*p->func)(dtd, a+0, a+1, a+2, a+3);
|
|
default:
|
|
assert(0);
|
|
return FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
return sgml2pl_error(ERR_DOMAIN, "dtd_property", property);
|
|
}
|
|
|
|
/*******************************
|
|
* CATALOG *
|
|
*******************************/
|
|
|
|
static foreign_t
|
|
pl_sgml_register_catalog_file(term_t file, term_t where)
|
|
{ wchar_t *fn;
|
|
char *w;
|
|
catalog_location loc;
|
|
|
|
if ( !PL_get_wchars(file, NULL, &fn, CVT_ATOM|CVT_EXCEPTION) )
|
|
return FALSE;
|
|
if ( !PL_get_atom_chars(where, &w) )
|
|
return sgml2pl_error(ERR_TYPE, "atom", where);
|
|
|
|
if ( streq(w, "start") )
|
|
loc = CTL_START;
|
|
else if ( streq(w, "end") )
|
|
loc = CTL_END;
|
|
else
|
|
return sgml2pl_error(ERR_DOMAIN, "location", where);
|
|
|
|
return register_catalog_file(fn, loc);
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* INSTALL *
|
|
*******************************/
|
|
|
|
extern install_t install_xml_quote(void);
|
|
#ifdef O_STATISTICS
|
|
extern void sgml_statistics(void);
|
|
#endif
|
|
|
|
install_t
|
|
install(void)
|
|
{ initConstants();
|
|
|
|
PL_register_foreign("new_dtd", 2, pl_new_dtd, 0);
|
|
PL_register_foreign("free_dtd", 1, pl_free_dtd, 0);
|
|
PL_register_foreign("new_sgml_parser", 2, pl_new_sgml_parser, 0);
|
|
PL_register_foreign("free_sgml_parser", 1, pl_free_sgml_parser, 0);
|
|
PL_register_foreign("set_sgml_parser", 2, pl_set_sgml_parser, 0);
|
|
PL_register_foreign("get_sgml_parser", 2, pl_get_sgml_parser, 0);
|
|
PL_register_foreign("open_dtd", 3, pl_open_dtd, 0);
|
|
PL_register_foreign("sgml_parse", 2, pl_sgml_parse,
|
|
PL_FA_TRANSPARENT);
|
|
PL_register_foreign("_sgml_register_catalog_file", 2,
|
|
pl_sgml_register_catalog_file, 0);
|
|
PL_register_foreign("$dtd_property", 2, pl_dtd_property,
|
|
0);
|
|
|
|
install_xml_quote();
|
|
#ifdef O_STATISTICS
|
|
atexit(sgml_statistics);
|
|
#endif
|
|
}
|
|
|