2009-03-13 19:39:06 +00:00
|
|
|
/* $Id$
|
|
|
|
|
|
|
|
Part of SWI-Prolog
|
|
|
|
|
|
|
|
Author: Jan Wielemaker
|
|
|
|
E-mail: jan@swi.psy.uva.nl
|
|
|
|
WWW: http://www.swi-prolog.org
|
|
|
|
Copyright (C): 1985-2002, University of Amsterdam
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <assert.h>
|
|
|
|
#include "dtd.h"
|
|
|
|
#include "model.h"
|
|
|
|
|
|
|
|
#define MAX_VISITED 256
|
|
|
|
#define MAX_ALLOWED 64
|
|
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
This module implements a finite state engine for validating the content
|
|
|
|
model of elements. A state machine is the only feasible approach for
|
|
|
|
realising an event-driven SGML parser.
|
|
|
|
|
|
|
|
The public functions are:
|
|
|
|
|
|
|
|
dtd_state *new_dtd_state(void)
|
|
|
|
Create an anonymous new state. Normally an element creates two of
|
|
|
|
these for it ->initial_state and ->final_state attributes.
|
|
|
|
|
|
|
|
dtd_state *make_state_engine(dtd_element *e)
|
|
|
|
Associate a state engine to this element and return the initial
|
|
|
|
state of the engine. If the element has an engine, simply return
|
|
|
|
the initial state.
|
|
|
|
|
|
|
|
dtd_state *make_dtd_transition(dtd_state *here, dtd_element *e)
|
|
|
|
Given the current state, see whether we can accept e and return
|
|
|
|
the resulting state. If no transition is possible return NULL.
|
|
|
|
|
|
|
|
int same_state(dtd_state *final, dtd_state *here)
|
|
|
|
See whether two states are the same, or the final state can be
|
|
|
|
reached only traversing equivalence links.
|
|
|
|
|
|
|
|
The A&B&... model
|
|
|
|
|
|
|
|
Models of the type a&b&c are hard to translate, as the resulting state
|
|
|
|
machine is of size order N! In practice only a little of this will be
|
|
|
|
used however and we `fix' this problem using a `lazy state-engine', that
|
|
|
|
expands to the next level only after reaching some level. See the
|
|
|
|
function state_transitions(). The design takes more lazy generation into
|
|
|
|
consideration.
|
|
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
|
|
|
|
typedef struct _state_transition
|
|
|
|
{ dtd_element *element; /* element on transition */
|
|
|
|
dtd_state *state; /* state to go to */
|
|
|
|
struct _state_transition *next; /* next possible transition */
|
|
|
|
} transition;
|
|
|
|
|
|
|
|
typedef struct _dtd_model_list /* list (set) of models */
|
|
|
|
{ dtd_model *model;
|
|
|
|
struct _dtd_model_list *next;
|
|
|
|
} dtd_model_list;
|
|
|
|
|
|
|
|
typedef enum
|
|
|
|
{ EX_AND /* expand (a&b&...) */
|
|
|
|
} expand_type;
|
|
|
|
|
|
|
|
typedef struct _state_expander
|
|
|
|
{ dtd_state *target; /* Target state to expand to */
|
|
|
|
expand_type type; /* EX_* */
|
|
|
|
union
|
|
|
|
{ struct
|
|
|
|
{ dtd_model_list *set; /* Models we should still see */
|
|
|
|
} and; /* Expand (a&b&...) */
|
|
|
|
} kind;
|
|
|
|
} expander;
|
|
|
|
|
|
|
|
typedef struct _visited
|
|
|
|
{ int size; /* set-size */
|
|
|
|
dtd_state *states[MAX_VISITED]; /* The set */
|
|
|
|
} visited;
|
|
|
|
|
|
|
|
|
|
|
|
static void translate_model(dtd_model *m, dtd_state *from, dtd_state *to);
|
|
|
|
static transition *state_transitions(dtd_state *state);
|
|
|
|
|
|
|
|
static int
|
|
|
|
visit(dtd_state *state, visited *visited)
|
|
|
|
{ int i;
|
|
|
|
|
|
|
|
for(i=0; i<visited->size; i++)
|
|
|
|
{ if ( visited->states[i] == state )
|
|
|
|
return FALSE;
|
|
|
|
}
|
2010-05-06 10:59:09 +01:00
|
|
|
|
2009-03-13 19:39:06 +00:00
|
|
|
if ( visited->size >= MAX_VISITED )
|
|
|
|
{ fprintf(stderr, "Reached MAX_VISITED!\n");
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
visited->states[visited->size++] = state;
|
|
|
|
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static dtd_state *
|
|
|
|
do_make_dtd_transition(dtd_state *here, dtd_element *e, visited *visited)
|
|
|
|
{ transition *tset = state_transitions(here);
|
|
|
|
transition *t;
|
|
|
|
|
|
|
|
for(t=tset; t; t=t->next)
|
|
|
|
{ if ( t->element == e )
|
|
|
|
return t->state;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(t=tset; t; t=t->next)
|
|
|
|
{ if ( t->element == NULL && visit(t->state, visited) )
|
|
|
|
{ dtd_state *new;
|
|
|
|
|
|
|
|
if ( (new=do_make_dtd_transition(t->state, e, visited)) )
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
dtd_state *
|
|
|
|
make_dtd_transition(dtd_state *here, dtd_element *e)
|
|
|
|
{ visited visited;
|
|
|
|
visited.size = 0;
|
|
|
|
|
|
|
|
if ( !here ) /* from nowhere to nowhere */
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return do_make_dtd_transition(here, e, &visited);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
find_same_state(dtd_state *final, dtd_state *here, visited *visited)
|
|
|
|
{ transition *t;
|
|
|
|
|
|
|
|
if ( final == here )
|
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
for(t=state_transitions(here); t; t=t->next)
|
|
|
|
{ if ( t->element == NULL && visit(t->state, visited) )
|
|
|
|
{ if ( find_same_state(final, t->state, visited) )
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
same_state(dtd_state *final, dtd_state *here)
|
|
|
|
{ visited visited;
|
|
|
|
visited.size = 0;
|
|
|
|
|
|
|
|
return find_same_state(final, here, &visited);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
state_allows_for(dtd_state *state, dtd_element **allow, int *n)
|
|
|
|
See what elements are allowed if we are in this state. This is
|
|
|
|
currently not used, but might prove handly for error messages or
|
|
|
|
syntax-directed editors.
|
|
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
|
|
|
|
static void
|
|
|
|
do_state_allows_for(dtd_state *here, dtd_element **allow, int *n,
|
|
|
|
visited *visited)
|
|
|
|
{ transition *t;
|
|
|
|
|
|
|
|
for(t=state_transitions(here); t; t=t->next)
|
|
|
|
{ int i;
|
|
|
|
|
|
|
|
if ( t->element == NULL )
|
|
|
|
{ if ( visit(t->state, visited) )
|
|
|
|
do_state_allows_for(t->state, allow, n, visited);
|
|
|
|
} else
|
|
|
|
{ for(i=0; i<*n; i++)
|
|
|
|
{ if ( allow[i] == t->element )
|
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
allow[(*n)++] = t->element;
|
|
|
|
}
|
|
|
|
next:
|
|
|
|
;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
state_allows_for(dtd_state *state, dtd_element **allow, int *n)
|
|
|
|
{ visited visited;
|
|
|
|
visited.size = 0;
|
|
|
|
|
|
|
|
*n = 0;
|
|
|
|
if ( state )
|
|
|
|
do_state_allows_for(state, allow, n, &visited);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
do_find_omitted_path(dtd_state *state, dtd_element *e,
|
|
|
|
dtd_element **path, int *pl,
|
|
|
|
visited *visited)
|
|
|
|
{ transition *tset = state_transitions(state);
|
|
|
|
transition *t;
|
|
|
|
int pathlen = *pl;
|
|
|
|
|
|
|
|
for(t=tset; t; t=t->next)
|
|
|
|
{ if ( t->element == e )
|
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
if ( t->element &&
|
|
|
|
t->element != CDATA_ELEMENT &&
|
|
|
|
t->element->structure &&
|
|
|
|
t->element->structure->omit_open &&
|
|
|
|
visit(t->state, visited) )
|
|
|
|
{ dtd_state *initial = make_state_engine(t->element);
|
|
|
|
|
|
|
|
path[pathlen] = t->element;
|
|
|
|
*pl = pathlen+1;
|
|
|
|
if ( do_find_omitted_path(initial, e, path, pl, visited) )
|
|
|
|
return TRUE;
|
|
|
|
*pl = pathlen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for(t=tset; t; t=t->next)
|
|
|
|
{ if ( !t->element &&
|
|
|
|
visit(t->state, visited) )
|
|
|
|
{ if ( do_find_omitted_path(t->state, e, path, pl, visited) )
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-05-06 10:59:09 +01:00
|
|
|
int
|
2009-03-13 19:39:06 +00:00
|
|
|
find_omitted_path(dtd_state *state, dtd_element *e, dtd_element **path)
|
|
|
|
{ int pl = 0;
|
|
|
|
visited visited;
|
|
|
|
visited.size = 0;
|
|
|
|
|
|
|
|
if ( state && do_find_omitted_path(state, e, path, &pl, &visited) )
|
|
|
|
return pl;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
dtd_state *
|
|
|
|
new_dtd_state()
|
|
|
|
{ dtd_state *s = sgml_calloc(1, sizeof(*s));
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
link(dtd_state *from, dtd_state *to, dtd_element *e)
|
|
|
|
{ transition *t = sgml_calloc(1, sizeof(*t));
|
|
|
|
|
|
|
|
t->state = to;
|
|
|
|
t->element = e;
|
|
|
|
t->next = from->transitions;
|
|
|
|
from->transitions = t;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*******************************
|
|
|
|
* EXPANSION *
|
|
|
|
*******************************/
|
|
|
|
|
|
|
|
static void
|
|
|
|
add_model_list(dtd_model_list **list, dtd_model *m)
|
|
|
|
{ dtd_model_list *l = sgml_calloc(1, sizeof(*l));
|
|
|
|
|
|
|
|
l->model = m;
|
|
|
|
|
|
|
|
for( ; *list; list = &(*list)->next)
|
|
|
|
;
|
|
|
|
*list = l;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static transition *
|
|
|
|
state_transitions(dtd_state *state)
|
|
|
|
{ if ( !state->transitions && state->expander )
|
|
|
|
{ expander *ex = state->expander;
|
2010-05-06 10:59:09 +01:00
|
|
|
|
2009-03-13 19:39:06 +00:00
|
|
|
switch(ex->type)
|
|
|
|
{ case EX_AND:
|
|
|
|
{ dtd_model_list *left = ex->kind.and.set;
|
|
|
|
|
|
|
|
if ( !left ) /* empty AND (should not happen) */
|
2010-05-06 10:59:09 +01:00
|
|
|
{ link(state, ex->target, NULL);
|
2009-03-13 19:39:06 +00:00
|
|
|
} else if ( !left->next ) /* only one left */
|
|
|
|
{ translate_model(left->model, state, ex->target);
|
|
|
|
} else
|
|
|
|
{ for( ; left; left = left->next )
|
|
|
|
{ dtd_state *tmp = new_dtd_state();
|
|
|
|
expander *nex = sgml_calloc(1, sizeof(*nex));
|
|
|
|
dtd_model_list *l;
|
|
|
|
|
|
|
|
translate_model(left->model, state, tmp);
|
|
|
|
tmp->expander = nex;
|
|
|
|
nex->target = ex->target;
|
|
|
|
nex->type = EX_AND;
|
|
|
|
for(l=ex->kind.and.set; l; l=l->next)
|
|
|
|
{ if ( l != left )
|
|
|
|
add_model_list(&nex->kind.and.set, l->model);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return state->transitions;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*******************************
|
|
|
|
* TRANSLATION *
|
|
|
|
*******************************/
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
translate_one(dtd_model *m, dtd_state *from, dtd_state *to)
|
|
|
|
{ switch(m->type)
|
|
|
|
{ case MT_ELEMENT:
|
|
|
|
{ dtd_element *e = m->content.element;
|
|
|
|
|
|
|
|
link(from, to, e);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case MT_SEQ: /* a,b,... */
|
|
|
|
{ dtd_model *sub;
|
|
|
|
|
|
|
|
for( sub = m->content.group; sub->next; sub = sub->next )
|
|
|
|
{ dtd_state *tmp = new_dtd_state();
|
|
|
|
translate_model(sub, from, tmp);
|
|
|
|
from = tmp;
|
|
|
|
}
|
|
|
|
translate_model(sub, from, to);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case MT_AND: /* a&b&... */
|
|
|
|
{ expander *ex = sgml_calloc(1, sizeof(*ex));
|
|
|
|
dtd_model *sub;
|
|
|
|
|
|
|
|
ex->target = to;
|
|
|
|
ex->type = EX_AND;
|
2010-05-06 10:59:09 +01:00
|
|
|
|
2009-03-13 19:39:06 +00:00
|
|
|
for( sub = m->content.group; sub; sub = sub->next )
|
|
|
|
add_model_list(&ex->kind.and.set, sub);
|
|
|
|
|
|
|
|
from->expander = ex;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case MT_OR: /* a|b|... */
|
|
|
|
{ dtd_model *sub;
|
|
|
|
|
|
|
|
for( sub = m->content.group; sub; sub = sub->next )
|
|
|
|
translate_model(sub, from, to);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case MT_PCDATA:
|
|
|
|
case MT_UNDEF:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
translate_model(dtd_model *m, dtd_state *from, dtd_state *to)
|
|
|
|
{ if ( m->type == MT_PCDATA )
|
|
|
|
{ link(from, from, CDATA_ELEMENT);
|
|
|
|
link(from, to, NULL);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch(m->cardinality)
|
|
|
|
{ case MC_OPT: /* ? */
|
|
|
|
link(from, to, NULL);
|
|
|
|
/*FALLTHROUGH*/
|
|
|
|
case MC_ONE:
|
|
|
|
translate_one(m, from, to);
|
|
|
|
return;
|
|
|
|
case MC_REP: /* * */
|
|
|
|
translate_one(m, from, from);
|
|
|
|
link(from, to, NULL);
|
|
|
|
return;
|
|
|
|
case MC_PLUS: /* + */
|
|
|
|
translate_one(m, from, to);
|
|
|
|
translate_one(m, to, to);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
dtd_state *
|
|
|
|
make_state_engine(dtd_element *e)
|
|
|
|
{ if ( e->structure )
|
|
|
|
{ dtd_edef *def = e->structure;
|
|
|
|
|
|
|
|
if ( !def->initial_state )
|
|
|
|
{ if ( def->content )
|
|
|
|
{ def->initial_state = new_dtd_state();
|
|
|
|
def->final_state = new_dtd_state();
|
2010-05-06 10:59:09 +01:00
|
|
|
|
2009-03-13 19:39:06 +00:00
|
|
|
translate_model(def->content, def->initial_state, def->final_state);
|
|
|
|
} else if ( def->type == C_CDATA || def->type == C_RCDATA )
|
|
|
|
{ def->initial_state = new_dtd_state();
|
|
|
|
def->final_state = new_dtd_state();
|
|
|
|
|
|
|
|
link(def->initial_state, def->initial_state, CDATA_ELEMENT);
|
|
|
|
link(def->initial_state, def->final_state, NULL);
|
|
|
|
} else
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return def->initial_state;
|
|
|
|
}
|
2010-05-06 10:59:09 +01:00
|
|
|
|
2009-03-13 19:39:06 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*******************************
|
|
|
|
* FREE *
|
|
|
|
*******************************/
|
|
|
|
|
|
|
|
static void do_free_state_engine(dtd_state *state, visited *visited);
|
|
|
|
|
|
|
|
static void
|
|
|
|
free_model_list(dtd_model_list *l)
|
|
|
|
{ dtd_model_list *next;
|
|
|
|
|
|
|
|
for( ; l; l=next)
|
|
|
|
{ next = l->next;
|
|
|
|
|
|
|
|
sgml_free(l);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
free_expander(expander *e, visited *visited)
|
|
|
|
{ if ( visit(e->target, visited) )
|
|
|
|
do_free_state_engine(e->target, visited);
|
|
|
|
|
|
|
|
switch(e->type)
|
|
|
|
{ case EX_AND:
|
|
|
|
free_model_list(e->kind.and.set);
|
|
|
|
default:
|
|
|
|
;
|
|
|
|
}
|
|
|
|
|
|
|
|
sgml_free(e);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
do_free_state_engine(dtd_state *state, visited *visited)
|
|
|
|
{ transition *t, *next;
|
2010-05-06 10:59:09 +01:00
|
|
|
|
2009-03-13 19:39:06 +00:00
|
|
|
for(t=state->transitions; t; t=next)
|
|
|
|
{ next = t->next;
|
|
|
|
|
|
|
|
if ( visit(t->state, visited) )
|
|
|
|
do_free_state_engine(t->state, visited);
|
|
|
|
|
|
|
|
sgml_free(t);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( state->expander )
|
|
|
|
free_expander(state->expander, visited);
|
|
|
|
|
|
|
|
sgml_free(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
free_state_engine(dtd_state *state)
|
|
|
|
{ if ( state )
|
|
|
|
{ visited visited;
|
|
|
|
visited.size = 0;
|
|
|
|
|
|
|
|
visit(state, &visited);
|
|
|
|
do_free_state_engine(state, &visited);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|