525 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			525 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*  $Id$
 | 
						|
 | 
						|
    Part of SWI-Prolog
 | 
						|
 | 
						|
    Author:        Jan Wielemaker
 | 
						|
    E-mail:        jan@swi.psy.uva.nl
 | 
						|
    WWW:           http://www.swi-prolog.org
 | 
						|
    Copyright (C): 1985-2002, University of Amsterdam
 | 
						|
 | 
						|
    This library is free software; you can redistribute it and/or
 | 
						|
    modify it under the terms of the GNU Lesser General Public
 | 
						|
    License as published by the Free Software Foundation; either
 | 
						|
    version 2.1 of the License, or (at your option) any later version.
 | 
						|
 | 
						|
    This library is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
    Lesser General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Lesser General Public
 | 
						|
    License along with this library; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
*/
 | 
						|
 | 
						|
#include <stdio.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <assert.h>
 | 
						|
#include "dtd.h"
 | 
						|
#include "model.h"
 | 
						|
 | 
						|
#define MAX_VISITED 256
 | 
						|
#define MAX_ALLOWED 64
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
This module implements a finite state  engine for validating the content
 | 
						|
model of elements. A state machine  is   the  only feasible approach for
 | 
						|
realising an event-driven SGML parser.
 | 
						|
 | 
						|
The public functions are:
 | 
						|
 | 
						|
dtd_state *new_dtd_state(void)
 | 
						|
    Create an anonymous new state.  Normally an element creates two of
 | 
						|
    these for it ->initial_state and ->final_state attributes.
 | 
						|
 | 
						|
dtd_state *make_state_engine(dtd_element *e)
 | 
						|
    Associate a state engine to this element and return the initial
 | 
						|
    state of the engine.  If the element has an engine, simply return
 | 
						|
    the initial state.
 | 
						|
 | 
						|
dtd_state *make_dtd_transition(dtd_state *here, dtd_element *e)
 | 
						|
    Given the current state, see whether we can accept e and return
 | 
						|
    the resulting state.  If no transition is possible return NULL.
 | 
						|
 | 
						|
int same_state(dtd_state *final, dtd_state *here)
 | 
						|
    See whether two states are the same, or the final state can be
 | 
						|
    reached only traversing equivalence links.
 | 
						|
 | 
						|
The A&B&... model
 | 
						|
 | 
						|
Models of the type a&b&c are hard   to translate, as the resulting state
 | 
						|
machine is of size order N! In practice   only  a little of this will be
 | 
						|
used however and we `fix' this problem using a `lazy state-engine', that
 | 
						|
expands to the next level  only  after   reaching  some  level.  See the
 | 
						|
function state_transitions(). The design takes more lazy generation into
 | 
						|
consideration.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
typedef struct _state_transition
 | 
						|
{ dtd_element	     *element;		/* element on transition */
 | 
						|
  dtd_state	     *state;		/* state to go to */
 | 
						|
  struct _state_transition *next;	/* next possible transition */
 | 
						|
} transition;
 | 
						|
 | 
						|
typedef struct _dtd_model_list		/* list (set) of models */
 | 
						|
{ dtd_model *model;
 | 
						|
  struct _dtd_model_list *next;
 | 
						|
} dtd_model_list;
 | 
						|
 | 
						|
typedef enum
 | 
						|
{ EX_AND				/* expand (a&b&...) */
 | 
						|
} expand_type;
 | 
						|
 | 
						|
typedef struct _state_expander
 | 
						|
{ dtd_state	       *target;		/* Target state to expand to */
 | 
						|
  expand_type		type;		/* EX_* */
 | 
						|
  union
 | 
						|
  { struct
 | 
						|
    { dtd_model_list *set;		/* Models we should still see */
 | 
						|
    } and;				/* Expand (a&b&...) */
 | 
						|
  } kind;
 | 
						|
} expander;
 | 
						|
 | 
						|
typedef struct _visited
 | 
						|
{ int	size;				/* set-size */
 | 
						|
  dtd_state *states[MAX_VISITED];	/* The set */
 | 
						|
} visited;
 | 
						|
 | 
						|
 | 
						|
static void	translate_model(dtd_model *m, dtd_state *from, dtd_state *to);
 | 
						|
static transition *state_transitions(dtd_state *state);
 | 
						|
 | 
						|
static int
 | 
						|
visit(dtd_state *state, visited *visited)
 | 
						|
{ int i;
 | 
						|
 | 
						|
  for(i=0; i<visited->size; i++)
 | 
						|
  { if ( visited->states[i] == state )
 | 
						|
      return FALSE;
 | 
						|
  }
 | 
						|
      
 | 
						|
  if ( visited->size >= MAX_VISITED )
 | 
						|
  { fprintf(stderr, "Reached MAX_VISITED!\n");
 | 
						|
    return FALSE;
 | 
						|
  }
 | 
						|
 | 
						|
  visited->states[visited->size++] = state;
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static dtd_state *
 | 
						|
do_make_dtd_transition(dtd_state *here, dtd_element *e, visited *visited)
 | 
						|
{ transition *tset = state_transitions(here);
 | 
						|
  transition *t;
 | 
						|
 | 
						|
  for(t=tset; t; t=t->next)
 | 
						|
  { if ( t->element == e )
 | 
						|
      return t->state;
 | 
						|
  }
 | 
						|
 | 
						|
  for(t=tset; t; t=t->next)
 | 
						|
  { if ( t->element == NULL && visit(t->state, visited) )
 | 
						|
    { dtd_state *new;
 | 
						|
 | 
						|
      if ( (new=do_make_dtd_transition(t->state, e, visited)) )
 | 
						|
	return new;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dtd_state *
 | 
						|
make_dtd_transition(dtd_state *here, dtd_element *e)
 | 
						|
{ visited visited;
 | 
						|
  visited.size = 0;
 | 
						|
 | 
						|
  if ( !here )				/* from nowhere to nowhere */
 | 
						|
    return NULL;
 | 
						|
 | 
						|
  return do_make_dtd_transition(here, e, &visited);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
find_same_state(dtd_state *final, dtd_state *here, visited *visited)
 | 
						|
{ transition *t;
 | 
						|
 | 
						|
  if ( final == here )
 | 
						|
    return TRUE;
 | 
						|
 | 
						|
  for(t=state_transitions(here); t; t=t->next)
 | 
						|
  { if ( t->element == NULL && visit(t->state, visited) )
 | 
						|
    { if ( find_same_state(final, t->state, visited) )
 | 
						|
	return TRUE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
same_state(dtd_state *final, dtd_state *here)
 | 
						|
{ visited visited;
 | 
						|
  visited.size = 0;
 | 
						|
 | 
						|
  return find_same_state(final, here, &visited);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
state_allows_for(dtd_state *state, dtd_element **allow, int *n)
 | 
						|
    See what elements are allowed if we are in this state.  This is
 | 
						|
    currently not used, but might prove handly for error messages or
 | 
						|
    syntax-directed editors.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static void
 | 
						|
do_state_allows_for(dtd_state *here, dtd_element **allow, int *n,
 | 
						|
		    visited *visited)
 | 
						|
{ transition *t;
 | 
						|
 | 
						|
  for(t=state_transitions(here); t; t=t->next)
 | 
						|
  { int i;
 | 
						|
 | 
						|
    if ( t->element == NULL )
 | 
						|
    { if ( visit(t->state, visited) )
 | 
						|
	do_state_allows_for(t->state, allow, n, visited);
 | 
						|
    } else
 | 
						|
    { for(i=0; i<*n; i++)
 | 
						|
      { if ( allow[i] == t->element )
 | 
						|
	  goto next;
 | 
						|
      }
 | 
						|
      allow[(*n)++] = t->element;
 | 
						|
    }
 | 
						|
  next:
 | 
						|
    ;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void
 | 
						|
state_allows_for(dtd_state *state, dtd_element **allow, int *n)
 | 
						|
{ visited visited;
 | 
						|
  visited.size = 0;
 | 
						|
 | 
						|
  *n = 0;
 | 
						|
  if ( state )
 | 
						|
    do_state_allows_for(state, allow, n, &visited);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
do_find_omitted_path(dtd_state *state, dtd_element *e,
 | 
						|
		     dtd_element **path, int *pl,
 | 
						|
		     visited *visited)
 | 
						|
{ transition *tset = state_transitions(state);
 | 
						|
  transition *t;
 | 
						|
  int pathlen = *pl;
 | 
						|
 | 
						|
  for(t=tset; t; t=t->next)
 | 
						|
  { if ( t->element == e )
 | 
						|
      return TRUE;
 | 
						|
 | 
						|
    if ( t->element &&
 | 
						|
	 t->element != CDATA_ELEMENT &&
 | 
						|
	 t->element->structure &&
 | 
						|
	 t->element->structure->omit_open &&
 | 
						|
	 visit(t->state, visited) )
 | 
						|
    { dtd_state *initial = make_state_engine(t->element);
 | 
						|
 | 
						|
      path[pathlen] = t->element;
 | 
						|
      *pl = pathlen+1;
 | 
						|
      if ( do_find_omitted_path(initial, e, path, pl, visited) )
 | 
						|
	return TRUE;
 | 
						|
      *pl = pathlen;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  for(t=tset; t; t=t->next)
 | 
						|
  { if ( !t->element &&
 | 
						|
	 visit(t->state, visited) )
 | 
						|
    { if ( do_find_omitted_path(t->state, e, path, pl, visited) )
 | 
						|
	return TRUE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int 
 | 
						|
find_omitted_path(dtd_state *state, dtd_element *e, dtd_element **path)
 | 
						|
{ int pl = 0;
 | 
						|
  visited visited;
 | 
						|
  visited.size = 0;
 | 
						|
 | 
						|
  if ( state && do_find_omitted_path(state, e, path, &pl, &visited) )
 | 
						|
    return pl;
 | 
						|
 | 
						|
  return -1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dtd_state *
 | 
						|
new_dtd_state()
 | 
						|
{ dtd_state *s = sgml_calloc(1, sizeof(*s));
 | 
						|
 | 
						|
  return s;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
link(dtd_state *from, dtd_state *to, dtd_element *e)
 | 
						|
{ transition *t = sgml_calloc(1, sizeof(*t));
 | 
						|
 | 
						|
  t->state = to;
 | 
						|
  t->element = e;
 | 
						|
  t->next = from->transitions;
 | 
						|
  from->transitions = t;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      EXPANSION		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static void
 | 
						|
add_model_list(dtd_model_list **list, dtd_model *m)
 | 
						|
{ dtd_model_list *l = sgml_calloc(1, sizeof(*l));
 | 
						|
 | 
						|
  l->model = m;
 | 
						|
 | 
						|
  for( ; *list; list = &(*list)->next)
 | 
						|
    ;
 | 
						|
  *list = l;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static transition *
 | 
						|
state_transitions(dtd_state *state)
 | 
						|
{ if ( !state->transitions && state->expander )
 | 
						|
  { expander *ex = state->expander;
 | 
						|
    
 | 
						|
    switch(ex->type)
 | 
						|
    { case EX_AND:
 | 
						|
      { dtd_model_list *left = ex->kind.and.set;
 | 
						|
 | 
						|
	if ( !left )			/* empty AND (should not happen) */
 | 
						|
	{ link(state, ex->target, NULL); 
 | 
						|
	} else if ( !left->next )	/* only one left */
 | 
						|
	{ translate_model(left->model, state, ex->target);
 | 
						|
	} else
 | 
						|
	{ for( ; left; left = left->next )
 | 
						|
	  { dtd_state *tmp = new_dtd_state();
 | 
						|
	    expander *nex = sgml_calloc(1, sizeof(*nex));
 | 
						|
	    dtd_model_list *l;
 | 
						|
 | 
						|
	    translate_model(left->model, state, tmp);
 | 
						|
	    tmp->expander = nex;
 | 
						|
	    nex->target = ex->target;
 | 
						|
	    nex->type = EX_AND;
 | 
						|
	    for(l=ex->kind.and.set; l; l=l->next)
 | 
						|
	    { if ( l != left )
 | 
						|
		add_model_list(&nex->kind.and.set, l->model);
 | 
						|
	    }
 | 
						|
	  }
 | 
						|
	}
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return state->transitions;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	   TRANSLATION		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
translate_one(dtd_model *m, dtd_state *from, dtd_state *to)
 | 
						|
{ switch(m->type)
 | 
						|
  { case MT_ELEMENT:
 | 
						|
    { dtd_element *e = m->content.element;
 | 
						|
 | 
						|
      link(from, to, e);
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    case MT_SEQ:			/* a,b,... */
 | 
						|
    { dtd_model *sub;
 | 
						|
 | 
						|
      for( sub = m->content.group; sub->next; sub = sub->next )
 | 
						|
      { dtd_state *tmp = new_dtd_state();
 | 
						|
	translate_model(sub, from, tmp);
 | 
						|
	from = tmp;
 | 
						|
      }
 | 
						|
      translate_model(sub, from, to);
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    case MT_AND:			/* a&b&... */
 | 
						|
    { expander *ex = sgml_calloc(1, sizeof(*ex));
 | 
						|
      dtd_model *sub;
 | 
						|
 | 
						|
      ex->target = to;
 | 
						|
      ex->type   = EX_AND;
 | 
						|
      
 | 
						|
      for( sub = m->content.group; sub; sub = sub->next )
 | 
						|
	add_model_list(&ex->kind.and.set, sub);
 | 
						|
 | 
						|
      from->expander = ex;
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    case MT_OR:				/* a|b|... */
 | 
						|
    { dtd_model *sub;
 | 
						|
 | 
						|
      for( sub = m->content.group; sub; sub = sub->next )
 | 
						|
	translate_model(sub, from, to);
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    case MT_PCDATA:
 | 
						|
    case MT_UNDEF:
 | 
						|
      assert(0);
 | 
						|
  }
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
translate_model(dtd_model *m, dtd_state *from, dtd_state *to)
 | 
						|
{ if ( m->type == MT_PCDATA )
 | 
						|
  { link(from, from, CDATA_ELEMENT);
 | 
						|
    link(from, to, NULL);
 | 
						|
    return;
 | 
						|
  }
 | 
						|
 | 
						|
  switch(m->cardinality)
 | 
						|
  { case MC_OPT:			/* ? */
 | 
						|
      link(from, to, NULL);
 | 
						|
    /*FALLTHROUGH*/
 | 
						|
    case MC_ONE:
 | 
						|
      translate_one(m, from, to);
 | 
						|
      return;
 | 
						|
    case MC_REP:			/* * */
 | 
						|
      translate_one(m, from, from);
 | 
						|
      link(from, to, NULL);
 | 
						|
      return;
 | 
						|
    case MC_PLUS:			/* + */
 | 
						|
      translate_one(m, from, to);
 | 
						|
      translate_one(m, to, to);
 | 
						|
      return;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dtd_state *
 | 
						|
make_state_engine(dtd_element *e)
 | 
						|
{ if ( e->structure )
 | 
						|
  { dtd_edef *def = e->structure;
 | 
						|
 | 
						|
    if ( !def->initial_state )
 | 
						|
    { if ( def->content )
 | 
						|
      { def->initial_state = new_dtd_state();
 | 
						|
	def->final_state   = new_dtd_state();
 | 
						|
    
 | 
						|
	translate_model(def->content, def->initial_state, def->final_state);
 | 
						|
      } else if ( def->type == C_CDATA || def->type == C_RCDATA )
 | 
						|
      { def->initial_state = new_dtd_state();
 | 
						|
	def->final_state   = new_dtd_state();
 | 
						|
 | 
						|
	link(def->initial_state, def->initial_state, CDATA_ELEMENT);
 | 
						|
	link(def->initial_state, def->final_state, NULL);
 | 
						|
      } else
 | 
						|
	return NULL;
 | 
						|
    }
 | 
						|
 | 
						|
    return def->initial_state;
 | 
						|
  }
 | 
						|
  
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	       FREE		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static void do_free_state_engine(dtd_state *state, visited *visited);
 | 
						|
 | 
						|
static void
 | 
						|
free_model_list(dtd_model_list *l)
 | 
						|
{ dtd_model_list *next;
 | 
						|
 | 
						|
  for( ; l; l=next)
 | 
						|
  { next = l->next;
 | 
						|
 | 
						|
    sgml_free(l);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_expander(expander *e, visited *visited)
 | 
						|
{ if ( visit(e->target, visited) )
 | 
						|
    do_free_state_engine(e->target, visited);
 | 
						|
 | 
						|
  switch(e->type)
 | 
						|
  { case EX_AND:
 | 
						|
      free_model_list(e->kind.and.set);
 | 
						|
    default:
 | 
						|
      ;
 | 
						|
  }
 | 
						|
 | 
						|
  sgml_free(e);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
do_free_state_engine(dtd_state *state, visited *visited)
 | 
						|
{ transition *t, *next;
 | 
						|
  
 | 
						|
  for(t=state->transitions; t; t=next)
 | 
						|
  { next = t->next;
 | 
						|
 | 
						|
    if ( visit(t->state, visited) )
 | 
						|
      do_free_state_engine(t->state, visited);
 | 
						|
 | 
						|
    sgml_free(t);
 | 
						|
  }
 | 
						|
 | 
						|
  if ( state->expander )
 | 
						|
    free_expander(state->expander, visited);
 | 
						|
 | 
						|
  sgml_free(state);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void
 | 
						|
free_state_engine(dtd_state *state)
 | 
						|
{ if ( state )
 | 
						|
  { visited visited;
 | 
						|
    visited.size = 0;
 | 
						|
 | 
						|
    visit(state, &visited);
 | 
						|
    do_free_state_engine(state, &visited);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 |