/*  $Id$

    Part of SWI-Prolog

    Author:        Jan Wielemaker
    E-mail:        jan@swi.psy.uva.nl
    WWW:           http://www.swi-prolog.org
    Copyright (C): 1985-2002, University of Amsterdam

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "dtd.h"
#include "model.h"

#define MAX_VISITED 256
#define MAX_ALLOWED 64

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This module implements a finite state  engine for validating the content
model of elements. A state machine  is   the  only feasible approach for
realising an event-driven SGML parser.

The public functions are:

dtd_state *new_dtd_state(void)
    Create an anonymous new state.  Normally an element creates two of
    these for it ->initial_state and ->final_state attributes.

dtd_state *make_state_engine(dtd_element *e)
    Associate a state engine to this element and return the initial
    state of the engine.  If the element has an engine, simply return
    the initial state.

dtd_state *make_dtd_transition(dtd_state *here, dtd_element *e)
    Given the current state, see whether we can accept e and return
    the resulting state.  If no transition is possible return NULL.

int same_state(dtd_state *final, dtd_state *here)
    See whether two states are the same, or the final state can be
    reached only traversing equivalence links.

The A&B&... model

Models of the type a&b&c are hard   to translate, as the resulting state
machine is of size order N! In practice   only  a little of this will be
used however and we `fix' this problem using a `lazy state-engine', that
expands to the next level  only  after   reaching  some  level.  See the
function state_transitions(). The design takes more lazy generation into
consideration.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

typedef struct _state_transition
{ dtd_element	     *element;		/* element on transition */
  dtd_state	     *state;		/* state to go to */
  struct _state_transition *next;	/* next possible transition */
} transition;

typedef struct _dtd_model_list		/* list (set) of models */
{ dtd_model *model;
  struct _dtd_model_list *next;
} dtd_model_list;

typedef enum
{ EX_AND				/* expand (a&b&...) */
} expand_type;

typedef struct _state_expander
{ dtd_state	       *target;		/* Target state to expand to */
  expand_type		type;		/* EX_* */
  union
  { struct
    { dtd_model_list *set;		/* Models we should still see */
    } and;				/* Expand (a&b&...) */
  } kind;
} expander;

typedef struct _visited
{ int	size;				/* set-size */
  dtd_state *states[MAX_VISITED];	/* The set */
} visited;


static void	translate_model(dtd_model *m, dtd_state *from, dtd_state *to);
static transition *state_transitions(dtd_state *state);

static int
visit(dtd_state *state, visited *visited)
{ int i;

  for(i=0; i<visited->size; i++)
  { if ( visited->states[i] == state )
      return FALSE;
  }

  if ( visited->size >= MAX_VISITED )
  { fprintf(stderr, "Reached MAX_VISITED!\n");
    return FALSE;
  }

  visited->states[visited->size++] = state;

  return TRUE;
}


static dtd_state *
do_make_dtd_transition(dtd_state *here, dtd_element *e, visited *visited)
{ transition *tset = state_transitions(here);
  transition *t;

  for(t=tset; t; t=t->next)
  { if ( t->element == e )
      return t->state;
  }

  for(t=tset; t; t=t->next)
  { if ( t->element == NULL && visit(t->state, visited) )
    { dtd_state *new;

      if ( (new=do_make_dtd_transition(t->state, e, visited)) )
	return new;
    }
  }

  return NULL;
}


dtd_state *
make_dtd_transition(dtd_state *here, dtd_element *e)
{ visited visited;
  visited.size = 0;

  if ( !here )				/* from nowhere to nowhere */
    return NULL;

  return do_make_dtd_transition(here, e, &visited);
}


static int
find_same_state(dtd_state *final, dtd_state *here, visited *visited)
{ transition *t;

  if ( final == here )
    return TRUE;

  for(t=state_transitions(here); t; t=t->next)
  { if ( t->element == NULL && visit(t->state, visited) )
    { if ( find_same_state(final, t->state, visited) )
	return TRUE;
    }
  }

  return FALSE;
}


int
same_state(dtd_state *final, dtd_state *here)
{ visited visited;
  visited.size = 0;

  return find_same_state(final, here, &visited);
}


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
state_allows_for(dtd_state *state, dtd_element **allow, int *n)
    See what elements are allowed if we are in this state.  This is
    currently not used, but might prove handly for error messages or
    syntax-directed editors.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

static void
do_state_allows_for(dtd_state *here, dtd_element **allow, int *n,
		    visited *visited)
{ transition *t;

  for(t=state_transitions(here); t; t=t->next)
  { int i;

    if ( t->element == NULL )
    { if ( visit(t->state, visited) )
	do_state_allows_for(t->state, allow, n, visited);
    } else
    { for(i=0; i<*n; i++)
      { if ( allow[i] == t->element )
	  goto next;
      }
      allow[(*n)++] = t->element;
    }
  next:
    ;
  }
}


void
state_allows_for(dtd_state *state, dtd_element **allow, int *n)
{ visited visited;
  visited.size = 0;

  *n = 0;
  if ( state )
    do_state_allows_for(state, allow, n, &visited);
}


static int
do_find_omitted_path(dtd_state *state, dtd_element *e,
		     dtd_element **path, int *pl,
		     visited *visited)
{ transition *tset = state_transitions(state);
  transition *t;
  int pathlen = *pl;

  for(t=tset; t; t=t->next)
  { if ( t->element == e )
      return TRUE;

    if ( t->element &&
	 t->element != CDATA_ELEMENT &&
	 t->element->structure &&
	 t->element->structure->omit_open &&
	 visit(t->state, visited) )
    { dtd_state *initial = make_state_engine(t->element);

      path[pathlen] = t->element;
      *pl = pathlen+1;
      if ( do_find_omitted_path(initial, e, path, pl, visited) )
	return TRUE;
      *pl = pathlen;
    }
  }

  for(t=tset; t; t=t->next)
  { if ( !t->element &&
	 visit(t->state, visited) )
    { if ( do_find_omitted_path(t->state, e, path, pl, visited) )
	return TRUE;
    }
  }

  return FALSE;
}


int
find_omitted_path(dtd_state *state, dtd_element *e, dtd_element **path)
{ int pl = 0;
  visited visited;
  visited.size = 0;

  if ( state && do_find_omitted_path(state, e, path, &pl, &visited) )
    return pl;

  return -1;
}


dtd_state *
new_dtd_state()
{ dtd_state *s = sgml_calloc(1, sizeof(*s));

  return s;
}


static void
link(dtd_state *from, dtd_state *to, dtd_element *e)
{ transition *t = sgml_calloc(1, sizeof(*t));

  t->state = to;
  t->element = e;
  t->next = from->transitions;
  from->transitions = t;
}


		 /*******************************
		 *	      EXPANSION		*
		 *******************************/

static void
add_model_list(dtd_model_list **list, dtd_model *m)
{ dtd_model_list *l = sgml_calloc(1, sizeof(*l));

  l->model = m;

  for( ; *list; list = &(*list)->next)
    ;
  *list = l;
}


static transition *
state_transitions(dtd_state *state)
{ if ( !state->transitions && state->expander )
  { expander *ex = state->expander;

    switch(ex->type)
    { case EX_AND:
      { dtd_model_list *left = ex->kind.and.set;

	if ( !left )			/* empty AND (should not happen) */
	{ link(state, ex->target, NULL);
	} else if ( !left->next )	/* only one left */
	{ translate_model(left->model, state, ex->target);
	} else
	{ for( ; left; left = left->next )
	  { dtd_state *tmp = new_dtd_state();
	    expander *nex = sgml_calloc(1, sizeof(*nex));
	    dtd_model_list *l;

	    translate_model(left->model, state, tmp);
	    tmp->expander = nex;
	    nex->target = ex->target;
	    nex->type = EX_AND;
	    for(l=ex->kind.and.set; l; l=l->next)
	    { if ( l != left )
		add_model_list(&nex->kind.and.set, l->model);
	    }
	  }
	}
      }
    }
  }

  return state->transitions;
}


		 /*******************************
		 *	   TRANSLATION		*
		 *******************************/


static void
translate_one(dtd_model *m, dtd_state *from, dtd_state *to)
{ switch(m->type)
  { case MT_ELEMENT:
    { dtd_element *e = m->content.element;

      link(from, to, e);
      return;
    }
    case MT_SEQ:			/* a,b,... */
    { dtd_model *sub;

      for( sub = m->content.group; sub->next; sub = sub->next )
      { dtd_state *tmp = new_dtd_state();
	translate_model(sub, from, tmp);
	from = tmp;
      }
      translate_model(sub, from, to);
      return;
    }
    case MT_AND:			/* a&b&... */
    { expander *ex = sgml_calloc(1, sizeof(*ex));
      dtd_model *sub;

      ex->target = to;
      ex->type   = EX_AND;

      for( sub = m->content.group; sub; sub = sub->next )
	add_model_list(&ex->kind.and.set, sub);

      from->expander = ex;
      return;
    }
    case MT_OR:				/* a|b|... */
    { dtd_model *sub;

      for( sub = m->content.group; sub; sub = sub->next )
	translate_model(sub, from, to);
      return;
    }
    case MT_PCDATA:
    case MT_UNDEF:
      assert(0);
  }

}


static void
translate_model(dtd_model *m, dtd_state *from, dtd_state *to)
{ if ( m->type == MT_PCDATA )
  { link(from, from, CDATA_ELEMENT);
    link(from, to, NULL);
    return;
  }

  switch(m->cardinality)
  { case MC_OPT:			/* ? */
      link(from, to, NULL);
    /*FALLTHROUGH*/
    case MC_ONE:
      translate_one(m, from, to);
      return;
    case MC_REP:			/* * */
      translate_one(m, from, from);
      link(from, to, NULL);
      return;
    case MC_PLUS:			/* + */
      translate_one(m, from, to);
      translate_one(m, to, to);
      return;
  }
}


dtd_state *
make_state_engine(dtd_element *e)
{ if ( e->structure )
  { dtd_edef *def = e->structure;

    if ( !def->initial_state )
    { if ( def->content )
      { def->initial_state = new_dtd_state();
	def->final_state   = new_dtd_state();

	translate_model(def->content, def->initial_state, def->final_state);
      } else if ( def->type == C_CDATA || def->type == C_RCDATA )
      { def->initial_state = new_dtd_state();
	def->final_state   = new_dtd_state();

	link(def->initial_state, def->initial_state, CDATA_ELEMENT);
	link(def->initial_state, def->final_state, NULL);
      } else
	return NULL;
    }

    return def->initial_state;
  }

  return NULL;
}


		 /*******************************
		 *	       FREE		*
		 *******************************/

static void do_free_state_engine(dtd_state *state, visited *visited);

static void
free_model_list(dtd_model_list *l)
{ dtd_model_list *next;

  for( ; l; l=next)
  { next = l->next;

    sgml_free(l);
  }
}


static void
free_expander(expander *e, visited *visited)
{ if ( visit(e->target, visited) )
    do_free_state_engine(e->target, visited);

  switch(e->type)
  { case EX_AND:
      free_model_list(e->kind.and.set);
    default:
      ;
  }

  sgml_free(e);
}


static void
do_free_state_engine(dtd_state *state, visited *visited)
{ transition *t, *next;

  for(t=state->transitions; t; t=next)
  { next = t->next;

    if ( visit(t->state, visited) )
      do_free_state_engine(t->state, visited);

    sgml_free(t);
  }

  if ( state->expander )
    free_expander(state->expander, visited);

  sgml_free(state);
}


void
free_state_engine(dtd_state *state)
{ if ( state )
  { visited visited;
    visited.size = 0;

    visit(state, &visited);
    do_free_state_engine(state, &visited);
  }
}