/*  $Id$

    Part of SWI-Prolog

    Author:        Jan Wielemaker
    E-mail:        jan@swi.psy.uva.nl
    WWW:           http://www.swi-prolog.org
    Copyright (C): 1985-2002, University of Amsterdam

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
#include <stdio.h>
#include <wchar.h>
#include <assert.h>
#include <string.h>
#include <wctype.h>
#include <time.h>
#include "dtd.h"
#include "util.h"
#include "prolog.h"

static int errors;

		 /*******************************
		 *	  PROLOG SYNTAX		*
		 *******************************/

typedef enum
{ AT_LOWER,
  AT_QUOTE,
  AT_FULLSTOP,
  AT_SYMBOL,
  AT_SOLO,
  AT_SPECIAL
} atomtype;


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Contributed by Richard O'Keefe.  Thanks!
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

static int
atomType(ichar const *s, int len)
{ static ichar const symbols[] = L"#$&*+-./:<=>?@\\^`~";
  unsigned char const *u = (unsigned char const *)s;

  switch (len)
  { case 0:
      return AT_QUOTE;
    case 1:
      return iswlower(u[0]) ? AT_LOWER
	   : u[0] == '.'    ? AT_FULLSTOP
	   : u[0] == '!'    ? AT_SOLO
	   : u[0] == ';'    ? AT_SOLO
	   : u[0] == ','    ? AT_SOLO
	   :                  AT_QUOTE;
    case 2:
      if (u[0] == '[' && u[1] == ']') return AT_SPECIAL;
      if (u[0] == '{' && u[1] == '}') return AT_SPECIAL;
      break;
    default:
      break;
  }

  if (iswlower(u[0]))
  { do ++u; while (--len > 0 && (iswalnum(*u) || *u == '_'));
    return len == 0 ? AT_LOWER : AT_QUOTE;
  } else if (wcschr(symbols, *u) != NULL)
  { do ++u; while (--len > 0 && wcschr(symbols, *u) != 0);
    return len == 0 ? AT_SYMBOL : AT_QUOTE;
  } else
  { return AT_QUOTE;
  }
}


static const ichar *
atom(const ichar *text)
{ int len = wcslen(text);

  switch(atomType(text, len))
  { case AT_QUOTE:
    case AT_FULLSTOP:
    { ichar *tmp = ringallo((len*2+1)*sizeof(ichar));
      ichar *o = tmp;

      *o++ = '\'';
      for( ; --len >= 0; text++)
      { switch( *text )
	{ case '\n':
	    *o++ = '\\';
	    *o++ = 'n';
	    break;
	  case '\r':
	    *o++ = '\\';
	    *o++ = 'r';
	    break;
	  case '\t':
	    *o++ = '\\';
	    *o++ = 't';
	    break;
	  case '\'':
	    *o++ = '\\';
	  default:
	    *o++ = *text;
	}
      }
      *o++ = '\'';
      *o   = '\0';

      return tmp;
    }
    default:
      return text;
  }
}


static const char *
bool(int val)
{ return val ? "true" : "false";
}


static void
prolog_print_entity(const char *which, dtd_entity *e)
{ switch( e->type )
  { case ET_LITERAL:
      wprintf(L"%s(%ls, %ls).\n",
	      which,
	      atom(e->name->name),
	      atom(e->value));
      break;
    case ET_SYSTEM:
      wprintf(L"%s(%ls, system(%ls)).\n",
	     which,
	     atom(e->name->name),
	     atom(e->exturl));
      break;
    case ET_PUBLIC:
      wprintf(L"%s(%ls, public(%ls, %ls)).\n",
	     which,
	     atom(e->name->name),
	     atom(e->extid),
	     atom(e->exturl));
      break;
  }
}


static void
prolog_print_model(dtd_model *m)
{ dtd_model *sub;
  int n = 0;
  const char *sep;

  switch(m->type)
  { case MT_PCDATA:
      printf("'#pcdata'");
      goto card;
    case MT_ELEMENT:
      wprintf(L"%ls", atom(m->content.element->name->name));
      goto card;
    case MT_AND:
      sep = " & ";
      break;
    case MT_SEQ:
      sep = ", ";
      break;
    case MT_OR:
      sep = "|";
      break;
    case MT_UNDEF:
    default:
      assert(0);
      sep = NULL;			/* should not be used */
      break;
  }

  printf("(");
  for(sub = m->content.group; sub; sub=sub->next)
  { if ( n++ > 0 )
      printf("%s", sep);
    prolog_print_model(sub);
  }
  printf(")");

card:
  switch(m->cardinality)
  { case MC_ONE:
      break;
    case MC_OPT:
      printf("?");
      break;
    case MC_REP:
      printf("*");
      break;
    case MC_PLUS:
      printf("+");
      break;
  }
}


static void
prolog_print_content(dtd_element *e)
{ dtd_edef *def = e->structure;

  switch( def->type )
  { case C_EMPTY:
      printf("empty");
      break;
    case C_CDATA:
      printf("cdata");
      break;
    case C_RCDATA:
      printf("rcdata");
      break;
    case C_ANY:
      printf("any");
      break;
    default:
      if ( def->content )
      { printf("model(");
	prolog_print_model(def->content);
	printf(")");
      } else
      { printf("[]");
	fwprintf(stderr,
		L"Warning: element %s has no content model\n",
		e->name->name);
	errors++;
      }
      break;
  }
}


static ichar *
istrblank(const ichar *s)
{ for( ; *s; s++ )
  { if ( iswspace(*s) )
      return (ichar *)s;
  }

  return NULL;
}


static void
print_listval(attrtype type, int len, const ichar *text)
{ ichar *t = sgml_malloc((len+1)*sizeof(ichar));

  istrncpy(t, text, len);
  t[len] = '\0';

  if ( type == AT_NUMBERS )
    wprintf(L"%ls", t);
  else
    wprintf(L"%ls", atom(t));

  sgml_free(t);
}


static void
prolog_print_attribute(dtd_element *e, dtd_attr *at)
{ wprintf(L"    attribute(%ls, %ls, ",
	 atom(e->name->name), atom(at->name->name));

  switch(at->type)			/* print type */
  { case AT_CDATA:
      printf("cdata");
      break;
    case AT_ENTITY:
      printf("entity");
      break;
    case AT_ENTITIES:
      printf("entities");
      break;
    case AT_ID:
      printf("id");
      break;
    case AT_IDREF:
      printf("idref");
      break;
    case AT_IDREFS:
      printf("list(idref)");
      break;
    case AT_NAME:
      printf("name");
      break;
    case AT_NAMES:
      printf("list(name)");
      break;
    case AT_NMTOKEN:
      printf("nmtoken");
      break;
    case AT_NMTOKENS:
      printf("list(nmtoken)");
      break;
    case AT_NOTATION:
      printf("notation");
      break;
    case AT_NUMBER:
      printf("number");
      break;
    case AT_NUMBERS:
      printf("list(number)");
      break;
    case AT_NAMEOF:
    { dtd_name_list *nl;
      int n = 0;

      printf("nameof([");
      for(nl = at->typeex.nameof; nl; nl = nl->next)
      { if ( n++ > 0 )
	  printf(", ");
	wprintf(L"%ls", atom(nl->value->name));
      }
      printf("])");
    }
      break;
    case AT_NUTOKEN:
      printf("nutoken");
      break;
    case AT_NUTOKENS:
      printf("list(nutoken)");
      break;
  }

  printf(", ");				/* print default */
  switch(at->def)
  { case AT_REQUIRED:
      printf("required");
      break;
    case AT_CURRENT:
      printf("current");
      break;
    case AT_CONREF:
      printf("conref");
      break;
    case AT_IMPLIED:
      printf("implied");
      break;
    case AT_DEFAULT:
    case AT_FIXED:
    { char *f = (at->def == AT_DEFAULT ? "default" : "fixed");

      printf("%s(", f);

      switch( at->type )
      { case AT_CDATA:
	  wprintf(L"%ls", atom(at->att_def.cdata));
	  break;
	case AT_NUMBER:
	  printf("%ld", at->att_def.number);
	  break;
	case AT_NAME:
	case AT_NUTOKEN:
	case AT_NMTOKEN:
	  wprintf(L"%ls", atom(at->att_def.name->name));
	  break;
	default:
	  if ( at->islist )
	  { const ichar *val = at->att_def.list;
	    const ichar *e;
	    int an = 0;

	    printf("[");
	    for(e=istrblank(val); e; val = e+1, e=istrblank(val))
	    { if ( e == val )
		continue;			/* skip spaces */
	      if ( an++ > 0 )
		printf(", ");
	      print_listval(at->type, e-val, val);
	    }
            if ( an++ > 0 )
	      printf(", ");
	    print_listval(at->type, istrlen(val), val);
	    printf("]");
	    break;
	  }
	  assert(0);
      }

      printf(")");
    }
  }

  printf(").\n");
}


static void
prolog_print_element(dtd_element *e, unsigned int flags)
{ ichar nbuf[MAXNMLEN];

  istrcpy(nbuf, e->name->name);
  istrupper(nbuf);

  wprintf(L"\n%% Element <%s>\n", nbuf);

  if ( e->structure )
  { dtd_edef *def = e->structure;

    wprintf(L"element(%ls, omit(%s, %s), ",
	    atom(e->name->name),
	    bool(def->omit_open),
	    bool(def->omit_close));
    prolog_print_content(e);
    printf(").\n");

    if ( def->excluded )
    { dtd_element_list *el;

      for(el = def->excluded; el; el=el->next)
	wprintf(L"exclude(%ls, %ls).\n",
		atom(e->name->name),
		atom(el->value->name->name));
    }
    if ( def->included )
    { dtd_element_list *el;

      for(el = def->included; el; el=el->next)
	wprintf(L"include(%ls, %ls).\n",
		atom(e->name->name),
		atom(el->value->name->name));
    }

    if ( flags & PL_PRINT_ATTRIBUTES )
    { dtd_attr_list *al;

      for(al=e->attributes; al; al=al->next)
	prolog_print_attribute(e, al->attribute);
    }
  } else
  { fwprintf(stderr, L"Warning: element %s has no definition\n",
	     e->name->name);
    errors++;
  }
}


int
prolog_print_dtd(dtd *dtd, unsigned int flags)
{ dtd_entity *et;
  dtd_element *e;
  time_t now;

  if ( !dtd->doctype )
    fprintf(stderr, "DTD has no document type\n");

  time(&now);

  if ( !flags )
    flags = PL_PRINT_ALL;

  errors = 0;

  wprintf(L"/*  This file represents the SGML DOCTYPE \"%s\"\n", dtd->doctype);
  printf("    converted using dtd2pl version %s\n", DTD2PL_VERSION);
  printf("    Conversion date: %s\n\n", ctime(&now));
  printf("    dtd2pl is written by Jan Wielemaker\n");
  printf("    E-mail: jan@swi.psy.uva.nl\n");
  printf("*/\n\n");

  wprintf(L":- module(%s_dtd, []).\n\n", dtd->doctype);
  printf(":- op(100, xf,  ?).\n");
  printf(":- op(100, xf,  +).\n");
  printf(":- op(100, xf,  *).\n");
  printf(":- op(200, xfy, &).\n");

  printf("\n");
  printf(":- discontiguous\n");
  printf("\tattribute/4,\n");
  printf("\telement/3,\n");
  printf("\texclude/2,\n");
  printf("\tinclude/2.\n");

  if ( flags & PL_PRINT_PENTITIES )
  { printf("\n");
    for( et=dtd->pentities; et; et=et->next )
      prolog_print_entity("parameter_entity", et);
  }

  if ( flags & PL_PRINT_ENTITIES )
  { printf("\n");
    for( et=dtd->entities; et; et=et->next )
      prolog_print_entity("entity", et);
  }

  if ( flags & PL_PRINT_ELEMENTS )
  { printf("\n");
    for( e=dtd->elements; e; e=e->next )
      prolog_print_element(e, flags);
  }

  if ( errors )
  { fprintf(stderr, "Warning: DTD contained %d errors\n", errors);
    return FALSE;
  }

  return TRUE;
}