Experiment with porting SGML to YAP, and trying to preserve SWI code as much
as possible.
This commit is contained in:
520
packages/sgml/prolog.c
Normal file
520
packages/sgml/prolog.c
Normal file
@@ -0,0 +1,520 @@
|
||||
/* $Id$
|
||||
|
||||
Part of SWI-Prolog
|
||||
|
||||
Author: Jan Wielemaker
|
||||
E-mail: jan@swi.psy.uva.nl
|
||||
WWW: http://www.swi-prolog.org
|
||||
Copyright (C): 1985-2002, University of Amsterdam
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#define _ISOC99_SOURCE 1 /* fwprintf(), etc prototypes */
|
||||
#include <stdio.h>
|
||||
#include <wchar.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <wctype.h>
|
||||
#include <time.h>
|
||||
#include "dtd.h"
|
||||
#include "util.h"
|
||||
#include "prolog.h"
|
||||
|
||||
static int errors;
|
||||
|
||||
/*******************************
|
||||
* PROLOG SYNTAX *
|
||||
*******************************/
|
||||
|
||||
typedef enum
|
||||
{ AT_LOWER,
|
||||
AT_QUOTE,
|
||||
AT_FULLSTOP,
|
||||
AT_SYMBOL,
|
||||
AT_SOLO,
|
||||
AT_SPECIAL
|
||||
} atomtype;
|
||||
|
||||
|
||||
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
Contributed by Richard O'Keefe. Thanks!
|
||||
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
||||
|
||||
static int
|
||||
atomType(ichar const *s, int len)
|
||||
{ static ichar const symbols[] = L"#$&*+-./:<=>?@\\^`~";
|
||||
unsigned char const *u = (unsigned char const *)s;
|
||||
|
||||
switch (len)
|
||||
{ case 0:
|
||||
return AT_QUOTE;
|
||||
case 1:
|
||||
return iswlower(u[0]) ? AT_LOWER
|
||||
: u[0] == '.' ? AT_FULLSTOP
|
||||
: u[0] == '!' ? AT_SOLO
|
||||
: u[0] == ';' ? AT_SOLO
|
||||
: u[0] == ',' ? AT_SOLO
|
||||
: AT_QUOTE;
|
||||
case 2:
|
||||
if (u[0] == '[' && u[1] == ']') return AT_SPECIAL;
|
||||
if (u[0] == '{' && u[1] == '}') return AT_SPECIAL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (iswlower(u[0]))
|
||||
{ do ++u; while (--len > 0 && (iswalnum(*u) || *u == '_'));
|
||||
return len == 0 ? AT_LOWER : AT_QUOTE;
|
||||
} else if (wcschr(symbols, *u) != NULL)
|
||||
{ do ++u; while (--len > 0 && wcschr(symbols, *u) != 0);
|
||||
return len == 0 ? AT_SYMBOL : AT_QUOTE;
|
||||
} else
|
||||
{ return AT_QUOTE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const ichar *
|
||||
atom(const ichar *text)
|
||||
{ int len = wcslen(text);
|
||||
|
||||
switch(atomType(text, len))
|
||||
{ case AT_QUOTE:
|
||||
case AT_FULLSTOP:
|
||||
{ ichar *tmp = ringallo((len*2+1)*sizeof(ichar));
|
||||
ichar *o = tmp;
|
||||
|
||||
*o++ = '\'';
|
||||
for( ; --len >= 0; text++)
|
||||
{ switch( *text )
|
||||
{ case '\n':
|
||||
*o++ = '\\';
|
||||
*o++ = 'n';
|
||||
break;
|
||||
case '\r':
|
||||
*o++ = '\\';
|
||||
*o++ = 'r';
|
||||
break;
|
||||
case '\t':
|
||||
*o++ = '\\';
|
||||
*o++ = 't';
|
||||
break;
|
||||
case '\'':
|
||||
*o++ = '\\';
|
||||
default:
|
||||
*o++ = *text;
|
||||
}
|
||||
}
|
||||
*o++ = '\'';
|
||||
*o = '\0';
|
||||
|
||||
return tmp;
|
||||
}
|
||||
default:
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const char *
|
||||
bool(int val)
|
||||
{ return val ? "true" : "false";
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
prolog_print_entity(const char *which, dtd_entity *e)
|
||||
{ switch( e->type )
|
||||
{ case ET_LITERAL:
|
||||
wprintf(L"%s(%ls, %ls).\n",
|
||||
which,
|
||||
atom(e->name->name),
|
||||
atom(e->value));
|
||||
break;
|
||||
case ET_SYSTEM:
|
||||
wprintf(L"%s(%ls, system(%ls)).\n",
|
||||
which,
|
||||
atom(e->name->name),
|
||||
atom(e->exturl));
|
||||
break;
|
||||
case ET_PUBLIC:
|
||||
wprintf(L"%s(%ls, public(%ls, %ls)).\n",
|
||||
which,
|
||||
atom(e->name->name),
|
||||
atom(e->extid),
|
||||
atom(e->exturl));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
prolog_print_model(dtd_model *m)
|
||||
{ dtd_model *sub;
|
||||
int n = 0;
|
||||
const char *sep;
|
||||
|
||||
switch(m->type)
|
||||
{ case MT_PCDATA:
|
||||
printf("'#pcdata'");
|
||||
goto card;
|
||||
case MT_ELEMENT:
|
||||
wprintf(L"%ls", atom(m->content.element->name->name));
|
||||
goto card;
|
||||
case MT_AND:
|
||||
sep = " & ";
|
||||
break;
|
||||
case MT_SEQ:
|
||||
sep = ", ";
|
||||
break;
|
||||
case MT_OR:
|
||||
sep = "|";
|
||||
break;
|
||||
case MT_UNDEF:
|
||||
default:
|
||||
assert(0);
|
||||
sep = NULL; /* should not be used */
|
||||
break;
|
||||
}
|
||||
|
||||
printf("(");
|
||||
for(sub = m->content.group; sub; sub=sub->next)
|
||||
{ if ( n++ > 0 )
|
||||
printf("%s", sep);
|
||||
prolog_print_model(sub);
|
||||
}
|
||||
printf(")");
|
||||
|
||||
card:
|
||||
switch(m->cardinality)
|
||||
{ case MC_ONE:
|
||||
break;
|
||||
case MC_OPT:
|
||||
printf("?");
|
||||
break;
|
||||
case MC_REP:
|
||||
printf("*");
|
||||
break;
|
||||
case MC_PLUS:
|
||||
printf("+");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
prolog_print_content(dtd_element *e)
|
||||
{ dtd_edef *def = e->structure;
|
||||
|
||||
switch( def->type )
|
||||
{ case C_EMPTY:
|
||||
printf("empty");
|
||||
break;
|
||||
case C_CDATA:
|
||||
printf("cdata");
|
||||
break;
|
||||
case C_RCDATA:
|
||||
printf("rcdata");
|
||||
break;
|
||||
case C_ANY:
|
||||
printf("any");
|
||||
break;
|
||||
default:
|
||||
if ( def->content )
|
||||
{ printf("model(");
|
||||
prolog_print_model(def->content);
|
||||
printf(")");
|
||||
} else
|
||||
{ printf("[]");
|
||||
fwprintf(stderr,
|
||||
L"Warning: element %s has no content model\n",
|
||||
e->name->name);
|
||||
errors++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static ichar *
|
||||
istrblank(const ichar *s)
|
||||
{ for( ; *s; s++ )
|
||||
{ if ( iswspace(*s) )
|
||||
return (ichar *)s;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
print_listval(attrtype type, int len, const ichar *text)
|
||||
{ ichar *t = sgml_malloc((len+1)*sizeof(ichar));
|
||||
|
||||
istrncpy(t, text, len);
|
||||
t[len] = '\0';
|
||||
|
||||
if ( type == AT_NUMBERS )
|
||||
wprintf(L"%ls", t);
|
||||
else
|
||||
wprintf(L"%ls", atom(t));
|
||||
|
||||
sgml_free(t);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
prolog_print_attribute(dtd_element *e, dtd_attr *at)
|
||||
{ wprintf(L" attribute(%ls, %ls, ",
|
||||
atom(e->name->name), atom(at->name->name));
|
||||
|
||||
switch(at->type) /* print type */
|
||||
{ case AT_CDATA:
|
||||
printf("cdata");
|
||||
break;
|
||||
case AT_ENTITY:
|
||||
printf("entity");
|
||||
break;
|
||||
case AT_ENTITIES:
|
||||
printf("entities");
|
||||
break;
|
||||
case AT_ID:
|
||||
printf("id");
|
||||
break;
|
||||
case AT_IDREF:
|
||||
printf("idref");
|
||||
break;
|
||||
case AT_IDREFS:
|
||||
printf("list(idref)");
|
||||
break;
|
||||
case AT_NAME:
|
||||
printf("name");
|
||||
break;
|
||||
case AT_NAMES:
|
||||
printf("list(name)");
|
||||
break;
|
||||
case AT_NMTOKEN:
|
||||
printf("nmtoken");
|
||||
break;
|
||||
case AT_NMTOKENS:
|
||||
printf("list(nmtoken)");
|
||||
break;
|
||||
case AT_NOTATION:
|
||||
printf("notation");
|
||||
break;
|
||||
case AT_NUMBER:
|
||||
printf("number");
|
||||
break;
|
||||
case AT_NUMBERS:
|
||||
printf("list(number)");
|
||||
break;
|
||||
case AT_NAMEOF:
|
||||
{ dtd_name_list *nl;
|
||||
int n = 0;
|
||||
|
||||
printf("nameof([");
|
||||
for(nl = at->typeex.nameof; nl; nl = nl->next)
|
||||
{ if ( n++ > 0 )
|
||||
printf(", ");
|
||||
wprintf(L"%ls", atom(nl->value->name));
|
||||
}
|
||||
printf("])");
|
||||
}
|
||||
break;
|
||||
case AT_NUTOKEN:
|
||||
printf("nutoken");
|
||||
break;
|
||||
case AT_NUTOKENS:
|
||||
printf("list(nutoken)");
|
||||
break;
|
||||
}
|
||||
|
||||
printf(", "); /* print default */
|
||||
switch(at->def)
|
||||
{ case AT_REQUIRED:
|
||||
printf("required");
|
||||
break;
|
||||
case AT_CURRENT:
|
||||
printf("current");
|
||||
break;
|
||||
case AT_CONREF:
|
||||
printf("conref");
|
||||
break;
|
||||
case AT_IMPLIED:
|
||||
printf("implied");
|
||||
break;
|
||||
case AT_DEFAULT:
|
||||
case AT_FIXED:
|
||||
{ char *f = (at->def == AT_DEFAULT ? "default" : "fixed");
|
||||
|
||||
printf("%s(", f);
|
||||
|
||||
switch( at->type )
|
||||
{ case AT_CDATA:
|
||||
wprintf(L"%ls", atom(at->att_def.cdata));
|
||||
break;
|
||||
case AT_NUMBER:
|
||||
printf("%ld", at->att_def.number);
|
||||
break;
|
||||
case AT_NAME:
|
||||
case AT_NUTOKEN:
|
||||
case AT_NMTOKEN:
|
||||
wprintf(L"%ls", atom(at->att_def.name->name));
|
||||
break;
|
||||
default:
|
||||
if ( at->islist )
|
||||
{ const ichar *val = at->att_def.list;
|
||||
const ichar *e;
|
||||
int an = 0;
|
||||
|
||||
printf("[");
|
||||
for(e=istrblank(val); e; val = e+1, e=istrblank(val))
|
||||
{ if ( e == val )
|
||||
continue; /* skip spaces */
|
||||
if ( an++ > 0 )
|
||||
printf(", ");
|
||||
print_listval(at->type, e-val, val);
|
||||
}
|
||||
if ( an++ > 0 )
|
||||
printf(", ");
|
||||
print_listval(at->type, istrlen(val), val);
|
||||
printf("]");
|
||||
break;
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
|
||||
printf(")");
|
||||
}
|
||||
}
|
||||
|
||||
printf(").\n");
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
prolog_print_element(dtd_element *e, unsigned int flags)
|
||||
{ ichar nbuf[MAXNMLEN];
|
||||
|
||||
istrcpy(nbuf, e->name->name);
|
||||
istrupper(nbuf);
|
||||
|
||||
wprintf(L"\n%% Element <%s>\n", nbuf);
|
||||
|
||||
if ( e->structure )
|
||||
{ dtd_edef *def = e->structure;
|
||||
|
||||
wprintf(L"element(%ls, omit(%s, %s), ",
|
||||
atom(e->name->name),
|
||||
bool(def->omit_open),
|
||||
bool(def->omit_close));
|
||||
prolog_print_content(e);
|
||||
printf(").\n");
|
||||
|
||||
if ( def->excluded )
|
||||
{ dtd_element_list *el;
|
||||
|
||||
for(el = def->excluded; el; el=el->next)
|
||||
wprintf(L"exclude(%ls, %ls).\n",
|
||||
atom(e->name->name),
|
||||
atom(el->value->name->name));
|
||||
}
|
||||
if ( def->included )
|
||||
{ dtd_element_list *el;
|
||||
|
||||
for(el = def->included; el; el=el->next)
|
||||
wprintf(L"include(%ls, %ls).\n",
|
||||
atom(e->name->name),
|
||||
atom(el->value->name->name));
|
||||
}
|
||||
|
||||
if ( flags & PL_PRINT_ATTRIBUTES )
|
||||
{ dtd_attr_list *al;
|
||||
|
||||
for(al=e->attributes; al; al=al->next)
|
||||
prolog_print_attribute(e, al->attribute);
|
||||
}
|
||||
} else
|
||||
{ fwprintf(stderr, L"Warning: element %s has no definition\n",
|
||||
e->name->name);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
prolog_print_dtd(dtd *dtd, unsigned int flags)
|
||||
{ dtd_entity *et;
|
||||
dtd_element *e;
|
||||
time_t now;
|
||||
|
||||
if ( !dtd->doctype )
|
||||
fprintf(stderr, "DTD has no document type\n");
|
||||
|
||||
time(&now);
|
||||
|
||||
if ( !flags )
|
||||
flags = PL_PRINT_ALL;
|
||||
|
||||
errors = 0;
|
||||
|
||||
wprintf(L"/* This file represents the SGML DOCTYPE \"%s\"\n", dtd->doctype);
|
||||
printf(" converted using dtd2pl version %s\n", DTD2PL_VERSION);
|
||||
printf(" Conversion date: %s\n\n", ctime(&now));
|
||||
printf(" dtd2pl is written by Jan Wielemaker\n");
|
||||
printf(" E-mail: jan@swi.psy.uva.nl\n");
|
||||
printf("*/\n\n");
|
||||
|
||||
wprintf(L":- module(%s_dtd, []).\n\n", dtd->doctype);
|
||||
printf(":- op(100, xf, ?).\n");
|
||||
printf(":- op(100, xf, +).\n");
|
||||
printf(":- op(100, xf, *).\n");
|
||||
printf(":- op(200, xfy, &).\n");
|
||||
|
||||
printf("\n");
|
||||
printf(":- discontiguous\n");
|
||||
printf("\tattribute/4,\n");
|
||||
printf("\telement/3,\n");
|
||||
printf("\texclude/2,\n");
|
||||
printf("\tinclude/2.\n");
|
||||
|
||||
if ( flags & PL_PRINT_PENTITIES )
|
||||
{ printf("\n");
|
||||
for( et=dtd->pentities; et; et=et->next )
|
||||
prolog_print_entity("parameter_entity", et);
|
||||
}
|
||||
|
||||
if ( flags & PL_PRINT_ENTITIES )
|
||||
{ printf("\n");
|
||||
for( et=dtd->entities; et; et=et->next )
|
||||
prolog_print_entity("entity", et);
|
||||
}
|
||||
|
||||
if ( flags & PL_PRINT_ELEMENTS )
|
||||
{ printf("\n");
|
||||
for( e=dtd->elements; e; e=e->next )
|
||||
prolog_print_element(e, flags);
|
||||
}
|
||||
|
||||
if ( errors )
|
||||
{ fprintf(stderr, "Warning: DTD contained %d errors\n", errors);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
Reference in New Issue
Block a user