This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/sgml/catalog.c

673 lines
15 KiB
C
Raw Normal View History

/* $Id$
Part of SWI-Prolog
Author: Jan Wielemaker and Richard O'Keefe
E-mail: wielemak@science.uva.nl
WWW: http://www.swi-prolog.org
Copyright (C): 1985-2006, University of Amsterdam
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define _ISOC99_SOURCE 1 /* fwprintf(), etc prototypes */
#include "util.h"
#include "catalog.h"
#include <stdio.h>
#include <wctype.h>
#include <string.h>
#include <stdlib.h>
#define DTD_MINOR_ERRORS 1
#include <dtd.h> /* error codes */
#ifdef __WINDOWS__
#define swprintf _snwprintf
#endif
#ifdef _REENTRANT
#include <pthread.h>
static pthread_mutex_t catalog_mutex = PTHREAD_MUTEX_INITIALIZER;
#define LOCK() pthread_mutex_lock(&catalog_mutex)
#define UNLOCK() pthread_mutex_unlock(&catalog_mutex)
#else
#define LOCK()
#define UNLOCK()
#endif
#ifndef MAXPATHLEN
#define MAXPATHLEN 1024
#endif
#ifndef MAXLINE
#define MAXLINE 1024
#endif
#ifndef EOS
#define EOS '\0'
#endif
#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif
#define streq(s1, s2) istreq(s1, s2)
#define uc(p) (*(p))
typedef struct catalogue_item *catalogue_item_ptr;
struct catalogue_item
{ catalogue_item_ptr next;
int kind;
ichar const *target;
ichar const *replacement;
};
static catalogue_item_ptr first_item = 0, last_item = 0;
typedef struct _catalog_file
{ ichar *file;
struct _catalog_file *next;
int loaded; /* did we parse this file? */
catalogue_item_ptr first_item; /* List of items in the file */
catalogue_item_ptr last_item;
} catalog_file;
static catalog_file *catalog;
#ifdef __WINDOWS__
#define isDirSep(c) ((c) == '/' || (c) == '\\')
#define DIRSEPSTR L"\\"
#else
#define isDirSep(c) ((c) == '/')
#define DIRSEPSTR L"/"
#endif
static ichar *
DirName(const ichar *f, ichar *dir)
{ const ichar *base, *p;
for (base = p = f; *p; p++)
{ if (isDirSep(*p) && p[1] != EOS)
base = p;
}
if (base == f)
{ if (isDirSep(*f))
istrcpy(dir, DIRSEPSTR);
else
istrcpy(dir, L".");
} else
{ istrncpy(dir, f, base - f);
dir[base - f] = EOS;
}
return dir;
}
int
is_absolute_path(const ichar *name)
{ if (isDirSep(name[0])
#ifdef __WINDOWS__
|| (iswalpha(uc(name)) && name[1] == ':')
#endif
)
return TRUE;
return FALSE;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
localpath() creates an absolute path for name relative to ref. The
returned path must be freed using sgml_free() when done.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
ichar *
localpath(const ichar *ref, const ichar *name)
{ ichar *local;
if (!ref || is_absolute_path(name))
local = istrdup(name);
else
{ ichar buf[MAXPATHLEN];
DirName(ref, buf);
istrcat(buf, DIRSEPSTR);
istrcat(buf, name);
local = istrdup(buf);
}
if (!local)
sgml_nomem();
return local;
}
2010-05-06 10:59:09 +01:00
int
register_catalog_file_unlocked(const ichar *file, catalog_location where)
{ catalog_file **f = &catalog;
catalog_file *cf;
for (; *f; f = &(*f)->next)
{ cf = *f;
if (istreq(cf->file, file))
return TRUE; /* existing, move? */
}
cf = sgml_malloc(sizeof(*cf));
memset(cf, 0, sizeof(*cf));
cf->file = istrdup(file);
if (!cf->file)
sgml_nomem();
if (where == CTL_END)
{ cf->next = NULL;
*f = cf;
} else
{ cf->next = catalog;
catalog = cf;
}
return TRUE;
}
static wchar_t *
wgetenv(const char *name)
{ const char *vs;
if ( (vs = getenv(name)) )
{ size_t wl = mbstowcs(NULL, vs, 0);
if ( wl > 0 )
{ wchar_t *ws = sgml_malloc((wl+1)*sizeof(wchar_t));
mbstowcs(ws, vs, wl+1);
return ws;
}
}
return NULL;
}
static void
2010-05-06 10:59:09 +01:00
init_catalog()
{ static int done = FALSE;
LOCK();
if ( !done++ )
{ ichar *path = wgetenv("SGML_CATALOG_FILES");
if (!path)
{ UNLOCK();
return;
}
while (*path)
{ ichar buf[MAXPATHLEN];
ichar *s;
if ((s = istrchr(path, L':')))
{ istrncpy(buf, path, s - path);
buf[s - path] = '\0';
path = s + 1;
if ( buf[0] ) /* skip empty entries */
register_catalog_file_unlocked(buf, CTL_START);
} else
{ if ( path[0] ) /* skip empty entries */
register_catalog_file_unlocked(path, CTL_START);
break;
}
}
}
UNLOCK();
}
int
register_catalog_file(const ichar *file, catalog_location where)
{ int rc;
2010-05-06 10:59:09 +01:00
init_catalog();
LOCK();
rc = register_catalog_file_unlocked(file, where);
UNLOCK();
return rc;
}
/*******************************
* CATALOG FILE PARSING *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
The code from here to the end of this file was written by Richard
O'Keefe and modified by Jan Wielemaker to fit in with the rest of the
parser.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* OVERRIDE YES/NO
sets a boolean flag initialised to NO.
The value of this flag is stored as part of each entry.
(PUBLIC|DOCTYPE|ENTITY)&YES will match whether a system identifier
was provided in the source document or not;
(PUBLIC|DOCTYPE|ENTITY)&NO will only match if a system identifier
was not provided.
*/
/* catalogue =
( PUBLIC pubid filename
| SYSTEM sysid filename
| DOCTYPE name filename
| ENTITY name filename
| OVERRIDE YES
| OVERRIDE NO
| BASE filename
| junk
)*
*/
/* Keywords are matched ignoring case. */
static int
ci_streql(ichar const *a, ichar const *b)
{ return istrcaseeq(a, b);
}
/* Names may be matched heading case in XML. */
static int
cs_streql(ichar const *a, ichar const *b)
{ return istreq(a, b);
}
/* Any other word or any quoted string is reported as CAT_OTHER.
When we are not looking for the beginning of an entry, the only
positive outcome is CAT_OTHER.
*/
static int
scan_overflow(size_t buflen)
2010-05-06 10:59:09 +01:00
{ gripe(NULL, ERC_REPRESENTATION, L"token length");
return EOF;
}
static int
scan(FILE* src, ichar *buffer, size_t buflen, int kw_expected)
{ int c, q;
ichar *p = buffer, *e = p + buflen - 1;
for (;;)
{ c = getc(src);
if (c <= ' ')
{ if (c < 0)
return EOF;
continue;
}
if (c == '-')
{ c = getc(src);
if (c != '-')
{ *p++ = '-';
break;
}
for (;;)
{ c = getc(src);
if (c < 0)
return EOF;
if (c == '-')
{ c = getc(src);
if (c < 0)
return EOF;
if (c == '-')
break;
}
}
continue;
}
if (c == '"' || c == '\'')
{ q = c;
for (;;)
{ c = getc(src);
if (c < 0)
return EOF;
if (c == q)
{ *p = '\0';
return CAT_OTHER;
}
if (p == e)
return scan_overflow(buflen);
*p++ = c;
}
}
break;
}
/* We reach here if there is an unquoted token. */
/* Don't try "PUBLIC--well/sortof--'foo' 'bar'" */
/* because hyphens are allowed in unquoted words */
/* and so are slashes and a bunch of other stuff. */
/* To keep this code simple, an unquoted token */
/* ends at EOF, ', ", or layout. */
while (c > ' ' && c != '"' && c != '\'')
{ if (p == e)
return scan_overflow(buflen);
*p++ = c;
c = getc(src);
}
*p = '\0';
if (kw_expected)
{ if (ci_streql(buffer, L"public"))
return CAT_PUBLIC;
if (ci_streql(buffer, L"system"))
return CAT_SYSTEM;
if (ci_streql(buffer, L"entity"))
return CAT_ENTITY;
if (ci_streql(buffer, L"doctype"))
return CAT_DOCTYPE;
if (ci_streql(buffer, L"override"))
return CAT_OVERRIDE;
if (ci_streql(buffer, L"base"))
return CAT_BASE;
}
return CAT_OTHER;
}
/* The strings can represent names (taken verbatim),
system identifiers (ditto), or public identifiers (squished).
We need to squish, and we need to copy. When it comes to
squishing, we don't need to worry about Unicode spaces,
because public identifiers aren't allow to have any characters
that aren't in ASCII.
*/
static void
squish(ichar *pubid)
{ ichar const *s = (ichar const *) pubid;
ichar *d = (ichar *) pubid;
ichar c;
int w;
w = 1;
while ((c = *s++) != '\0')
{ if (c <= ' ')
{ if (!w)
*d++ = ' ', w = 1;
} else
{ *d++ = c, w = 0;
}
}
if (w && d != (ichar *) pubid)
d--;
*d = '\0';
}
/* We represent a catalogue internally by a list of
(CAT_xxx, string, string)
triples.
*/
static void
load_one_catalogue(catalog_file * file)
{ FILE *src = wfopen(file->file, "r");
ichar buffer[2 * FILENAME_MAX];
ichar base[2 * FILENAME_MAX];
ichar *p;
int t;
catalogue_item_ptr this_item;
int override = 0;
if ( !src )
2010-05-06 10:59:09 +01:00
{ gripe(NULL, ERC_NO_CATALOGUE, file->file);
return;
}
(void) istrcpy(base, file->file);
p = base + istrlen(base);
while (p != base && !isDirSep(p[-1]))
p--;
for (;;)
{ t = scan(src, buffer, sizeof(buffer), 1);
switch (t)
{ case CAT_BASE:
if (scan(src, buffer, sizeof(buffer), 0) == EOF)
break;
(void) istrcpy(base, buffer);
p = base + istrlen(base);
if (p != base && !isDirSep(p[-1]))
*p++ = '/';
continue;
case CAT_OVERRIDE:
if (scan(src, buffer, sizeof(buffer), 0) == EOF)
break;
override = towlower(buffer[0]) == 'y' ? CAT_OVERRIDE : 0;
continue;
case CAT_PUBLIC:
case CAT_SYSTEM:
case CAT_ENTITY:
case CAT_DOCTYPE:
this_item = sgml_malloc(sizeof *this_item);
if (scan(src, buffer, sizeof buffer, 0) == EOF)
break;
if (t == CAT_PUBLIC)
squish(buffer);
this_item->next = 0;
this_item->kind = t == CAT_SYSTEM ? t : t + override;
this_item->target = istrdup(buffer);
if (scan(src, buffer, sizeof buffer, 0) == EOF)
break;
if (is_absolute_path(buffer) || p == base)
{ this_item->replacement = istrdup(buffer);
} else
{ (void) istrcpy(p, buffer);
this_item->replacement = istrdup(base);
}
if (file->first_item == 0)
{ file->first_item = this_item;
} else
{ file->last_item->next = this_item;
}
file->last_item = this_item;
continue;
case EOF:
break;
default:
continue;
}
break;
}
fclose(src);
}
/* To look up a DTD:
f = find_in_catalogue(CAT_DOCTYPE, name, pubid, sysid, ci);
If it cannot otherwise be found and name is not null,
${name}.dtd will be returned.
To look up a parameter entity:
f = find_in_catalogue(CAT_PENTITY, name, pubid, sysid, ci);
2010-05-06 10:59:09 +01:00
The name may begin with a % but need not; if it doesn't
a % will be prefixed for the search.
If it cannot otherwise be found ${name}.pen will be returned.
To look up an ordinary entity:
f = find_in_catalogue(CAT_ENTITY, name, pubid, sysid, ci);
If the name begins with a % this is just like a CAT_PENTITY search.
If it cannot otherwise be found %{name}.ent will be returned.
The full catalogue format allows for NOTATION (which we still need
for XML), SGMLDECL, DTDDECL, and LINKTYPE. At the moment, only
notation is plausible. To handle such things,
f = find_in_catalogue(CAT_OTHER, name, pubid, sysid, ci);
If it cannot be found, NULL is returned.
The name, pubid, and sysid may each be NULL. It doesn't really
make sense for them all to be NULL.
For SGML, name matching (DOCTYPE, ENTITY) should normally ignore
alphabetic case. Pass ci=1 to make this happen. For XML, name
matching must heed alphabetic case. Pass ci=0 to make that happen.
A CAT_DOCTYPE, CAT_ENTITY, or CAT_PENTITY search doesn't really make
sense withint a name, so if the name should happen to be 0, the search
kind is converted to CAT_OTHER.
*/
ichar const *
find_in_catalogue(int kind,
ichar const *name,
ichar const *pubid, ichar const *sysid, int ci)
{ ichar penname[FILENAME_MAX];
const size_t penlen = sizeof(penname)/sizeof(ichar);
catalogue_item_ptr item;
ichar const *result;
catalog_file *catfile;
init_catalog();
if ( name == 0 )
{ kind = CAT_OTHER;
} else
{ switch (kind)
{ case CAT_OTHER:
case CAT_DOCTYPE:
break;
case CAT_PENTITY:
if (name[0] != '%')
{ penname[0] = '%';
(void) istrcpy(penname + 1, name);
name = penname;
}
break;
case CAT_ENTITY:
if (name[0] == '%')
{ kind = CAT_PENTITY;
}
break;
default:
return 0;
}
}
result = 0;
for (catfile = catalog;; catfile = catfile->next)
{ if (catfile)
{ if (!catfile->loaded)
{ load_one_catalogue(catfile);
catfile->loaded = TRUE;
}
item = catfile->first_item;
} else
item = first_item;
for (; item != 0; item = item->next)
{ switch (item->kind)
{ case CAT_PUBLIC:
if (sysid != 0)
break;
/*FALLTHROUGH*/
case OVR_PUBLIC:
if (pubid != 0 && result == 0 && cs_streql(pubid, item->target))
result = item->replacement;
break;
case CAT_SYSTEM:
if (sysid != 0 && cs_streql(sysid, item->target))
return item->replacement;
break;
case CAT_DOCTYPE:
if (sysid != 0)
break;
/*FALLTHROUGH*/
case OVR_DOCTYPE:
if (name != 0 && kind == CAT_DOCTYPE && result == 0
&& (ci ? ci_streql : cs_streql) (name, item->target))
result = item->replacement;
break;
case CAT_ENTITY:
if (sysid != 0)
break;
/*FALLTHROUGH*/ case OVR_ENTITY:
if (name != 0 && kind >= CAT_ENTITY && result == 0
&& (ci ? ci_streql : cs_streql) (name, item->target))
result = item->replacement;
break;
default:
break;
}
}
if (!catfile)
break;
}
if ( result != 0 )
return result;
if ( sysid != 0 )
return sysid;
if ( kind == CAT_OTHER || kind == CAT_DOCTYPE )
return 0;
if ( istrlen(name)+4+1 > penlen )
2010-05-06 10:59:09 +01:00
{ gripe(NULL, ERC_REPRESENTATION, L"entity name");
return NULL;
}
item = sgml_malloc(sizeof(*item));
item->next = 0;
item->kind = kind;
item->target = istrdup(name);
switch (kind)
{ case CAT_DOCTYPE:
(void) swprintf(penname, penlen, L"%ls.dtd", name);
break;
case CAT_PENTITY:
item->kind = CAT_ENTITY;
(void) swprintf(penname, penlen, L"%ls.pen", name + 1);
break;
case CAT_ENTITY:
(void) swprintf(penname, penlen, L"%ls.ent", name);
break;
default:
abort();
}
item->replacement = istrdup(penname);
if (first_item == 0)
{ first_item = item;
} else
{ last_item->next = item;
}
last_item = item;
return item->replacement;
}