/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker and Richard O'Keefe E-mail: wielemak@science.uva.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2006, University of Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _ISOC99_SOURCE 1 /* fwprintf(), etc prototypes */ #include "util.h" #include "catalog.h" #include #include #include #include #define DTD_MINOR_ERRORS 1 #include /* error codes */ #ifdef __WINDOWS__ #define swprintf _snwprintf #endif #ifdef _REENTRANT #include static pthread_mutex_t catalog_mutex = PTHREAD_MUTEX_INITIALIZER; #define LOCK() pthread_mutex_lock(&catalog_mutex) #define UNLOCK() pthread_mutex_unlock(&catalog_mutex) #else #define LOCK() #define UNLOCK() #endif #ifndef MAXPATHLEN #define MAXPATHLEN 1024 #endif #ifndef MAXLINE #define MAXLINE 1024 #endif #ifndef EOS #define EOS '\0' #endif #ifndef TRUE #define TRUE 1 #define FALSE 0 #endif #define streq(s1, s2) istreq(s1, s2) #define uc(p) (*(p)) typedef struct catalogue_item *catalogue_item_ptr; struct catalogue_item { catalogue_item_ptr next; int kind; ichar const *target; ichar const *replacement; }; static catalogue_item_ptr first_item = 0, last_item = 0; typedef struct _catalog_file { ichar *file; struct _catalog_file *next; int loaded; /* did we parse this file? */ catalogue_item_ptr first_item; /* List of items in the file */ catalogue_item_ptr last_item; } catalog_file; static catalog_file *catalog; #ifdef __WINDOWS__ #define isDirSep(c) ((c) == '/' || (c) == '\\') #define DIRSEPSTR L"\\" #else #define isDirSep(c) ((c) == '/') #define DIRSEPSTR L"/" #endif static ichar * DirName(const ichar *f, ichar *dir) { const ichar *base, *p; for (base = p = f; *p; p++) { if (isDirSep(*p) && p[1] != EOS) base = p; } if (base == f) { if (isDirSep(*f)) istrcpy(dir, DIRSEPSTR); else istrcpy(dir, L"."); } else { istrncpy(dir, f, base - f); dir[base - f] = EOS; } return dir; } int is_absolute_path(const ichar *name) { if (isDirSep(name[0]) #ifdef __WINDOWS__ || (iswalpha(uc(name)) && name[1] == ':') #endif ) return TRUE; return FALSE; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - localpath() creates an absolute path for name relative to ref. The returned path must be freed using sgml_free() when done. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ichar * localpath(const ichar *ref, const ichar *name) { ichar *local; if (!ref || is_absolute_path(name)) local = istrdup(name); else { ichar buf[MAXPATHLEN]; DirName(ref, buf); istrcat(buf, DIRSEPSTR); istrcat(buf, name); local = istrdup(buf); } if (!local) sgml_nomem(); return local; } static int register_catalog_file_unlocked(const ichar *file, catalog_location where) { catalog_file **f = &catalog; catalog_file *cf; for (; *f; f = &(*f)->next) { cf = *f; if (istreq(cf->file, file)) return TRUE; /* existing, move? */ } cf = sgml_malloc(sizeof(*cf)); memset(cf, 0, sizeof(*cf)); cf->file = istrdup(file); if (!cf->file) sgml_nomem(); if (where == CTL_END) { cf->next = NULL; *f = cf; } else { cf->next = catalog; catalog = cf; } return TRUE; } static wchar_t * wgetenv(const char *name) { const char *vs; if ( (vs = getenv(name)) ) { size_t wl = mbstowcs(NULL, vs, 0); if ( wl > 0 ) { wchar_t *ws = sgml_malloc((wl+1)*sizeof(wchar_t)); mbstowcs(ws, vs, wl+1); return ws; } } return NULL; } static void init_catalog(void) { static int done = FALSE; LOCK(); if ( !done++ ) { ichar *path = wgetenv("SGML_CATALOG_FILES"); if (!path) { UNLOCK(); return; } while (*path) { ichar buf[MAXPATHLEN]; ichar *s; if ((s = istrchr(path, L':'))) { istrncpy(buf, path, s - path); buf[s - path] = '\0'; path = s + 1; if ( buf[0] ) /* skip empty entries */ register_catalog_file_unlocked(buf, CTL_START); } else { if ( path[0] ) /* skip empty entries */ register_catalog_file_unlocked(path, CTL_START); break; } } } UNLOCK(); } int register_catalog_file(const ichar *file, catalog_location where) { int rc; init_catalog(); LOCK(); rc = register_catalog_file_unlocked(file, where); UNLOCK(); return rc; } /******************************* * CATALOG FILE PARSING * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The code from here to the end of this file was written by Richard O'Keefe and modified by Jan Wielemaker to fit in with the rest of the parser. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #include #include #include #include /* OVERRIDE YES/NO sets a boolean flag initialised to NO. The value of this flag is stored as part of each entry. (PUBLIC|DOCTYPE|ENTITY)&YES will match whether a system identifier was provided in the source document or not; (PUBLIC|DOCTYPE|ENTITY)&NO will only match if a system identifier was not provided. */ /* catalogue = ( PUBLIC pubid filename | SYSTEM sysid filename | DOCTYPE name filename | ENTITY name filename | OVERRIDE YES | OVERRIDE NO | BASE filename | junk )* */ /* Keywords are matched ignoring case. */ static int ci_streql(ichar const *a, ichar const *b) { return istrcaseeq(a, b); } /* Names may be matched heading case in XML. */ static int cs_streql(ichar const *a, ichar const *b) { return istreq(a, b); } /* Any other word or any quoted string is reported as CAT_OTHER. When we are not looking for the beginning of an entry, the only positive outcome is CAT_OTHER. */ static int scan_overflow(size_t buflen) { gripe(ERC_REPRESENTATION, L"token length"); return EOF; } static int scan(FILE* src, ichar *buffer, size_t buflen, int kw_expected) { int c, q; ichar *p = buffer, *e = p + buflen - 1; for (;;) { c = getc(src); if (c <= ' ') { if (c < 0) return EOF; continue; } if (c == '-') { c = getc(src); if (c != '-') { *p++ = '-'; break; } for (;;) { c = getc(src); if (c < 0) return EOF; if (c == '-') { c = getc(src); if (c < 0) return EOF; if (c == '-') break; } } continue; } if (c == '"' || c == '\'') { q = c; for (;;) { c = getc(src); if (c < 0) return EOF; if (c == q) { *p = '\0'; return CAT_OTHER; } if (p == e) return scan_overflow(buflen); *p++ = c; } } break; } /* We reach here if there is an unquoted token. */ /* Don't try "PUBLIC--well/sortof--'foo' 'bar'" */ /* because hyphens are allowed in unquoted words */ /* and so are slashes and a bunch of other stuff. */ /* To keep this code simple, an unquoted token */ /* ends at EOF, ', ", or layout. */ while (c > ' ' && c != '"' && c != '\'') { if (p == e) return scan_overflow(buflen); *p++ = c; c = getc(src); } *p = '\0'; if (kw_expected) { if (ci_streql(buffer, L"public")) return CAT_PUBLIC; if (ci_streql(buffer, L"system")) return CAT_SYSTEM; if (ci_streql(buffer, L"entity")) return CAT_ENTITY; if (ci_streql(buffer, L"doctype")) return CAT_DOCTYPE; if (ci_streql(buffer, L"override")) return CAT_OVERRIDE; if (ci_streql(buffer, L"base")) return CAT_BASE; } return CAT_OTHER; } /* The strings can represent names (taken verbatim), system identifiers (ditto), or public identifiers (squished). We need to squish, and we need to copy. When it comes to squishing, we don't need to worry about Unicode spaces, because public identifiers aren't allow to have any characters that aren't in ASCII. */ static void squish(ichar *pubid) { ichar const *s = (ichar const *) pubid; ichar *d = (ichar *) pubid; ichar c; int w; w = 1; while ((c = *s++) != '\0') { if (c <= ' ') { if (!w) *d++ = ' ', w = 1; } else { *d++ = c, w = 0; } } if (w && d != (ichar *) pubid) d--; *d = '\0'; } /* We represent a catalogue internally by a list of (CAT_xxx, string, string) triples. */ static void load_one_catalogue(catalog_file * file) { FILE *src = wfopen(file->file, "r"); ichar buffer[2 * FILENAME_MAX]; ichar base[2 * FILENAME_MAX]; ichar *p; int t; catalogue_item_ptr this_item; int override = 0; if ( !src ) { gripe(ERC_NO_CATALOGUE, file->file); return; } (void) istrcpy(base, file->file); p = base + istrlen(base); while (p != base && !isDirSep(p[-1])) p--; for (;;) { t = scan(src, buffer, sizeof(buffer), 1); switch (t) { case CAT_BASE: if (scan(src, buffer, sizeof(buffer), 0) == EOF) break; (void) istrcpy(base, buffer); p = base + istrlen(base); if (p != base && !isDirSep(p[-1])) *p++ = '/'; continue; case CAT_OVERRIDE: if (scan(src, buffer, sizeof(buffer), 0) == EOF) break; override = towlower(buffer[0]) == 'y' ? CAT_OVERRIDE : 0; continue; case CAT_PUBLIC: case CAT_SYSTEM: case CAT_ENTITY: case CAT_DOCTYPE: this_item = sgml_malloc(sizeof *this_item); if (scan(src, buffer, sizeof buffer, 0) == EOF) break; if (t == CAT_PUBLIC) squish(buffer); this_item->next = 0; this_item->kind = t == CAT_SYSTEM ? t : t + override; this_item->target = istrdup(buffer); if (scan(src, buffer, sizeof buffer, 0) == EOF) break; if (is_absolute_path(buffer) || p == base) { this_item->replacement = istrdup(buffer); } else { (void) istrcpy(p, buffer); this_item->replacement = istrdup(base); } if (file->first_item == 0) { file->first_item = this_item; } else { file->last_item->next = this_item; } file->last_item = this_item; continue; case EOF: break; default: continue; } break; } fclose(src); } /* To look up a DTD: f = find_in_catalogue(CAT_DOCTYPE, name, pubid, sysid, ci); If it cannot otherwise be found and name is not null, ${name}.dtd will be returned. To look up a parameter entity: f = find_in_catalogue(CAT_PENTITY, name, pubid, sysid, ci); The name may begin with a % but need not; if it doesn't a % will be prefixed for the search. If it cannot otherwise be found ${name}.pen will be returned. To look up an ordinary entity: f = find_in_catalogue(CAT_ENTITY, name, pubid, sysid, ci); If the name begins with a % this is just like a CAT_PENTITY search. If it cannot otherwise be found %{name}.ent will be returned. The full catalogue format allows for NOTATION (which we still need for XML), SGMLDECL, DTDDECL, and LINKTYPE. At the moment, only notation is plausible. To handle such things, f = find_in_catalogue(CAT_OTHER, name, pubid, sysid, ci); If it cannot be found, NULL is returned. The name, pubid, and sysid may each be NULL. It doesn't really make sense for them all to be NULL. For SGML, name matching (DOCTYPE, ENTITY) should normally ignore alphabetic case. Pass ci=1 to make this happen. For XML, name matching must heed alphabetic case. Pass ci=0 to make that happen. A CAT_DOCTYPE, CAT_ENTITY, or CAT_PENTITY search doesn't really make sense withint a name, so if the name should happen to be 0, the search kind is converted to CAT_OTHER. */ ichar const * find_in_catalogue(int kind, ichar const *name, ichar const *pubid, ichar const *sysid, int ci) { ichar penname[FILENAME_MAX]; const size_t penlen = sizeof(penname)/sizeof(ichar); catalogue_item_ptr item; ichar const *result; catalog_file *catfile; init_catalog(); if ( name == 0 ) { kind = CAT_OTHER; } else { switch (kind) { case CAT_OTHER: case CAT_DOCTYPE: break; case CAT_PENTITY: if (name[0] != '%') { penname[0] = '%'; (void) istrcpy(penname + 1, name); name = penname; } break; case CAT_ENTITY: if (name[0] == '%') { kind = CAT_PENTITY; } break; default: return 0; } } result = 0; for (catfile = catalog;; catfile = catfile->next) { if (catfile) { if (!catfile->loaded) { load_one_catalogue(catfile); catfile->loaded = TRUE; } item = catfile->first_item; } else item = first_item; for (; item != 0; item = item->next) { switch (item->kind) { case CAT_PUBLIC: if (sysid != 0) break; /*FALLTHROUGH*/ case OVR_PUBLIC: if (pubid != 0 && result == 0 && cs_streql(pubid, item->target)) result = item->replacement; break; case CAT_SYSTEM: if (sysid != 0 && cs_streql(sysid, item->target)) return item->replacement; break; case CAT_DOCTYPE: if (sysid != 0) break; /*FALLTHROUGH*/ case OVR_DOCTYPE: if (name != 0 && kind == CAT_DOCTYPE && result == 0 && (ci ? ci_streql : cs_streql) (name, item->target)) result = item->replacement; break; case CAT_ENTITY: if (sysid != 0) break; /*FALLTHROUGH*/ case OVR_ENTITY: if (name != 0 && kind >= CAT_ENTITY && result == 0 && (ci ? ci_streql : cs_streql) (name, item->target)) result = item->replacement; break; default: break; } } if (!catfile) break; } if ( result != 0 ) return result; if ( sysid != 0 ) return sysid; if ( kind == CAT_OTHER || kind == CAT_DOCTYPE ) return 0; if ( istrlen(name)+4+1 > penlen ) { gripe(ERC_REPRESENTATION, L"entity name"); return NULL; } item = sgml_malloc(sizeof(*item)); item->next = 0; item->kind = kind; item->target = istrdup(name); switch (kind) { case CAT_DOCTYPE: (void) swprintf(penname, penlen, L"%ls.dtd", name); break; case CAT_PENTITY: item->kind = CAT_ENTITY; (void) swprintf(penname, penlen, L"%ls.pen", name + 1); break; case CAT_ENTITY: (void) swprintf(penname, penlen, L"%ls.ent", name); break; default: abort(); } item->replacement = istrdup(penname); if (first_item == 0) { first_item = item; } else { last_item->next = item; } last_item = item; return item->replacement; }