| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | /*  $Id$
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Part of SWI-Prolog | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Author:        Jan Wielemaker and Richard O'Keefe | 
					
						
							|  |  |  |     E-mail:        wielemak@science.uva.nl | 
					
						
							|  |  |  |     WWW:           http://www.swi-prolog.org
 | 
					
						
							|  |  |  |     Copyright (C): 1985-2006, University of Amsterdam | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     This library is free software; you can redistribute it and/or | 
					
						
							|  |  |  |     modify it under the terms of the GNU Lesser General Public | 
					
						
							|  |  |  |     License as published by the Free Software Foundation; either | 
					
						
							|  |  |  |     version 2.1 of the License, or (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     This library is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
					
						
							|  |  |  |     Lesser General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     You should have received a copy of the GNU Lesser General Public | 
					
						
							|  |  |  |     License along with this library; if not, write to the Free Software | 
					
						
							|  |  |  |     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
 | 
					
						
							|  |  |  | #include "util.h"
 | 
					
						
							|  |  |  | #include "catalog.h"
 | 
					
						
							|  |  |  | #include <stdio.h>
 | 
					
						
							|  |  |  | #include <wctype.h>
 | 
					
						
							|  |  |  | #include <string.h>
 | 
					
						
							|  |  |  | #include <stdlib.h>
 | 
					
						
							|  |  |  | #define DTD_MINOR_ERRORS 1
 | 
					
						
							|  |  |  | #include <dtd.h>			/* error codes */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __WINDOWS__
 | 
					
						
							|  |  |  | #define swprintf _snwprintf
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef _REENTRANT
 | 
					
						
							|  |  |  | #include <pthread.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static pthread_mutex_t catalog_mutex = PTHREAD_MUTEX_INITIALIZER; | 
					
						
							|  |  |  | #define LOCK() pthread_mutex_lock(&catalog_mutex)
 | 
					
						
							|  |  |  | #define UNLOCK() pthread_mutex_unlock(&catalog_mutex)
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #define LOCK()
 | 
					
						
							|  |  |  | #define UNLOCK()
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifndef MAXPATHLEN
 | 
					
						
							|  |  |  | #define MAXPATHLEN 1024
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #ifndef MAXLINE
 | 
					
						
							|  |  |  | #define MAXLINE 1024
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #ifndef EOS
 | 
					
						
							|  |  |  | #define EOS '\0'
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #ifndef TRUE
 | 
					
						
							|  |  |  | #define TRUE 1
 | 
					
						
							|  |  |  | #define FALSE 0
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define streq(s1, s2) istreq(s1, s2)
 | 
					
						
							|  |  |  | #define uc(p) (*(p))
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct catalogue_item *catalogue_item_ptr; | 
					
						
							|  |  |  | struct catalogue_item | 
					
						
							|  |  |  | { catalogue_item_ptr next; | 
					
						
							|  |  |  |   int kind; | 
					
						
							|  |  |  |   ichar const *target; | 
					
						
							|  |  |  |   ichar const *replacement; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static catalogue_item_ptr first_item = 0, last_item = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct _catalog_file | 
					
						
							|  |  |  | { ichar *file; | 
					
						
							|  |  |  |   struct _catalog_file *next; | 
					
						
							|  |  |  |   int loaded;				/* did we parse this file? */ | 
					
						
							|  |  |  |   catalogue_item_ptr first_item;	/* List of items in the file */ | 
					
						
							|  |  |  |   catalogue_item_ptr last_item; | 
					
						
							|  |  |  | } catalog_file; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static catalog_file *catalog; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __WINDOWS__
 | 
					
						
							|  |  |  | #define isDirSep(c) ((c) == '/' || (c) == '\\')
 | 
					
						
							|  |  |  | #define DIRSEPSTR L"\\"
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #define isDirSep(c) ((c) == '/')
 | 
					
						
							|  |  |  | #define DIRSEPSTR L"/"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static ichar * | 
					
						
							|  |  |  | DirName(const ichar *f, ichar *dir) | 
					
						
							|  |  |  | { const ichar *base, *p; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for (base = p = f; *p; p++) | 
					
						
							|  |  |  |   { if (isDirSep(*p) && p[1] != EOS) | 
					
						
							|  |  |  |       base = p; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if (base == f) | 
					
						
							|  |  |  |   { if (isDirSep(*f)) | 
					
						
							|  |  |  |       istrcpy(dir, DIRSEPSTR); | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       istrcpy(dir, L"."); | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { istrncpy(dir, f, base - f); | 
					
						
							|  |  |  |     dir[base - f] = EOS; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return dir; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | is_absolute_path(const ichar *name) | 
					
						
							|  |  |  | { if (isDirSep(name[0]) | 
					
						
							|  |  |  | #ifdef __WINDOWS__
 | 
					
						
							|  |  |  |       || (iswalpha(uc(name)) && name[1] == ':') | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     return TRUE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return FALSE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | localpath() creates an absolute  path  for   name  relative  to ref. The | 
					
						
							|  |  |  | returned path must be freed using sgml_free() when done. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ichar * | 
					
						
							|  |  |  | localpath(const ichar *ref, const ichar *name) | 
					
						
							|  |  |  | { ichar *local; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (!ref || is_absolute_path(name)) | 
					
						
							|  |  |  |     local = istrdup(name); | 
					
						
							|  |  |  |   else | 
					
						
							|  |  |  |   { ichar buf[MAXPATHLEN]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     DirName(ref, buf); | 
					
						
							|  |  |  |     istrcat(buf, DIRSEPSTR); | 
					
						
							|  |  |  |     istrcat(buf, name); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     local = istrdup(buf); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (!local) | 
					
						
							|  |  |  |     sgml_nomem(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return local; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | int | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | register_catalog_file_unlocked(const ichar *file, catalog_location where) | 
					
						
							|  |  |  | { catalog_file **f = &catalog; | 
					
						
							|  |  |  |   catalog_file *cf; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for (; *f; f = &(*f)->next) | 
					
						
							|  |  |  |   { cf = *f; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (istreq(cf->file, file)) | 
					
						
							|  |  |  |       return TRUE;		/* existing, move? */ | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   cf = sgml_malloc(sizeof(*cf)); | 
					
						
							|  |  |  |   memset(cf, 0, sizeof(*cf)); | 
					
						
							|  |  |  |   cf->file = istrdup(file); | 
					
						
							|  |  |  |   if (!cf->file) | 
					
						
							|  |  |  |     sgml_nomem(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (where == CTL_END) | 
					
						
							|  |  |  |   { cf->next = NULL; | 
					
						
							|  |  |  |     *f = cf; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { cf->next = catalog; | 
					
						
							|  |  |  |     catalog = cf; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return TRUE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static wchar_t * | 
					
						
							|  |  |  | wgetenv(const char *name) | 
					
						
							|  |  |  | { const char *vs; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( (vs = getenv(name)) ) | 
					
						
							|  |  |  |   { size_t wl = mbstowcs(NULL, vs, 0); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( wl > 0 ) | 
					
						
							|  |  |  |     { wchar_t *ws = sgml_malloc((wl+1)*sizeof(wchar_t)); | 
					
						
							|  |  |  |       mbstowcs(ws, vs, wl+1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return ws; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | init_catalog() | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | { static int done = FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   LOCK(); | 
					
						
							|  |  |  |   if ( !done++ ) | 
					
						
							|  |  |  |   { ichar *path = wgetenv("SGML_CATALOG_FILES"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!path) | 
					
						
							|  |  |  |     { UNLOCK(); | 
					
						
							|  |  |  |       return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while (*path) | 
					
						
							|  |  |  |     { ichar buf[MAXPATHLEN]; | 
					
						
							|  |  |  |       ichar *s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ((s = istrchr(path, L':'))) | 
					
						
							|  |  |  |       { istrncpy(buf, path, s - path); | 
					
						
							|  |  |  | 	buf[s - path] = '\0'; | 
					
						
							|  |  |  | 	path = s + 1; | 
					
						
							|  |  |  | 	if ( buf[0] )			/* skip empty entries */ | 
					
						
							|  |  |  | 	  register_catalog_file_unlocked(buf, CTL_START); | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { if ( path[0] )			/* skip empty entries */ | 
					
						
							|  |  |  | 	  register_catalog_file_unlocked(path, CTL_START); | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   UNLOCK(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | register_catalog_file(const ichar *file, catalog_location where) | 
					
						
							|  |  |  | { int rc; | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |   init_catalog(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   LOCK(); | 
					
						
							|  |  |  |   rc = register_catalog_file_unlocked(file, where); | 
					
						
							|  |  |  |   UNLOCK(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return rc; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *     CATALOG FILE PARSING	* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | The code from here to the  end  of   this  file  was  written by Richard | 
					
						
							|  |  |  | O'Keefe and modified by Jan Wielemaker to fit   in  with the rest of the | 
					
						
							|  |  |  | parser. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <ctype.h>
 | 
					
						
							|  |  |  | #include <stdio.h>
 | 
					
						
							|  |  |  | #include <stdlib.h>
 | 
					
						
							|  |  |  | #include <string.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  OVERRIDE YES/NO
 | 
					
						
							|  |  |  |     sets a boolean flag initialised to NO. | 
					
						
							|  |  |  |     The value of this flag is stored as part of each entry. | 
					
						
							|  |  |  |     (PUBLIC|DOCTYPE|ENTITY)&YES will match whether a system identifier | 
					
						
							|  |  |  |     was provided in the source document or not; | 
					
						
							|  |  |  |     (PUBLIC|DOCTYPE|ENTITY)&NO will only match if a system identifier | 
					
						
							|  |  |  |     was not provided. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  catalogue =
 | 
					
						
							|  |  |  |     (   PUBLIC  pubid filename | 
					
						
							|  |  |  |     |   SYSTEM  sysid filename | 
					
						
							|  |  |  |     |   DOCTYPE name  filename | 
					
						
							|  |  |  |     |   ENTITY  name  filename | 
					
						
							|  |  |  |     |   OVERRIDE YES | 
					
						
							|  |  |  |     |   OVERRIDE NO | 
					
						
							|  |  |  |     |   BASE          filename | 
					
						
							|  |  |  |     |   junk | 
					
						
							|  |  |  |     )* | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  Keywords are matched ignoring case.  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | ci_streql(ichar const *a, ichar const *b) | 
					
						
							|  |  |  | { return istrcaseeq(a, b); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  Names may be matched heading case in XML.  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | cs_streql(ichar const *a, ichar const *b) | 
					
						
							|  |  |  | { return istreq(a, b); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  Any other word or any quoted string is reported as CAT_OTHER.
 | 
					
						
							|  |  |  |     When we are not looking for the beginning of an entry, the only | 
					
						
							|  |  |  |     positive outcome is CAT_OTHER. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | scan_overflow(size_t buflen) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  | { gripe(NULL, ERC_REPRESENTATION, L"token length"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   return EOF; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | scan(FILE* src, ichar *buffer, size_t buflen, int kw_expected) | 
					
						
							|  |  |  | { int c, q; | 
					
						
							|  |  |  |   ichar *p = buffer, *e = p + buflen - 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for (;;) | 
					
						
							|  |  |  |   { c = getc(src); | 
					
						
							|  |  |  |     if (c <= ' ') | 
					
						
							|  |  |  |     { if (c < 0) | 
					
						
							|  |  |  | 	return EOF; | 
					
						
							|  |  |  |       continue; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (c == '-') | 
					
						
							|  |  |  |     { c = getc(src); | 
					
						
							|  |  |  |       if (c != '-') | 
					
						
							|  |  |  |       { *p++ = '-'; | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       for (;;) | 
					
						
							|  |  |  |       { c = getc(src); | 
					
						
							|  |  |  | 	if (c < 0) | 
					
						
							|  |  |  | 	  return EOF; | 
					
						
							|  |  |  | 	if (c == '-') | 
					
						
							|  |  |  | 	{ c = getc(src); | 
					
						
							|  |  |  | 	  if (c < 0) | 
					
						
							|  |  |  | 	    return EOF; | 
					
						
							|  |  |  | 	  if (c == '-') | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       continue; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (c == '"' || c == '\'') | 
					
						
							|  |  |  |     { q = c; | 
					
						
							|  |  |  |       for (;;) | 
					
						
							|  |  |  |       { c = getc(src); | 
					
						
							|  |  |  | 	if (c < 0) | 
					
						
							|  |  |  | 	  return EOF; | 
					
						
							|  |  |  | 	if (c == q) | 
					
						
							|  |  |  | 	{ *p = '\0'; | 
					
						
							|  |  |  | 	  return CAT_OTHER; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if (p == e) | 
					
						
							|  |  |  | 	  return scan_overflow(buflen); | 
					
						
							|  |  |  | 	*p++ = c; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   /*  We reach here if there is an unquoted token.   */ | 
					
						
							|  |  |  |   /*  Don't try "PUBLIC--well/sortof--'foo' 'bar'"   */ | 
					
						
							|  |  |  |   /*  because hyphens are allowed in unquoted words  */ | 
					
						
							|  |  |  |   /*  and so are slashes and a bunch of other stuff. */ | 
					
						
							|  |  |  |   /*  To keep this code simple, an unquoted token    */ | 
					
						
							|  |  |  |   /*  ends at EOF, ', ", or layout.                  */ | 
					
						
							|  |  |  |   while (c > ' ' && c != '"' && c != '\'') | 
					
						
							|  |  |  |   { if (p == e) | 
					
						
							|  |  |  |       return scan_overflow(buflen); | 
					
						
							|  |  |  |     *p++ = c; | 
					
						
							|  |  |  |     c = getc(src); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   *p = '\0'; | 
					
						
							|  |  |  |   if (kw_expected) | 
					
						
							|  |  |  |   { if (ci_streql(buffer, L"public")) | 
					
						
							|  |  |  |       return CAT_PUBLIC; | 
					
						
							|  |  |  |     if (ci_streql(buffer, L"system")) | 
					
						
							|  |  |  |       return CAT_SYSTEM; | 
					
						
							|  |  |  |     if (ci_streql(buffer, L"entity")) | 
					
						
							|  |  |  |       return CAT_ENTITY; | 
					
						
							|  |  |  |     if (ci_streql(buffer, L"doctype")) | 
					
						
							|  |  |  |       return CAT_DOCTYPE; | 
					
						
							|  |  |  |     if (ci_streql(buffer, L"override")) | 
					
						
							|  |  |  |       return CAT_OVERRIDE; | 
					
						
							|  |  |  |     if (ci_streql(buffer, L"base")) | 
					
						
							|  |  |  |       return CAT_BASE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   return CAT_OTHER; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  The strings can represent names (taken verbatim),
 | 
					
						
							|  |  |  |     system identifiers (ditto), or public identifiers (squished). | 
					
						
							|  |  |  |     We need to squish, and we need to copy.  When it comes to | 
					
						
							|  |  |  |     squishing, we don't need to worry about Unicode spaces, | 
					
						
							|  |  |  |     because public identifiers aren't allow to have any characters | 
					
						
							|  |  |  |     that aren't in ASCII. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | squish(ichar *pubid) | 
					
						
							|  |  |  | { ichar const *s = (ichar const *) pubid; | 
					
						
							|  |  |  |   ichar *d = (ichar *) pubid; | 
					
						
							|  |  |  |   ichar c; | 
					
						
							|  |  |  |   int w; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   w = 1; | 
					
						
							|  |  |  |   while ((c = *s++) != '\0') | 
					
						
							|  |  |  |   { if (c <= ' ') | 
					
						
							|  |  |  |     { if (!w) | 
					
						
							|  |  |  | 	*d++ = ' ', w = 1; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |     { *d++ = c, w = 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if (w && d != (ichar *) pubid) | 
					
						
							|  |  |  |     d--; | 
					
						
							|  |  |  |   *d = '\0'; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  We represent a catalogue internally by a list of
 | 
					
						
							|  |  |  |     (CAT_xxx, string, string) | 
					
						
							|  |  |  |     triples. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | load_one_catalogue(catalog_file * file) | 
					
						
							|  |  |  | { FILE *src = wfopen(file->file, "r"); | 
					
						
							|  |  |  |   ichar buffer[2 * FILENAME_MAX]; | 
					
						
							|  |  |  |   ichar base[2 * FILENAME_MAX]; | 
					
						
							|  |  |  |   ichar *p; | 
					
						
							|  |  |  |   int t; | 
					
						
							|  |  |  |   catalogue_item_ptr this_item; | 
					
						
							|  |  |  |   int override = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !src ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(NULL, ERC_NO_CATALOGUE, file->file); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   (void) istrcpy(base, file->file); | 
					
						
							|  |  |  |   p = base + istrlen(base); | 
					
						
							|  |  |  |   while (p != base && !isDirSep(p[-1])) | 
					
						
							|  |  |  |     p--; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for (;;) | 
					
						
							|  |  |  |   { t = scan(src, buffer, sizeof(buffer), 1); | 
					
						
							|  |  |  |     switch (t) | 
					
						
							|  |  |  |     { case CAT_BASE: | 
					
						
							|  |  |  | 	if (scan(src, buffer, sizeof(buffer), 0) == EOF) | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	(void) istrcpy(base, buffer); | 
					
						
							|  |  |  | 	p = base + istrlen(base); | 
					
						
							|  |  |  | 	if (p != base && !isDirSep(p[-1])) | 
					
						
							|  |  |  | 	  *p++ = '/'; | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |       case CAT_OVERRIDE: | 
					
						
							|  |  |  | 	if (scan(src, buffer, sizeof(buffer), 0) == EOF) | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	override = towlower(buffer[0]) == 'y' ? CAT_OVERRIDE : 0; | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |       case CAT_PUBLIC: | 
					
						
							|  |  |  |       case CAT_SYSTEM: | 
					
						
							|  |  |  |       case CAT_ENTITY: | 
					
						
							|  |  |  |       case CAT_DOCTYPE: | 
					
						
							|  |  |  | 	this_item = sgml_malloc(sizeof *this_item); | 
					
						
							|  |  |  | 	if (scan(src, buffer, sizeof buffer, 0) == EOF) | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	if (t == CAT_PUBLIC) | 
					
						
							|  |  |  | 	  squish(buffer); | 
					
						
							|  |  |  | 	this_item->next = 0; | 
					
						
							|  |  |  | 	this_item->kind = t == CAT_SYSTEM ? t : t + override; | 
					
						
							|  |  |  | 	this_item->target = istrdup(buffer); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (scan(src, buffer, sizeof buffer, 0) == EOF) | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (is_absolute_path(buffer) || p == base) | 
					
						
							|  |  |  | 	{ this_item->replacement = istrdup(buffer); | 
					
						
							|  |  |  | 	} else | 
					
						
							|  |  |  |         { (void) istrcpy(p, buffer); | 
					
						
							|  |  |  |           this_item->replacement = istrdup(base); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (file->first_item == 0) | 
					
						
							|  |  |  | 	{ file->first_item = this_item; | 
					
						
							|  |  |  | 	} else | 
					
						
							|  |  |  | 	{ file->last_item->next = this_item; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	file->last_item = this_item; | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |       case EOF: | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	continue; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   fclose(src); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*  To look up a DTD:
 | 
					
						
							|  |  |  |     f = find_in_catalogue(CAT_DOCTYPE, name, pubid, sysid, ci); | 
					
						
							|  |  |  |     If it cannot otherwise be found and name is not null, | 
					
						
							|  |  |  |     ${name}.dtd will be returned. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     To look up a parameter entity: | 
					
						
							|  |  |  |     f = find_in_catalogue(CAT_PENTITY, name, pubid, sysid, ci); | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |     The name may begin with a % but need not; if it doesn't | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     a % will be prefixed for the search. | 
					
						
							|  |  |  |     If it cannot otherwise be found ${name}.pen will be returned. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     To look up an ordinary entity: | 
					
						
							|  |  |  |     f = find_in_catalogue(CAT_ENTITY, name, pubid, sysid, ci); | 
					
						
							|  |  |  |     If the name begins with a % this is just like a CAT_PENTITY search. | 
					
						
							|  |  |  |     If it cannot otherwise be found %{name}.ent will be returned. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     The full catalogue format allows for NOTATION (which we still need | 
					
						
							|  |  |  |     for XML), SGMLDECL, DTDDECL, and LINKTYPE.  At the moment, only | 
					
						
							|  |  |  |     notation is plausible.  To handle such things, | 
					
						
							|  |  |  |     f = find_in_catalogue(CAT_OTHER, name, pubid, sysid, ci); | 
					
						
							|  |  |  |     If it cannot be found, NULL is returned. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     The name, pubid, and sysid may each be NULL.   It doesn't really | 
					
						
							|  |  |  |     make sense for them all to be NULL. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     For SGML, name matching (DOCTYPE, ENTITY) should normally ignore | 
					
						
							|  |  |  |     alphabetic case.  Pass ci=1 to make this happen.  For XML, name | 
					
						
							|  |  |  |     matching must heed alphabetic case.  Pass ci=0 to make that happen. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     A CAT_DOCTYPE, CAT_ENTITY, or CAT_PENTITY search doesn't really make | 
					
						
							|  |  |  |     sense withint a name, so if the name should happen to be 0, the search | 
					
						
							|  |  |  |     kind is converted to CAT_OTHER. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ichar const * | 
					
						
							|  |  |  | find_in_catalogue(int kind, | 
					
						
							|  |  |  | 		  ichar const *name, | 
					
						
							|  |  |  | 		  ichar const *pubid, ichar const *sysid, int ci) | 
					
						
							|  |  |  | { ichar penname[FILENAME_MAX]; | 
					
						
							|  |  |  |   const size_t penlen = sizeof(penname)/sizeof(ichar); | 
					
						
							|  |  |  |   catalogue_item_ptr item; | 
					
						
							|  |  |  |   ichar const *result; | 
					
						
							|  |  |  |   catalog_file *catfile; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   init_catalog(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( name == 0 ) | 
					
						
							|  |  |  |   { kind = CAT_OTHER; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { switch (kind) | 
					
						
							|  |  |  |     { case CAT_OTHER: | 
					
						
							|  |  |  |       case CAT_DOCTYPE: | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       case CAT_PENTITY: | 
					
						
							|  |  |  | 	if (name[0] != '%') | 
					
						
							|  |  |  | 	{ penname[0] = '%'; | 
					
						
							|  |  |  | 	  (void) istrcpy(penname + 1, name); | 
					
						
							|  |  |  | 	  name = penname; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       case CAT_ENTITY: | 
					
						
							|  |  |  | 	if (name[0] == '%') | 
					
						
							|  |  |  | 	{ kind = CAT_PENTITY; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	break; | 
					
						
							|  |  |  |       default: | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   result = 0; | 
					
						
							|  |  |  |   for (catfile = catalog;; catfile = catfile->next) | 
					
						
							|  |  |  |   { if (catfile) | 
					
						
							|  |  |  |     { if (!catfile->loaded) | 
					
						
							|  |  |  |       { load_one_catalogue(catfile); | 
					
						
							|  |  |  | 	catfile->loaded = TRUE; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       item = catfile->first_item; | 
					
						
							|  |  |  |     } else | 
					
						
							|  |  |  |       item = first_item; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for (; item != 0; item = item->next) | 
					
						
							|  |  |  |     { switch (item->kind) | 
					
						
							|  |  |  |       { case CAT_PUBLIC: | 
					
						
							|  |  |  | 	  if (sysid != 0) | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	/*FALLTHROUGH*/ | 
					
						
							|  |  |  | 	case OVR_PUBLIC: | 
					
						
							|  |  |  | 	  if (pubid != 0 && result == 0 && cs_streql(pubid, item->target)) | 
					
						
							|  |  |  | 	    result = item->replacement; | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	case CAT_SYSTEM: | 
					
						
							|  |  |  | 	  if (sysid != 0 && cs_streql(sysid, item->target)) | 
					
						
							|  |  |  | 	    return item->replacement; | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	case CAT_DOCTYPE: | 
					
						
							|  |  |  | 	  if (sysid != 0) | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	/*FALLTHROUGH*/ | 
					
						
							|  |  |  | 	case OVR_DOCTYPE: | 
					
						
							|  |  |  | 	  if (name != 0 && kind == CAT_DOCTYPE && result == 0 | 
					
						
							|  |  |  | 	      && (ci ? ci_streql : cs_streql) (name, item->target)) | 
					
						
							|  |  |  | 	    result = item->replacement; | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	case CAT_ENTITY: | 
					
						
							|  |  |  | 	  if (sysid != 0) | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	 /*FALLTHROUGH*/ case OVR_ENTITY: | 
					
						
							|  |  |  | 	  if (name != 0 && kind >= CAT_ENTITY && result == 0 | 
					
						
							|  |  |  | 	      && (ci ? ci_streql : cs_streql) (name, item->target)) | 
					
						
							|  |  |  | 	    result = item->replacement; | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 	  break; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!catfile) | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( result != 0 ) | 
					
						
							|  |  |  |     return result; | 
					
						
							|  |  |  |   if ( sysid != 0 ) | 
					
						
							|  |  |  |     return sysid; | 
					
						
							|  |  |  |   if ( kind == CAT_OTHER || kind == CAT_DOCTYPE ) | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( istrlen(name)+4+1 > penlen ) | 
					
						
							| 
									
										
										
										
											2010-05-06 10:59:09 +01:00
										 |  |  |   { gripe(NULL, ERC_REPRESENTATION, L"entity name"); | 
					
						
							| 
									
										
										
										
											2009-03-13 19:39:06 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   item = sgml_malloc(sizeof(*item)); | 
					
						
							|  |  |  |   item->next = 0; | 
					
						
							|  |  |  |   item->kind = kind; | 
					
						
							|  |  |  |   item->target = istrdup(name); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   switch (kind) | 
					
						
							|  |  |  |   { case CAT_DOCTYPE: | 
					
						
							|  |  |  |       (void) swprintf(penname, penlen, L"%ls.dtd", name); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case CAT_PENTITY: | 
					
						
							|  |  |  |       item->kind = CAT_ENTITY; | 
					
						
							|  |  |  |       (void) swprintf(penname, penlen, L"%ls.pen", name + 1); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case CAT_ENTITY: | 
					
						
							|  |  |  |       (void) swprintf(penname, penlen, L"%ls.ent", name); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       abort(); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   item->replacement = istrdup(penname); | 
					
						
							|  |  |  |   if (first_item == 0) | 
					
						
							|  |  |  |   { first_item = item; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { last_item->next = item; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   last_item = item; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return item->replacement; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 |