yap-6.3/os/pl-ctype.c

/*  Part of SWI-Prolog

    Author:        Jan Wielemaker
    E-mail:        J.Wielemaker@vu.nl
    WWW:           http://www.swi-prolog.org
    Copyright (C): 1985-2013, University of Amsterdam
			      VU University Amsterdam

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/

#include "pl-incl.h"
#include <ctype.h>
#include "pl-ctype.h"

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This module defines:

	char_type(?Char, ?Type)
	code_type(?Char, ?Type)

See manual for details.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

#define CTX_CHAR 0			/* Class(Char) */
#define CTX_CODE 1			/* Class(Int) */

typedef struct
{ atom_t	name;			/* name of the class */
  int (*test)(wint_t chr);		/* boolean */
  int (*reverse)(wint_t chr);		/* reverse mapping */
  short		arity;			/* arity of class (i.e. lower('A')) */
  short		ctx_type;		/* CTX_* */
} char_type;

#define ENUM_NONE	0x00
#define ENUM_CHAR	0x01
#define ENUM_CLASS	0x02
#define ENUM_BOTH	0x03

typedef struct
{ int		current;		/* current character */
  const char_type   *class;		/* current class */
  int		do_enum;		/* what to enumerate */
} generator;


static int
iswhite(wint_t chr)
{ return chr == ' ' || chr == '\t';
}


static int
fiscsym(wint_t chr)
{ return iswalnum(chr) || chr == '_';
}


static int
fiscsymf(wint_t chr)
{ return iswalpha(chr) || chr == '_';
}

static int
iseof(wint_t chr)
{ return chr == (wint_t)-1;
}

static int
iseol(wint_t chr)
{ return chr >= 10 && chr <= 13;
}

static int
isnl(wint_t chr)
{ return chr == '\n';
}

static int
isperiod(wint_t chr)
{ return chr && strchr(".?!", chr) != NULL;
}

static int
isquote(wint_t chr)
{ return chr && strchr("'`\"", chr) != NULL;
}

static int
fupper(wint_t chr)
{ return iswlower(chr) ? (int)towupper(chr) : -1;
}

static int
flower(wint_t chr)
{ return iswupper(chr) ? (int)towlower(chr) : -1;
}

static int
ftoupper(wint_t chr)
{ return towupper(chr);
}

static int
ftolower(wint_t chr)
{ return towlower(chr);
}

static int
fparen(wint_t chr)
{ switch(chr)
  { case '(':
      return ')';
    case '{':
      return '}';
    case '[':
      return ']';
    default:
      return -1;
  }
}


static int
rparen(wint_t chr)
{ switch(chr)
  { case ')':
      return '(';
    case '}':
      return '{';
    case ']':
      return '[';
    default:
      return -1;
  }
}


static int
fdigit(wint_t chr)
{ if ( chr <= 0xff && isdigit(chr) )
    return chr - '0';
  return -1;
}


static int
rdigit(wint_t d)
{ if ( (int)d >= 0 && d <= 9 )
    return d+'0';
  return -1;
}


static int
fxdigit(wint_t chr)
{ if ( chr > 0xff )
    return -1;
  if ( isdigit(chr) )
    return chr - '0';
  if ( chr >= 'a' && chr <= 'f' )
    return chr - 'a' + 10;
  if ( chr >= 'A' && chr <= 'F' )
    return chr - 'A' + 10;
  return -1;
}


static int
rxdigit(wint_t d)
{ if ( (int)d >= 0 && d <= 9 )
    return d+'0';
  if ( d >= 10 && d <= 15 )
    return d-10+'a';
  return -1;
}


#define mkfunction(name) \
	static int f ## name(wint_t chr) { return name(chr); }

mkfunction(iswalnum)
mkfunction(iswalpha)
mkfunction(isascii)
mkfunction(iswcntrl)
mkfunction(iswdigit)
mkfunction(iswgraph)
mkfunction(iswlower)
mkfunction(iswupper)
mkfunction(iswpunct)
mkfunction(iswspace)

static const char_type char_types[] =
{ { ATOM_alnum,			     fiswalnum },
  { ATOM_alpha,			     fiswalpha },
  { ATOM_csym,			     fiscsym },
  { ATOM_csymf,			     fiscsymf },
  { ATOM_prolog_var_start,	     f_is_prolog_var_start },
  { ATOM_prolog_atom_start,	     f_is_prolog_atom_start },
  { ATOM_prolog_identifier_continue, f_is_prolog_identifier_continue },
  { ATOM_prolog_symbol,		     f_is_prolog_symbol },
  { ATOM_csymf,			     fiscsymf },
  { ATOM_ascii,			     fisascii },
  { ATOM_white,			     iswhite },
  { ATOM_cntrl,			     fiswcntrl },
  { ATOM_digit,			     fiswdigit },
  { ATOM_graph,			     fiswgraph },
  { ATOM_lower,			     fiswlower },
  { ATOM_upper,			     fiswupper },
  { ATOM_punct,			     fiswpunct },
  { ATOM_space,			     fiswspace },
  { ATOM_end_of_file,		     iseof },
  { ATOM_end_of_line,		     iseol },
  { ATOM_newline,		     isnl },
  { ATOM_period,		     isperiod },
  { ATOM_quote,			     isquote },
  { ATOM_lower,			     fupper,	flower,   1, CTX_CHAR },
  { ATOM_upper,			     flower,	fupper,   1, CTX_CHAR },
  { ATOM_to_lower,		     ftoupper,	ftolower, 1, CTX_CHAR },
  { ATOM_to_upper,		     ftolower,	ftoupper, 1, CTX_CHAR },
  { ATOM_paren,			     fparen,	rparen,   1, CTX_CHAR },
  { ATOM_digit,			     fdigit,	rdigit,   1, CTX_CODE  },
  { ATOM_xdigit,		     fxdigit,	rxdigit,  1, CTX_CODE  },
  { NULL_ATOM,			     NULL }
};


static const char_type *
char_type_by_name(atom_t name, int arity)
{ const char_type *cc;

  for(cc = char_types; cc->name; cc++)
  { if ( cc->name == name && cc->arity == arity )
      return cc;
  }

  return NULL;
}


static int
advanceGen(generator *gen)
{ if ( gen->do_enum & ENUM_CHAR )
  { if ( ++gen->current == 256 )
      fail;
  } else
  { gen->class++;
    if ( !gen->class->name )
      fail;
  }

  succeed;
}


static int
unify_char_type(term_t type, const char_type *ct, int context, int how)
{ GET_LD

  if ( ct->arity == 0 )
  { return PL_unify_atom(type, ct->name);
  } else /*if ( ct->arity == 1 )*/
  { if ( PL_unify_functor(type, PL_new_functor(ct->name, 1)) )
    { term_t a = PL_new_term_ref();

      _PL_get_arg(1, type, a);

      if ( ct->ctx_type == CTX_CHAR )
	return PL_unify_char(a, context, how);
      else
	return PL_unify_integer(a, context);
    }
  }

  fail;
}


static foreign_t
do_char_type(term_t chr, term_t class, control_t h, int how)
{ GET_LD
  generator *gen;
  fid_t fid;

  switch( ForeignControl(h) )
  { case FRG_FIRST_CALL:
    { const char_type *cc = NULL;
      int c;
      int do_enum = ENUM_NONE;
      atom_t cn;
      int arity;

      if ( PL_is_variable(chr) )
	do_enum |= ENUM_CHAR;
      if ( PL_is_variable(class) )
	do_enum |= ENUM_CLASS;

      if ( do_enum == ENUM_BOTH )
	return PL_error("char_type", 2, NULL, ERR_INSTANTIATION);

      if ( !(do_enum & ENUM_CHAR) )
      { if ( !PL_get_char(chr, &c, TRUE) )
	  fail;
	if ( c == -1 )
	  return PL_unify_atom(class, ATOM_end_of_file);
      }

      if ( !(do_enum & ENUM_CLASS) )
      { if ( !PL_get_name_arity(class, &cn, &arity) ||
	     !(cc = char_type_by_name(cn, arity)) )
	  return PL_error("char_type", 2, NULL,
			  ERR_TYPE, ATOM_char_type, class);
      }

      if ( do_enum == ENUM_NONE )
      { if ( arity == 0 )
	  return (*cc->test)((wint_t)c) ? TRUE : FALSE;
	else
	{ int rval = (*cc->test)((wint_t)c);

	  if ( rval >= 0 )
	  { term_t a = PL_new_term_ref();
	    int ok;

	    _PL_get_arg(1, class, a);

	    if ( cc->ctx_type == CTX_CHAR )
	      ok = PL_unify_char(a, rval, how);
	    else
	      ok = PL_unify_integer(a, rval);

	    if ( ok )
	      return TRUE;
	    else
	      do_enum = ENUM_CHAR;	/* try the other way around */
	  } else
	    fail;
	}
      }

      if ( do_enum == ENUM_CHAR && arity == 1 )
      {	term_t a = PL_new_term_ref();	/* char_type(X, lower('A')) */
	int ca;

	_PL_get_arg(1, class, a);
	if ( !PL_is_variable(a) )
	{ if ( PL_get_char(a, &ca, FALSE) )
	  { int c = (*cc->reverse)((wint_t)ca);

	    if ( c < 0 )
	      fail;

	    return PL_unify_char(chr, c, how);
	  }
	  fail;				/* error */
	}
      }

      gen = allocForeignState(sizeof(*gen));
      gen->do_enum = do_enum;

      if ( do_enum & ENUM_CHAR )
      { gen->class      = cc;
	gen->current    = -1;
      } else if ( do_enum & ENUM_CLASS )
      { gen->class	= char_types;
	gen->current    = c;
      }

      break;
    }
    case FRG_REDO:
      gen = ForeignContextPtr(h);
      break;
    case FRG_CUTTED:
      gen = ForeignContextPtr(h);
      if ( gen )
	freeForeignState(gen, sizeof(*gen));
    default:
      succeed;
  }

  if ( !(fid = PL_open_foreign_frame()) )
    goto error;

  for(;;)
  { int rval;

    if ( (rval = (*gen->class->test)((wint_t)gen->current)) )
    { if ( gen->do_enum & ENUM_CHAR )
      { if ( !PL_unify_char(chr, gen->current, how) )
	  goto next;
      }
      if ( gen->class->arity > 0 )
      { if ( rval < 0 ||
	     !unify_char_type(class, gen->class, rval, how) )
	  goto next;

      } else if ( gen->do_enum & ENUM_CLASS )
      { if ( !unify_char_type(class, gen->class, rval, how) )
	  goto next;
      }

      if ( advanceGen(gen) )		/* ok, found one */
      { ForeignRedoPtr(gen);
      } else
      { freeForeignState(gen, sizeof(*gen));	/* the only one */
	succeed;
      }
    }
  next:
    PL_rewind_foreign_frame(fid);

    if ( !advanceGen(gen) )
      break;
  }

error:
  freeForeignState(gen, sizeof(*gen));
  fail;
}


static
PRED_IMPL("char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
/** @pred  char_type(? _Char_, ? _Type_)


Tests or generates alternative  _Types_ or  _Chars_. The
character-types are inspired by the standard `C`
`<ctype.h>` primitives.

+  `alnum`
     _Char_ is a letter (upper- or lowercase) or digit.

+ `alpha`
    _Char_ is a letter (upper- or lowercase).

+ `csym`
    _Char_ is a letter (upper- or lowercase), digit or the underscore (_). These are valid C- and Prolog symbol characters.

+ `csymf`
    _Char_ is a letter (upper- or lowercase) or the underscore (_). These are valid first characters for C- and Prolog symbols

+ `ascii`
    _Char_ is a 7-bits ASCII character (0..127).

+ `white`
    _Char_ is a space or tab. E.i. white space inside a line.

+ `cntrl`
    _Char_ is an ASCII control-character (0..31).

+ `digit`
    _Char_ is a digit.

+ `digit( _Weight_)`
    _Char_ is a digit with value _Weight_. I.e. `char_type(X, digit(6))` yields X =  aaasaá'6'. Useful for parsing numbers.

+ `xdigit( _Weight_)`
    _Char_ is a hexa-decimal digit with value  _Weight_. I.e. char_type(a, xdigit(X) yields X = '10'. Useful for parsing numbers.

+ `graph`
    _Char_ produces a visible mark on a page when printed. Note that the space is not included!

+ `lower`
    _Char_ is a lower-case letter.

+ `lower(Upper)`
    _Char_ is a lower-case version of  _Upper_. Only true if _Char_ is lowercase and  _Upper_ uppercase.

+ `to_lower(Upper)`
 _Char_ is a lower-case version of Upper. For non-letters, or letter without case,  _Char_ and Lower are the same. See also upcase_atom/2 and downcase_atom/2.

+ `upper`
 _Char_ is an upper-case letter.

+ `upper(Lower)`
 _Char_ is an upper-case version of Lower. Only true if  _Char_ is uppercase and Lower lowercase.

+ `to_upper(Lower)`
 _Char_ is an upper-case version of Lower. For non-letters, or letter without case,  _Char_ and Lower are the same. See also upcase_atom/2 and downcase_atom/2.

+ `punct`
 _Char_ is a punctuation character. This is a graph character that is not a letter or digit.

+ `space`
 _Char_ is some form of layout character (tab, vertical-tab, newline, etc.).

+ `end_of_file`
 _Char_ is -1.

+ `end_of_line`
 _Char_ ends a line (ASCII: 10..13).

+ `newline`
 _Char_ is a the newline character (10).

+ `period`
 _Char_ counts as the end of a sentence (.,!,?).

+ `quote`
 _Char_ is a quote-character.

+ `paren(Close)`
 _Char_ is an open-parenthesis and Close is the corresponding close-parenthesis.


+ `code_type(? _Code_, ? _Type_)`


As char_type/2, but uses character-codes rather than
one-character atoms. Please note that both predicates are as
flexible as possible. They handle either representation if the
argument is instantiated and only will instantiate with an integer
code or one-character atom depending of the version used. See also
the prolog-flag double_quotes and the built-in predicates
atom_chars/2 and atom_codes/2.


 */
{ return do_char_type(A1, A2, PL__ctx, PL_CHAR);
}


static
PRED_IMPL("code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
{ return do_char_type(A1, A2, PL__ctx, PL_CODE);
}


#if 0
static
PRED_IMPL("iswctype", 2, iswctype, 0)
{ char *s;
  int chr;
  wctype_t t;

  if ( !PL_get_char_ex(A1, &chr, FALSE) ||
       !PL_get_chars(A2, &s, CVT_ATOM|CVT_EXCEPTION) )
    return FALSE;

  if ( !(t=wctype(s)) )
    return PL_error(NULL, 0, NULL, ERR_EXISTENCE, ATOM_type, A2);

  return iswctype(chr, t) ? TRUE : FALSE;
}
#endif


static int
init_tout(PL_chars_t *t, size_t len)
{ switch(t->encoding)
  { case ENC_ISO_LATIN_1:
      if ( len < sizeof(t->buf) )
      { t->text.t = t->buf;
	t->storage = PL_CHARS_LOCAL;
      } else
      { t->text.t = PL_malloc(len);
	t->storage = PL_CHARS_MALLOC;
      }
      succeed;
    case ENC_WCHAR:
      if ( len*sizeof(pl_wchar_t) < sizeof(t->buf) )
      { t->text.w = (pl_wchar_t*)t->buf;
	t->storage = PL_CHARS_LOCAL;
      } else
      { t->text.w = PL_malloc(len*sizeof(pl_wchar_t));
	t->storage = PL_CHARS_MALLOC;
      }
      succeed;
    default:
      assert(0);
      fail;
  }
}


static inline wint_t
get_chr_from_text(const PL_chars_t *t, size_t index)
{ switch(t->encoding)
  { case ENC_ISO_LATIN_1:
      return t->text.t[index]&0xff;
    case ENC_WCHAR:
      return t->text.w[index];
    default:
      assert(0);
      return 0;
  }
}


static foreign_t
modify_case_atom(term_t in, term_t out, int down)
{ GET_LD
  PL_chars_t tin, tout;

  if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
    return FALSE;

  if ( PL_get_text(out, &tout, CVT_ATOMIC) )
  { unsigned int i;

    if ( tin.length != tout.length )
      fail;

    for(i=0; i<tin.length; i++)
    { wint_t ci = get_chr_from_text(&tin, i);
      wint_t co = get_chr_from_text(&tout, i);

      if ( down )
      { if ( co != towlower(ci) )
	  fail;
      } else
      { if ( co != towupper(ci) )
	  fail;
      }
    }

    succeed;
  } else if ( PL_is_variable(out) )
  { unsigned int i;

    tout.encoding  = tin.encoding;
    tout.length    = tin.length;
    tout.canonical = FALSE;		/* or TRUE? Can WCHAR map to ISO? */

    init_tout(&tout, tin.length);

    if ( tin.encoding == ENC_ISO_LATIN_1 )
    { const unsigned char *in = (const unsigned char*)tin.text.t;

      if ( down )
      { for(i=0; i<tin.length; i++)
	{ wint_t c = towlower(in[i]);

	  if ( c > 255 )
	  { PL_promote_text(&tout);
	    for( ; i<tin.length; i++)
	    { tout.text.w[i] = towlower(in[i]);
	    }
	    break;
	  } else
	  { tout.text.t[i] = (char)c;
	  }
	}
      } else				/* upcase */
      { for(i=0; i<tin.length; i++)
	{ wint_t c = towupper(in[i]);

	  if ( c > 255 )
	  { PL_promote_text(&tout);
	    for( ; i<tin.length; i++)
	    { tout.text.w[i] = towupper(in[i]);
	    }
	    break;
	  } else
	  { tout.text.t[i] = (char)c;
	  }
	}
      }
    } else
    { if ( down )
      { for(i=0; i<tin.length; i++)
	{ tout.text.w[i] = towlower(tin.text.w[i]);
	}
      } else
      { for(i=0; i<tin.length; i++)
	{ tout.text.w[i] = towupper(tin.text.w[i]);
	}
      }
    }

    PL_unify_text(out, 0, &tout, PL_ATOM);
    PL_free_text(&tout);

    succeed;
  } else
  { return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_atom, out);
  }
}


static
PRED_IMPL("downcase_atom", 2, downcase_atom, 0)
{ return modify_case_atom(A1, A2, TRUE);
}


static
PRED_IMPL("upcase_atom", 2, upcase_atom, 0)
{ return modify_case_atom(A1, A2, FALSE);
}


		 /*******************************
		 *	    WHITE SPACE		*
		 *******************************/

static int
write_normalize_space(IOSTREAM *out, term_t in)
{ GET_LD
  PL_chars_t tin;
  size_t i, end;

  if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
    return FALSE;

  end = tin.length;
  i = 0;

  while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
    i++;
  while( i<end )
  { wint_t c;

    while(i<end && !unicode_separator((c=get_chr_from_text(&tin, i))))
    { if ( Sputcode(c, out) < 0 )
	fail;
      i++;
    }
    while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
      i++;
    if ( i < end )
    { if (  Sputcode(' ', out) < 0 )
	fail;
    }
  }

  succeed;
}


static
PRED_IMPL("normalize_space", 2, normalize_space, 0)
{ redir_context ctx;
  word rc;

  if ( (rc = setupOutputRedirect(A1, &ctx, FALSE)) )
  { if ( (rc = write_normalize_space(ctx.stream, A2)) )
      rc = closeOutputRedirect(&ctx);
    else
      discardOutputRedirect(&ctx);
  }

  return rc;
}


		 /*******************************
		 *	       LOCALE		*
		 *******************************/

#if O_LOCALE
#include <locale.h>

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Note: on some installations, locale doesn't   work correctly. Printing a
message isn't really cute. It would be better to use printMessage(), but
the system isn't yet initialised far enough.   Maybe we should store the
failure and print a message at the end of the initialisation?

We only return FALSE if LC_CTYPE  fails.   This  is a serious indication
that locale support is broken. We don't   depend too much on the others,
so we ignore possible problems.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

static int
init_locale(void)
{ int rc = TRUE;

  if ( !setlocale(LC_CTYPE, "") )
  { rc = FALSE;
    DEBUG(0, Sdprintf("Failed to set LC_CTYPE locale\n"));
  }
  if ( !setlocale(LC_TIME, "") )
  { DEBUG(0, Sdprintf("Failed to set LC_TIME locale\n"));
  }
  if ( !setlocale(LC_COLLATE, "") )
  { DEBUG(0, Sdprintf("Failed to set LC_COLLATE locale\n"));
  }

  return rc;
}

typedef struct
{ int category;
  const char *name;
} lccat;

static lccat lccats[] =
{ { LC_ALL,      "all" },
  { LC_COLLATE,  "collate" },
  { LC_CTYPE,    "ctype" },
#ifdef LC_MESSAGES
  { LC_MESSAGES, "messages" },
#endif
  { LC_MONETARY, "monetary" },
  { LC_NUMERIC,  "numeric" },
  { LC_TIME,     "time" },
  { 0,           NULL }
};


static
PRED_IMPL("setlocale", 3, setlocale, 0)
{ PRED_LD
  char *what;
  char *locale;
  const lccat *lcp;


  if ( !PL_get_chars(A1, &what, CVT_ATOM|CVT_EXCEPTION) )
    fail;
  if ( PL_is_variable(A3) )
    locale = NULL;
  else if ( !PL_get_chars(A3, &locale, CVT_ATOM|CVT_EXCEPTION) )
    fail;

  for ( lcp = lccats; lcp->name; lcp++ )
  { if ( streq(lcp->name, what) )
    { char *old = setlocale(lcp->category, NULL);

      if ( !PL_unify_chars(A2, PL_ATOM, -1, old) )
	fail;

      if ( PL_compare(A2, A3) != 0 )
      { if ( !setlocale(lcp->category, locale) )
	{ if ( errno == ENOENT )
	    return PL_existence_error("locale", A3);
	  return PL_error(NULL, 0, MSG_ERRNO, ERR_SYSCALL, "setlocale");
	}
      }

#ifdef O_LOCALE
      updateLocale(lcp->category, locale);
#endif

      succeed;
    }
  }

  return PL_domain_error("category", A1);
}

#else

#define init_locale() 1

static
PRED_IMPL("setlocale", 3, setlocale, 0)
{ return notImplemented("setlocale", 3);
}

#endif


		 /*******************************
		 *      PUBLISH PREDICATES	*
		 *******************************/

BeginPredDefs(ctype)
  PRED_DEF("char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
  PRED_DEF("code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
  PRED_DEF("setlocale", 3, setlocale, 0)
  PRED_DEF("downcase_atom", 2, downcase_atom, 0)
  PRED_DEF("upcase_atom", 2, upcase_atom, 0)
  PRED_DEF("normalize_space", 2, normalize_space, 0)
EndPredDefs


		 /*******************************
		 *	PROLOG CHARACTERS	*
		 *******************************/

const char _PL_char_types[] = {
/* ^@  ^A  ^B  ^C  ^D  ^E  ^F  ^G  ^H  ^I  ^J  ^K  ^L  ^M  ^N  ^O    0-15 */
   CT, CT, CT, CT, CT, CT, CT, CT, CT, SP, SP, SP, SP, SP, CT, CT,
/* ^P  ^Q  ^R  ^S  ^T  ^U  ^V  ^W  ^X  ^Y  ^Z  ^[  ^\  ^]  ^^  ^_   16-31 */
   CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
/* sp   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /   32-47 */
   SP, SO, DQ, SY, SY, SO, SY, SQ, PU, PU, SY, SY, PU, SY, SY, SY,
/*  0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?   48-63 */
   DI, DI, DI, DI, DI, DI, DI, DI, DI, DI, SY, SO, SY, SY, SY, SY,
/*  @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   64-79 */
   SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
/*  P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _   80-95 */
   UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, PU, SY, PU, SY, UC,
/*  `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   96-111 */
   SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
/*  p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~  ^?   112-127 */
   LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, PU, PU, PU, SY, CT,
			  /* 128-159 (C1 controls) */
   CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
   CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
			  /* 160-255 (G1 graphics) */
			  /* ISO Latin 1 (=Unicode) is assumed */
/*  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
   SP, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SO, SY, SY, /*00AX*/
   SY, SY, SO, SO, SY, LC, SY, SY, SY, SO, LC, SY, SO, SO, SO, SY, /*00BX*/
   UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, /*00CX*/
   UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC, /*00DX*/
   LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, /*00EX*/
   LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC  /*00FX*/
};


typedef struct
{ const char *name;
  IOENC encoding;
} enc_map;

static const enc_map map[] =
{ { "UTF-8",	  ENC_UTF8 },
  { "utf8",	  ENC_UTF8 },
  { "ISO8859-1",  ENC_ISO_LATIN_1 },
  { "ISO8859_1",  ENC_ISO_LATIN_1 },
  { "iso88591",   ENC_ISO_LATIN_1 },
  { "iso_8859_1", ENC_ISO_LATIN_1 },
  { NULL, ENC_UNKNOWN }
};

IOENC
initEncoding(void)
{ GET_LD
#if HAVE_SETLOCALE
    char *enc;
#endif

  if ( LD )
  { if ( !LD->encoding )
      {

      if ( !init_locale() )
      { LD->encoding = ENC_ISO_LATIN_1;
#if HAVE_SETLOCALE
      } else if ( (enc = setlocale(LC_CTYPE, NULL)) )
      {  char *encp;
	LD->encoding = ENC_ANSI;		/* text encoding */

	if ( (encp = strchr(enc, '.')) )
	{ const enc_map *m;
	  encp++;				/* skip '.' */

	  for ( m=map; m->name; m++ )
	  { if ( strcmp(encp, m->name) == 0 )
	    { LD->encoding = m->encoding;
	      break;
	    }
	  }
	} else {
	  /* vsc: just test LC_CTYPE, works on Macs */
	  const enc_map *m;

	  for ( m=map; m->name; m++ )
	  { if ( strcmp(enc, m->name) == 0 )
	    { LD->encoding = m->encoding;
	      break;
	    }
	  }
	}
#endif
      } else { LD->encoding = ENC_ISO_LATIN_1;
      }
    }

    return LD->encoding;
  }

  return ENC_ANSI;
}


void
initCharTypes(void)
{ initEncoding();
}