This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/os/pl-ctype.c

997 lines
22 KiB
C

/* Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: J.Wielemaker@vu.nl
WWW: http://www.swi-prolog.org
Copyright (C): 1985-2013, University of Amsterdam
VU University Amsterdam
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "pl-incl.h"
#include <ctype.h>
#include "pl-ctype.h"
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This module defines:
char_type(?Char, ?Type)
code_type(?Char, ?Type)
See manual for details.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#define CTX_CHAR 0 /* Class(Char) */
#define CTX_CODE 1 /* Class(Int) */
typedef struct
{ atom_t name; /* name of the class */
int (*test)(wint_t chr); /* boolean */
int (*reverse)(wint_t chr); /* reverse mapping */
short arity; /* arity of class (i.e. lower('A')) */
short ctx_type; /* CTX_* */
} char_type;
#define ENUM_NONE 0x00
#define ENUM_CHAR 0x01
#define ENUM_CLASS 0x02
#define ENUM_BOTH 0x03
typedef struct
{ int current; /* current character */
const char_type *class; /* current class */
int do_enum; /* what to enumerate */
} generator;
static int
iswhite(wint_t chr)
{ return chr == ' ' || chr == '\t';
}
static int
fiscsym(wint_t chr)
{ return iswalnum(chr) || chr == '_';
}
static int
fiscsymf(wint_t chr)
{ return iswalpha(chr) || chr == '_';
}
static int
iseof(wint_t chr)
{ return chr == (wint_t)-1;
}
static int
iseol(wint_t chr)
{ return chr >= 10 && chr <= 13;
}
static int
isnl(wint_t chr)
{ return chr == '\n';
}
static int
isperiod(wint_t chr)
{ return chr && strchr(".?!", chr) != NULL;
}
static int
isquote(wint_t chr)
{ return chr && strchr("'`\"", chr) != NULL;
}
static int
fupper(wint_t chr)
{ return iswlower(chr) ? (int)towupper(chr) : -1;
}
static int
flower(wint_t chr)
{ return iswupper(chr) ? (int)towlower(chr) : -1;
}
static int
ftoupper(wint_t chr)
{ return towupper(chr);
}
static int
ftolower(wint_t chr)
{ return towlower(chr);
}
static int
fparen(wint_t chr)
{ switch(chr)
{ case '(':
return ')';
case '{':
return '}';
case '[':
return ']';
default:
return -1;
}
}
static int
rparen(wint_t chr)
{ switch(chr)
{ case ')':
return '(';
case '}':
return '{';
case ']':
return '[';
default:
return -1;
}
}
static int
fdigit(wint_t chr)
{ if ( chr <= 0xff && isdigit(chr) )
return chr - '0';
return -1;
}
static int
rdigit(wint_t d)
{ if ( (int)d >= 0 && d <= 9 )
return d+'0';
return -1;
}
static int
fxdigit(wint_t chr)
{ if ( chr > 0xff )
return -1;
if ( isdigit(chr) )
return chr - '0';
if ( chr >= 'a' && chr <= 'f' )
return chr - 'a' + 10;
if ( chr >= 'A' && chr <= 'F' )
return chr - 'A' + 10;
return -1;
}
static int
rxdigit(wint_t d)
{ if ( (int)d >= 0 && d <= 9 )
return d+'0';
if ( d >= 10 && d <= 15 )
return d-10+'a';
return -1;
}
#define mkfunction(name) \
static int f ## name(wint_t chr) { return name(chr); }
mkfunction(iswalnum)
mkfunction(iswalpha)
mkfunction(isascii)
mkfunction(iswcntrl)
mkfunction(iswdigit)
mkfunction(iswgraph)
mkfunction(iswlower)
mkfunction(iswupper)
mkfunction(iswpunct)
mkfunction(iswspace)
static const char_type char_types[] =
{ { ATOM_alnum, fiswalnum },
{ ATOM_alpha, fiswalpha },
{ ATOM_csym, fiscsym },
{ ATOM_csymf, fiscsymf },
{ ATOM_prolog_var_start, f_is_prolog_var_start },
{ ATOM_prolog_atom_start, f_is_prolog_atom_start },
{ ATOM_prolog_identifier_continue, f_is_prolog_identifier_continue },
{ ATOM_prolog_symbol, f_is_prolog_symbol },
{ ATOM_csymf, fiscsymf },
{ ATOM_ascii, fisascii },
{ ATOM_white, iswhite },
{ ATOM_cntrl, fiswcntrl },
{ ATOM_digit, fiswdigit },
{ ATOM_graph, fiswgraph },
{ ATOM_lower, fiswlower },
{ ATOM_upper, fiswupper },
{ ATOM_punct, fiswpunct },
{ ATOM_space, fiswspace },
{ ATOM_end_of_file, iseof },
{ ATOM_end_of_line, iseol },
{ ATOM_newline, isnl },
{ ATOM_period, isperiod },
{ ATOM_quote, isquote },
{ ATOM_lower, fupper, flower, 1, CTX_CHAR },
{ ATOM_upper, flower, fupper, 1, CTX_CHAR },
{ ATOM_to_lower, ftoupper, ftolower, 1, CTX_CHAR },
{ ATOM_to_upper, ftolower, ftoupper, 1, CTX_CHAR },
{ ATOM_paren, fparen, rparen, 1, CTX_CHAR },
{ ATOM_digit, fdigit, rdigit, 1, CTX_CODE },
{ ATOM_xdigit, fxdigit, rxdigit, 1, CTX_CODE },
{ NULL_ATOM, NULL }
};
static const char_type *
char_type_by_name(atom_t name, int arity)
{ const char_type *cc;
for(cc = char_types; cc->name; cc++)
{ if ( cc->name == name && cc->arity == arity )
return cc;
}
return NULL;
}
static int
advanceGen(generator *gen)
{ if ( gen->do_enum & ENUM_CHAR )
{ if ( ++gen->current == 256 )
fail;
} else
{ gen->class++;
if ( !gen->class->name )
fail;
}
succeed;
}
static int
unify_char_type(term_t type, const char_type *ct, int context, int how)
{ GET_LD
if ( ct->arity == 0 )
{ return PL_unify_atom(type, ct->name);
} else /*if ( ct->arity == 1 )*/
{ if ( PL_unify_functor(type, PL_new_functor(ct->name, 1)) )
{ term_t a = PL_new_term_ref();
_PL_get_arg(1, type, a);
if ( ct->ctx_type == CTX_CHAR )
return PL_unify_char(a, context, how);
else
return PL_unify_integer(a, context);
}
}
fail;
}
static foreign_t
do_char_type(term_t chr, term_t class, control_t h, int how)
{ GET_LD
generator *gen;
fid_t fid;
switch( ForeignControl(h) )
{ case FRG_FIRST_CALL:
{ const char_type *cc = NULL;
int c;
int do_enum = ENUM_NONE;
atom_t cn;
int arity;
if ( PL_is_variable(chr) )
do_enum |= ENUM_CHAR;
if ( PL_is_variable(class) )
do_enum |= ENUM_CLASS;
if ( do_enum == ENUM_BOTH )
return PL_error("char_type", 2, NULL, ERR_INSTANTIATION);
if ( !(do_enum & ENUM_CHAR) )
{ if ( !PL_get_char(chr, &c, TRUE) )
fail;
if ( c == -1 )
return PL_unify_atom(class, ATOM_end_of_file);
}
if ( !(do_enum & ENUM_CLASS) )
{ if ( !PL_get_name_arity(class, &cn, &arity) ||
!(cc = char_type_by_name(cn, arity)) )
return PL_error("char_type", 2, NULL,
ERR_TYPE, ATOM_char_type, class);
}
if ( do_enum == ENUM_NONE )
{ if ( arity == 0 )
return (*cc->test)((wint_t)c) ? TRUE : FALSE;
else
{ int rval = (*cc->test)((wint_t)c);
if ( rval >= 0 )
{ term_t a = PL_new_term_ref();
int ok;
_PL_get_arg(1, class, a);
if ( cc->ctx_type == CTX_CHAR )
ok = PL_unify_char(a, rval, how);
else
ok = PL_unify_integer(a, rval);
if ( ok )
return TRUE;
else
do_enum = ENUM_CHAR; /* try the other way around */
} else
fail;
}
}
if ( do_enum == ENUM_CHAR && arity == 1 )
{ term_t a = PL_new_term_ref(); /* char_type(X, lower('A')) */
int ca;
_PL_get_arg(1, class, a);
if ( !PL_is_variable(a) )
{ if ( PL_get_char(a, &ca, FALSE) )
{ int c = (*cc->reverse)((wint_t)ca);
if ( c < 0 )
fail;
return PL_unify_char(chr, c, how);
}
fail; /* error */
}
}
gen = allocForeignState(sizeof(*gen));
gen->do_enum = do_enum;
if ( do_enum & ENUM_CHAR )
{ gen->class = cc;
gen->current = -1;
} else if ( do_enum & ENUM_CLASS )
{ gen->class = char_types;
gen->current = c;
}
break;
}
case FRG_REDO:
gen = ForeignContextPtr(h);
break;
case FRG_CUTTED:
gen = ForeignContextPtr(h);
if ( gen )
freeForeignState(gen, sizeof(*gen));
default:
succeed;
}
if ( !(fid = PL_open_foreign_frame()) )
goto error;
for(;;)
{ int rval;
if ( (rval = (*gen->class->test)((wint_t)gen->current)) )
{ if ( gen->do_enum & ENUM_CHAR )
{ if ( !PL_unify_char(chr, gen->current, how) )
goto next;
}
if ( gen->class->arity > 0 )
{ if ( rval < 0 ||
!unify_char_type(class, gen->class, rval, how) )
goto next;
} else if ( gen->do_enum & ENUM_CLASS )
{ if ( !unify_char_type(class, gen->class, rval, how) )
goto next;
}
if ( advanceGen(gen) ) /* ok, found one */
{ ForeignRedoPtr(gen);
} else
{ freeForeignState(gen, sizeof(*gen)); /* the only one */
succeed;
}
}
next:
PL_rewind_foreign_frame(fid);
if ( !advanceGen(gen) )
break;
}
error:
freeForeignState(gen, sizeof(*gen));
fail;
}
static
PRED_IMPL("char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
/** @pred char_type(? _Char_, ? _Type_)
Tests or generates alternative _Types_ or _Chars_. The
character-types are inspired by the standard `C`
`<ctype.h>` primitives.
+ `alnum`
_Char_ is a letter (upper- or lowercase) or digit.
+ `alpha`
_Char_ is a letter (upper- or lowercase).
+ `csym`
_Char_ is a letter (upper- or lowercase), digit or the underscore (_). These are valid C- and Prolog symbol characters.
+ `csymf`
_Char_ is a letter (upper- or lowercase) or the underscore (_). These are valid first characters for C- and Prolog symbols
+ `ascii`
_Char_ is a 7-bits ASCII character (0..127).
+ `white`
_Char_ is a space or tab. E.i. white space inside a line.
+ `cntrl`
_Char_ is an ASCII control-character (0..31).
+ `digit`
_Char_ is a digit.
+ `digit( _Weight_)`
_Char_ is a digit with value _Weight_. I.e. `char_type(X, digit(6))` yields X = aaasaá'6'. Useful for parsing numbers.
+ `xdigit( _Weight_)`
_Char_ is a hexa-decimal digit with value _Weight_. I.e. char_type(a, xdigit(X) yields X = '10'. Useful for parsing numbers.
+ `graph`
_Char_ produces a visible mark on a page when printed. Note that the space is not included!
+ `lower`
_Char_ is a lower-case letter.
+ `lower(Upper)`
_Char_ is a lower-case version of _Upper_. Only true if _Char_ is lowercase and _Upper_ uppercase.
+ `to_lower(Upper)`
_Char_ is a lower-case version of Upper. For non-letters, or letter without case, _Char_ and Lower are the same. See also upcase_atom/2 and downcase_atom/2.
+ `upper`
_Char_ is an upper-case letter.
+ `upper(Lower)`
_Char_ is an upper-case version of Lower. Only true if _Char_ is uppercase and Lower lowercase.
+ `to_upper(Lower)`
_Char_ is an upper-case version of Lower. For non-letters, or letter without case, _Char_ and Lower are the same. See also upcase_atom/2 and downcase_atom/2.
+ `punct`
_Char_ is a punctuation character. This is a graph character that is not a letter or digit.
+ `space`
_Char_ is some form of layout character (tab, vertical-tab, newline, etc.).
+ `end_of_file`
_Char_ is -1.
+ `end_of_line`
_Char_ ends a line (ASCII: 10..13).
+ `newline`
_Char_ is a the newline character (10).
+ `period`
_Char_ counts as the end of a sentence (.,!,?).
+ `quote`
_Char_ is a quote-character.
+ `paren(Close)`
_Char_ is an open-parenthesis and Close is the corresponding close-parenthesis.
+ `code_type(? _Code_, ? _Type_)`
As char_type/2, but uses character-codes rather than
one-character atoms. Please note that both predicates are as
flexible as possible. They handle either representation if the
argument is instantiated and only will instantiate with an integer
code or one-character atom depending of the version used. See also
the prolog-flag double_quotes and the built-in predicates
atom_chars/2 and atom_codes/2.
*/
{ return do_char_type(A1, A2, PL__ctx, PL_CHAR);
}
static
PRED_IMPL("code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
{ return do_char_type(A1, A2, PL__ctx, PL_CODE);
}
#if 0
static
PRED_IMPL("iswctype", 2, iswctype, 0)
{ char *s;
int chr;
wctype_t t;
if ( !PL_get_char_ex(A1, &chr, FALSE) ||
!PL_get_chars(A2, &s, CVT_ATOM|CVT_EXCEPTION) )
return FALSE;
if ( !(t=wctype(s)) )
return PL_error(NULL, 0, NULL, ERR_EXISTENCE, ATOM_type, A2);
return iswctype(chr, t) ? TRUE : FALSE;
}
#endif
static int
init_tout(PL_chars_t *t, size_t len)
{ switch(t->encoding)
{ case ENC_ISO_LATIN_1:
if ( len < sizeof(t->buf) )
{ t->text.t = t->buf;
t->storage = PL_CHARS_LOCAL;
} else
{ t->text.t = PL_malloc(len);
t->storage = PL_CHARS_MALLOC;
}
succeed;
case ENC_WCHAR:
if ( len*sizeof(pl_wchar_t) < sizeof(t->buf) )
{ t->text.w = (pl_wchar_t*)t->buf;
t->storage = PL_CHARS_LOCAL;
} else
{ t->text.w = PL_malloc(len*sizeof(pl_wchar_t));
t->storage = PL_CHARS_MALLOC;
}
succeed;
default:
assert(0);
fail;
}
}
static inline wint_t
get_chr_from_text(const PL_chars_t *t, size_t index)
{ switch(t->encoding)
{ case ENC_ISO_LATIN_1:
return t->text.t[index]&0xff;
case ENC_WCHAR:
return t->text.w[index];
default:
assert(0);
return 0;
}
}
static foreign_t
modify_case_atom(term_t in, term_t out, int down)
{ GET_LD
PL_chars_t tin, tout;
if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
return FALSE;
if ( PL_get_text(out, &tout, CVT_ATOMIC) )
{ unsigned int i;
if ( tin.length != tout.length )
fail;
for(i=0; i<tin.length; i++)
{ wint_t ci = get_chr_from_text(&tin, i);
wint_t co = get_chr_from_text(&tout, i);
if ( down )
{ if ( co != towlower(ci) )
fail;
} else
{ if ( co != towupper(ci) )
fail;
}
}
succeed;
} else if ( PL_is_variable(out) )
{ unsigned int i;
tout.encoding = tin.encoding;
tout.length = tin.length;
tout.canonical = FALSE; /* or TRUE? Can WCHAR map to ISO? */
init_tout(&tout, tin.length);
if ( tin.encoding == ENC_ISO_LATIN_1 )
{ const unsigned char *in = (const unsigned char*)tin.text.t;
if ( down )
{ for(i=0; i<tin.length; i++)
{ wint_t c = towlower(in[i]);
if ( c > 255 )
{ PL_promote_text(&tout);
for( ; i<tin.length; i++)
{ tout.text.w[i] = towlower(in[i]);
}
break;
} else
{ tout.text.t[i] = (char)c;
}
}
} else /* upcase */
{ for(i=0; i<tin.length; i++)
{ wint_t c = towupper(in[i]);
if ( c > 255 )
{ PL_promote_text(&tout);
for( ; i<tin.length; i++)
{ tout.text.w[i] = towupper(in[i]);
}
break;
} else
{ tout.text.t[i] = (char)c;
}
}
}
} else
{ if ( down )
{ for(i=0; i<tin.length; i++)
{ tout.text.w[i] = towlower(tin.text.w[i]);
}
} else
{ for(i=0; i<tin.length; i++)
{ tout.text.w[i] = towupper(tin.text.w[i]);
}
}
}
PL_unify_text(out, 0, &tout, PL_ATOM);
PL_free_text(&tout);
succeed;
} else
{ return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_atom, out);
}
}
static
PRED_IMPL("downcase_atom", 2, downcase_atom, 0)
{ return modify_case_atom(A1, A2, TRUE);
}
static
PRED_IMPL("upcase_atom", 2, upcase_atom, 0)
{ return modify_case_atom(A1, A2, FALSE);
}
/*******************************
* WHITE SPACE *
*******************************/
static int
write_normalize_space(IOSTREAM *out, term_t in)
{ GET_LD
PL_chars_t tin;
size_t i, end;
if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
return FALSE;
end = tin.length;
i = 0;
while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
i++;
while( i<end )
{ wint_t c;
while(i<end && !unicode_separator((c=get_chr_from_text(&tin, i))))
{ if ( Sputcode(c, out) < 0 )
fail;
i++;
}
while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
i++;
if ( i < end )
{ if ( Sputcode(' ', out) < 0 )
fail;
}
}
succeed;
}
static
PRED_IMPL("normalize_space", 2, normalize_space, 0)
{ redir_context ctx;
word rc;
if ( (rc = setupOutputRedirect(A1, &ctx, FALSE)) )
{ if ( (rc = write_normalize_space(ctx.stream, A2)) )
rc = closeOutputRedirect(&ctx);
else
discardOutputRedirect(&ctx);
}
return rc;
}
/*******************************
* LOCALE *
*******************************/
#if O_LOCALE
#include <locale.h>
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Note: on some installations, locale doesn't work correctly. Printing a
message isn't really cute. It would be better to use printMessage(), but
the system isn't yet initialised far enough. Maybe we should store the
failure and print a message at the end of the initialisation?
We only return FALSE if LC_CTYPE fails. This is a serious indication
that locale support is broken. We don't depend too much on the others,
so we ignore possible problems.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
init_locale(void)
{ int rc = TRUE;
if ( !setlocale(LC_CTYPE, "") )
{ rc = FALSE;
DEBUG(0, Sdprintf("Failed to set LC_CTYPE locale\n"));
}
if ( !setlocale(LC_TIME, "") )
{ DEBUG(0, Sdprintf("Failed to set LC_TIME locale\n"));
}
if ( !setlocale(LC_COLLATE, "") )
{ DEBUG(0, Sdprintf("Failed to set LC_COLLATE locale\n"));
}
return rc;
}
typedef struct
{ int category;
const char *name;
} lccat;
static lccat lccats[] =
{ { LC_ALL, "all" },
{ LC_COLLATE, "collate" },
{ LC_CTYPE, "ctype" },
#ifdef LC_MESSAGES
{ LC_MESSAGES, "messages" },
#endif
{ LC_MONETARY, "monetary" },
{ LC_NUMERIC, "numeric" },
{ LC_TIME, "time" },
{ 0, NULL }
};
static
PRED_IMPL("setlocale", 3, setlocale, 0)
{ PRED_LD
char *what;
char *locale;
const lccat *lcp;
if ( !PL_get_chars(A1, &what, CVT_ATOM|CVT_EXCEPTION) )
fail;
if ( PL_is_variable(A3) )
locale = NULL;
else if ( !PL_get_chars(A3, &locale, CVT_ATOM|CVT_EXCEPTION) )
fail;
for ( lcp = lccats; lcp->name; lcp++ )
{ if ( streq(lcp->name, what) )
{ char *old = setlocale(lcp->category, NULL);
if ( !PL_unify_chars(A2, PL_ATOM, -1, old) )
fail;
if ( PL_compare(A2, A3) != 0 )
{ if ( !setlocale(lcp->category, locale) )
{ if ( errno == ENOENT )
return PL_existence_error("locale", A3);
return PL_error(NULL, 0, MSG_ERRNO, ERR_SYSCALL, "setlocale");
}
}
#ifdef O_LOCALE
updateLocale(lcp->category, locale);
#endif
succeed;
}
}
return PL_domain_error("category", A1);
}
#else
#define init_locale() 1
static
PRED_IMPL("setlocale", 3, setlocale, 0)
{ return notImplemented("setlocale", 3);
}
#endif
/*******************************
* PUBLISH PREDICATES *
*******************************/
BeginPredDefs(ctype)
PRED_DEF("char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
PRED_DEF("code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
PRED_DEF("setlocale", 3, setlocale, 0)
PRED_DEF("downcase_atom", 2, downcase_atom, 0)
PRED_DEF("upcase_atom", 2, upcase_atom, 0)
PRED_DEF("normalize_space", 2, normalize_space, 0)
EndPredDefs
/*******************************
* PROLOG CHARACTERS *
*******************************/
const char _PL_char_types[] = {
/* ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N ^O 0-15 */
CT, CT, CT, CT, CT, CT, CT, CT, CT, SP, SP, SP, SP, SP, CT, CT,
/* ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ 16-31 */
CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
/* sp ! " # $ % & ' ( ) * + , - . / 32-47 */
SP, SO, DQ, SY, SY, SO, SY, SQ, PU, PU, SY, SY, PU, SY, SY, SY,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 48-63 */
DI, DI, DI, DI, DI, DI, DI, DI, DI, DI, SY, SO, SY, SY, SY, SY,
/* @ A B C D E F G H I J K L M N O 64-79 */
SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
/* P Q R S T U V W X Y Z [ \ ] ^ _ 80-95 */
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, PU, SY, PU, SY, UC,
/* ` a b c d e f g h i j k l m n o 96-111 */
SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
/* p q r s t u v w x y z { | } ~ ^? 112-127 */
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, PU, PU, PU, SY, CT,
/* 128-159 (C1 controls) */
CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
/* 160-255 (G1 graphics) */
/* ISO Latin 1 (=Unicode) is assumed */
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
SP, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SO, SY, SY, /*00AX*/
SY, SY, SO, SO, SY, LC, SY, SY, SY, SO, LC, SY, SO, SO, SO, SY, /*00BX*/
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, /*00CX*/
UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC, /*00DX*/
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, /*00EX*/
LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC /*00FX*/
};
typedef struct
{ const char *name;
IOENC encoding;
} enc_map;
static const enc_map map[] =
{ { "UTF-8", ENC_UTF8 },
{ "utf8", ENC_UTF8 },
{ "ISO8859-1", ENC_ISO_LATIN_1 },
{ "ISO8859_1", ENC_ISO_LATIN_1 },
{ "iso88591", ENC_ISO_LATIN_1 },
{ "iso_8859_1", ENC_ISO_LATIN_1 },
{ NULL, ENC_UNKNOWN }
};
IOENC
initEncoding(void)
{ GET_LD
#if HAVE_SETLOCALE
char *enc;
#endif
if ( LD )
{ if ( !LD->encoding )
{
if ( !init_locale() )
{ LD->encoding = ENC_ISO_LATIN_1;
#if HAVE_SETLOCALE
} else if ( (enc = setlocale(LC_CTYPE, NULL)) )
{ char *encp;
LD->encoding = ENC_ANSI; /* text encoding */
if ( (encp = strchr(enc, '.')) )
{ const enc_map *m;
encp++; /* skip '.' */
for ( m=map; m->name; m++ )
{ if ( strcmp(encp, m->name) == 0 )
{ LD->encoding = m->encoding;
break;
}
}
} else {
/* vsc: just test LC_CTYPE, works on Macs */
const enc_map *m;
for ( m=map; m->name; m++ )
{ if ( strcmp(enc, m->name) == 0 )
{ LD->encoding = m->encoding;
break;
}
}
}
#endif
} else { LD->encoding = ENC_ISO_LATIN_1;
}
}
return LD->encoding;
}
return ENC_ANSI;
}
void
initCharTypes(void)
{ initEncoding();
}