encoding fixes

This commit is contained in:
Vítor Santos Costa 2016-02-14 04:14:20 +00:00
parent 7b8c1815ae
commit 42091deffc

View File

@ -20,13 +20,13 @@
#define ENCODING_H 1 #define ENCODING_H 1
typedef enum { typedef enum {
ENC_OCTET = 0, /// binary files ENC_OCTET = 0, /// binary files
ENC_ISO_LATIN1 = 1, /// US+West Europe ENC_ISO_LATIN1 = 1, /// US+West Europe
ENC_ISO_ASCII = 2, /// US only ENC_ISO_ASCII = 2, /// US only
ENC_ISO_ANSI = 4, /// Who cares ENC_ISO_ANSI = 4, /// Who cares
ENC_ISO_UTF8 = 8, /// Most everyone nowadays ENC_ISO_UTF8 = 8, /// Most everyone nowadays
ENC_UTF16_BE = 16, /// People who made a mistake ENC_UTF16_BE = 16, /// People who made a mistake
ENC_UTF16_LE = 32, /// People who made the same mistake ENC_UTF16_LE = 32, /// People who made the same mistake
ENC_ISO_UTF32_BE = 64, /// nobody ENC_ISO_UTF32_BE = 64, /// nobody
ENC_ISO_UTF32_LE = 128, /// yes, nobody ENC_ISO_UTF32_LE = 128, /// yes, nobody
} encoding_t; } encoding_t;
@ -40,21 +40,21 @@ typedef enum {
#ifdef YAP_H #ifdef YAP_H
/// read the current environment, as set by the user or as Initial /// read the current environment, as set by the user or as Initial
encoding_t Yap_DefaultEncoding( void ); encoding_t Yap_DefaultEncoding(void);
void Yap_SetDefaultEncoding(encoding_t new_encoding); void Yap_SetDefaultEncoding(encoding_t new_encoding);
#if HAVE_XLOCALE_H #if HAVE_XLOCALE_H
typedef enum { typedef enum {
SEQ_ENC_OCTET, /// binary files SEQ_ENC_OCTET, /// binary files
SEQ_ENC_ISO_LATIN1, /// US+West Europe SEQ_ENC_ISO_LATIN1, /// US+West Europe
SEQ_ENC_ISO_ASCII , /// US only SEQ_ENC_ISO_ASCII, /// US only
SEQ_ENC_ISO_ANSI , /// Who cares SEQ_ENC_ISO_ANSI, /// Who cares
SEQ_ENC_ISO_UTF8 , /// Most everyone nowadays SEQ_ENC_ISO_UTF8, /// Most everyone nowadays
SEQ_ENC_UTF16_BE, /// People who made a mistake SEQ_ENC_UTF16_BE, /// People who made a mistake
SEQ_ENC_UTF16_LE, /// People who made the same mistake SEQ_ENC_UTF16_LE, /// People who made the same mistake
v\ SEQ_ENC_ISO_UTF32_BE, /// nobody v\ SEQ_ENC_ISO_UTF32_BE, /// nobody
SEQ_ENC_ISO_UTF32_LE /// yes, nobody SEQ_ENC_ISO_UTF32_LE /// yes, nobody
} seq_encoding_t; } seq_encoding_t;
/// convert from unary to binary representation. /// convert from unary to binary representation.
static inline seq_encoding_t seq_encoding(encoding_t inp) { static inline seq_encoding_t seq_encoding(encoding_t inp) {
@ -65,60 +65,84 @@ static inline seq_encoding_t seq_encoding(encoding_t inp) {
#else #else
unsigned int out; unsigned int out;
// supports max 16 different encodings. // supports max 16 different encodings.
if (inp==0) if (inp == 0)
return 0L; return 0L;
// if (inp & ((CELL)0xffffL << 16)) {inp >>= 16; out += 16;} // if (inp & ((CELL)0xffffL << 16)) {inp >>= 16; out += 16;}
if (inp & ((CELL)0xffL << 8)) {inp >>= 8; out += 8;} if (inp & ((CELL)0xffL << 8)) {
if (inp & ((CELL)0xfL << 4)) {inp >>= 4; out += 4;} inp >>= 8;
if (inp & ((CELL)0x3L << 2)) {inp >>= 2; out += 2;} out += 8;
if (inp & ((CELL)0x1 << 1)) out++; }
if (inp & ((CELL)0xfL << 4)) {
inp >>= 4;
out += 4;
}
if (inp & ((CELL)0x3L << 2)) {
inp >>= 2;
out += 2;
}
if (inp & ((CELL)0x1 << 1))
out++;
#endif #endif
return out; return out;
} }
extern xlocale enc_locales[SEQ_ENC_ISO_UTF32_LE+1]; extern xlocale enc_locales[SEQ_ENC_ISO_UTF32_LE + 1];
#endif #endif
static inline const char *enc_name(encoding_t enc) {
static inline const char *enc_name(encoding_t enc) switch (enc) {
{ case ENC_OCTET:
switch(enc) return "octet";
{ case ENC_ISO_LATIN1:
case ENC_OCTET: return "octet"; return "iso_latin_1";
case ENC_ISO_LATIN1: return "iso_latin_1"; case ENC_ISO_ASCII:
case ENC_ISO_ASCII: return "ascii"; return "ascii";
case ENC_ISO_ANSI: return "octet"; case ENC_ISO_ANSI:
case ENC_ISO_UTF8: return "utf8"; return "octet";
case ENC_UTF16_BE: return "utf16_be"; case ENC_ISO_UTF8:
case ENC_UTF16_LE: return "utf16_le"; return "utf8";
case ENC_ISO_UTF32_BE: return "utf32_be"; case ENC_UTF16_BE:
case ENC_ISO_UTF32_LE: return "utf32_le"; return "utf16_be";
default: return "thanks for watching!!"; case ENC_UTF16_LE:
} return "utf16_le";
case ENC_ISO_UTF32_BE:
return "utf32_be";
case ENC_ISO_UTF32_LE:
return "utf32_le";
default:
return "thanks for watching!!";
}
} }
static inline static inline encoding_t enc_id(char *s) {
encoding_t enc_id(char *s) {
{ if (!strcmp(s, "iso_utf8"))
{ return ENC_ISO_UTF8;
if (!strcmp(s, "iso_utf8")) return ENC_ISO_UTF8; if (!strcmp(s, "utf8"))
if (!strcmp(s, "utf16_be")) return ENC_UTF16_BE; return ENC_ISO_UTF8;
if (!strcmp(s, "utf16_le")) return ENC_UTF16_LE; if (!strcmp(s, "utf16_le"))
if (!strcmp(s, "octet")) return ENC_OCTET; return ENC_UTF16_LE;
if (!strcmp(s, "iso_latin_1")) return ENC_ISO_LATIN1; if (!strcmp(s, "octet"))
if (!strcmp(s, "iso_ascii")) return ENC_ISO_ASCII; return ENC_OCTET;
if (!strcmp(s, "iso_ansi")) return ENC_ISO_ANSI; if (!strcmp(s, "iso_latin_1"))
if (!strcmp(s, "utf32_be")) return ENC_ISO_UTF32_BE; return ENC_ISO_LATIN1;
if (!strcmp(s, "utf32_le")) return ENC_ISO_UTF32_LE; if (!strcmp(s, "iso_ascii"))
if (!strcmp(s, "default")) return Yap_DefaultEncoding(); return ENC_ISO_ASCII;
else { if (!strcmp(s, "iso_ansi"))
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, 0, "bad encoding %s", s); return ENC_ISO_ANSI;
return ENC_OCTET; if (!strcmp(s, "utf32_be"))
} return ENC_ISO_UTF32_BE;
if (!strcmp(s, "utf32_le"))
return ENC_ISO_UTF32_LE;
if (!strcmp(s, "default"))
return Yap_DefaultEncoding();
else {
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, 0, "bad encoding %s", s);
return ENC_OCTET;
} }
}
} }
#endif #endif
#endif #endif