2015-06-18 01:34:39 +01:00
|
|
|
/*************************************************************************
|
|
|
|
* *
|
|
|
|
* YAP Prolog %W% %G%
|
|
|
|
* *
|
|
|
|
* Yap Prolog was developed at NCCUP - Universidade do Porto *
|
|
|
|
* *
|
|
|
|
* Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003 *
|
|
|
|
* *
|
|
|
|
**************************************************************************
|
|
|
|
* *
|
|
|
|
* File: yapio.h *
|
|
|
|
* Last rev: 22/1/03 *
|
|
|
|
* mods: *
|
|
|
|
* comments: UNICODE encoding support (based on SWI-Prolog) *
|
|
|
|
* *
|
|
|
|
*************************************************************************/
|
|
|
|
|
|
|
|
#ifndef ENCODING_H
|
|
|
|
|
|
|
|
#define ENCODING_H 1
|
|
|
|
|
|
|
|
typedef enum {
|
2016-02-14 04:14:20 +00:00
|
|
|
ENC_OCTET = 0, /// binary files
|
|
|
|
ENC_ISO_LATIN1 = 1, /// US+West Europe
|
|
|
|
ENC_ISO_ASCII = 2, /// US only
|
|
|
|
ENC_ISO_ANSI = 4, /// Who cares
|
|
|
|
ENC_ISO_UTF8 = 8, /// Most everyone nowadays
|
|
|
|
ENC_UTF16_BE = 16, /// People who made a mistake
|
|
|
|
ENC_UTF16_LE = 32, /// People who made the same mistake
|
2015-06-18 01:34:39 +01:00
|
|
|
ENC_ISO_UTF32_BE = 64, /// nobody
|
|
|
|
ENC_ISO_UTF32_LE = 128, /// yes, nobody
|
2016-02-19 19:36:11 +00:00
|
|
|
ENC_UCS2_BE = 256, /// nobody
|
|
|
|
ENC_UCS2_LE = 512, /// yes, nobody
|
2015-06-18 01:34:39 +01:00
|
|
|
} encoding_t;
|
2015-10-08 02:23:45 +01:00
|
|
|
|
2016-02-18 12:10:58 +00:00
|
|
|
#if WORDS_BIGENDIAN
|
2015-10-08 02:23:45 +01:00
|
|
|
#define ENC_WCHAR ENC_ISO_UTF32_BE
|
|
|
|
#else
|
|
|
|
#define ENC_WCHAR ENC_ISO_UTF32_LE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef YAP_H
|
|
|
|
|
2015-06-18 01:34:39 +01:00
|
|
|
/// read the current environment, as set by the user or as Initial
|
2016-02-14 04:14:20 +00:00
|
|
|
encoding_t Yap_DefaultEncoding(void);
|
2016-04-14 12:00:09 +01:00
|
|
|
encoding_t Yap_SystemEncoding(void);
|
2015-06-18 01:34:39 +01:00
|
|
|
void Yap_SetDefaultEncoding(encoding_t new_encoding);
|
|
|
|
|
2015-10-08 02:23:45 +01:00
|
|
|
#if HAVE_XLOCALE_H
|
|
|
|
typedef enum {
|
2016-02-14 04:14:20 +00:00
|
|
|
SEQ_ENC_OCTET, /// binary files
|
|
|
|
SEQ_ENC_ISO_LATIN1, /// US+West Europe
|
|
|
|
SEQ_ENC_ISO_ASCII, /// US only
|
|
|
|
SEQ_ENC_ISO_ANSI, /// Who cares
|
|
|
|
SEQ_ENC_ISO_UTF8, /// Most everyone nowadays
|
|
|
|
SEQ_ENC_UTF16_BE, /// People who made a mistake
|
|
|
|
SEQ_ENC_UTF16_LE, /// People who made the same mistake
|
2016-02-19 19:36:11 +00:00
|
|
|
SEQ_ENC_ISO_UTF32_BE, /// nobody
|
2016-02-14 04:14:20 +00:00
|
|
|
SEQ_ENC_ISO_UTF32_LE /// yes, nobody
|
|
|
|
} seq_encoding_t;
|
2015-10-08 02:23:45 +01:00
|
|
|
|
|
|
|
/// convert from unary to binary representation.
|
|
|
|
static inline seq_encoding_t seq_encoding(encoding_t inp) {
|
|
|
|
#if HAVE__BUILTIN_FFSLL
|
|
|
|
return __builtin_ffsll(inp);
|
|
|
|
#elif HAVE_FFSLL
|
|
|
|
return ffsll(inp);
|
|
|
|
#else
|
|
|
|
unsigned int out;
|
|
|
|
// supports max 16 different encodings.
|
2016-02-14 04:14:20 +00:00
|
|
|
if (inp == 0)
|
2015-10-08 02:23:45 +01:00
|
|
|
return 0L;
|
|
|
|
// if (inp & ((CELL)0xffffL << 16)) {inp >>= 16; out += 16;}
|
2016-02-14 04:14:20 +00:00
|
|
|
if (inp & ((CELL)0xffL << 8)) {
|
|
|
|
inp >>= 8;
|
|
|
|
out += 8;
|
|
|
|
}
|
|
|
|
if (inp & ((CELL)0xfL << 4)) {
|
|
|
|
inp >>= 4;
|
|
|
|
out += 4;
|
|
|
|
}
|
|
|
|
if (inp & ((CELL)0x3L << 2)) {
|
|
|
|
inp >>= 2;
|
|
|
|
out += 2;
|
|
|
|
}
|
|
|
|
if (inp & ((CELL)0x1 << 1))
|
|
|
|
out++;
|
2015-10-08 02:23:45 +01:00
|
|
|
#endif
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
2016-02-14 04:14:20 +00:00
|
|
|
extern xlocale enc_locales[SEQ_ENC_ISO_UTF32_LE + 1];
|
2015-10-08 02:23:45 +01:00
|
|
|
#endif
|
|
|
|
|
2016-02-14 04:14:20 +00:00
|
|
|
static inline const char *enc_name(encoding_t enc) {
|
|
|
|
switch (enc) {
|
|
|
|
case ENC_OCTET:
|
|
|
|
return "octet";
|
|
|
|
case ENC_ISO_LATIN1:
|
|
|
|
return "iso_latin_1";
|
|
|
|
case ENC_ISO_ASCII:
|
|
|
|
return "ascii";
|
|
|
|
case ENC_ISO_ANSI:
|
|
|
|
return "octet";
|
|
|
|
case ENC_ISO_UTF8:
|
|
|
|
return "utf8";
|
|
|
|
case ENC_UTF16_BE:
|
|
|
|
return "utf16_be";
|
|
|
|
case ENC_UTF16_LE:
|
|
|
|
return "utf16_le";
|
2016-02-19 19:36:11 +00:00
|
|
|
case ENC_UCS2_BE:
|
|
|
|
return "ucs2_be";
|
|
|
|
case ENC_UCS2_LE:
|
|
|
|
return "ucs2_le";
|
2016-02-14 04:14:20 +00:00
|
|
|
case ENC_ISO_UTF32_BE:
|
|
|
|
return "utf32_be";
|
|
|
|
case ENC_ISO_UTF32_LE:
|
|
|
|
return "utf32_le";
|
|
|
|
default:
|
|
|
|
return "thanks for watching!!";
|
|
|
|
}
|
2015-06-18 01:34:39 +01:00
|
|
|
}
|
|
|
|
|
2016-02-18 12:10:58 +00:00
|
|
|
static inline encoding_t enc_id(const char *s, encoding_t enc_bom) {
|
2016-02-14 04:14:20 +00:00
|
|
|
{
|
|
|
|
if (!strcmp(s, "iso_utf8"))
|
|
|
|
return ENC_ISO_UTF8;
|
|
|
|
if (!strcmp(s, "utf8"))
|
|
|
|
return ENC_ISO_UTF8;
|
2016-02-18 12:10:58 +00:00
|
|
|
if (!strcmp(s, "UTF-8"))
|
|
|
|
return ENC_ISO_UTF8;
|
2016-02-14 04:14:20 +00:00
|
|
|
if (!strcmp(s, "utf16_le"))
|
|
|
|
return ENC_UTF16_LE;
|
2016-02-18 12:10:58 +00:00
|
|
|
if (!strcmp(s, "utf16_be"))
|
|
|
|
return ENC_UTF16_BE;
|
|
|
|
if (!strcmp(s, "UTF-16")) {
|
|
|
|
if (enc_bom == ENC_UTF16_LE)
|
|
|
|
return ENC_UTF16_LE;
|
|
|
|
return ENC_UTF16_BE;
|
|
|
|
}
|
|
|
|
if (!strcmp(s, "UTF-16LE"))
|
|
|
|
return ENC_UTF16_LE;
|
2016-02-19 19:36:11 +00:00
|
|
|
if (!strcmp(s, "UTF-16BE"))
|
2016-02-18 12:10:58 +00:00
|
|
|
return ENC_UTF16_BE;
|
2016-02-14 04:14:20 +00:00
|
|
|
if (!strcmp(s, "octet"))
|
|
|
|
return ENC_OCTET;
|
|
|
|
if (!strcmp(s, "iso_latin_1"))
|
|
|
|
return ENC_ISO_LATIN1;
|
|
|
|
if (!strcmp(s, "iso_ascii"))
|
|
|
|
return ENC_ISO_ASCII;
|
|
|
|
if (!strcmp(s, "iso_ansi"))
|
|
|
|
return ENC_ISO_ANSI;
|
|
|
|
if (!strcmp(s, "utf32_be"))
|
|
|
|
return ENC_ISO_UTF32_BE;
|
|
|
|
if (!strcmp(s, "utf32_le"))
|
|
|
|
return ENC_ISO_UTF32_LE;
|
2016-02-18 12:10:58 +00:00
|
|
|
if (!strcmp(s, "UTF-32")) {
|
|
|
|
if (enc_bom == ENC_ISO_UTF32_LE)
|
|
|
|
return ENC_ISO_UTF32_LE;
|
|
|
|
return ENC_ISO_UTF32_BE;
|
|
|
|
}
|
|
|
|
if (!strcmp(s, "UTF-32BE"))
|
|
|
|
return ENC_ISO_UTF32_BE;
|
|
|
|
if (!strcmp(s, "UTF-32LE"))
|
|
|
|
return ENC_ISO_UTF32_LE;
|
|
|
|
if (!strcmp(s, "ISO-8859-1"))
|
|
|
|
return ENC_ISO_LATIN1;
|
2016-02-19 19:36:11 +00:00
|
|
|
if (!strcmp(s, "US_ASCII"))
|
|
|
|
return ENC_ISO_ASCII;
|
2016-02-18 12:10:58 +00:00
|
|
|
// just for SWI compat, this actually refers to
|
|
|
|
// UCS-2
|
|
|
|
if (!strcmp(s, "unicode_be"))
|
2016-02-19 19:36:11 +00:00
|
|
|
return ENC_UCS2_BE;
|
2016-02-18 12:10:58 +00:00
|
|
|
if (!strcmp(s, "unicode_le"))
|
2016-02-19 19:36:11 +00:00
|
|
|
return ENC_UCS2_LE;
|
|
|
|
if (!strcmp(s, "UCS-2")) {
|
|
|
|
if (enc_bom == ENC_UTF16_LE)
|
|
|
|
return ENC_UCS2_LE;
|
|
|
|
return ENC_UCS2_BE;
|
|
|
|
}
|
|
|
|
if (!strcmp(s, "UCS-2LE"))
|
|
|
|
return ENC_UCS2_LE;
|
|
|
|
if (!strcmp(s, "UCS-2BE"))
|
|
|
|
return ENC_UCS2_BE;
|
2016-02-18 12:10:58 +00:00
|
|
|
if (!strcmp(s, "default")) {
|
|
|
|
if (enc_bom != ENC_OCTET)
|
|
|
|
return enc_bom;
|
|
|
|
return Yap_DefaultEncoding();
|
|
|
|
}
|
2016-02-14 04:14:20 +00:00
|
|
|
else {
|
2016-02-18 12:10:58 +00:00
|
|
|
Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, MkAtomTerm(Yap_LookupAtom(s)), "bad encoding %s", s);
|
|
|
|
return Yap_DefaultEncoding();
|
2015-06-18 01:34:39 +01:00
|
|
|
}
|
2016-02-14 04:14:20 +00:00
|
|
|
}
|
2015-06-18 01:34:39 +01:00
|
|
|
}
|
|
|
|
|
2015-10-08 02:23:45 +01:00
|
|
|
#endif
|
2015-06-18 01:34:39 +01:00
|
|
|
|
|
|
|
#endif
|