200 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			200 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*************************************************************************
 | 
						|
*									 *
 | 
						|
*	 YAP Prolog 	%W% %G%
 | 
						|
*									 *
 | 
						|
*	Yap Prolog was developed at NCCUP - Universidade do Porto	 *
 | 
						|
*									 *
 | 
						|
* Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003	 *
 | 
						|
*									 *
 | 
						|
**************************************************************************
 | 
						|
*									 *
 | 
						|
* File:		yapio.h							 *
 | 
						|
* Last rev:	22/1/03							 *
 | 
						|
* mods:									 *
 | 
						|
* comments:	UNICODE encoding support (based on SWI-Prolog)		 *
 | 
						|
*									 *
 | 
						|
*************************************************************************/
 | 
						|
 | 
						|
#ifndef ENCODING_H
 | 
						|
 | 
						|
#define ENCODING_H 1
 | 
						|
 | 
						|
#include "YapError.h"
 | 
						|
 | 
						|
typedef enum {
 | 
						|
  ENC_OCTET = 0,          /// binary files
 | 
						|
  ENC_ISO_LATIN1 = 1,     /// US+West Europe
 | 
						|
  ENC_ISO_ASCII = 2,      /// US only
 | 
						|
  ENC_ISO_ANSI = 4,       /// Who cares
 | 
						|
  ENC_ISO_UTF8 = 8,       /// Most everyone nowadays
 | 
						|
  ENC_UTF16_BE = 16,      /// People who made a mistake
 | 
						|
  ENC_UTF16_LE = 32,      /// People who made the same mistake
 | 
						|
  ENC_ISO_UTF32_BE = 64,  /// nobody
 | 
						|
  ENC_ISO_UTF32_LE = 128, /// yes, nobody
 | 
						|
  ENC_UCS2_BE = 256,      /// nobody
 | 
						|
  ENC_UCS2_LE = 512,      /// yes, nobody
 | 
						|
} encoding_t;
 | 
						|
 | 
						|
#if WORDS_BIGENDIAN
 | 
						|
#define ENC_WCHAR ENC_ISO_UTF32_BE
 | 
						|
#else
 | 
						|
#define ENC_WCHAR ENC_ISO_UTF32_LE
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef YAP_H
 | 
						|
 | 
						|
/// read the current environment, as set by the user or as Initial
 | 
						|
encoding_t Yap_DefaultEncoding(void);
 | 
						|
encoding_t Yap_SystemEncoding(void);
 | 
						|
void Yap_SetDefaultEncoding(encoding_t new_encoding);
 | 
						|
 | 
						|
#if HAVE_XLOCALE_H
 | 
						|
typedef enum {
 | 
						|
  SEQ_ENC_OCTET,        /// binary files
 | 
						|
  SEQ_ENC_ISO_LATIN1,   /// US+West Europe
 | 
						|
  SEQ_ENC_ISO_ASCII,    /// US only
 | 
						|
  SEQ_ENC_ISO_ANSI,     /// Who cares
 | 
						|
  SEQ_ENC_ISO_UTF8,     /// Most everyone nowadays
 | 
						|
  SEQ_ENC_UTF16_BE,     /// People who made a mistake
 | 
						|
  SEQ_ENC_UTF16_LE,     /// People who made the same mistake
 | 
						|
  SEQ_ENC_ISO_UTF32_BE, /// nobody
 | 
						|
  SEQ_ENC_ISO_UTF32_LE  /// yes, nobody
 | 
						|
} seq_encoding_t;
 | 
						|
 | 
						|
/// convert from unary to binary representation.
 | 
						|
static inline seq_encoding_t seq_encoding(encoding_t inp) {
 | 
						|
#if HAVE__BUILTIN_FFSLL
 | 
						|
  return __builtin_ffsll(inp);
 | 
						|
#elif HAVE_FFSLL
 | 
						|
  return ffsll(inp);
 | 
						|
#else
 | 
						|
  unsigned int out;
 | 
						|
  // supports max 16 different encodings.
 | 
						|
  if (inp == 0)
 | 
						|
    return 0L;
 | 
						|
  // if (inp &     ((CELL)0xffffL << 16)) {inp >>= 16; out += 16;}
 | 
						|
  if (inp & ((CELL)0xffL << 8)) {
 | 
						|
    inp >>= 8;
 | 
						|
    out += 8;
 | 
						|
  }
 | 
						|
  if (inp & ((CELL)0xfL << 4)) {
 | 
						|
    inp >>= 4;
 | 
						|
    out += 4;
 | 
						|
  }
 | 
						|
  if (inp & ((CELL)0x3L << 2)) {
 | 
						|
    inp >>= 2;
 | 
						|
    out += 2;
 | 
						|
  }
 | 
						|
  if (inp & ((CELL)0x1 << 1))
 | 
						|
    out++;
 | 
						|
#endif
 | 
						|
  return out;
 | 
						|
}
 | 
						|
 | 
						|
extern xlocale enc_locales[SEQ_ENC_ISO_UTF32_LE + 1];
 | 
						|
#endif
 | 
						|
 | 
						|
static inline const char *enc_name(encoding_t enc) {
 | 
						|
  switch (enc) {
 | 
						|
  case ENC_OCTET:
 | 
						|
    return "octet";
 | 
						|
  case ENC_ISO_LATIN1:
 | 
						|
    return "iso_latin_1";
 | 
						|
  case ENC_ISO_ASCII:
 | 
						|
    return "ascii";
 | 
						|
  case ENC_ISO_ANSI:
 | 
						|
    return "octet";
 | 
						|
  case ENC_ISO_UTF8:
 | 
						|
    return "utf8";
 | 
						|
  case ENC_UTF16_BE:
 | 
						|
    return "utf16_be";
 | 
						|
  case ENC_UTF16_LE:
 | 
						|
    return "utf16_le";
 | 
						|
  case ENC_UCS2_BE:
 | 
						|
    return "ucs2_be";
 | 
						|
  case ENC_UCS2_LE:
 | 
						|
    return "ucs2_le";
 | 
						|
  case ENC_ISO_UTF32_BE:
 | 
						|
    return "utf32_be";
 | 
						|
  case ENC_ISO_UTF32_LE:
 | 
						|
    return "utf32_le";
 | 
						|
  default:
 | 
						|
    return "thanks for watching!!";
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
static inline encoding_t enc_id(const char *s, encoding_t enc_bom) {
 | 
						|
  {
 | 
						|
    if (!strcmp(s, "iso_utf8"))
 | 
						|
      return ENC_ISO_UTF8;
 | 
						|
    if (!strcmp(s, "utf8"))
 | 
						|
      return ENC_ISO_UTF8;
 | 
						|
    if (!strcmp(s, "UTF-8"))
 | 
						|
      return ENC_ISO_UTF8;
 | 
						|
    if (!strcmp(s, "utf16_le"))
 | 
						|
      return ENC_UTF16_LE;
 | 
						|
    if (!strcmp(s, "utf16_be"))
 | 
						|
      return ENC_UTF16_BE;
 | 
						|
    if (!strcmp(s, "UTF-16")) {
 | 
						|
      if (enc_bom == ENC_UTF16_LE)
 | 
						|
        return ENC_UTF16_LE;
 | 
						|
      return ENC_UTF16_BE;
 | 
						|
    }
 | 
						|
    if (!strcmp(s, "UTF-16LE"))
 | 
						|
      return ENC_UTF16_LE;
 | 
						|
    if (!strcmp(s, "UTF-16BE"))
 | 
						|
      return ENC_UTF16_BE;
 | 
						|
    if (!strcmp(s, "octet"))
 | 
						|
      return ENC_OCTET;
 | 
						|
    if (!strcmp(s, "iso_latin_1"))
 | 
						|
      return ENC_ISO_LATIN1;
 | 
						|
    if (!strcmp(s, "iso_ascii"))
 | 
						|
      return ENC_ISO_ASCII;
 | 
						|
    if (!strcmp(s, "iso_ansi"))
 | 
						|
      return ENC_ISO_ANSI;
 | 
						|
    if (!strcmp(s, "utf32_be"))
 | 
						|
      return ENC_ISO_UTF32_BE;
 | 
						|
    if (!strcmp(s, "utf32_le"))
 | 
						|
      return ENC_ISO_UTF32_LE;
 | 
						|
    if (!strcmp(s, "UTF-32")) {
 | 
						|
      if (enc_bom == ENC_ISO_UTF32_LE)
 | 
						|
        return ENC_ISO_UTF32_LE;
 | 
						|
      return ENC_ISO_UTF32_BE;
 | 
						|
    }
 | 
						|
    if (!strcmp(s, "UTF-32BE"))
 | 
						|
      return ENC_ISO_UTF32_BE;
 | 
						|
    if (!strcmp(s, "UTF-32LE"))
 | 
						|
      return ENC_ISO_UTF32_LE;
 | 
						|
    if (!strcmp(s, "ISO-8859-1"))
 | 
						|
      return ENC_ISO_LATIN1;
 | 
						|
    if (!strcmp(s, "US_ASCII"))
 | 
						|
      return ENC_ISO_ASCII;
 | 
						|
    // just for SWI compat, this actually refers to
 | 
						|
    // UCS-2
 | 
						|
    if (!strcmp(s, "unicode_be"))
 | 
						|
      return ENC_UCS2_BE;
 | 
						|
    if (!strcmp(s, "unicode_le"))
 | 
						|
      return ENC_UCS2_LE;
 | 
						|
    if (!strcmp(s, "UCS-2")) {
 | 
						|
      if (enc_bom == ENC_UTF16_LE)
 | 
						|
        return ENC_UCS2_LE;
 | 
						|
      return ENC_UCS2_BE;
 | 
						|
    }
 | 
						|
    if (!strcmp(s, "UCS-2LE"))
 | 
						|
      return ENC_UCS2_LE;
 | 
						|
    if (!strcmp(s, "UCS-2BE"))
 | 
						|
      return ENC_UCS2_BE;
 | 
						|
    if (!strcmp(s, "default")) {
 | 
						|
      if (enc_bom != ENC_OCTET)
 | 
						|
        return enc_bom;
 | 
						|
      return Yap_DefaultEncoding();
 | 
						|
    } else {
 | 
						|
      return Yap_DefaultEncoding();
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
#endif
 |