200 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			200 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*************************************************************************
 | |
| *									 *
 | |
| *	 YAP Prolog 	%W% %G%
 | |
| *									 *
 | |
| *	Yap Prolog was developed at NCCUP - Universidade do Porto	 *
 | |
| *									 *
 | |
| * Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003	 *
 | |
| *									 *
 | |
| **************************************************************************
 | |
| *									 *
 | |
| * File:		yapio.h							 *
 | |
| * Last rev:	22/1/03							 *
 | |
| * mods:									 *
 | |
| * comments:	UNICODE encoding support (based on SWI-Prolog)		 *
 | |
| *									 *
 | |
| *************************************************************************/
 | |
| 
 | |
| #ifndef ENCODING_H
 | |
| 
 | |
| #define ENCODING_H 1
 | |
| 
 | |
| typedef enum {
 | |
|   ENC_OCTET = 0,          /// binary files
 | |
|   ENC_ISO_LATIN1 = 1,     /// US+West Europe
 | |
|   ENC_ISO_ASCII = 2,      /// US only
 | |
|   ENC_ISO_ANSI = 4,       /// Who cares
 | |
|   ENC_ISO_UTF8 = 8,       /// Most everyone nowadays
 | |
|   ENC_UTF16_BE = 16,      /// People who made a mistake
 | |
|   ENC_UTF16_LE = 32,      /// People who made the same mistake
 | |
|   ENC_ISO_UTF32_BE = 64,  /// nobody
 | |
|   ENC_ISO_UTF32_LE = 128, /// yes, nobody
 | |
|   ENC_UCS2_BE = 256,  /// nobody
 | |
|   ENC_UCS2_LE = 512, /// yes, nobody
 | |
| } encoding_t;
 | |
| 
 | |
| #if WORDS_BIGENDIAN
 | |
| #define ENC_WCHAR ENC_ISO_UTF32_BE
 | |
| #else
 | |
| #define ENC_WCHAR ENC_ISO_UTF32_LE
 | |
| #endif
 | |
| 
 | |
| #ifdef YAP_H
 | |
| 
 | |
| /// read the current environment, as set by the user or as Initial
 | |
| encoding_t Yap_DefaultEncoding(void);
 | |
| encoding_t Yap_SystemEncoding(void);
 | |
| void Yap_SetDefaultEncoding(encoding_t new_encoding);
 | |
| 
 | |
| #if HAVE_XLOCALE_H
 | |
| typedef enum {
 | |
|   SEQ_ENC_OCTET,           /// binary files
 | |
|   SEQ_ENC_ISO_LATIN1,      /// US+West Europe
 | |
|   SEQ_ENC_ISO_ASCII,       /// US only
 | |
|   SEQ_ENC_ISO_ANSI,        /// Who cares
 | |
|   SEQ_ENC_ISO_UTF8,        /// Most everyone nowadays
 | |
|   SEQ_ENC_UTF16_BE,        /// People who made a mistake
 | |
|   SEQ_ENC_UTF16_LE,        /// People who made the same mistake
 | |
|   SEQ_ENC_ISO_UTF32_BE, /// nobody
 | |
|   SEQ_ENC_ISO_UTF32_LE     /// yes, nobody
 | |
| } seq_encoding_t;
 | |
| 
 | |
| /// convert from unary to binary representation.
 | |
| static inline seq_encoding_t seq_encoding(encoding_t inp) {
 | |
| #if HAVE__BUILTIN_FFSLL
 | |
|   return __builtin_ffsll(inp);
 | |
| #elif HAVE_FFSLL
 | |
|   return ffsll(inp);
 | |
| #else
 | |
|   unsigned int out;
 | |
|   // supports max 16 different encodings.
 | |
|   if (inp == 0)
 | |
|     return 0L;
 | |
|   // if (inp &     ((CELL)0xffffL << 16)) {inp >>= 16; out += 16;}
 | |
|   if (inp & ((CELL)0xffL << 8)) {
 | |
|     inp >>= 8;
 | |
|     out += 8;
 | |
|   }
 | |
|   if (inp & ((CELL)0xfL << 4)) {
 | |
|     inp >>= 4;
 | |
|     out += 4;
 | |
|   }
 | |
|   if (inp & ((CELL)0x3L << 2)) {
 | |
|     inp >>= 2;
 | |
|     out += 2;
 | |
|   }
 | |
|   if (inp & ((CELL)0x1 << 1))
 | |
|     out++;
 | |
| #endif
 | |
|   return out;
 | |
| }
 | |
| 
 | |
| extern xlocale enc_locales[SEQ_ENC_ISO_UTF32_LE + 1];
 | |
| #endif
 | |
| 
 | |
| static inline const char *enc_name(encoding_t enc) {
 | |
|   switch (enc) {
 | |
|   case ENC_OCTET:
 | |
|     return "octet";
 | |
|   case ENC_ISO_LATIN1:
 | |
|     return "iso_latin_1";
 | |
|   case ENC_ISO_ASCII:
 | |
|     return "ascii";
 | |
|   case ENC_ISO_ANSI:
 | |
|     return "octet";
 | |
|   case ENC_ISO_UTF8:
 | |
|     return "utf8";
 | |
|   case ENC_UTF16_BE:
 | |
|     return "utf16_be";
 | |
|   case ENC_UTF16_LE:
 | |
|     return "utf16_le";
 | |
|   case ENC_UCS2_BE:
 | |
|     return "ucs2_be";
 | |
|   case ENC_UCS2_LE:
 | |
|     return "ucs2_le";
 | |
|   case ENC_ISO_UTF32_BE:
 | |
|     return "utf32_be";
 | |
|   case ENC_ISO_UTF32_LE:
 | |
|     return "utf32_le";
 | |
|   default:
 | |
|     return "thanks for watching!!";
 | |
|   }
 | |
| }
 | |
| 
 | |
| static inline encoding_t enc_id(const char *s, encoding_t enc_bom) {
 | |
|   {
 | |
|     if (!strcmp(s, "iso_utf8"))
 | |
|       return ENC_ISO_UTF8;
 | |
|     if (!strcmp(s, "utf8"))
 | |
|       return ENC_ISO_UTF8;
 | |
|     if (!strcmp(s, "UTF-8"))
 | |
|       return ENC_ISO_UTF8;
 | |
|     if (!strcmp(s, "utf16_le"))
 | |
|       return ENC_UTF16_LE;
 | |
|     if (!strcmp(s, "utf16_be"))
 | |
|       return ENC_UTF16_BE;
 | |
|     if (!strcmp(s, "UTF-16")) {
 | |
|        if (enc_bom == ENC_UTF16_LE)
 | |
|         return ENC_UTF16_LE;
 | |
|       return ENC_UTF16_BE;
 | |
|     }
 | |
|     if (!strcmp(s, "UTF-16LE"))
 | |
|       return ENC_UTF16_LE;
 | |
|     if (!strcmp(s, "UTF-16BE"))
 | |
|       return ENC_UTF16_BE;
 | |
|     if (!strcmp(s, "octet"))
 | |
|       return ENC_OCTET;
 | |
|     if (!strcmp(s, "iso_latin_1"))
 | |
|       return ENC_ISO_LATIN1;
 | |
|     if (!strcmp(s, "iso_ascii"))
 | |
|       return ENC_ISO_ASCII;
 | |
|     if (!strcmp(s, "iso_ansi"))
 | |
|       return ENC_ISO_ANSI;
 | |
|     if (!strcmp(s, "utf32_be"))
 | |
|       return ENC_ISO_UTF32_BE;
 | |
|     if (!strcmp(s, "utf32_le"))
 | |
|       return ENC_ISO_UTF32_LE;
 | |
|     if (!strcmp(s, "UTF-32")) {
 | |
|       if (enc_bom == ENC_ISO_UTF32_LE)
 | |
|         return ENC_ISO_UTF32_LE;
 | |
|       return ENC_ISO_UTF32_BE;
 | |
|     }
 | |
|     if (!strcmp(s, "UTF-32BE"))
 | |
|       return ENC_ISO_UTF32_BE;
 | |
|     if (!strcmp(s, "UTF-32LE"))
 | |
|       return ENC_ISO_UTF32_LE;
 | |
|     if (!strcmp(s, "ISO-8859-1"))
 | |
|       return ENC_ISO_LATIN1;
 | |
|     if (!strcmp(s, "US_ASCII"))
 | |
|       return ENC_ISO_ASCII;
 | |
|     // just for SWI compat, this actually refers to
 | |
|     // UCS-2
 | |
|     if (!strcmp(s, "unicode_be"))
 | |
|       return ENC_UCS2_BE;
 | |
|     if (!strcmp(s, "unicode_le"))
 | |
|       return ENC_UCS2_LE;
 | |
|     if (!strcmp(s, "UCS-2")) {
 | |
|        if (enc_bom == ENC_UTF16_LE)
 | |
|         return ENC_UCS2_LE;
 | |
|       return ENC_UCS2_BE;
 | |
|     }
 | |
|     if (!strcmp(s, "UCS-2LE"))
 | |
|       return ENC_UCS2_LE;
 | |
|     if (!strcmp(s, "UCS-2BE"))
 | |
|       return ENC_UCS2_BE;
 | |
|     if (!strcmp(s, "default")) {
 | |
|         if (enc_bom != ENC_OCTET)
 | |
|           return enc_bom;
 | |
|         return Yap_DefaultEncoding();
 | |
|     }
 | |
|     else {
 | |
|       Yap_Error(DOMAIN_ERROR_OUT_OF_RANGE, MkAtomTerm(Yap_LookupAtom(s)), "bad encoding %s", s);
 | |
|       return Yap_DefaultEncoding();
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #endif
 |