forked from GNUsocial/gnu-social
		
	
		
			
				
	
	
		
			183 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			183 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| /**
 | |
|  * Phergie
 | |
|  *
 | |
|  * PHP version 5
 | |
|  *
 | |
|  * LICENSE
 | |
|  *
 | |
|  * This source file is subject to the new BSD license that is bundled
 | |
|  * with this package in the file LICENSE.
 | |
|  * It is also available through the world-wide-web at this URL:
 | |
|  * http://phergie.org/license
 | |
|  *
 | |
|  * @category  Phergie
 | |
|  * @package   Phergie_Plugin_Encoding
 | |
|  * @author    Phergie Development Team <team@phergie.org>
 | |
|  * @copyright 2008-2010 Phergie Development Team (http://phergie.org)
 | |
|  * @license   http://phergie.org/license New BSD License
 | |
|  * @link      http://pear.phergie.org/package/Phergie_Plugin_Encoding
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * Handles decoding markup entities and converting text between character
 | |
|  * encodings.
 | |
|  *
 | |
|  * @category Phergie
 | |
|  * @package  Phergie_Plugin_Encoding
 | |
|  * @author   Phergie Development Team <team@phergie.org>
 | |
|  * @license  http://phergie.org/license New BSD License
 | |
|  * @link     http://pear.phergie.org/package/Phergie_Plugin_Encoding
 | |
|  */
 | |
| class Phergie_Plugin_Encoding extends Phergie_Plugin_Abstract
 | |
| {
 | |
|     /**
 | |
|      * Lookup table for entity conversions not supported by
 | |
|      * html_entity_decode()
 | |
|      *
 | |
|      * @var array
 | |
|      * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73409
 | |
|      * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73410
 | |
|      */
 | |
|     protected static $entities = array(
 | |
|         'α' => 913,
 | |
|         ''' => 39,
 | |
|         'β' => 914,
 | |
|         '•' => 149,
 | |
|         'χ' => 935,
 | |
|         'ˆ' => 94,
 | |
|         'δ' => 916,
 | |
|         'ε' => 917,
 | |
|         'η' => 919,
 | |
|         'ƒ' => 402,
 | |
|         'γ' => 915,
 | |
|         'ι' => 921,
 | |
|         'κ' => 922,
 | |
|         'λ' => 923,
 | |
|         '“' => 147,
 | |
|         '‹' => 139,
 | |
|         '‘' => 145,
 | |
|         '—' => 151,
 | |
|         '−' => 45,
 | |
|         'μ' => 924,
 | |
|         '–' => 150,
 | |
|         'ν' => 925,
 | |
|         'œ' => 140,
 | |
|         'ω' => 937,
 | |
|         'ο' => 927,
 | |
|         'φ' => 934,
 | |
|         'π' => 928,
 | |
|         'ϖ' => 982,
 | |
|         'ψ' => 936,
 | |
|         '”' => 148,
 | |
|         'ρ' => 929,
 | |
|         '›' => 155,
 | |
|         '’' => 146,
 | |
|         'š' => 138,
 | |
|         'σ' => 931,
 | |
|         'ς' => 962,
 | |
|         'τ' => 932,
 | |
|         'θ' => 920,
 | |
|         'ϑ' => 977,
 | |
|         '˜' => 126,
 | |
|         '™' => 153,
 | |
|         'ϒ' => 978,
 | |
|         'υ' => 933,
 | |
|         'ξ' => 926,
 | |
|         'ÿ' => 159,
 | |
|         'ζ' => 918,
 | |
|     );
 | |
| 
 | |
|     /**
 | |
|      * Decodes markup entities in a given string.
 | |
|      *
 | |
|      * @param string $string  String containing markup entities
 | |
|      * @param string $charset Optional character set name to use in decoding
 | |
|      *        entities, defaults to UTF-8
 | |
|      *
 | |
|      * @return string String with markup entities decoded
 | |
|      */
 | |
|     public function decodeEntities($string, $charset = 'UTF-8')
 | |
|     {
 | |
|         $string = str_ireplace(
 | |
|             array_keys(self::$entities),
 | |
|             array_map('chr', self::$entities),
 | |
|             $string
 | |
|         );
 | |
|         $string = html_entity_decode($string, ENT_QUOTES, $charset);
 | |
|         $string = preg_replace(
 | |
|             array('/�*([0-9]+);/me', '/�*([a-f0-9]+);/mei'),
 | |
|             array('$this->codeToUtf(\\1)', '$this->codeToUtf(hexdec(\\1))'),
 | |
|             $string
 | |
|         );
 | |
|         return $string;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Converts a given unicode to its UTF-8 equivalent.
 | |
|      *
 | |
|      * @param int $code Code to convert
 | |
|      * @return string Character corresponding to code
 | |
|      */
 | |
|     public function codeToUtf8($code)
 | |
|     {
 | |
|         $code = (int) $code;
 | |
|         switch ($code) {
 | |
|             // 1 byte, 7 bits
 | |
|             case 0:
 | |
|                 return chr(0);
 | |
|             case ($code & 0x7F):
 | |
|                 return chr($code);
 | |
| 
 | |
|             // 2 bytes, 11 bits
 | |
|             case ($code & 0x7FF):
 | |
|                 return chr(0xC0 | (($code >> 6) & 0x1F)) .
 | |
|                        chr(0x80 | ($code & 0x3F));
 | |
| 
 | |
|             // 3 bytes, 16 bits
 | |
|             case ($code & 0xFFFF):
 | |
|                 return chr(0xE0 | (($code >> 12) & 0x0F)) .
 | |
|                        chr(0x80 | (($code >> 6) & 0x3F)) .
 | |
|                        chr(0x80 | ($code & 0x3F));
 | |
| 
 | |
|             // 4 bytes, 21 bits
 | |
|             case ($code & 0x1FFFFF):
 | |
|                 return chr(0xF0 | ($code >> 18)) .
 | |
|                        chr(0x80 | (($code >> 12) & 0x3F)) .
 | |
|                        chr(0x80 | (($code >> 6) & 0x3F)) .
 | |
|                        chr(0x80 | ($code & 0x3F));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Transliterates characters in a given string where possible.
 | |
|      *
 | |
|      * @param string $string      String containing characters to
 | |
|      *        transliterate
 | |
|      * @param string $charsetFrom Optional character set of the string,
 | |
|      *        defaults to UTF-8
 | |
|      * @param string $charsetTo   Optional character set to which the string
 | |
|      *        should be converted, defaults to ISO-8859-1
 | |
|      *
 | |
|      * @return string String with characters transliterated or the original
 | |
|      *         string if transliteration was not possible
 | |
|      */
 | |
|     public function transliterate($string, $charsetFrom = 'UTF-8', $charsetTo = 'ISO-8859-1')
 | |
|     {
 | |
|         // @link http://pecl.php.net/package/translit
 | |
|         if (function_exists('transliterate')) {
 | |
|             $string = transliterate($string, array('han_transliterate', 'diacritical_remove'), $charsetFrom, $charsetTo);
 | |
|         } elseif (function_exists('iconv')) {
 | |
|             $string = iconv($charsetFrom, $charsetTo . '//TRANSLIT', $string);
 | |
|         } else {
 | |
|             // @link http://stackoverflow.com/questions/1284535/php-transliteration/1285491#1285491
 | |
|             $string = preg_replace(
 | |
|                 '~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i',
 | |
|                 '$1',
 | |
|                 htmlentities($string, ENT_COMPAT, $charsetFrom)
 | |
|             );
 | |
|         }
 | |
|         return $string;
 | |
|     }
 | |
| }
 |