forked from GNUsocial/gnu-social
		
	
		
			
				
	
	
		
			183 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			183 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
/**
 | 
						|
 * Phergie
 | 
						|
 *
 | 
						|
 * PHP version 5
 | 
						|
 *
 | 
						|
 * LICENSE
 | 
						|
 *
 | 
						|
 * This source file is subject to the new BSD license that is bundled
 | 
						|
 * with this package in the file LICENSE.
 | 
						|
 * It is also available through the world-wide-web at this URL:
 | 
						|
 * http://phergie.org/license
 | 
						|
 *
 | 
						|
 * @category  Phergie
 | 
						|
 * @package   Phergie_Plugin_Encoding
 | 
						|
 * @author    Phergie Development Team <team@phergie.org>
 | 
						|
 * @copyright 2008-2010 Phergie Development Team (http://phergie.org)
 | 
						|
 * @license   http://phergie.org/license New BSD License
 | 
						|
 * @link      http://pear.phergie.org/package/Phergie_Plugin_Encoding
 | 
						|
 */
 | 
						|
 | 
						|
/**
 | 
						|
 * Handles decoding markup entities and converting text between character
 | 
						|
 * encodings.
 | 
						|
 *
 | 
						|
 * @category Phergie
 | 
						|
 * @package  Phergie_Plugin_Encoding
 | 
						|
 * @author   Phergie Development Team <team@phergie.org>
 | 
						|
 * @license  http://phergie.org/license New BSD License
 | 
						|
 * @link     http://pear.phergie.org/package/Phergie_Plugin_Encoding
 | 
						|
 */
 | 
						|
class Phergie_Plugin_Encoding extends Phergie_Plugin_Abstract
 | 
						|
{
 | 
						|
    /**
 | 
						|
     * Lookup table for entity conversions not supported by
 | 
						|
     * html_entity_decode()
 | 
						|
     *
 | 
						|
     * @var array
 | 
						|
     * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73409
 | 
						|
     * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73410
 | 
						|
     */
 | 
						|
    protected static $entities = array(
 | 
						|
        'α' => 913,
 | 
						|
        ''' => 39,
 | 
						|
        'β' => 914,
 | 
						|
        '•' => 149,
 | 
						|
        'χ' => 935,
 | 
						|
        'ˆ' => 94,
 | 
						|
        'δ' => 916,
 | 
						|
        'ε' => 917,
 | 
						|
        'η' => 919,
 | 
						|
        'ƒ' => 402,
 | 
						|
        'γ' => 915,
 | 
						|
        'ι' => 921,
 | 
						|
        'κ' => 922,
 | 
						|
        'λ' => 923,
 | 
						|
        '“' => 147,
 | 
						|
        '‹' => 139,
 | 
						|
        '‘' => 145,
 | 
						|
        '—' => 151,
 | 
						|
        '−' => 45,
 | 
						|
        'μ' => 924,
 | 
						|
        '–' => 150,
 | 
						|
        'ν' => 925,
 | 
						|
        'œ' => 140,
 | 
						|
        'ω' => 937,
 | 
						|
        'ο' => 927,
 | 
						|
        'φ' => 934,
 | 
						|
        'π' => 928,
 | 
						|
        'ϖ' => 982,
 | 
						|
        'ψ' => 936,
 | 
						|
        '”' => 148,
 | 
						|
        'ρ' => 929,
 | 
						|
        '›' => 155,
 | 
						|
        '’' => 146,
 | 
						|
        'š' => 138,
 | 
						|
        'σ' => 931,
 | 
						|
        'ς' => 962,
 | 
						|
        'τ' => 932,
 | 
						|
        'θ' => 920,
 | 
						|
        'ϑ' => 977,
 | 
						|
        '˜' => 126,
 | 
						|
        '™' => 153,
 | 
						|
        'ϒ' => 978,
 | 
						|
        'υ' => 933,
 | 
						|
        'ξ' => 926,
 | 
						|
        'ÿ' => 159,
 | 
						|
        'ζ' => 918,
 | 
						|
    );
 | 
						|
 | 
						|
    /**
 | 
						|
     * Decodes markup entities in a given string.
 | 
						|
     *
 | 
						|
     * @param string $string  String containing markup entities
 | 
						|
     * @param string $charset Optional character set name to use in decoding
 | 
						|
     *        entities, defaults to UTF-8
 | 
						|
     *
 | 
						|
     * @return string String with markup entities decoded
 | 
						|
     */
 | 
						|
    public function decodeEntities($string, $charset = 'UTF-8')
 | 
						|
    {
 | 
						|
        $string = str_ireplace(
 | 
						|
            array_keys(self::$entities),
 | 
						|
            array_map('chr', self::$entities),
 | 
						|
            $string
 | 
						|
        );
 | 
						|
        $string = html_entity_decode($string, ENT_QUOTES, $charset);
 | 
						|
        $string = preg_replace(
 | 
						|
            array('/�*([0-9]+);/me', '/�*([a-f0-9]+);/mei'),
 | 
						|
            array('$this->codeToUtf(\\1)', '$this->codeToUtf(hexdec(\\1))'),
 | 
						|
            $string
 | 
						|
        );
 | 
						|
        return $string;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Converts a given unicode to its UTF-8 equivalent.
 | 
						|
     *
 | 
						|
     * @param int $code Code to convert
 | 
						|
     * @return string Character corresponding to code
 | 
						|
     */
 | 
						|
    public function codeToUtf8($code)
 | 
						|
    {
 | 
						|
        $code = (int) $code;
 | 
						|
        switch ($code) {
 | 
						|
            // 1 byte, 7 bits
 | 
						|
            case 0:
 | 
						|
                return chr(0);
 | 
						|
            case ($code & 0x7F):
 | 
						|
                return chr($code);
 | 
						|
 | 
						|
            // 2 bytes, 11 bits
 | 
						|
            case ($code & 0x7FF):
 | 
						|
                return chr(0xC0 | (($code >> 6) & 0x1F)) .
 | 
						|
                       chr(0x80 | ($code & 0x3F));
 | 
						|
 | 
						|
            // 3 bytes, 16 bits
 | 
						|
            case ($code & 0xFFFF):
 | 
						|
                return chr(0xE0 | (($code >> 12) & 0x0F)) .
 | 
						|
                       chr(0x80 | (($code >> 6) & 0x3F)) .
 | 
						|
                       chr(0x80 | ($code & 0x3F));
 | 
						|
 | 
						|
            // 4 bytes, 21 bits
 | 
						|
            case ($code & 0x1FFFFF):
 | 
						|
                return chr(0xF0 | ($code >> 18)) .
 | 
						|
                       chr(0x80 | (($code >> 12) & 0x3F)) .
 | 
						|
                       chr(0x80 | (($code >> 6) & 0x3F)) .
 | 
						|
                       chr(0x80 | ($code & 0x3F));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Transliterates characters in a given string where possible.
 | 
						|
     *
 | 
						|
     * @param string $string      String containing characters to
 | 
						|
     *        transliterate
 | 
						|
     * @param string $charsetFrom Optional character set of the string,
 | 
						|
     *        defaults to UTF-8
 | 
						|
     * @param string $charsetTo   Optional character set to which the string
 | 
						|
     *        should be converted, defaults to ISO-8859-1
 | 
						|
     *
 | 
						|
     * @return string String with characters transliterated or the original
 | 
						|
     *         string if transliteration was not possible
 | 
						|
     */
 | 
						|
    public function transliterate($string, $charsetFrom = 'UTF-8', $charsetTo = 'ISO-8859-1')
 | 
						|
    {
 | 
						|
        // @link http://pecl.php.net/package/translit
 | 
						|
        if (function_exists('transliterate')) {
 | 
						|
            $string = transliterate($string, array('han_transliterate', 'diacritical_remove'), $charsetFrom, $charsetTo);
 | 
						|
        } elseif (function_exists('iconv')) {
 | 
						|
            $string = iconv($charsetFrom, $charsetTo . '//TRANSLIT', $string);
 | 
						|
        } else {
 | 
						|
            // @link http://stackoverflow.com/questions/1284535/php-transliteration/1285491#1285491
 | 
						|
            $string = preg_replace(
 | 
						|
                '~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i',
 | 
						|
                '$1',
 | 
						|
                htmlentities($string, ENT_COMPAT, $charsetFrom)
 | 
						|
            );
 | 
						|
        }
 | 
						|
        return $string;
 | 
						|
    }
 | 
						|
}
 |