forked from GNUsocial/gnu-social
		
	
		
			
	
	
		
			183 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
		
		
			
		
	
	
			183 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| 
								 | 
							
								<?php
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Phergie
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * PHP version 5
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * LICENSE
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This source file is subject to the new BSD license that is bundled
							 | 
						||
| 
								 | 
							
								 * with this package in the file LICENSE.
							 | 
						||
| 
								 | 
							
								 * It is also available through the world-wide-web at this URL:
							 | 
						||
| 
								 | 
							
								 * http://phergie.org/license
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * @category  Phergie
							 | 
						||
| 
								 | 
							
								 * @package   Phergie_Plugin_Encoding
							 | 
						||
| 
								 | 
							
								 * @author    Phergie Development Team <team@phergie.org>
							 | 
						||
| 
								 | 
							
								 * @copyright 2008-2010 Phergie Development Team (http://phergie.org)
							 | 
						||
| 
								 | 
							
								 * @license   http://phergie.org/license New BSD License
							 | 
						||
| 
								 | 
							
								 * @link      http://pear.phergie.org/package/Phergie_Plugin_Encoding
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Handles decoding markup entities and converting text between character
							 | 
						||
| 
								 | 
							
								 * encodings.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * @category Phergie
							 | 
						||
| 
								 | 
							
								 * @package  Phergie_Plugin_Encoding
							 | 
						||
| 
								 | 
							
								 * @author   Phergie Development Team <team@phergie.org>
							 | 
						||
| 
								 | 
							
								 * @license  http://phergie.org/license New BSD License
							 | 
						||
| 
								 | 
							
								 * @link     http://pear.phergie.org/package/Phergie_Plugin_Encoding
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								class Phergie_Plugin_Encoding extends Phergie_Plugin_Abstract
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Lookup table for entity conversions not supported by
							 | 
						||
| 
								 | 
							
								     * html_entity_decode()
							 | 
						||
| 
								 | 
							
								     *
							 | 
						||
| 
								 | 
							
								     * @var array
							 | 
						||
| 
								 | 
							
								     * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73409
							 | 
						||
| 
								 | 
							
								     * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73410
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    protected static $entities = array(
							 | 
						||
| 
								 | 
							
								        'α' => 913,
							 | 
						||
| 
								 | 
							
								        ''' => 39,
							 | 
						||
| 
								 | 
							
								        'β' => 914,
							 | 
						||
| 
								 | 
							
								        '•' => 149,
							 | 
						||
| 
								 | 
							
								        'χ' => 935,
							 | 
						||
| 
								 | 
							
								        'ˆ' => 94,
							 | 
						||
| 
								 | 
							
								        'δ' => 916,
							 | 
						||
| 
								 | 
							
								        'ε' => 917,
							 | 
						||
| 
								 | 
							
								        'η' => 919,
							 | 
						||
| 
								 | 
							
								        'ƒ' => 402,
							 | 
						||
| 
								 | 
							
								        'γ' => 915,
							 | 
						||
| 
								 | 
							
								        'ι' => 921,
							 | 
						||
| 
								 | 
							
								        'κ' => 922,
							 | 
						||
| 
								 | 
							
								        'λ' => 923,
							 | 
						||
| 
								 | 
							
								        '“' => 147,
							 | 
						||
| 
								 | 
							
								        '‹' => 139,
							 | 
						||
| 
								 | 
							
								        '‘' => 145,
							 | 
						||
| 
								 | 
							
								        '—' => 151,
							 | 
						||
| 
								 | 
							
								        '−' => 45,
							 | 
						||
| 
								 | 
							
								        'μ' => 924,
							 | 
						||
| 
								 | 
							
								        '–' => 150,
							 | 
						||
| 
								 | 
							
								        'ν' => 925,
							 | 
						||
| 
								 | 
							
								        'œ' => 140,
							 | 
						||
| 
								 | 
							
								        'ω' => 937,
							 | 
						||
| 
								 | 
							
								        'ο' => 927,
							 | 
						||
| 
								 | 
							
								        'φ' => 934,
							 | 
						||
| 
								 | 
							
								        'π' => 928,
							 | 
						||
| 
								 | 
							
								        'ϖ' => 982,
							 | 
						||
| 
								 | 
							
								        'ψ' => 936,
							 | 
						||
| 
								 | 
							
								        '”' => 148,
							 | 
						||
| 
								 | 
							
								        'ρ' => 929,
							 | 
						||
| 
								 | 
							
								        '›' => 155,
							 | 
						||
| 
								 | 
							
								        '’' => 146,
							 | 
						||
| 
								 | 
							
								        'š' => 138,
							 | 
						||
| 
								 | 
							
								        'σ' => 931,
							 | 
						||
| 
								 | 
							
								        'ς' => 962,
							 | 
						||
| 
								 | 
							
								        'τ' => 932,
							 | 
						||
| 
								 | 
							
								        'θ' => 920,
							 | 
						||
| 
								 | 
							
								        'ϑ' => 977,
							 | 
						||
| 
								 | 
							
								        '˜' => 126,
							 | 
						||
| 
								 | 
							
								        '™' => 153,
							 | 
						||
| 
								 | 
							
								        'ϒ' => 978,
							 | 
						||
| 
								 | 
							
								        'υ' => 933,
							 | 
						||
| 
								 | 
							
								        'ξ' => 926,
							 | 
						||
| 
								 | 
							
								        'ÿ' => 159,
							 | 
						||
| 
								 | 
							
								        'ζ' => 918,
							 | 
						||
| 
								 | 
							
								    );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Decodes markup entities in a given string.
							 | 
						||
| 
								 | 
							
								     *
							 | 
						||
| 
								 | 
							
								     * @param string $string  String containing markup entities
							 | 
						||
| 
								 | 
							
								     * @param string $charset Optional character set name to use in decoding
							 | 
						||
| 
								 | 
							
								     *        entities, defaults to UTF-8
							 | 
						||
| 
								 | 
							
								     *
							 | 
						||
| 
								 | 
							
								     * @return string String with markup entities decoded
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function decodeEntities($string, $charset = 'UTF-8')
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        $string = str_ireplace(
							 | 
						||
| 
								 | 
							
								            array_keys(self::$entities),
							 | 
						||
| 
								 | 
							
								            array_map('chr', self::$entities),
							 | 
						||
| 
								 | 
							
								            $string
							 | 
						||
| 
								 | 
							
								        );
							 | 
						||
| 
								 | 
							
								        $string = html_entity_decode($string, ENT_QUOTES, $charset);
							 | 
						||
| 
								 | 
							
								        $string = preg_replace(
							 | 
						||
| 
								 | 
							
								            array('/�*([0-9]+);/me', '/�*([a-f0-9]+);/mei'),
							 | 
						||
| 
								 | 
							
								            array('$this->codeToUtf(\\1)', '$this->codeToUtf(hexdec(\\1))'),
							 | 
						||
| 
								 | 
							
								            $string
							 | 
						||
| 
								 | 
							
								        );
							 | 
						||
| 
								 | 
							
								        return $string;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Converts a given unicode to its UTF-8 equivalent.
							 | 
						||
| 
								 | 
							
								     *
							 | 
						||
| 
								 | 
							
								     * @param int $code Code to convert
							 | 
						||
| 
								 | 
							
								     * @return string Character corresponding to code
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function codeToUtf8($code)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        $code = (int) $code;
							 | 
						||
| 
								 | 
							
								        switch ($code) {
							 | 
						||
| 
								 | 
							
								            // 1 byte, 7 bits
							 | 
						||
| 
								 | 
							
								            case 0:
							 | 
						||
| 
								 | 
							
								                return chr(0);
							 | 
						||
| 
								 | 
							
								            case ($code & 0x7F):
							 | 
						||
| 
								 | 
							
								                return chr($code);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            // 2 bytes, 11 bits
							 | 
						||
| 
								 | 
							
								            case ($code & 0x7FF):
							 | 
						||
| 
								 | 
							
								                return chr(0xC0 | (($code >> 6) & 0x1F)) .
							 | 
						||
| 
								 | 
							
								                       chr(0x80 | ($code & 0x3F));
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            // 3 bytes, 16 bits
							 | 
						||
| 
								 | 
							
								            case ($code & 0xFFFF):
							 | 
						||
| 
								 | 
							
								                return chr(0xE0 | (($code >> 12) & 0x0F)) .
							 | 
						||
| 
								 | 
							
								                       chr(0x80 | (($code >> 6) & 0x3F)) .
							 | 
						||
| 
								 | 
							
								                       chr(0x80 | ($code & 0x3F));
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            // 4 bytes, 21 bits
							 | 
						||
| 
								 | 
							
								            case ($code & 0x1FFFFF):
							 | 
						||
| 
								 | 
							
								                return chr(0xF0 | ($code >> 18)) .
							 | 
						||
| 
								 | 
							
								                       chr(0x80 | (($code >> 12) & 0x3F)) .
							 | 
						||
| 
								 | 
							
								                       chr(0x80 | (($code >> 6) & 0x3F)) .
							 | 
						||
| 
								 | 
							
								                       chr(0x80 | ($code & 0x3F));
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Transliterates characters in a given string where possible.
							 | 
						||
| 
								 | 
							
								     *
							 | 
						||
| 
								 | 
							
								     * @param string $string      String containing characters to
							 | 
						||
| 
								 | 
							
								     *        transliterate
							 | 
						||
| 
								 | 
							
								     * @param string $charsetFrom Optional character set of the string,
							 | 
						||
| 
								 | 
							
								     *        defaults to UTF-8
							 | 
						||
| 
								 | 
							
								     * @param string $charsetTo   Optional character set to which the string
							 | 
						||
| 
								 | 
							
								     *        should be converted, defaults to ISO-8859-1
							 | 
						||
| 
								 | 
							
								     *
							 | 
						||
| 
								 | 
							
								     * @return string String with characters transliterated or the original
							 | 
						||
| 
								 | 
							
								     *         string if transliteration was not possible
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function transliterate($string, $charsetFrom = 'UTF-8', $charsetTo = 'ISO-8859-1')
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        // @link http://pecl.php.net/package/translit
							 | 
						||
| 
								 | 
							
								        if (function_exists('transliterate')) {
							 | 
						||
| 
								 | 
							
								            $string = transliterate($string, array('han_transliterate', 'diacritical_remove'), $charsetFrom, $charsetTo);
							 | 
						||
| 
								 | 
							
								        } elseif (function_exists('iconv')) {
							 | 
						||
| 
								 | 
							
								            $string = iconv($charsetFrom, $charsetTo . '//TRANSLIT', $string);
							 | 
						||
| 
								 | 
							
								        } else {
							 | 
						||
| 
								 | 
							
								            // @link http://stackoverflow.com/questions/1284535/php-transliteration/1285491#1285491
							 | 
						||
| 
								 | 
							
								            $string = preg_replace(
							 | 
						||
| 
								 | 
							
								                '~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i',
							 | 
						||
| 
								 | 
							
								                '$1',
							 | 
						||
| 
								 | 
							
								                htmlentities($string, ENT_COMPAT, $charsetFrom)
							 | 
						||
| 
								 | 
							
								            );
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        return $string;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 |