Lower mbstring dep, remove it for Yaml and CssSelector components

This commit is contained in:
Nicolas Grekas 2014-05-12 10:37:25 +02:00
parent 1a64fef828
commit a4b805dbf2
9 changed files with 51 additions and 64 deletions

View File

@ -54,7 +54,7 @@ class FormatterHelperTest extends \PHPUnit_Framework_TestCase
public function testFormatBlockWithDiacriticLetters()
{
if (!extension_loaded('mbstring')) {
if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('This test requires mbstring to work.');
}

View File

@ -65,14 +65,18 @@ class TokenizerEscaping
*/
private function replaceUnicodeSequences($value)
{
return preg_replace_callback($this->patterns->getUnicodeEscapePattern(), function (array $match) {
$code = $match[1];
return preg_replace_callback($this->patterns->getUnicodeEscapePattern(), function ($match) {
$c = hexdec($match[1]);
if (bin2hex($code) > 0xFFFD) {
$code = '\\FFFD';
if (0x80 > $c %= 0x200000) {
return chr($c);
}
if (0x800 > $c) {
return chr(0xC0 | $c>>6).chr(0x80 | $c & 0x3F);
}
if (0x10000 > $c) {
return chr(0xE0 | $c>>12).chr(0x80 | $c>>6 & 0x3F).chr(0x80 | $c & 0x3F);
}
return mb_convert_encoding(pack('H*', $code), 'UTF-8', 'UCS-2BE');
}, $value);
}
}

View File

@ -133,25 +133,19 @@ class NumberToLocalizedStringTransformer implements DataTransformerInterface
}
if (function_exists('mb_detect_encoding') && false !== $encoding = mb_detect_encoding($value)) {
$strlen = function ($string) use ($encoding) {
return mb_strlen($string, $encoding);
};
$substr = function ($string, $offset, $length) use ($encoding) {
return mb_substr($string, $offset, $length, $encoding);
};
$length = mb_strlen($value, $encoding);
$remainder = mb_substr($value, $position, $length, $encoding);
} else {
$strlen = 'strlen';
$substr = 'substr';
$length = strlen($value);
$remainder = substr($value, $position, $length);
}
$length = $strlen($value);
// After parsing, position holds the index of the character where the
// parsing stopped
if ($position < $length) {
// Check if there are unrecognized characters at the end of the
// number (excluding whitespace characters)
$remainder = trim($substr($value, $position, $length), " \t\n\r\0\x0b\xc2\xa0");
$remainder = trim($remainder, " \t\n\r\0\x0b\xc2\xa0");
if ('' !== $remainder) {
throw new TransformationFailedException(

View File

@ -119,7 +119,7 @@ class NumberToLocalizedStringTransformerTest extends \PHPUnit_Framework_TestCase
// https://github.com/symfony/symfony/issues/7609
public function testReverseTransformWithGroupingAndFixedSpaces()
{
if (!extension_loaded('mbstring')) {
if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('The "mbstring" extension is required for this test.');
}
@ -335,7 +335,7 @@ class NumberToLocalizedStringTransformerTest extends \PHPUnit_Framework_TestCase
*/
public function testReverseTransformDisallowsCenteredExtraCharactersMultibyte()
{
if (!extension_loaded('mbstring')) {
if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('The "mbstring" extension is required for this test.');
}
@ -352,7 +352,7 @@ class NumberToLocalizedStringTransformerTest extends \PHPUnit_Framework_TestCase
*/
public function testReverseTransformIgnoresTrailingSpacesInExceptionMessage()
{
if (!extension_loaded('mbstring')) {
if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('The "mbstring" extension is required for this test.');
}
@ -380,7 +380,7 @@ class NumberToLocalizedStringTransformerTest extends \PHPUnit_Framework_TestCase
*/
public function testReverseTransformDisallowsTrailingExtraCharactersMultibyte()
{
if (!extension_loaded('mbstring')) {
if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('The "mbstring" extension is required for this test.');
}

View File

@ -52,8 +52,8 @@ class TrimListenerTest extends \PHPUnit_Framework_TestCase
*/
public function testTrimUtf8Separators($hex)
{
if (!function_exists('mb_check_encoding')) {
$this->markTestSkipped('The "mb_check_encoding" function is not available');
if (!function_exists('mb_convert_encoding')) {
$this->markTestSkipped('The "mb_convert_encoding" function is not available');
}
// Convert hexadecimal representation into binary

View File

@ -18,7 +18,7 @@ class IcuResFileDumperTest extends \PHPUnit_Framework_TestCase
{
public function testDump()
{
if (!extension_loaded('mbstring')) {
if (!function_exists('mb_convert_encoding')) {
$this->markTestSkipped('This test requires mbstring to work.');
}

View File

@ -55,7 +55,7 @@ class Parser
$this->currentLine = '';
$this->lines = explode("\n", $this->cleanup($value));
if (function_exists('mb_detect_encoding') && false === mb_detect_encoding($value, 'UTF-8', true)) {
if (!preg_match('//u', $value)) {
throw new ParseException('The YAML value does not appear to be valid UTF-8.');
}

View File

@ -33,12 +33,6 @@ class ParserTest extends \PHPUnit_Framework_TestCase
*/
public function testSpecifications($file, $expected, $yaml, $comment)
{
if ('escapedCharacters' == $file) {
if (!function_exists('iconv') && !function_exists('mb_convert_encoding')) {
$this->markTestSkipped('The iconv and mbstring extensions are not available.');
}
}
$this->assertEquals($expected, var_export($this->parser->parse($yaml), true), $comment);
}
@ -446,8 +440,8 @@ EOF;
public function testNonUtf8Exception()
{
if (!function_exists('mb_detect_encoding') || !function_exists('iconv')) {
$this->markTestSkipped('Exceptions for non-utf8 charsets require the mb_detect_encoding() and iconv() functions.');
if (!function_exists('iconv')) {
$this->markTestSkipped('Exceptions for non-utf8 charsets require the iconv() function.');
return;
}

View File

@ -21,6 +21,7 @@ class Unescaper
{
// Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
// must be converted to that encoding.
// @deprecated since 2.5, to be removed in 3.0
const ENCODING = 'UTF-8';
// Regex fragment that matches an escaped character in a double quoted
@ -80,13 +81,13 @@ class Unescaper
case 'n':
return "\n";
case 'v':
return "\xb";
return "\xB";
case 'f':
return "\xc";
return "\xC";
case 'r':
return "\xd";
return "\r";
case 'e':
return "\x1b";
return "\x1B";
case ' ':
return ' ';
case '"':
@ -97,50 +98,44 @@ class Unescaper
return '\\';
case 'N':
// U+0085 NEXT LINE
return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE');
return "\xC2\x85";
case '_':
// U+00A0 NO-BREAK SPACE
return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE');
return "\xC2\xA0";
case 'L':
// U+2028 LINE SEPARATOR
return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE');
return "\xE2\x80\xA8";
case 'P':
// U+2029 PARAGRAPH SEPARATOR
return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE');
return "\xE2\x80\xA9";
case 'x':
$char = pack('n', hexdec(substr($value, 2, 2)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
return self::utf8chr(hexdec(substr($value, 2, 2)));
case 'u':
$char = pack('n', hexdec(substr($value, 2, 4)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
return self::utf8chr(hexdec(substr($value, 2, 4)));
case 'U':
$char = pack('N', hexdec(substr($value, 2, 8)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE');
return self::utf8chr(hexdec(substr($value, 2, 8)));
}
}
/**
* Convert a string from one encoding to another.
* Get the UTF-8 character for the given code point.
*
* @param string $value The string to convert
* @param string $to The input encoding
* @param string $from The output encoding
* @param int $c The unicode code point
*
* @return string The string with the new encoding
*
* @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring)
* @return string The corresponding UTF-8 character
*/
private function convertEncoding($value, $to, $from)
private static function utf8chr($c)
{
if (function_exists('mb_convert_encoding')) {
return mb_convert_encoding($value, $to, $from);
} elseif (function_exists('iconv')) {
return iconv($from, $to, $value);
if (0x80 > $c %= 0x200000) {
return chr($c);
}
if (0x800 > $c) {
return chr(0xC0 | $c>>6).chr(0x80 | $c & 0x3F);
}
if (0x10000 > $c) {
return chr(0xE0 | $c>>12).chr(0x80 | $c>>6 & 0x3F).chr(0x80 | $c & 0x3F);
}
throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).');
return chr(0xF0 | $c>>18).chr(0x80 | $c>>12 & 0x3F).chr(0x80 | $c>>6 & 0x3F).chr(0x80 | $c & 0x3F);
}
}