Lower mbstring dep, remove it for Yaml and CssSelector components

This commit is contained in:
Nicolas Grekas 2014-05-12 10:37:25 +02:00
parent 1a64fef828
commit a4b805dbf2
9 changed files with 51 additions and 64 deletions

View File

@ -54,7 +54,7 @@ class FormatterHelperTest extends \PHPUnit_Framework_TestCase
public function testFormatBlockWithDiacriticLetters() public function testFormatBlockWithDiacriticLetters()
{ {
if (!extension_loaded('mbstring')) { if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('This test requires mbstring to work.'); $this->markTestSkipped('This test requires mbstring to work.');
} }

View File

@ -65,14 +65,18 @@ class TokenizerEscaping
*/ */
private function replaceUnicodeSequences($value) private function replaceUnicodeSequences($value)
{ {
return preg_replace_callback($this->patterns->getUnicodeEscapePattern(), function (array $match) { return preg_replace_callback($this->patterns->getUnicodeEscapePattern(), function ($match) {
$code = $match[1]; $c = hexdec($match[1]);
if (bin2hex($code) > 0xFFFD) { if (0x80 > $c %= 0x200000) {
$code = '\\FFFD'; return chr($c);
}
if (0x800 > $c) {
return chr(0xC0 | $c>>6).chr(0x80 | $c & 0x3F);
}
if (0x10000 > $c) {
return chr(0xE0 | $c>>12).chr(0x80 | $c>>6 & 0x3F).chr(0x80 | $c & 0x3F);
} }
return mb_convert_encoding(pack('H*', $code), 'UTF-8', 'UCS-2BE');
}, $value); }, $value);
} }
} }

View File

@ -133,25 +133,19 @@ class NumberToLocalizedStringTransformer implements DataTransformerInterface
} }
if (function_exists('mb_detect_encoding') && false !== $encoding = mb_detect_encoding($value)) { if (function_exists('mb_detect_encoding') && false !== $encoding = mb_detect_encoding($value)) {
$strlen = function ($string) use ($encoding) { $length = mb_strlen($value, $encoding);
return mb_strlen($string, $encoding); $remainder = mb_substr($value, $position, $length, $encoding);
};
$substr = function ($string, $offset, $length) use ($encoding) {
return mb_substr($string, $offset, $length, $encoding);
};
} else { } else {
$strlen = 'strlen'; $length = strlen($value);
$substr = 'substr'; $remainder = substr($value, $position, $length);
} }
$length = $strlen($value);
// After parsing, position holds the index of the character where the // After parsing, position holds the index of the character where the
// parsing stopped // parsing stopped
if ($position < $length) { if ($position < $length) {
// Check if there are unrecognized characters at the end of the // Check if there are unrecognized characters at the end of the
// number (excluding whitespace characters) // number (excluding whitespace characters)
$remainder = trim($substr($value, $position, $length), " \t\n\r\0\x0b\xc2\xa0"); $remainder = trim($remainder, " \t\n\r\0\x0b\xc2\xa0");
if ('' !== $remainder) { if ('' !== $remainder) {
throw new TransformationFailedException( throw new TransformationFailedException(

View File

@ -119,7 +119,7 @@ class NumberToLocalizedStringTransformerTest extends \PHPUnit_Framework_TestCase
// https://github.com/symfony/symfony/issues/7609 // https://github.com/symfony/symfony/issues/7609
public function testReverseTransformWithGroupingAndFixedSpaces() public function testReverseTransformWithGroupingAndFixedSpaces()
{ {
if (!extension_loaded('mbstring')) { if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('The "mbstring" extension is required for this test.'); $this->markTestSkipped('The "mbstring" extension is required for this test.');
} }
@ -335,7 +335,7 @@ class NumberToLocalizedStringTransformerTest extends \PHPUnit_Framework_TestCase
*/ */
public function testReverseTransformDisallowsCenteredExtraCharactersMultibyte() public function testReverseTransformDisallowsCenteredExtraCharactersMultibyte()
{ {
if (!extension_loaded('mbstring')) { if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('The "mbstring" extension is required for this test.'); $this->markTestSkipped('The "mbstring" extension is required for this test.');
} }
@ -352,7 +352,7 @@ class NumberToLocalizedStringTransformerTest extends \PHPUnit_Framework_TestCase
*/ */
public function testReverseTransformIgnoresTrailingSpacesInExceptionMessage() public function testReverseTransformIgnoresTrailingSpacesInExceptionMessage()
{ {
if (!extension_loaded('mbstring')) { if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('The "mbstring" extension is required for this test.'); $this->markTestSkipped('The "mbstring" extension is required for this test.');
} }
@ -380,7 +380,7 @@ class NumberToLocalizedStringTransformerTest extends \PHPUnit_Framework_TestCase
*/ */
public function testReverseTransformDisallowsTrailingExtraCharactersMultibyte() public function testReverseTransformDisallowsTrailingExtraCharactersMultibyte()
{ {
if (!extension_loaded('mbstring')) { if (!function_exists('mb_detect_encoding')) {
$this->markTestSkipped('The "mbstring" extension is required for this test.'); $this->markTestSkipped('The "mbstring" extension is required for this test.');
} }

View File

@ -52,8 +52,8 @@ class TrimListenerTest extends \PHPUnit_Framework_TestCase
*/ */
public function testTrimUtf8Separators($hex) public function testTrimUtf8Separators($hex)
{ {
if (!function_exists('mb_check_encoding')) { if (!function_exists('mb_convert_encoding')) {
$this->markTestSkipped('The "mb_check_encoding" function is not available'); $this->markTestSkipped('The "mb_convert_encoding" function is not available');
} }
// Convert hexadecimal representation into binary // Convert hexadecimal representation into binary

View File

@ -18,7 +18,7 @@ class IcuResFileDumperTest extends \PHPUnit_Framework_TestCase
{ {
public function testDump() public function testDump()
{ {
if (!extension_loaded('mbstring')) { if (!function_exists('mb_convert_encoding')) {
$this->markTestSkipped('This test requires mbstring to work.'); $this->markTestSkipped('This test requires mbstring to work.');
} }

View File

@ -55,7 +55,7 @@ class Parser
$this->currentLine = ''; $this->currentLine = '';
$this->lines = explode("\n", $this->cleanup($value)); $this->lines = explode("\n", $this->cleanup($value));
if (function_exists('mb_detect_encoding') && false === mb_detect_encoding($value, 'UTF-8', true)) { if (!preg_match('//u', $value)) {
throw new ParseException('The YAML value does not appear to be valid UTF-8.'); throw new ParseException('The YAML value does not appear to be valid UTF-8.');
} }

View File

@ -33,12 +33,6 @@ class ParserTest extends \PHPUnit_Framework_TestCase
*/ */
public function testSpecifications($file, $expected, $yaml, $comment) public function testSpecifications($file, $expected, $yaml, $comment)
{ {
if ('escapedCharacters' == $file) {
if (!function_exists('iconv') && !function_exists('mb_convert_encoding')) {
$this->markTestSkipped('The iconv and mbstring extensions are not available.');
}
}
$this->assertEquals($expected, var_export($this->parser->parse($yaml), true), $comment); $this->assertEquals($expected, var_export($this->parser->parse($yaml), true), $comment);
} }
@ -446,8 +440,8 @@ EOF;
public function testNonUtf8Exception() public function testNonUtf8Exception()
{ {
if (!function_exists('mb_detect_encoding') || !function_exists('iconv')) { if (!function_exists('iconv')) {
$this->markTestSkipped('Exceptions for non-utf8 charsets require the mb_detect_encoding() and iconv() functions.'); $this->markTestSkipped('Exceptions for non-utf8 charsets require the iconv() function.');
return; return;
} }

View File

@ -21,6 +21,7 @@ class Unescaper
{ {
// Parser and Inline assume UTF-8 encoding, so escaped Unicode characters // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
// must be converted to that encoding. // must be converted to that encoding.
// @deprecated since 2.5, to be removed in 3.0
const ENCODING = 'UTF-8'; const ENCODING = 'UTF-8';
// Regex fragment that matches an escaped character in a double quoted // Regex fragment that matches an escaped character in a double quoted
@ -80,13 +81,13 @@ class Unescaper
case 'n': case 'n':
return "\n"; return "\n";
case 'v': case 'v':
return "\xb"; return "\xB";
case 'f': case 'f':
return "\xc"; return "\xC";
case 'r': case 'r':
return "\xd"; return "\r";
case 'e': case 'e':
return "\x1b"; return "\x1B";
case ' ': case ' ':
return ' '; return ' ';
case '"': case '"':
@ -97,50 +98,44 @@ class Unescaper
return '\\'; return '\\';
case 'N': case 'N':
// U+0085 NEXT LINE // U+0085 NEXT LINE
return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE'); return "\xC2\x85";
case '_': case '_':
// U+00A0 NO-BREAK SPACE // U+00A0 NO-BREAK SPACE
return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE'); return "\xC2\xA0";
case 'L': case 'L':
// U+2028 LINE SEPARATOR // U+2028 LINE SEPARATOR
return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE'); return "\xE2\x80\xA8";
case 'P': case 'P':
// U+2029 PARAGRAPH SEPARATOR // U+2029 PARAGRAPH SEPARATOR
return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE'); return "\xE2\x80\xA9";
case 'x': case 'x':
$char = pack('n', hexdec(substr($value, 2, 2))); return self::utf8chr(hexdec(substr($value, 2, 2)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
case 'u': case 'u':
$char = pack('n', hexdec(substr($value, 2, 4))); return self::utf8chr(hexdec(substr($value, 2, 4)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
case 'U': case 'U':
$char = pack('N', hexdec(substr($value, 2, 8))); return self::utf8chr(hexdec(substr($value, 2, 8)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE');
} }
} }
/** /**
* Convert a string from one encoding to another. * Get the UTF-8 character for the given code point.
* *
* @param string $value The string to convert * @param int $c The unicode code point
* @param string $to The input encoding
* @param string $from The output encoding
* *
* @return string The string with the new encoding * @return string The corresponding UTF-8 character
*
* @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring)
*/ */
private function convertEncoding($value, $to, $from) private static function utf8chr($c)
{ {
if (function_exists('mb_convert_encoding')) { if (0x80 > $c %= 0x200000) {
return mb_convert_encoding($value, $to, $from); return chr($c);
} elseif (function_exists('iconv')) { }
return iconv($from, $to, $value); if (0x800 > $c) {
return chr(0xC0 | $c>>6).chr(0x80 | $c & 0x3F);
}
if (0x10000 > $c) {
return chr(0xE0 | $c>>12).chr(0x80 | $c>>6 & 0x3F).chr(0x80 | $c & 0x3F);
} }
throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).'); return chr(0xF0 | $c>>18).chr(0x80 | $c>>12 & 0x3F).chr(0x80 | $c>>6 & 0x3F).chr(0x80 | $c & 0x3F);
} }
} }