From c9a1c091829e5791c743ee2ec05f8272fd95bfed Mon Sep 17 00:00:00 2001 From: Richard Bradley Date: Fri, 3 Feb 2017 16:27:28 +0000 Subject: [PATCH] #20411 fix Yaml parsing for very long quoted strings --- src/Symfony/Component/Yaml/Inline.php | 16 ++-- src/Symfony/Component/Yaml/Parser.php | 88 ++++++++++++------- .../Component/Yaml/Tests/ParserTest.php | 12 +++ 3 files changed, 77 insertions(+), 39 deletions(-) diff --git a/src/Symfony/Component/Yaml/Inline.php b/src/Symfony/Component/Yaml/Inline.php index aa1833745a..2255379a97 100644 --- a/src/Symfony/Component/Yaml/Inline.php +++ b/src/Symfony/Component/Yaml/Inline.php @@ -149,8 +149,8 @@ class Inline case Escaper::requiresDoubleQuoting($value): return Escaper::escapeWithDoubleQuotes($value); case Escaper::requiresSingleQuoting($value): - case preg_match(self::getHexRegex(), $value): - case preg_match(self::getTimestampRegex(), $value): + case Parser::preg_match(self::getHexRegex(), $value): + case Parser::preg_match(self::getTimestampRegex(), $value): return Escaper::escapeWithSingleQuotes($value); default: return $value; @@ -242,10 +242,10 @@ class Inline $i += strlen($output); // remove comments - if (preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) { + if (Parser::preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) { $output = substr($output, 0, $match[0][1]); } - } elseif (preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) { + } elseif (Parser::preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) { $output = $match[1]; $i += strlen($output); } else { @@ -272,7 +272,7 @@ class Inline */ private static function parseQuotedScalar($scalar, &$i) { - if (!preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) { + if (!Parser::preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) { throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i))); } @@ -520,16 +520,16 @@ class Inline return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw); case is_numeric($scalar): - case preg_match(self::getHexRegex(), $scalar): + case Parser::preg_match(self::getHexRegex(), $scalar): return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar; case '.inf' === $scalarLower: case '.nan' === $scalarLower: return -log(0); case '-.inf' === $scalarLower: return log(0); - case preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar): + case Parser::preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar): return (float) str_replace(',', '', $scalar); - case preg_match(self::getTimestampRegex(), $scalar): + case Parser::preg_match(self::getTimestampRegex(), $scalar): $timeZone = date_default_timezone_get(); date_default_timezone_set('UTC'); $time = strtotime($scalar); diff --git a/src/Symfony/Component/Yaml/Parser.php b/src/Symfony/Component/Yaml/Parser.php index f3528ba24f..c4f5144468 100644 --- a/src/Symfony/Component/Yaml/Parser.php +++ b/src/Symfony/Component/Yaml/Parser.php @@ -61,7 +61,7 @@ class Parser */ public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false) { - if (!preg_match('//u', $value)) { + if (false === preg_match('//u', $value)) { throw new ParseException('The YAML value does not appear to be valid UTF-8.'); } $this->currentLineNb = -1; @@ -92,13 +92,13 @@ class Parser } $isRef = $mergeNode = false; - if (preg_match('#^\-((?P\s+)(?P.+?))?\s*$#u', $this->currentLine, $values)) { + if (self::preg_match('#^\-((?P\s+)(?P.+))?$#u', rtrim($this->currentLine), $values)) { if ($context && 'mapping' == $context) { throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine); } $context = 'sequence'; - if (isset($values['value']) && preg_match('#^&(?P[^ ]+) *(?P.*)#u', $values['value'], $matches)) { + if (isset($values['value']) && self::preg_match('#^&(?P[^ ]+) *(?P.*)#u', $values['value'], $matches)) { $isRef = $matches['ref']; $values['value'] = $matches['value']; } @@ -108,7 +108,7 @@ class Parser $data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap); } else { if (isset($values['leadspaces']) - && preg_match('#^(?P'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P.+?))?\s*$#u', $values['value'], $matches) + && self::preg_match('#^(?P'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P.+))?$#u', rtrim($values['value']), $matches) ) { // this is a compact notation element, add to next block and parse $block = $values['value']; @@ -124,7 +124,10 @@ class Parser if ($isRef) { $this->refs[$isRef] = end($data); } - } elseif (preg_match('#^(?P'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) { + } elseif ( + self::preg_match('#^(?P'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P.+))?$#u', rtrim($this->currentLine), $values) + && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'"))) + ) { if ($context && 'sequence' == $context) { throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine); } @@ -203,7 +206,7 @@ class Parser } } } - } elseif (isset($values['value']) && preg_match('#^&(?P[^ ]+) *(?P.*)#u', $values['value'], $matches)) { + } elseif (isset($values['value']) && self::preg_match('#^&(?P[^ ]+) *(?P.*)#u', $values['value'], $matches)) { $isRef = $matches['ref']; $values['value'] = $matches['value']; } @@ -266,27 +269,7 @@ class Parser return $value; } - switch (preg_last_error()) { - case PREG_INTERNAL_ERROR: - $error = 'Internal PCRE error.'; - break; - case PREG_BACKTRACK_LIMIT_ERROR: - $error = 'pcre.backtrack_limit reached.'; - break; - case PREG_RECURSION_LIMIT_ERROR: - $error = 'pcre.recursion_limit reached.'; - break; - case PREG_BAD_UTF8_ERROR: - $error = 'Malformed UTF-8 data.'; - break; - case PREG_BAD_UTF8_OFFSET_ERROR: - $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.'; - break; - default: - $error = 'Unable to parse.'; - } - - throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine); + throw new ParseException('Unable to parse', $this->getRealCurrentLineNb() + 1, $this->currentLine); } } @@ -520,7 +503,7 @@ class Parser return $this->refs[$value]; } - if (preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) { + if (self::preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) { $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : ''; return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers)); @@ -566,7 +549,7 @@ class Parser // determine indentation if not specified if (0 === $indentation) { - if (preg_match('/^ +/', $this->currentLine, $matches)) { + if (self::preg_match('/^ +/', $this->currentLine, $matches)) { $indentation = strlen($matches[0]); } } @@ -577,7 +560,7 @@ class Parser while ( $notEOF && ( $isCurrentLineBlank || - preg_match($pattern, $this->currentLine, $matches) + self::preg_match($pattern, $this->currentLine, $matches) ) ) { if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) { @@ -800,6 +783,49 @@ class Parser */ private function isBlockScalarHeader() { - return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine); + return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine); + } + + /** + * A local wrapper for `preg_match` which will throw a ParseException if there + * is an internal error in the PCRE engine. + * + * This avoids us needing to check for "false" every time PCRE is used + * in the YAML engine + * + * @throws ParseException on a PCRE internal error + * + * @see preg_last_error() + * + * @internal + */ + public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0) + { + $ret = preg_match($pattern, $subject, $matches, $flags, $offset); + if ($ret === false) { + switch (preg_last_error()) { + case PREG_INTERNAL_ERROR: + $error = 'Internal PCRE error.'; + break; + case PREG_BACKTRACK_LIMIT_ERROR: + $error = 'pcre.backtrack_limit reached.'; + break; + case PREG_RECURSION_LIMIT_ERROR: + $error = 'pcre.recursion_limit reached.'; + break; + case PREG_BAD_UTF8_ERROR: + $error = 'Malformed UTF-8 data.'; + break; + case PREG_BAD_UTF8_OFFSET_ERROR: + $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.'; + break; + default: + $error = 'Error.'; + } + + throw new ParseException($error); + } + + return $ret; } } diff --git a/src/Symfony/Component/Yaml/Tests/ParserTest.php b/src/Symfony/Component/Yaml/Tests/ParserTest.php index e1e0d5a614..d7634556f3 100644 --- a/src/Symfony/Component/Yaml/Tests/ParserTest.php +++ b/src/Symfony/Component/Yaml/Tests/ParserTest.php @@ -16,6 +16,7 @@ use Symfony\Component\Yaml\Parser; class ParserTest extends \PHPUnit_Framework_TestCase { + /** @var Parser */ protected $parser; protected function setUp() @@ -1143,6 +1144,17 @@ YAML ), ); } + + public function testCanParseVeryLongValue() + { + $longStringWithSpaces = str_repeat('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', 20000); + $trickyVal = array('x' => $longStringWithSpaces); + + $yamlString = Yaml::dump($trickyVal); + $arrayFromYaml = $this->parser->parse($yamlString); + + $this->assertEquals($trickyVal, $arrayFromYaml); + } } class B