#20411 fix Yaml parsing for very long quoted strings

This commit is contained in:
Richard Bradley 2017-02-03 16:27:28 +00:00 committed by Fabien Potencier
parent 01a0250d49
commit c9a1c09182
3 changed files with 77 additions and 39 deletions

View File

@ -149,8 +149,8 @@ class Inline
case Escaper::requiresDoubleQuoting($value): case Escaper::requiresDoubleQuoting($value):
return Escaper::escapeWithDoubleQuotes($value); return Escaper::escapeWithDoubleQuotes($value);
case Escaper::requiresSingleQuoting($value): case Escaper::requiresSingleQuoting($value):
case preg_match(self::getHexRegex(), $value): case Parser::preg_match(self::getHexRegex(), $value):
case preg_match(self::getTimestampRegex(), $value): case Parser::preg_match(self::getTimestampRegex(), $value):
return Escaper::escapeWithSingleQuotes($value); return Escaper::escapeWithSingleQuotes($value);
default: default:
return $value; return $value;
@ -242,10 +242,10 @@ class Inline
$i += strlen($output); $i += strlen($output);
// remove comments // remove comments
if (preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) { if (Parser::preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
$output = substr($output, 0, $match[0][1]); $output = substr($output, 0, $match[0][1]);
} }
} elseif (preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) { } elseif (Parser::preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
$output = $match[1]; $output = $match[1];
$i += strlen($output); $i += strlen($output);
} else { } else {
@ -272,7 +272,7 @@ class Inline
*/ */
private static function parseQuotedScalar($scalar, &$i) private static function parseQuotedScalar($scalar, &$i)
{ {
if (!preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) { if (!Parser::preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i))); throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)));
} }
@ -520,16 +520,16 @@ class Inline
return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw); return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw);
case is_numeric($scalar): case is_numeric($scalar):
case preg_match(self::getHexRegex(), $scalar): case Parser::preg_match(self::getHexRegex(), $scalar):
return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar; return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar;
case '.inf' === $scalarLower: case '.inf' === $scalarLower:
case '.nan' === $scalarLower: case '.nan' === $scalarLower:
return -log(0); return -log(0);
case '-.inf' === $scalarLower: case '-.inf' === $scalarLower:
return log(0); return log(0);
case preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar): case Parser::preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
return (float) str_replace(',', '', $scalar); return (float) str_replace(',', '', $scalar);
case preg_match(self::getTimestampRegex(), $scalar): case Parser::preg_match(self::getTimestampRegex(), $scalar):
$timeZone = date_default_timezone_get(); $timeZone = date_default_timezone_get();
date_default_timezone_set('UTC'); date_default_timezone_set('UTC');
$time = strtotime($scalar); $time = strtotime($scalar);

View File

@ -61,7 +61,7 @@ class Parser
*/ */
public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false) public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false)
{ {
if (!preg_match('//u', $value)) { if (false === preg_match('//u', $value)) {
throw new ParseException('The YAML value does not appear to be valid UTF-8.'); throw new ParseException('The YAML value does not appear to be valid UTF-8.');
} }
$this->currentLineNb = -1; $this->currentLineNb = -1;
@ -92,13 +92,13 @@ class Parser
} }
$isRef = $mergeNode = false; $isRef = $mergeNode = false;
if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) { if (self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
if ($context && 'mapping' == $context) { if ($context && 'mapping' == $context) {
throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine); throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine);
} }
$context = 'sequence'; $context = 'sequence';
if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) { if (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
$isRef = $matches['ref']; $isRef = $matches['ref'];
$values['value'] = $matches['value']; $values['value'] = $matches['value'];
} }
@ -108,7 +108,7 @@ class Parser
$data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap); $data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap);
} else { } else {
if (isset($values['leadspaces']) if (isset($values['leadspaces'])
&& preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches) && self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($values['value']), $matches)
) { ) {
// this is a compact notation element, add to next block and parse // this is a compact notation element, add to next block and parse
$block = $values['value']; $block = $values['value'];
@ -124,7 +124,10 @@ class Parser
if ($isRef) { if ($isRef) {
$this->refs[$isRef] = end($data); $this->refs[$isRef] = end($data);
} }
} elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) { } elseif (
self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
&& (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))
) {
if ($context && 'sequence' == $context) { if ($context && 'sequence' == $context) {
throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine); throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine);
} }
@ -203,7 +206,7 @@ class Parser
} }
} }
} }
} elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) { } elseif (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
$isRef = $matches['ref']; $isRef = $matches['ref'];
$values['value'] = $matches['value']; $values['value'] = $matches['value'];
} }
@ -266,27 +269,7 @@ class Parser
return $value; return $value;
} }
switch (preg_last_error()) { throw new ParseException('Unable to parse', $this->getRealCurrentLineNb() + 1, $this->currentLine);
case PREG_INTERNAL_ERROR:
$error = 'Internal PCRE error.';
break;
case PREG_BACKTRACK_LIMIT_ERROR:
$error = 'pcre.backtrack_limit reached.';
break;
case PREG_RECURSION_LIMIT_ERROR:
$error = 'pcre.recursion_limit reached.';
break;
case PREG_BAD_UTF8_ERROR:
$error = 'Malformed UTF-8 data.';
break;
case PREG_BAD_UTF8_OFFSET_ERROR:
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
break;
default:
$error = 'Unable to parse.';
}
throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
} }
} }
@ -520,7 +503,7 @@ class Parser
return $this->refs[$value]; return $this->refs[$value];
} }
if (preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) { if (self::preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
$modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : ''; $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers)); return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
@ -566,7 +549,7 @@ class Parser
// determine indentation if not specified // determine indentation if not specified
if (0 === $indentation) { if (0 === $indentation) {
if (preg_match('/^ +/', $this->currentLine, $matches)) { if (self::preg_match('/^ +/', $this->currentLine, $matches)) {
$indentation = strlen($matches[0]); $indentation = strlen($matches[0]);
} }
} }
@ -577,7 +560,7 @@ class Parser
while ( while (
$notEOF && ( $notEOF && (
$isCurrentLineBlank || $isCurrentLineBlank ||
preg_match($pattern, $this->currentLine, $matches) self::preg_match($pattern, $this->currentLine, $matches)
) )
) { ) {
if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) { if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) {
@ -800,6 +783,49 @@ class Parser
*/ */
private function isBlockScalarHeader() private function isBlockScalarHeader()
{ {
return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine); return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
}
/**
* A local wrapper for `preg_match` which will throw a ParseException if there
* is an internal error in the PCRE engine.
*
* This avoids us needing to check for "false" every time PCRE is used
* in the YAML engine
*
* @throws ParseException on a PCRE internal error
*
* @see preg_last_error()
*
* @internal
*/
public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0)
{
$ret = preg_match($pattern, $subject, $matches, $flags, $offset);
if ($ret === false) {
switch (preg_last_error()) {
case PREG_INTERNAL_ERROR:
$error = 'Internal PCRE error.';
break;
case PREG_BACKTRACK_LIMIT_ERROR:
$error = 'pcre.backtrack_limit reached.';
break;
case PREG_RECURSION_LIMIT_ERROR:
$error = 'pcre.recursion_limit reached.';
break;
case PREG_BAD_UTF8_ERROR:
$error = 'Malformed UTF-8 data.';
break;
case PREG_BAD_UTF8_OFFSET_ERROR:
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
break;
default:
$error = 'Error.';
}
throw new ParseException($error);
}
return $ret;
} }
} }

View File

@ -16,6 +16,7 @@ use Symfony\Component\Yaml\Parser;
class ParserTest extends \PHPUnit_Framework_TestCase class ParserTest extends \PHPUnit_Framework_TestCase
{ {
/** @var Parser */
protected $parser; protected $parser;
protected function setUp() protected function setUp()
@ -1143,6 +1144,17 @@ YAML
), ),
); );
} }
public function testCanParseVeryLongValue()
{
$longStringWithSpaces = str_repeat('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', 20000);
$trickyVal = array('x' => $longStringWithSpaces);
$yamlString = Yaml::dump($trickyVal);
$arrayFromYaml = $this->parser->parse($yamlString);
$this->assertEquals($trickyVal, $arrayFromYaml);
}
} }
class B class B