#20411 fix Yaml parsing for very long quoted strings
This commit is contained in:
parent
01a0250d49
commit
c9a1c09182
@ -149,8 +149,8 @@ class Inline
|
|||||||
case Escaper::requiresDoubleQuoting($value):
|
case Escaper::requiresDoubleQuoting($value):
|
||||||
return Escaper::escapeWithDoubleQuotes($value);
|
return Escaper::escapeWithDoubleQuotes($value);
|
||||||
case Escaper::requiresSingleQuoting($value):
|
case Escaper::requiresSingleQuoting($value):
|
||||||
case preg_match(self::getHexRegex(), $value):
|
case Parser::preg_match(self::getHexRegex(), $value):
|
||||||
case preg_match(self::getTimestampRegex(), $value):
|
case Parser::preg_match(self::getTimestampRegex(), $value):
|
||||||
return Escaper::escapeWithSingleQuotes($value);
|
return Escaper::escapeWithSingleQuotes($value);
|
||||||
default:
|
default:
|
||||||
return $value;
|
return $value;
|
||||||
@ -242,10 +242,10 @@ class Inline
|
|||||||
$i += strlen($output);
|
$i += strlen($output);
|
||||||
|
|
||||||
// remove comments
|
// remove comments
|
||||||
if (preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
|
if (Parser::preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
|
||||||
$output = substr($output, 0, $match[0][1]);
|
$output = substr($output, 0, $match[0][1]);
|
||||||
}
|
}
|
||||||
} elseif (preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
|
} elseif (Parser::preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
|
||||||
$output = $match[1];
|
$output = $match[1];
|
||||||
$i += strlen($output);
|
$i += strlen($output);
|
||||||
} else {
|
} else {
|
||||||
@ -272,7 +272,7 @@ class Inline
|
|||||||
*/
|
*/
|
||||||
private static function parseQuotedScalar($scalar, &$i)
|
private static function parseQuotedScalar($scalar, &$i)
|
||||||
{
|
{
|
||||||
if (!preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
|
if (!Parser::preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
|
||||||
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)));
|
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -520,16 +520,16 @@ class Inline
|
|||||||
|
|
||||||
return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw);
|
return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw);
|
||||||
case is_numeric($scalar):
|
case is_numeric($scalar):
|
||||||
case preg_match(self::getHexRegex(), $scalar):
|
case Parser::preg_match(self::getHexRegex(), $scalar):
|
||||||
return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar;
|
return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar;
|
||||||
case '.inf' === $scalarLower:
|
case '.inf' === $scalarLower:
|
||||||
case '.nan' === $scalarLower:
|
case '.nan' === $scalarLower:
|
||||||
return -log(0);
|
return -log(0);
|
||||||
case '-.inf' === $scalarLower:
|
case '-.inf' === $scalarLower:
|
||||||
return log(0);
|
return log(0);
|
||||||
case preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
|
case Parser::preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
|
||||||
return (float) str_replace(',', '', $scalar);
|
return (float) str_replace(',', '', $scalar);
|
||||||
case preg_match(self::getTimestampRegex(), $scalar):
|
case Parser::preg_match(self::getTimestampRegex(), $scalar):
|
||||||
$timeZone = date_default_timezone_get();
|
$timeZone = date_default_timezone_get();
|
||||||
date_default_timezone_set('UTC');
|
date_default_timezone_set('UTC');
|
||||||
$time = strtotime($scalar);
|
$time = strtotime($scalar);
|
||||||
|
@ -61,7 +61,7 @@ class Parser
|
|||||||
*/
|
*/
|
||||||
public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false)
|
public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false)
|
||||||
{
|
{
|
||||||
if (!preg_match('//u', $value)) {
|
if (false === preg_match('//u', $value)) {
|
||||||
throw new ParseException('The YAML value does not appear to be valid UTF-8.');
|
throw new ParseException('The YAML value does not appear to be valid UTF-8.');
|
||||||
}
|
}
|
||||||
$this->currentLineNb = -1;
|
$this->currentLineNb = -1;
|
||||||
@ -92,13 +92,13 @@ class Parser
|
|||||||
}
|
}
|
||||||
|
|
||||||
$isRef = $mergeNode = false;
|
$isRef = $mergeNode = false;
|
||||||
if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
|
if (self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
|
||||||
if ($context && 'mapping' == $context) {
|
if ($context && 'mapping' == $context) {
|
||||||
throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine);
|
throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine);
|
||||||
}
|
}
|
||||||
$context = 'sequence';
|
$context = 'sequence';
|
||||||
|
|
||||||
if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
|
if (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
|
||||||
$isRef = $matches['ref'];
|
$isRef = $matches['ref'];
|
||||||
$values['value'] = $matches['value'];
|
$values['value'] = $matches['value'];
|
||||||
}
|
}
|
||||||
@ -108,7 +108,7 @@ class Parser
|
|||||||
$data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap);
|
$data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap);
|
||||||
} else {
|
} else {
|
||||||
if (isset($values['leadspaces'])
|
if (isset($values['leadspaces'])
|
||||||
&& preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
|
&& self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($values['value']), $matches)
|
||||||
) {
|
) {
|
||||||
// this is a compact notation element, add to next block and parse
|
// this is a compact notation element, add to next block and parse
|
||||||
$block = $values['value'];
|
$block = $values['value'];
|
||||||
@ -124,7 +124,10 @@ class Parser
|
|||||||
if ($isRef) {
|
if ($isRef) {
|
||||||
$this->refs[$isRef] = end($data);
|
$this->refs[$isRef] = end($data);
|
||||||
}
|
}
|
||||||
} elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) {
|
} elseif (
|
||||||
|
self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
|
||||||
|
&& (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))
|
||||||
|
) {
|
||||||
if ($context && 'sequence' == $context) {
|
if ($context && 'sequence' == $context) {
|
||||||
throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine);
|
throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine);
|
||||||
}
|
}
|
||||||
@ -203,7 +206,7 @@ class Parser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
|
} elseif (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
|
||||||
$isRef = $matches['ref'];
|
$isRef = $matches['ref'];
|
||||||
$values['value'] = $matches['value'];
|
$values['value'] = $matches['value'];
|
||||||
}
|
}
|
||||||
@ -266,27 +269,7 @@ class Parser
|
|||||||
return $value;
|
return $value;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (preg_last_error()) {
|
throw new ParseException('Unable to parse', $this->getRealCurrentLineNb() + 1, $this->currentLine);
|
||||||
case PREG_INTERNAL_ERROR:
|
|
||||||
$error = 'Internal PCRE error.';
|
|
||||||
break;
|
|
||||||
case PREG_BACKTRACK_LIMIT_ERROR:
|
|
||||||
$error = 'pcre.backtrack_limit reached.';
|
|
||||||
break;
|
|
||||||
case PREG_RECURSION_LIMIT_ERROR:
|
|
||||||
$error = 'pcre.recursion_limit reached.';
|
|
||||||
break;
|
|
||||||
case PREG_BAD_UTF8_ERROR:
|
|
||||||
$error = 'Malformed UTF-8 data.';
|
|
||||||
break;
|
|
||||||
case PREG_BAD_UTF8_OFFSET_ERROR:
|
|
||||||
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
$error = 'Unable to parse.';
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -520,7 +503,7 @@ class Parser
|
|||||||
return $this->refs[$value];
|
return $this->refs[$value];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
|
if (self::preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
|
||||||
$modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
|
$modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
|
||||||
|
|
||||||
return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
|
return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
|
||||||
@ -566,7 +549,7 @@ class Parser
|
|||||||
|
|
||||||
// determine indentation if not specified
|
// determine indentation if not specified
|
||||||
if (0 === $indentation) {
|
if (0 === $indentation) {
|
||||||
if (preg_match('/^ +/', $this->currentLine, $matches)) {
|
if (self::preg_match('/^ +/', $this->currentLine, $matches)) {
|
||||||
$indentation = strlen($matches[0]);
|
$indentation = strlen($matches[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -577,7 +560,7 @@ class Parser
|
|||||||
while (
|
while (
|
||||||
$notEOF && (
|
$notEOF && (
|
||||||
$isCurrentLineBlank ||
|
$isCurrentLineBlank ||
|
||||||
preg_match($pattern, $this->currentLine, $matches)
|
self::preg_match($pattern, $this->currentLine, $matches)
|
||||||
)
|
)
|
||||||
) {
|
) {
|
||||||
if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) {
|
if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) {
|
||||||
@ -800,6 +783,49 @@ class Parser
|
|||||||
*/
|
*/
|
||||||
private function isBlockScalarHeader()
|
private function isBlockScalarHeader()
|
||||||
{
|
{
|
||||||
return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
|
return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A local wrapper for `preg_match` which will throw a ParseException if there
|
||||||
|
* is an internal error in the PCRE engine.
|
||||||
|
*
|
||||||
|
* This avoids us needing to check for "false" every time PCRE is used
|
||||||
|
* in the YAML engine
|
||||||
|
*
|
||||||
|
* @throws ParseException on a PCRE internal error
|
||||||
|
*
|
||||||
|
* @see preg_last_error()
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0)
|
||||||
|
{
|
||||||
|
$ret = preg_match($pattern, $subject, $matches, $flags, $offset);
|
||||||
|
if ($ret === false) {
|
||||||
|
switch (preg_last_error()) {
|
||||||
|
case PREG_INTERNAL_ERROR:
|
||||||
|
$error = 'Internal PCRE error.';
|
||||||
|
break;
|
||||||
|
case PREG_BACKTRACK_LIMIT_ERROR:
|
||||||
|
$error = 'pcre.backtrack_limit reached.';
|
||||||
|
break;
|
||||||
|
case PREG_RECURSION_LIMIT_ERROR:
|
||||||
|
$error = 'pcre.recursion_limit reached.';
|
||||||
|
break;
|
||||||
|
case PREG_BAD_UTF8_ERROR:
|
||||||
|
$error = 'Malformed UTF-8 data.';
|
||||||
|
break;
|
||||||
|
case PREG_BAD_UTF8_OFFSET_ERROR:
|
||||||
|
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
$error = 'Error.';
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new ParseException($error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@ use Symfony\Component\Yaml\Parser;
|
|||||||
|
|
||||||
class ParserTest extends \PHPUnit_Framework_TestCase
|
class ParserTest extends \PHPUnit_Framework_TestCase
|
||||||
{
|
{
|
||||||
|
/** @var Parser */
|
||||||
protected $parser;
|
protected $parser;
|
||||||
|
|
||||||
protected function setUp()
|
protected function setUp()
|
||||||
@ -1143,6 +1144,17 @@ YAML
|
|||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testCanParseVeryLongValue()
|
||||||
|
{
|
||||||
|
$longStringWithSpaces = str_repeat('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', 20000);
|
||||||
|
$trickyVal = array('x' => $longStringWithSpaces);
|
||||||
|
|
||||||
|
$yamlString = Yaml::dump($trickyVal);
|
||||||
|
$arrayFromYaml = $this->parser->parse($yamlString);
|
||||||
|
|
||||||
|
$this->assertEquals($trickyVal, $arrayFromYaml);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class B
|
class B
|
||||||
|
Reference in New Issue
Block a user