fix lexing nested sequences/mappings

This commit is contained in:
Christian Flothmann 2019-09-24 00:08:17 +02:00
parent 1f4625053c
commit 4c15f80d84
2 changed files with 304 additions and 69 deletions

View File

@ -355,7 +355,7 @@ class Parser
}
try {
return Inline::parse($this->parseQuotedString($this->currentLine), $flags, $this->refs);
return Inline::parse($this->lexInlineQuotedString(), $flags, $this->refs);
} catch (ParseException $e) {
$e->setParsedLine($this->getRealCurrentLineNb() + 1);
$e->setSnippet($this->currentLine);
@ -368,7 +368,7 @@ class Parser
}
try {
$parsedMapping = Inline::parse($this->lexInlineMapping($this->currentLine), $flags, $this->refs);
$parsedMapping = Inline::parse($this->lexInlineMapping(), $flags, $this->refs);
while ($this->moveToNextLine()) {
if (!$this->isCurrentLineEmpty()) {
@ -389,7 +389,7 @@ class Parser
}
try {
$parsedSequence = Inline::parse($this->lexInlineSequence($this->currentLine), $flags, $this->refs);
$parsedSequence = Inline::parse($this->lexInlineSequence(), $flags, $this->refs);
while ($this->moveToNextLine()) {
if (!$this->isCurrentLineEmpty()) {
@ -659,6 +659,11 @@ class Parser
return implode("\n", $data);
}
private function hasMoreLines(): bool
{
return (\count($this->lines) - 1) > $this->currentLineNb;
}
/**
* Moves the parser to the next line.
*/
@ -736,9 +741,13 @@ class Parser
try {
if ('' !== $value && '{' === $value[0]) {
return Inline::parse($this->lexInlineMapping($value), $flags, $this->refs);
$cursor = \strlen($this->currentLine) - \strlen($value);
return Inline::parse($this->lexInlineMapping($cursor), $flags, $this->refs);
} elseif ('' !== $value && '[' === $value[0]) {
return Inline::parse($this->lexInlineSequence($value), $flags, $this->refs);
$cursor = \strlen($this->currentLine) - \strlen($value);
return Inline::parse($this->lexInlineSequence($cursor), $flags, $this->refs);
}
$quotation = '' !== $value && ('"' === $value[0] || "'" === $value[0]) ? $value[0] : null;
@ -1137,106 +1146,148 @@ class Parser
throw new ParseException(sprintf('Tags support is not enabled. You must use the flag "Yaml::PARSE_CUSTOM_TAGS" to use "%s".', $matches['tag']), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
}
private function parseQuotedString(string $yaml): ?string
private function lexInlineQuotedString(int &$cursor = 0): string
{
if ('' === $yaml || ('"' !== $yaml[0] && "'" !== $yaml[0])) {
throw new \InvalidArgumentException(sprintf('"%s" is not a quoted string.', $yaml));
}
$quotation = $this->currentLine[$cursor];
$value = $quotation;
++$cursor;
$lines = [$yaml];
$previousLineWasNewline = true;
$previousLineWasTerminatedWithBackslash = false;
while ($this->moveToNextLine()) {
$lines[] = $this->currentLine;
if (!$this->isCurrentLineEmpty() && $yaml[0] === $this->currentLine[-1]) {
break;
}
}
$value = '';
for ($i = 0, $linesCount = \count($lines), $previousLineWasNewline = false, $previousLineWasTerminatedWithBackslash = false; $i < $linesCount; ++$i) {
if ('' === trim($lines[$i])) {
do {
if ($this->isCurrentLineBlank()) {
$value .= "\n";
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
$value .= ' ';
}
if ('' !== trim($lines[$i]) && '\\' === substr($lines[$i], -1)) {
$value .= ltrim(substr($lines[$i], 0, -1));
} elseif ('' !== trim($lines[$i])) {
$value .= trim($lines[$i]);
for (; \strlen($this->currentLine) > $cursor; ++$cursor) {
switch ($this->currentLine[$cursor]) {
case '\\':
if (isset($this->currentLine[++$cursor])) {
$value .= '\\'.$this->currentLine[$cursor];
}
break;
case $quotation:
++$cursor;
if ("'" === $quotation && isset($this->currentLine[$cursor]) && "'" === $this->currentLine[$cursor]) {
$value .= "''";
break;
}
return $value.$quotation;
default:
$value .= $this->currentLine[$cursor];
}
}
if ('' === trim($lines[$i])) {
if ($this->isCurrentLineBlank()) {
$previousLineWasNewline = true;
$previousLineWasTerminatedWithBackslash = false;
} elseif ('\\' === substr($lines[$i], -1)) {
} elseif ('\\' === $this->currentLine[-1]) {
$previousLineWasNewline = false;
$previousLineWasTerminatedWithBackslash = true;
} else {
$previousLineWasNewline = false;
$previousLineWasTerminatedWithBackslash = false;
}
}
return $value;
if ($this->hasMoreLines()) {
$cursor = 0;
}
} while ($this->moveToNextLine());
throw new ParseException('Malformed inline YAML string');
}
private function lexInlineMapping(string $yaml): string
private function lexUnquotedString(int &$cursor): string
{
if ('' === $yaml || '{' !== $yaml[0]) {
throw new \InvalidArgumentException(sprintf('"%s" is not a sequence.', $yaml));
}
$offset = $cursor;
$cursor += strcspn($this->currentLine, '[]{},: ', $cursor);
for ($i = 1; isset($yaml[$i]) && '}' !== $yaml[$i]; ++$i) {
}
if (isset($yaml[$i]) && '}' === $yaml[$i]) {
return $yaml;
}
$lines = [$yaml];
while ($this->moveToNextLine()) {
$lines[] = $this->currentLine;
}
return implode("\n", $lines);
return substr($this->currentLine, $offset, $cursor - $offset);
}
private function lexInlineSequence(string $yaml): string
private function lexInlineMapping(int &$cursor = 0): string
{
if ('' === $yaml || '[' !== $yaml[0]) {
throw new \InvalidArgumentException(sprintf('"%s" is not a sequence.', $yaml));
}
return $this->lexInlineStructure($cursor, '}');
}
for ($i = 1; isset($yaml[$i]) && ']' !== $yaml[$i]; ++$i) {
}
private function lexInlineSequence(int &$cursor = 0): string
{
return $this->lexInlineStructure($cursor, ']');
}
if (isset($yaml[$i]) && ']' === $yaml[$i]) {
return $yaml;
}
private function lexInlineStructure(int &$cursor, string $closingTag): string
{
$value = $this->currentLine[$cursor];
++$cursor;
$value = $yaml;
do {
$this->consumeWhitespaces($cursor);
while ($this->moveToNextLine()) {
for ($i = 1; isset($this->currentLine[$i]) && ']' !== $this->currentLine[$i]; ++$i) {
while (isset($this->currentLine[$cursor])) {
switch ($this->currentLine[$cursor]) {
case '"':
case "'":
$value .= $this->lexInlineQuotedString($cursor);
break;
case ':':
case ',':
$value .= $this->currentLine[$cursor];
++$cursor;
break;
case '{':
$value .= $this->lexInlineMapping($cursor);
break;
case '[':
$value .= $this->lexInlineSequence($cursor);
break;
case $closingTag:
$value .= $this->currentLine[$cursor];
++$cursor;
return $value;
case '#':
break 2;
default:
$value .= $this->lexUnquotedString($cursor);
}
if ($this->consumeWhitespaces($cursor)) {
$value .= ' ';
}
}
$trimmedValue = trim($this->currentLine);
if ($this->hasMoreLines()) {
$cursor = 0;
}
} while ($this->moveToNextLine());
if ('' !== $trimmedValue && '#' === $trimmedValue[0]) {
continue;
throw new ParseException('Malformed inline YAML string');
}
private function consumeWhitespaces(int &$cursor): bool
{
$whitespacesConsumed = 0;
do {
$whitespaceOnlyTokenLength = strspn($this->currentLine, ' ', $cursor);
$whitespacesConsumed += $whitespaceOnlyTokenLength;
$cursor += $whitespaceOnlyTokenLength;
if (isset($this->currentLine[$cursor])) {
return 0 < $whitespacesConsumed;
}
$value .= $trimmedValue;
if (isset($this->currentLine[$i]) && ']' === $this->currentLine[$i]) {
break;
if ($this->hasMoreLines()) {
$cursor = 0;
}
}
} while ($this->moveToNextLine());
return $value;
return 0 < $whitespacesConsumed;
}
}

View File

@ -1660,6 +1660,16 @@ EOF;
'foo': 'bar',
'bar': 'baz'
}
YAML
,
],
'mapping with unquoted strings and values' => [
['foo' => 'bar', 'bar' => 'baz'],
<<<YAML
{
foo: bar,
bar: baz
}
YAML
,
],
@ -1673,6 +1683,53 @@ YAML
YAML
,
],
'sequence with unquoted items' => [
['foo', 'bar'],
<<<YAML
[
foo,
bar
]
YAML
,
],
'nested mapping terminating at end of line' => [
[
'foo' => [
'bar' => 'foobar',
],
],
<<<YAML
{ foo: { bar: foobar }
}
YAML
,
],
'nested sequence terminating at end of line' => [
[
'foo',
[
'bar',
'baz',
],
],
<<<YAML
[ foo, [bar, baz]
]
YAML
],
'nested sequence spanning multiple lines' => [
[
['entry1', []],
['entry2'],
],
<<<YAML
[
['entry1', {}],
['entry2']
]
YAML
],
'sequence nested in mapping' => [
['foo' => ['bar', 'foobar'], 'bar' => ['baz']],
<<<YAML
@ -1699,6 +1756,22 @@ foobar: [foo,
YAML
,
],
'sequence spanning multiple lines nested in mapping with a following mapping' => [
[
'foobar' => [
'foo',
'bar',
],
'bar' => 'baz',
],
<<<YAML
foobar: [
foo,
bar,
]
bar: baz
YAML
],
'nested sequence nested in mapping starting on the same line' => [
[
'foo' => [
@ -1824,6 +1897,110 @@ YAML
foo: 'bar
baz'
YAML
],
'mixed mapping with inline notation having separated lines' => [
[
'map' => [
'key' => 'value',
'a' => 'b',
],
'param' => 'some',
],
<<<YAML
map: {
key: "value",
a: "b"
}
param: "some"
YAML
],
'mixed mapping with inline notation on one line' => [
[
'map' => [
'key' => 'value',
'a' => 'b',
],
'param' => 'some',
],
<<<YAML
map: {key: "value", a: "b"}
param: "some"
YAML
],
'mixed mapping with compact inline notation on one line' => [
[
'map' => [
'key' => 'value',
'a' => 'b',
],
'param' => 'some',
],
<<<YAML
map: {key: "value",
a: "b"}
param: "some"
YAML
],
'nested collections containing strings with bracket chars' => [
[
[']'],
['}'],
['ba[r'],
['[ba]r'],
['bar]'],
['foo' => 'bar{'],
['foo' => 'b{ar}'],
['foo' => 'bar}'],
],
<<<YAML
[
[
"]"
],
[
"}"
],
[
"ba[r"
],
[
'[ba]r'
],
[
"bar]"
],
{
foo: "bar{"
},
{
foo: "b{ar}"
},
{
foo: 'bar}'
}
]
YAML
],
'escaped characters in quoted strings' => [
[
['te"st'],
['test'],
["te'st"],
['te"st]'],
['te"st'],
['test'],
["te'st"],
['te"st]'],
],
<<<YAML
[
["te\"st"],["test"],['te''st'],["te\"st]"],
["te\"st"],
["test"],
['te''st'],
["te\"st]"]
]
YAML
],
];
@ -1847,6 +2024,13 @@ YAML;
$this->assertEquals(new TaggedValue('foo', ['foo' => 'bar']), $this->parser->parse('!foo {foo: bar}', Yaml::PARSE_CUSTOM_TAGS));
}
public function testInvalidInlineSequenceContainingStringWithEscapedQuotationCharacter()
{
$this->expectException(ParseException::class);
$this->parser->parse('["\\"]');
}
/**
* @dataProvider taggedValuesProvider
*/