bug #40514 [Yaml] Allow tabs as separators between tokens (bertramakers)

This PR was merged into the 4.4 branch.

Discussion
----------

[Yaml] Allow tabs as separators between tokens

| Q             | A
| ------------- | ---
| Branch?       | 4.4
| Bug fix?      | yes
| New feature?  | no
| Deprecations? | no
| Tickets       | Fix #40507
| License       | MIT
| Doc PR        | None

As described in #40507, the Yaml spec allows tabs as whitespace characters between tokens. However, the Yaml parser crashes on this as it only expects spaces after the colon. https://yaml.org/spec/1.2/spec.html#id2778241

While I'm not a huge fan of it personally, it's an issue when a different linter tells us that a given YAML file with content that we have little control over has valid syntax in an unrelated check, and then our app crashes because it cannot be parsed after all.

Commits
-------

9a130ae93e Fix issue 40507: Tabs as separators between tokens
This commit is contained in:
Christian Flothmann 2021-04-23 09:45:19 +02:00
commit df6b1ebf78
2 changed files with 58 additions and 17 deletions

View File

@ -209,7 +209,7 @@ class Parser
array_pop($this->refsBeingParsed);
}
} elseif (
self::preg_match('#^(?P<key>(?:![^\s]++\s++)?(?:'.Inline::REGEX_QUOTED_STRING.'|(?:!?!php/const:)?[^ \'"\[\{!].*?)) *\:( ++(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
self::preg_match('#^(?P<key>(?:![^\s]++\s++)?(?:'.Inline::REGEX_QUOTED_STRING.'|(?:!?!php/const:)?[^ \'"\[\{!].*?)) *\:(( |\t)++(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
&& (false === strpos($values['key'], ' #') || \in_array($values['key'][0], ['"', "'"]))
) {
if ($context && 'sequence' == $context) {

View File

@ -52,26 +52,67 @@ class ParserTest extends TestCase
return $this->loadTestsFromFixtureFiles('nonStringKeys.yml');
}
public function testTabsInYaml()
/**
* @dataProvider invalidIndentation
*/
public function testTabsAsIndentationInYaml(string $given, string $expectedMessage)
{
// test tabs in YAML
$yamls = [
"foo:\n bar",
"foo:\n bar",
"foo:\n bar",
"foo:\n bar",
$this->expectException(ParseException::class);
$this->expectExceptionMessage($expectedMessage);
$this->parser->parse($given);
}
public function invalidIndentation(): array
{
return [
[
"foo:\n\tbar",
"A YAML file cannot contain tabs as indentation at line 2 (near \"\tbar\").",
],
[
"foo:\n \tbar",
"A YAML file cannot contain tabs as indentation at line 2 (near \"\tbar\").",
],
[
"foo:\n\t bar",
"A YAML file cannot contain tabs as indentation at line 2 (near \"\t bar\").",
],
[
"foo:\n \t bar",
"A YAML file cannot contain tabs as indentation at line 2 (near \"\t bar\").",
],
];
}
foreach ($yamls as $yaml) {
try {
$this->parser->parse($yaml);
/**
* @dataProvider validTokenSeparators
*/
public function testValidTokenSeparation(string $given, array $expected)
{
$actual = $this->parser->parse($given);
$this->assertEquals($expected, $actual);
}
$this->fail('YAML files must not contain tabs');
} catch (\Exception $e) {
$this->assertInstanceOf(\Exception::class, $e, 'YAML files must not contain tabs');
$this->assertEquals('A YAML file cannot contain tabs as indentation at line 2 (near "'.strpbrk($yaml, "\t").'").', $e->getMessage(), 'YAML files must not contain tabs');
}
}
public function validTokenSeparators(): array
{
return [
[
'foo: bar',
['foo' => 'bar'],
],
[
"foo:\tbar",
['foo' => 'bar'],
],
[
"foo: \tbar",
['foo' => 'bar'],
],
[
"foo:\t bar",
['foo' => 'bar'],
],
];
}
public function testEndOfTheDocumentMarker()