merged branch ezzatron/yaml-folded-scalar (PR #7455)

This PR was merged into the 2.1 branch.

Discussion
----------

[Yaml] fixed bugs with folded scalar parsing (2.1 branch)

| Q             | A
| ------------- | ---
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | n/a
| License       | MIT
| Doc PR        | n/a

This is a re-do of #7445, which was submitted against 2.0, which is no longer supported. Additionally, the previous PR broke some tests because literal tabs were accidentally changed to spaces, and GitHub's diff engine seemingly ignored the change, although the differences showed up when using git diff directly. This PR leaves the literals untouched, and hence the tests should now pass.

**From the original PR:**

I found some more bugs with the parsing of folded scalars. I'd also made some mistakes in the tests introduced by me in #6785. This PR fixes the incorrect tests, and introduces more related tests.

I had to completely rewrite the `Parser::parseFoldedScalar()` method. I think it came out simpler in the end - less 'special cases'. I've done some basic profiling by running the parser tests on repeat and haven't noticed any difference in parsing performance.

Commits
-------

bbcdfe2 [Yaml] fixed bugs with folded scalar parsing
This commit is contained in:
Fabien Potencier 2013-03-23 07:49:58 +01:00
commit 996912158c
2 changed files with 154 additions and 61 deletions

View File

@ -414,64 +414,61 @@ class Parser
*/
private function parseFoldedScalar($separator, $indicator = '', $indentation = 0)
{
$separator = '|' == $separator ? "\n" : ' ';
$text = '';
$notEOF = $this->moveToNextLine();
while ($notEOF && $this->isCurrentLineBlank()) {
$text .= "\n";
$notEOF = $this->moveToNextLine();
}
if (!$notEOF) {
return '';
}
if (!preg_match('#^(?P<indent>'.($indentation ? str_repeat(' ', $indentation) : ' +').')(?P<text>.*)$#u', $this->currentLine, $matches)) {
$this->moveToPreviousLine();
return '';
}
$textIndent = $matches['indent'];
$previousIndent = 0;
$text .= $matches['text'].$separator;
while ($this->currentLineNb + 1 < count($this->lines)) {
$this->moveToNextLine();
if (preg_match('#^(?P<indent> {'.strlen($textIndent).',})(?P<text>.+)$#u', $this->currentLine, $matches)) {
if (' ' == $separator && $previousIndent != $matches['indent']) {
$text = substr($text, 0, -1)."\n";
}
$previousIndent = $matches['indent'];
$text .= str_repeat(' ', $diff = strlen($matches['indent']) - strlen($textIndent)).$matches['text'].($diff ? "\n" : $separator);
} elseif (preg_match('#^(?P<text> *)$#', $this->currentLine, $matches)) {
$text .= preg_replace('#^ {1,'.strlen($textIndent).'}#', '', $matches['text'])."\n";
} else {
$this->moveToPreviousLine();
break;
// determine indentation if not specified
if (0 === $indentation) {
if (preg_match('/^ +/', $this->currentLine, $matches)) {
$indentation = strlen($matches[0]);
}
}
if (' ' == $separator) {
// replace last separator by a newline
$text = preg_replace('/ (\n*)$/', "\n$1", $text);
$text = '';
if ($indentation > 0) {
$pattern = sprintf('/^ {%d}(.*)$/', $indentation);
$isCurrentLineBlank = $this->isCurrentLineBlank();
while (
$notEOF && (
$isCurrentLineBlank ||
preg_match($pattern, $this->currentLine, $matches)
)
) {
if ($isCurrentLineBlank) {
$text .= substr($this->currentLine, $indentation);
} else {
$text .= $matches[1];
}
// newline only if not EOF
if ($notEOF = $this->moveToNextLine()) {
$text .= "\n";
$isCurrentLineBlank = $this->isCurrentLineBlank();
}
}
} elseif ($notEOF) {
$text .= "\n";
}
switch ($indicator) {
case '':
$text = preg_replace('#\n+$#s', "\n", $text);
break;
case '+':
break;
case '-':
$text = preg_replace('#\n+$#s', '', $text);
break;
if ($notEOF) {
$this->moveToPreviousLine();
}
// replace all non-trailing single newlines with spaces in folded blocks
if ('>' === $separator) {
preg_match('/(\n*)$/', $text, $matches);
$text = preg_replace('/(?<!\n)\n(?!\n)/', ' ', rtrim($text, "\n"));
$text .= $matches[1];
}
// deal with trailing newlines as indicated
if ('' === $indicator) {
$text = preg_replace('/\n+$/s', "\n", $text);
} elseif ('-' === $indicator) {
$text = preg_replace('/\n+$/s', '', $text);
}
return $text;

View File

@ -113,7 +113,6 @@ EOF;
foo: |-
one
two
bar: |-
one
two
@ -123,7 +122,24 @@ EOF;
'foo' => "one\ntwo",
'bar' => "one\ntwo",
);
$tests['Literal block chomping strip with trailing newline'] = array($expected, $yaml);
$tests['Literal block chomping strip with single trailing newline'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: |-
one
two
bar: |-
one
two
EOF;
$expected = array(
'foo' => "one\ntwo",
'bar' => "one\ntwo",
);
$tests['Literal block chomping strip with multiple trailing newlines'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: |-
@ -143,7 +159,6 @@ EOF;
foo: |
one
two
bar: |
one
two
@ -153,7 +168,24 @@ EOF;
'foo' => "one\ntwo\n",
'bar' => "one\ntwo\n",
);
$tests['Literal block chomping clip with trailing newline'] = array($expected, $yaml);
$tests['Literal block chomping clip with single trailing newline'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: |
one
two
bar: |
one
two
EOF;
$expected = array(
'foo' => "one\ntwo\n",
'bar' => "one\ntwo\n",
);
$tests['Literal block chomping clip with multiple trailing newlines'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: |
@ -165,7 +197,7 @@ bar: |
EOF;
$expected = array(
'foo' => "one\ntwo\n",
'bar' => "one\ntwo\n",
'bar' => "one\ntwo",
);
$tests['Literal block chomping clip without trailing newline'] = array($expected, $yaml);
@ -173,17 +205,33 @@ EOF;
foo: |+
one
two
bar: |+
one
two
EOF;
$expected = array(
'foo' => "one\ntwo\n",
'bar' => "one\ntwo\n",
);
$tests['Literal block chomping keep with single trailing newline'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: |+
one
two
bar: |+
one
two
EOF;
$expected = array(
'foo' => "one\ntwo\n\n",
'bar' => "one\ntwo\n\n",
);
$tests['Literal block chomping keep with trailing newline'] = array($expected, $yaml);
$tests['Literal block chomping keep with multiple trailing newlines'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: |+
@ -195,7 +243,7 @@ bar: |+
EOF;
$expected = array(
'foo' => "one\ntwo\n",
'bar' => "one\ntwo\n",
'bar' => "one\ntwo",
);
$tests['Literal block chomping keep without trailing newline'] = array($expected, $yaml);
@ -203,17 +251,33 @@ EOF;
foo: >-
one
two
bar: >-
one
two
EOF;
$expected = array(
'foo' => "one two",
'bar' => "one two",
);
$tests['Folded block chomping strip with single trailing newline'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: >-
one
two
bar: >-
one
two
EOF;
$expected = array(
'foo' => "one two",
'bar' => "one two",
);
$tests['Folded block chomping strip with trailing newline'] = array($expected, $yaml);
$tests['Folded block chomping strip with multiple trailing newlines'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: >-
@ -233,7 +297,6 @@ EOF;
foo: >
one
two
bar: >
one
two
@ -243,7 +306,24 @@ EOF;
'foo' => "one two\n",
'bar' => "one two\n",
);
$tests['Folded block chomping clip with trailing newline'] = array($expected, $yaml);
$tests['Folded block chomping clip with single trailing newline'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: >
one
two
bar: >
one
two
EOF;
$expected = array(
'foo' => "one two\n",
'bar' => "one two\n",
);
$tests['Folded block chomping clip with multiple trailing newlines'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: >
@ -255,7 +335,7 @@ bar: >
EOF;
$expected = array(
'foo' => "one two\n",
'bar' => "one two\n",
'bar' => "one two",
);
$tests['Folded block chomping clip without trailing newline'] = array($expected, $yaml);
@ -263,17 +343,33 @@ EOF;
foo: >+
one
two
bar: >+
one
two
EOF;
$expected = array(
'foo' => "one two\n",
'bar' => "one two\n",
);
$tests['Folded block chomping keep with single trailing newline'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: >+
one
two
bar: >+
one
two
EOF;
$expected = array(
'foo' => "one two\n\n",
'bar' => "one two\n\n",
);
$tests['Folded block chomping keep with trailing newline'] = array($expected, $yaml);
$tests['Folded block chomping keep with multiple trailing newlines'] = array($expected, $yaml);
$yaml = <<<'EOF'
foo: >+
@ -285,7 +381,7 @@ bar: >+
EOF;
$expected = array(
'foo' => "one two\n",
'bar' => "one two\n",
'bar' => "one two",
);
$tests['Folded block chomping keep without trailing newline'] = array($expected, $yaml);