[YAML] Improve performance of YAML parser

Optimise various methods and conditions to use best
performing alternatives where possible. Roughly:

* Uses methods that do not copy memory, e.g. strncmp
  as alternative for strpos matching beginning of string.
* Switches order of some conditions to put the cheapest
  checks first in order.
* Checks input before calling trim() - despite the function
  returning the same string as input, it still costs memory
  and introduces unnecessary overhead.
* Extracts variables for repeated identical function calls.
* Uses negative substring offsets instead of strlen + substr.
* Replaces single-char substr usages with substring access.
This commit is contained in:
Claus Due 2019-10-21 20:04:17 +02:00 committed by Nicolas Grekas
parent ecf37ddcbf
commit 7a7c9665da
2 changed files with 44 additions and 34 deletions

View File

@ -269,7 +269,7 @@ class Inline
*/ */
public static function parseScalar(string $scalar, int $flags = 0, array $delimiters = null, int &$i = 0, bool $evaluate = true, array $references = []) public static function parseScalar(string $scalar, int $flags = 0, array $delimiters = null, int &$i = 0, bool $evaluate = true, array $references = [])
{ {
if (\in_array($scalar[$i], ['"', "'"])) { if (\in_array($scalar[$i], ['"', "'"], true)) {
// quoted scalar // quoted scalar
$output = self::parseQuotedScalar($scalar, $i); $output = self::parseQuotedScalar($scalar, $i);
@ -324,7 +324,7 @@ class Inline
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)), self::$parsedLineNumber + 1, $scalar, self::$parsedFilename); throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)), self::$parsedLineNumber + 1, $scalar, self::$parsedFilename);
} }
$output = substr($match[0], 1, \strlen($match[0]) - 2); $output = substr($match[0], 1, -1);
$unescaper = new Unescaper(); $unescaper = new Unescaper();
if ('"' == $scalar[$i]) { if ('"' == $scalar[$i]) {
@ -371,7 +371,7 @@ class Inline
$value = self::parseMapping($sequence, $flags, $i, $references); $value = self::parseMapping($sequence, $flags, $i, $references);
break; break;
default: default:
$isQuoted = \in_array($sequence[$i], ['"', "'"]); $isQuoted = \in_array($sequence[$i], ['"', "'"], true);
$value = self::parseScalar($sequence, $flags, [',', ']'], $i, null === $tag, $references); $value = self::parseScalar($sequence, $flags, [',', ']'], $i, null === $tag, $references);
// the value can be an array if a reference has been resolved to an array var // the value can be an array if a reference has been resolved to an array var
@ -551,9 +551,8 @@ class Inline
private static function evaluateScalar(string $scalar, int $flags, array $references = []) private static function evaluateScalar(string $scalar, int $flags, array $references = [])
{ {
$scalar = trim($scalar); $scalar = trim($scalar);
$scalarLower = strtolower($scalar);
if (0 === strpos($scalar, '*')) { if ('*' === ($scalar[0] ?? '')) {
if (false !== $pos = strpos($scalar, '#')) { if (false !== $pos = strpos($scalar, '#')) {
$value = substr($scalar, 1, $pos - 2); $value = substr($scalar, 1, $pos - 2);
} else { } else {
@ -572,6 +571,8 @@ class Inline
return $references[$value]; return $references[$value];
} }
$scalarLower = strtolower($scalar);
switch (true) { switch (true) {
case 'null' === $scalarLower: case 'null' === $scalarLower:
case '' === $scalar: case '' === $scalar:
@ -583,11 +584,11 @@ class Inline
return false; return false;
case '!' === $scalar[0]: case '!' === $scalar[0]:
switch (true) { switch (true) {
case 0 === strpos($scalar, '!!str '): case 0 === strncmp($scalar, '!!str ', 6):
return (string) substr($scalar, 6); return (string) substr($scalar, 6);
case 0 === strpos($scalar, '! '): case 0 === strncmp($scalar, '! ', 2):
return substr($scalar, 2); return substr($scalar, 2);
case 0 === strpos($scalar, '!php/object'): case 0 === strncmp($scalar, '!php/object', 11):
if (self::$objectSupport) { if (self::$objectSupport) {
if (!isset($scalar[12])) { if (!isset($scalar[12])) {
@trigger_error('Using the !php/object tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED); @trigger_error('Using the !php/object tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
@ -603,7 +604,7 @@ class Inline
} }
return null; return null;
case 0 === strpos($scalar, '!php/const'): case 0 === strncmp($scalar, '!php/const', 10):
if (self::$constantSupport) { if (self::$constantSupport) {
if (!isset($scalar[11])) { if (!isset($scalar[11])) {
@trigger_error('Using the !php/const tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED); @trigger_error('Using the !php/const tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
@ -623,9 +624,9 @@ class Inline
} }
return null; return null;
case 0 === strpos($scalar, '!!float '): case 0 === strncmp($scalar, '!!float ', 8):
return (float) substr($scalar, 8); return (float) substr($scalar, 8);
case 0 === strpos($scalar, '!!binary '): case 0 === strncmp($scalar, '!!binary ', 9):
return self::evaluateBinaryScalar(substr($scalar, 9)); return self::evaluateBinaryScalar(substr($scalar, 9));
default: default:
throw new ParseException(sprintf('The string "%s" could not be parsed as it uses an unsupported built-in tag.', $scalar), self::$parsedLineNumber, $scalar, self::$parsedFilename); throw new ParseException(sprintf('The string "%s" could not be parsed as it uses an unsupported built-in tag.', $scalar), self::$parsedLineNumber, $scalar, self::$parsedFilename);
@ -633,7 +634,7 @@ class Inline
// Optimize for returning strings. // Optimize for returning strings.
// no break // no break
case '+' === $scalar[0] || '-' === $scalar[0] || '.' === $scalar[0] || is_numeric($scalar[0]): case \in_array($scalar[0], ['+', '-', '.'], true) || is_numeric($scalar[0]):
if (Parser::preg_match('{^[+-]?[0-9][0-9_]*$}', $scalar)) { if (Parser::preg_match('{^[+-]?[0-9][0-9_]*$}', $scalar)) {
$scalar = str_replace('_', '', (string) $scalar); $scalar = str_replace('_', '', (string) $scalar);
} }

View File

@ -28,6 +28,7 @@ class Parser
private $filename; private $filename;
private $offset = 0; private $offset = 0;
private $numberOfParsedLines = 0;
private $totalNumberOfLines; private $totalNumberOfLines;
private $lines = []; private $lines = [];
private $currentLineNb = -1; private $currentLineNb = -1;
@ -99,6 +100,7 @@ class Parser
} }
$this->lines = []; $this->lines = [];
$this->currentLine = ''; $this->currentLine = '';
$this->numberOfParsedLines = 0;
$this->refs = []; $this->refs = [];
$this->skippedLineNumbers = []; $this->skippedLineNumbers = [];
$this->locallySkippedLineNumbers = []; $this->locallySkippedLineNumbers = [];
@ -113,10 +115,11 @@ class Parser
$this->currentLine = ''; $this->currentLine = '';
$value = $this->cleanup($value); $value = $this->cleanup($value);
$this->lines = explode("\n", $value); $this->lines = explode("\n", $value);
$this->numberOfParsedLines = \count($this->lines);
$this->locallySkippedLineNumbers = []; $this->locallySkippedLineNumbers = [];
if (null === $this->totalNumberOfLines) { if (null === $this->totalNumberOfLines) {
$this->totalNumberOfLines = \count($this->lines); $this->totalNumberOfLines = $this->numberOfParsedLines;
} }
if (!$this->moveToNextLine()) { if (!$this->moveToNextLine()) {
@ -291,7 +294,7 @@ class Parser
$subTag = null; $subTag = null;
if ($mergeNode) { if ($mergeNode) {
// Merge keys // Merge keys
} elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) { } elseif (!isset($values['value']) || '' === $values['value'] || '#' === ($values['value'][0] ?? '') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
// hash // hash
// if next line is less indented or equal, then it means that the current value is null // if next line is less indented or equal, then it means that the current value is null
if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) { if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
@ -430,7 +433,8 @@ class Parser
$value = ''; $value = '';
foreach ($this->lines as $line) { foreach ($this->lines as $line) {
if ('' !== ltrim($line) && '#' === ltrim($line)[0]) { $trimmedLine = trim($line);
if ('#' === ($trimmedLine[0] ?? '')) {
continue; continue;
} }
// If the indentation is not consistent at offset 0, it is to be considered as a ParseError // If the indentation is not consistent at offset 0, it is to be considered as a ParseError
@ -442,22 +446,22 @@ class Parser
throw new ParseException('Mapping values are not allowed in multi-line blocks.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); throw new ParseException('Mapping values are not allowed in multi-line blocks.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
} }
if ('' === trim($line)) { if ('' === $trimmedLine) {
$value .= "\n"; $value .= "\n";
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) { } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
$value .= ' '; $value .= ' ';
} }
if ('' !== trim($line) && '\\' === substr($line, -1)) { if ('' !== $trimmedLine && '\\' === $line[-1]) {
$value .= ltrim(substr($line, 0, -1)); $value .= ltrim(substr($line, 0, -1));
} elseif ('' !== trim($line)) { } elseif ('' !== $trimmedLine) {
$value .= trim($line); $value .= $trimmedLine;
} }
if ('' === trim($line)) { if ('' === $trimmedLine) {
$previousLineWasNewline = true; $previousLineWasNewline = true;
$previousLineWasTerminatedWithBackslash = false; $previousLineWasTerminatedWithBackslash = false;
} elseif ('\\' === substr($line, -1)) { } elseif ('\\' === $line[-1]) {
$previousLineWasNewline = false; $previousLineWasNewline = false;
$previousLineWasTerminatedWithBackslash = true; $previousLineWasTerminatedWithBackslash = true;
} else { } else {
@ -481,7 +485,7 @@ class Parser
$data = new TaggedValue($tag, $data); $data = new TaggedValue($tag, $data);
} }
if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && !\is_object($data) && 'mapping' === $context) { if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && 'mapping' === $context && !\is_object($data)) {
$object = new \stdClass(); $object = new \stdClass();
foreach ($data as $key => $value) { foreach ($data as $key => $value) {
@ -545,6 +549,10 @@ class Parser
*/ */
private function getCurrentLineIndentation(): int private function getCurrentLineIndentation(): int
{ {
if (' ' !== ($this->currentLine[0] ?? '')) {
return 0;
}
return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' ')); return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' '));
} }
@ -653,7 +661,7 @@ class Parser
*/ */
private function moveToNextLine(): bool private function moveToNextLine(): bool
{ {
if ($this->currentLineNb >= \count($this->lines) - 1) { if ($this->currentLineNb >= $this->numberOfParsedLines - 1) {
return false; return false;
} }
@ -689,7 +697,7 @@ class Parser
*/ */
private function parseValue(string $value, int $flags, string $context) private function parseValue(string $value, int $flags, string $context)
{ {
if (0 === strpos($value, '*')) { if ('*' === ($value[0] ?? '')) {
if (false !== $pos = strpos($value, '#')) { if (false !== $pos = strpos($value, '#')) {
$value = substr($value, 1, $pos - 2); $value = substr($value, 1, $pos - 2);
} else { } else {
@ -750,7 +758,7 @@ class Parser
$lines[] = trim($this->currentLine); $lines[] = trim($this->currentLine);
// quoted string values end with a line that is terminated with the quotation character // quoted string values end with a line that is terminated with the quotation character
if ('' !== $this->currentLine && substr($this->currentLine, -1) === $quotation) { if ('' !== $this->currentLine && $this->currentLine[-1] === $quotation) {
break; break;
} }
} }
@ -944,7 +952,7 @@ class Parser
*/ */
private function isCurrentLineBlank(): bool private function isCurrentLineBlank(): bool
{ {
return '' == trim($this->currentLine, ' '); return '' === $this->currentLine || '' === trim($this->currentLine, ' ');
} }
/** /**
@ -955,7 +963,7 @@ class Parser
private function isCurrentLineComment(): bool private function isCurrentLineComment(): bool
{ {
//checking explicitly the first char of the trim is faster than loops or strpos //checking explicitly the first char of the trim is faster than loops or strpos
$ltrimmedLine = ltrim($this->currentLine, ' '); $ltrimmedLine = ' ' === $this->currentLine[0] ? ltrim($this->currentLine, ' ') : $this->currentLine;
return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0]; return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
} }
@ -1041,7 +1049,7 @@ class Parser
*/ */
private function isStringUnIndentedCollectionItem(): bool private function isStringUnIndentedCollectionItem(): bool
{ {
return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- '); return 0 === strncmp($this->currentLine, '- ', 2) || '-' === rtrim($this->currentLine);
} }
/** /**
@ -1144,22 +1152,23 @@ class Parser
$value = ''; $value = '';
for ($i = 0, $linesCount = \count($lines), $previousLineWasNewline = false, $previousLineWasTerminatedWithBackslash = false; $i < $linesCount; ++$i) { for ($i = 0, $linesCount = \count($lines), $previousLineWasNewline = false, $previousLineWasTerminatedWithBackslash = false; $i < $linesCount; ++$i) {
if ('' === trim($lines[$i])) { $trimmedLine = trim($lines[$i]);
if ('' === $trimmedLine) {
$value .= "\n"; $value .= "\n";
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) { } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
$value .= ' '; $value .= ' ';
} }
if ('' !== trim($lines[$i]) && '\\' === substr($lines[$i], -1)) { if ('' !== $trimmedLine && '\\' === $lines[$i][-1]) {
$value .= ltrim(substr($lines[$i], 0, -1)); $value .= ltrim(substr($lines[$i], 0, -1));
} elseif ('' !== trim($lines[$i])) { } elseif ('' !== $trimmedLine) {
$value .= trim($lines[$i]); $value .= $trimmedLine;
} }
if ('' === trim($lines[$i])) { if ('' === $trimmedLine) {
$previousLineWasNewline = true; $previousLineWasNewline = true;
$previousLineWasTerminatedWithBackslash = false; $previousLineWasTerminatedWithBackslash = false;
} elseif ('\\' === substr($lines[$i], -1)) { } elseif ('\\' === $lines[$i][-1]) {
$previousLineWasNewline = false; $previousLineWasNewline = false;
$previousLineWasTerminatedWithBackslash = true; $previousLineWasTerminatedWithBackslash = true;
} else { } else {