[YAML] Improve performance of YAML parser

Optimise various methods and conditions to use best
performing alternatives where possible. Roughly:

* Uses methods that do not copy memory, e.g. strncmp
  as alternative for strpos matching beginning of string.
* Switches order of some conditions to put the cheapest
  checks first in order.
* Checks input before calling trim() - despite the function
  returning the same string as input, it still costs memory
  and introduces unnecessary overhead.
* Extracts variables for repeated identical function calls.
* Uses negative substring offsets instead of strlen + substr.
* Replaces single-char substr usages with substring access.
This commit is contained in:
Claus Due 2019-10-21 20:04:17 +02:00 committed by Nicolas Grekas
parent ecf37ddcbf
commit 7a7c9665da
2 changed files with 44 additions and 34 deletions

View File

@ -269,7 +269,7 @@ class Inline
*/
public static function parseScalar(string $scalar, int $flags = 0, array $delimiters = null, int &$i = 0, bool $evaluate = true, array $references = [])
{
if (\in_array($scalar[$i], ['"', "'"])) {
if (\in_array($scalar[$i], ['"', "'"], true)) {
// quoted scalar
$output = self::parseQuotedScalar($scalar, $i);
@ -324,7 +324,7 @@ class Inline
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)), self::$parsedLineNumber + 1, $scalar, self::$parsedFilename);
}
$output = substr($match[0], 1, \strlen($match[0]) - 2);
$output = substr($match[0], 1, -1);
$unescaper = new Unescaper();
if ('"' == $scalar[$i]) {
@ -371,7 +371,7 @@ class Inline
$value = self::parseMapping($sequence, $flags, $i, $references);
break;
default:
$isQuoted = \in_array($sequence[$i], ['"', "'"]);
$isQuoted = \in_array($sequence[$i], ['"', "'"], true);
$value = self::parseScalar($sequence, $flags, [',', ']'], $i, null === $tag, $references);
// the value can be an array if a reference has been resolved to an array var
@ -551,9 +551,8 @@ class Inline
private static function evaluateScalar(string $scalar, int $flags, array $references = [])
{
$scalar = trim($scalar);
$scalarLower = strtolower($scalar);
if (0 === strpos($scalar, '*')) {
if ('*' === ($scalar[0] ?? '')) {
if (false !== $pos = strpos($scalar, '#')) {
$value = substr($scalar, 1, $pos - 2);
} else {
@ -572,6 +571,8 @@ class Inline
return $references[$value];
}
$scalarLower = strtolower($scalar);
switch (true) {
case 'null' === $scalarLower:
case '' === $scalar:
@ -583,11 +584,11 @@ class Inline
return false;
case '!' === $scalar[0]:
switch (true) {
case 0 === strpos($scalar, '!!str '):
case 0 === strncmp($scalar, '!!str ', 6):
return (string) substr($scalar, 6);
case 0 === strpos($scalar, '! '):
case 0 === strncmp($scalar, '! ', 2):
return substr($scalar, 2);
case 0 === strpos($scalar, '!php/object'):
case 0 === strncmp($scalar, '!php/object', 11):
if (self::$objectSupport) {
if (!isset($scalar[12])) {
@trigger_error('Using the !php/object tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
@ -603,7 +604,7 @@ class Inline
}
return null;
case 0 === strpos($scalar, '!php/const'):
case 0 === strncmp($scalar, '!php/const', 10):
if (self::$constantSupport) {
if (!isset($scalar[11])) {
@trigger_error('Using the !php/const tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
@ -623,9 +624,9 @@ class Inline
}
return null;
case 0 === strpos($scalar, '!!float '):
case 0 === strncmp($scalar, '!!float ', 8):
return (float) substr($scalar, 8);
case 0 === strpos($scalar, '!!binary '):
case 0 === strncmp($scalar, '!!binary ', 9):
return self::evaluateBinaryScalar(substr($scalar, 9));
default:
throw new ParseException(sprintf('The string "%s" could not be parsed as it uses an unsupported built-in tag.', $scalar), self::$parsedLineNumber, $scalar, self::$parsedFilename);
@ -633,7 +634,7 @@ class Inline
// Optimize for returning strings.
// no break
case '+' === $scalar[0] || '-' === $scalar[0] || '.' === $scalar[0] || is_numeric($scalar[0]):
case \in_array($scalar[0], ['+', '-', '.'], true) || is_numeric($scalar[0]):
if (Parser::preg_match('{^[+-]?[0-9][0-9_]*$}', $scalar)) {
$scalar = str_replace('_', '', (string) $scalar);
}

View File

@ -28,6 +28,7 @@ class Parser
private $filename;
private $offset = 0;
private $numberOfParsedLines = 0;
private $totalNumberOfLines;
private $lines = [];
private $currentLineNb = -1;
@ -99,6 +100,7 @@ class Parser
}
$this->lines = [];
$this->currentLine = '';
$this->numberOfParsedLines = 0;
$this->refs = [];
$this->skippedLineNumbers = [];
$this->locallySkippedLineNumbers = [];
@ -113,10 +115,11 @@ class Parser
$this->currentLine = '';
$value = $this->cleanup($value);
$this->lines = explode("\n", $value);
$this->numberOfParsedLines = \count($this->lines);
$this->locallySkippedLineNumbers = [];
if (null === $this->totalNumberOfLines) {
$this->totalNumberOfLines = \count($this->lines);
$this->totalNumberOfLines = $this->numberOfParsedLines;
}
if (!$this->moveToNextLine()) {
@ -291,7 +294,7 @@ class Parser
$subTag = null;
if ($mergeNode) {
// Merge keys
} elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
} elseif (!isset($values['value']) || '' === $values['value'] || '#' === ($values['value'][0] ?? '') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
// hash
// if next line is less indented or equal, then it means that the current value is null
if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
@ -430,7 +433,8 @@ class Parser
$value = '';
foreach ($this->lines as $line) {
if ('' !== ltrim($line) && '#' === ltrim($line)[0]) {
$trimmedLine = trim($line);
if ('#' === ($trimmedLine[0] ?? '')) {
continue;
}
// If the indentation is not consistent at offset 0, it is to be considered as a ParseError
@ -442,22 +446,22 @@ class Parser
throw new ParseException('Mapping values are not allowed in multi-line blocks.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
}
if ('' === trim($line)) {
if ('' === $trimmedLine) {
$value .= "\n";
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
$value .= ' ';
}
if ('' !== trim($line) && '\\' === substr($line, -1)) {
if ('' !== $trimmedLine && '\\' === $line[-1]) {
$value .= ltrim(substr($line, 0, -1));
} elseif ('' !== trim($line)) {
$value .= trim($line);
} elseif ('' !== $trimmedLine) {
$value .= $trimmedLine;
}
if ('' === trim($line)) {
if ('' === $trimmedLine) {
$previousLineWasNewline = true;
$previousLineWasTerminatedWithBackslash = false;
} elseif ('\\' === substr($line, -1)) {
} elseif ('\\' === $line[-1]) {
$previousLineWasNewline = false;
$previousLineWasTerminatedWithBackslash = true;
} else {
@ -481,7 +485,7 @@ class Parser
$data = new TaggedValue($tag, $data);
}
if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && !\is_object($data) && 'mapping' === $context) {
if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && 'mapping' === $context && !\is_object($data)) {
$object = new \stdClass();
foreach ($data as $key => $value) {
@ -545,6 +549,10 @@ class Parser
*/
private function getCurrentLineIndentation(): int
{
if (' ' !== ($this->currentLine[0] ?? '')) {
return 0;
}
return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' '));
}
@ -653,7 +661,7 @@ class Parser
*/
private function moveToNextLine(): bool
{
if ($this->currentLineNb >= \count($this->lines) - 1) {
if ($this->currentLineNb >= $this->numberOfParsedLines - 1) {
return false;
}
@ -689,7 +697,7 @@ class Parser
*/
private function parseValue(string $value, int $flags, string $context)
{
if (0 === strpos($value, '*')) {
if ('*' === ($value[0] ?? '')) {
if (false !== $pos = strpos($value, '#')) {
$value = substr($value, 1, $pos - 2);
} else {
@ -750,7 +758,7 @@ class Parser
$lines[] = trim($this->currentLine);
// quoted string values end with a line that is terminated with the quotation character
if ('' !== $this->currentLine && substr($this->currentLine, -1) === $quotation) {
if ('' !== $this->currentLine && $this->currentLine[-1] === $quotation) {
break;
}
}
@ -944,7 +952,7 @@ class Parser
*/
private function isCurrentLineBlank(): bool
{
return '' == trim($this->currentLine, ' ');
return '' === $this->currentLine || '' === trim($this->currentLine, ' ');
}
/**
@ -955,7 +963,7 @@ class Parser
private function isCurrentLineComment(): bool
{
//checking explicitly the first char of the trim is faster than loops or strpos
$ltrimmedLine = ltrim($this->currentLine, ' ');
$ltrimmedLine = ' ' === $this->currentLine[0] ? ltrim($this->currentLine, ' ') : $this->currentLine;
return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
}
@ -1041,7 +1049,7 @@ class Parser
*/
private function isStringUnIndentedCollectionItem(): bool
{
return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- ');
return 0 === strncmp($this->currentLine, '- ', 2) || '-' === rtrim($this->currentLine);
}
/**
@ -1144,22 +1152,23 @@ class Parser
$value = '';
for ($i = 0, $linesCount = \count($lines), $previousLineWasNewline = false, $previousLineWasTerminatedWithBackslash = false; $i < $linesCount; ++$i) {
if ('' === trim($lines[$i])) {
$trimmedLine = trim($lines[$i]);
if ('' === $trimmedLine) {
$value .= "\n";
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
$value .= ' ';
}
if ('' !== trim($lines[$i]) && '\\' === substr($lines[$i], -1)) {
if ('' !== $trimmedLine && '\\' === $lines[$i][-1]) {
$value .= ltrim(substr($lines[$i], 0, -1));
} elseif ('' !== trim($lines[$i])) {
$value .= trim($lines[$i]);
} elseif ('' !== $trimmedLine) {
$value .= $trimmedLine;
}
if ('' === trim($lines[$i])) {
if ('' === $trimmedLine) {
$previousLineWasNewline = true;
$previousLineWasTerminatedWithBackslash = false;
} elseif ('\\' === substr($lines[$i], -1)) {
} elseif ('\\' === $lines[$i][-1]) {
$previousLineWasNewline = false;
$previousLineWasTerminatedWithBackslash = true;
} else {