[YAML] Improve performance of YAML parser
Optimise various methods and conditions to use best performing alternatives where possible. Roughly: * Uses methods that do not copy memory, e.g. strncmp as alternative for strpos matching beginning of string. * Switches order of some conditions to put the cheapest checks first in order. * Checks input before calling trim() - despite the function returning the same string as input, it still costs memory and introduces unnecessary overhead. * Extracts variables for repeated identical function calls. * Uses negative substring offsets instead of strlen + substr. * Replaces single-char substr usages with substring access.
This commit is contained in:
parent
ecf37ddcbf
commit
7a7c9665da
@ -269,7 +269,7 @@ class Inline
|
|||||||
*/
|
*/
|
||||||
public static function parseScalar(string $scalar, int $flags = 0, array $delimiters = null, int &$i = 0, bool $evaluate = true, array $references = [])
|
public static function parseScalar(string $scalar, int $flags = 0, array $delimiters = null, int &$i = 0, bool $evaluate = true, array $references = [])
|
||||||
{
|
{
|
||||||
if (\in_array($scalar[$i], ['"', "'"])) {
|
if (\in_array($scalar[$i], ['"', "'"], true)) {
|
||||||
// quoted scalar
|
// quoted scalar
|
||||||
$output = self::parseQuotedScalar($scalar, $i);
|
$output = self::parseQuotedScalar($scalar, $i);
|
||||||
|
|
||||||
@ -324,7 +324,7 @@ class Inline
|
|||||||
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)), self::$parsedLineNumber + 1, $scalar, self::$parsedFilename);
|
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)), self::$parsedLineNumber + 1, $scalar, self::$parsedFilename);
|
||||||
}
|
}
|
||||||
|
|
||||||
$output = substr($match[0], 1, \strlen($match[0]) - 2);
|
$output = substr($match[0], 1, -1);
|
||||||
|
|
||||||
$unescaper = new Unescaper();
|
$unescaper = new Unescaper();
|
||||||
if ('"' == $scalar[$i]) {
|
if ('"' == $scalar[$i]) {
|
||||||
@ -371,7 +371,7 @@ class Inline
|
|||||||
$value = self::parseMapping($sequence, $flags, $i, $references);
|
$value = self::parseMapping($sequence, $flags, $i, $references);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
$isQuoted = \in_array($sequence[$i], ['"', "'"]);
|
$isQuoted = \in_array($sequence[$i], ['"', "'"], true);
|
||||||
$value = self::parseScalar($sequence, $flags, [',', ']'], $i, null === $tag, $references);
|
$value = self::parseScalar($sequence, $flags, [',', ']'], $i, null === $tag, $references);
|
||||||
|
|
||||||
// the value can be an array if a reference has been resolved to an array var
|
// the value can be an array if a reference has been resolved to an array var
|
||||||
@ -551,9 +551,8 @@ class Inline
|
|||||||
private static function evaluateScalar(string $scalar, int $flags, array $references = [])
|
private static function evaluateScalar(string $scalar, int $flags, array $references = [])
|
||||||
{
|
{
|
||||||
$scalar = trim($scalar);
|
$scalar = trim($scalar);
|
||||||
$scalarLower = strtolower($scalar);
|
|
||||||
|
|
||||||
if (0 === strpos($scalar, '*')) {
|
if ('*' === ($scalar[0] ?? '')) {
|
||||||
if (false !== $pos = strpos($scalar, '#')) {
|
if (false !== $pos = strpos($scalar, '#')) {
|
||||||
$value = substr($scalar, 1, $pos - 2);
|
$value = substr($scalar, 1, $pos - 2);
|
||||||
} else {
|
} else {
|
||||||
@ -572,6 +571,8 @@ class Inline
|
|||||||
return $references[$value];
|
return $references[$value];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$scalarLower = strtolower($scalar);
|
||||||
|
|
||||||
switch (true) {
|
switch (true) {
|
||||||
case 'null' === $scalarLower:
|
case 'null' === $scalarLower:
|
||||||
case '' === $scalar:
|
case '' === $scalar:
|
||||||
@ -583,11 +584,11 @@ class Inline
|
|||||||
return false;
|
return false;
|
||||||
case '!' === $scalar[0]:
|
case '!' === $scalar[0]:
|
||||||
switch (true) {
|
switch (true) {
|
||||||
case 0 === strpos($scalar, '!!str '):
|
case 0 === strncmp($scalar, '!!str ', 6):
|
||||||
return (string) substr($scalar, 6);
|
return (string) substr($scalar, 6);
|
||||||
case 0 === strpos($scalar, '! '):
|
case 0 === strncmp($scalar, '! ', 2):
|
||||||
return substr($scalar, 2);
|
return substr($scalar, 2);
|
||||||
case 0 === strpos($scalar, '!php/object'):
|
case 0 === strncmp($scalar, '!php/object', 11):
|
||||||
if (self::$objectSupport) {
|
if (self::$objectSupport) {
|
||||||
if (!isset($scalar[12])) {
|
if (!isset($scalar[12])) {
|
||||||
@trigger_error('Using the !php/object tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
|
@trigger_error('Using the !php/object tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
|
||||||
@ -603,7 +604,7 @@ class Inline
|
|||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
case 0 === strpos($scalar, '!php/const'):
|
case 0 === strncmp($scalar, '!php/const', 10):
|
||||||
if (self::$constantSupport) {
|
if (self::$constantSupport) {
|
||||||
if (!isset($scalar[11])) {
|
if (!isset($scalar[11])) {
|
||||||
@trigger_error('Using the !php/const tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
|
@trigger_error('Using the !php/const tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
|
||||||
@ -623,9 +624,9 @@ class Inline
|
|||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
case 0 === strpos($scalar, '!!float '):
|
case 0 === strncmp($scalar, '!!float ', 8):
|
||||||
return (float) substr($scalar, 8);
|
return (float) substr($scalar, 8);
|
||||||
case 0 === strpos($scalar, '!!binary '):
|
case 0 === strncmp($scalar, '!!binary ', 9):
|
||||||
return self::evaluateBinaryScalar(substr($scalar, 9));
|
return self::evaluateBinaryScalar(substr($scalar, 9));
|
||||||
default:
|
default:
|
||||||
throw new ParseException(sprintf('The string "%s" could not be parsed as it uses an unsupported built-in tag.', $scalar), self::$parsedLineNumber, $scalar, self::$parsedFilename);
|
throw new ParseException(sprintf('The string "%s" could not be parsed as it uses an unsupported built-in tag.', $scalar), self::$parsedLineNumber, $scalar, self::$parsedFilename);
|
||||||
@ -633,7 +634,7 @@ class Inline
|
|||||||
|
|
||||||
// Optimize for returning strings.
|
// Optimize for returning strings.
|
||||||
// no break
|
// no break
|
||||||
case '+' === $scalar[0] || '-' === $scalar[0] || '.' === $scalar[0] || is_numeric($scalar[0]):
|
case \in_array($scalar[0], ['+', '-', '.'], true) || is_numeric($scalar[0]):
|
||||||
if (Parser::preg_match('{^[+-]?[0-9][0-9_]*$}', $scalar)) {
|
if (Parser::preg_match('{^[+-]?[0-9][0-9_]*$}', $scalar)) {
|
||||||
$scalar = str_replace('_', '', (string) $scalar);
|
$scalar = str_replace('_', '', (string) $scalar);
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ class Parser
|
|||||||
|
|
||||||
private $filename;
|
private $filename;
|
||||||
private $offset = 0;
|
private $offset = 0;
|
||||||
|
private $numberOfParsedLines = 0;
|
||||||
private $totalNumberOfLines;
|
private $totalNumberOfLines;
|
||||||
private $lines = [];
|
private $lines = [];
|
||||||
private $currentLineNb = -1;
|
private $currentLineNb = -1;
|
||||||
@ -99,6 +100,7 @@ class Parser
|
|||||||
}
|
}
|
||||||
$this->lines = [];
|
$this->lines = [];
|
||||||
$this->currentLine = '';
|
$this->currentLine = '';
|
||||||
|
$this->numberOfParsedLines = 0;
|
||||||
$this->refs = [];
|
$this->refs = [];
|
||||||
$this->skippedLineNumbers = [];
|
$this->skippedLineNumbers = [];
|
||||||
$this->locallySkippedLineNumbers = [];
|
$this->locallySkippedLineNumbers = [];
|
||||||
@ -113,10 +115,11 @@ class Parser
|
|||||||
$this->currentLine = '';
|
$this->currentLine = '';
|
||||||
$value = $this->cleanup($value);
|
$value = $this->cleanup($value);
|
||||||
$this->lines = explode("\n", $value);
|
$this->lines = explode("\n", $value);
|
||||||
|
$this->numberOfParsedLines = \count($this->lines);
|
||||||
$this->locallySkippedLineNumbers = [];
|
$this->locallySkippedLineNumbers = [];
|
||||||
|
|
||||||
if (null === $this->totalNumberOfLines) {
|
if (null === $this->totalNumberOfLines) {
|
||||||
$this->totalNumberOfLines = \count($this->lines);
|
$this->totalNumberOfLines = $this->numberOfParsedLines;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!$this->moveToNextLine()) {
|
if (!$this->moveToNextLine()) {
|
||||||
@ -291,7 +294,7 @@ class Parser
|
|||||||
$subTag = null;
|
$subTag = null;
|
||||||
if ($mergeNode) {
|
if ($mergeNode) {
|
||||||
// Merge keys
|
// Merge keys
|
||||||
} elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
|
} elseif (!isset($values['value']) || '' === $values['value'] || '#' === ($values['value'][0] ?? '') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
|
||||||
// hash
|
// hash
|
||||||
// if next line is less indented or equal, then it means that the current value is null
|
// if next line is less indented or equal, then it means that the current value is null
|
||||||
if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
|
if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
|
||||||
@ -430,7 +433,8 @@ class Parser
|
|||||||
$value = '';
|
$value = '';
|
||||||
|
|
||||||
foreach ($this->lines as $line) {
|
foreach ($this->lines as $line) {
|
||||||
if ('' !== ltrim($line) && '#' === ltrim($line)[0]) {
|
$trimmedLine = trim($line);
|
||||||
|
if ('#' === ($trimmedLine[0] ?? '')) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// If the indentation is not consistent at offset 0, it is to be considered as a ParseError
|
// If the indentation is not consistent at offset 0, it is to be considered as a ParseError
|
||||||
@ -442,22 +446,22 @@ class Parser
|
|||||||
throw new ParseException('Mapping values are not allowed in multi-line blocks.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
|
throw new ParseException('Mapping values are not allowed in multi-line blocks.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('' === trim($line)) {
|
if ('' === $trimmedLine) {
|
||||||
$value .= "\n";
|
$value .= "\n";
|
||||||
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
|
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
|
||||||
$value .= ' ';
|
$value .= ' ';
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('' !== trim($line) && '\\' === substr($line, -1)) {
|
if ('' !== $trimmedLine && '\\' === $line[-1]) {
|
||||||
$value .= ltrim(substr($line, 0, -1));
|
$value .= ltrim(substr($line, 0, -1));
|
||||||
} elseif ('' !== trim($line)) {
|
} elseif ('' !== $trimmedLine) {
|
||||||
$value .= trim($line);
|
$value .= $trimmedLine;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('' === trim($line)) {
|
if ('' === $trimmedLine) {
|
||||||
$previousLineWasNewline = true;
|
$previousLineWasNewline = true;
|
||||||
$previousLineWasTerminatedWithBackslash = false;
|
$previousLineWasTerminatedWithBackslash = false;
|
||||||
} elseif ('\\' === substr($line, -1)) {
|
} elseif ('\\' === $line[-1]) {
|
||||||
$previousLineWasNewline = false;
|
$previousLineWasNewline = false;
|
||||||
$previousLineWasTerminatedWithBackslash = true;
|
$previousLineWasTerminatedWithBackslash = true;
|
||||||
} else {
|
} else {
|
||||||
@ -481,7 +485,7 @@ class Parser
|
|||||||
$data = new TaggedValue($tag, $data);
|
$data = new TaggedValue($tag, $data);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && !\is_object($data) && 'mapping' === $context) {
|
if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && 'mapping' === $context && !\is_object($data)) {
|
||||||
$object = new \stdClass();
|
$object = new \stdClass();
|
||||||
|
|
||||||
foreach ($data as $key => $value) {
|
foreach ($data as $key => $value) {
|
||||||
@ -545,6 +549,10 @@ class Parser
|
|||||||
*/
|
*/
|
||||||
private function getCurrentLineIndentation(): int
|
private function getCurrentLineIndentation(): int
|
||||||
{
|
{
|
||||||
|
if (' ' !== ($this->currentLine[0] ?? '')) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' '));
|
return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' '));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -653,7 +661,7 @@ class Parser
|
|||||||
*/
|
*/
|
||||||
private function moveToNextLine(): bool
|
private function moveToNextLine(): bool
|
||||||
{
|
{
|
||||||
if ($this->currentLineNb >= \count($this->lines) - 1) {
|
if ($this->currentLineNb >= $this->numberOfParsedLines - 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -689,7 +697,7 @@ class Parser
|
|||||||
*/
|
*/
|
||||||
private function parseValue(string $value, int $flags, string $context)
|
private function parseValue(string $value, int $flags, string $context)
|
||||||
{
|
{
|
||||||
if (0 === strpos($value, '*')) {
|
if ('*' === ($value[0] ?? '')) {
|
||||||
if (false !== $pos = strpos($value, '#')) {
|
if (false !== $pos = strpos($value, '#')) {
|
||||||
$value = substr($value, 1, $pos - 2);
|
$value = substr($value, 1, $pos - 2);
|
||||||
} else {
|
} else {
|
||||||
@ -750,7 +758,7 @@ class Parser
|
|||||||
$lines[] = trim($this->currentLine);
|
$lines[] = trim($this->currentLine);
|
||||||
|
|
||||||
// quoted string values end with a line that is terminated with the quotation character
|
// quoted string values end with a line that is terminated with the quotation character
|
||||||
if ('' !== $this->currentLine && substr($this->currentLine, -1) === $quotation) {
|
if ('' !== $this->currentLine && $this->currentLine[-1] === $quotation) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -944,7 +952,7 @@ class Parser
|
|||||||
*/
|
*/
|
||||||
private function isCurrentLineBlank(): bool
|
private function isCurrentLineBlank(): bool
|
||||||
{
|
{
|
||||||
return '' == trim($this->currentLine, ' ');
|
return '' === $this->currentLine || '' === trim($this->currentLine, ' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -955,7 +963,7 @@ class Parser
|
|||||||
private function isCurrentLineComment(): bool
|
private function isCurrentLineComment(): bool
|
||||||
{
|
{
|
||||||
//checking explicitly the first char of the trim is faster than loops or strpos
|
//checking explicitly the first char of the trim is faster than loops or strpos
|
||||||
$ltrimmedLine = ltrim($this->currentLine, ' ');
|
$ltrimmedLine = ' ' === $this->currentLine[0] ? ltrim($this->currentLine, ' ') : $this->currentLine;
|
||||||
|
|
||||||
return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
|
return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
|
||||||
}
|
}
|
||||||
@ -1041,7 +1049,7 @@ class Parser
|
|||||||
*/
|
*/
|
||||||
private function isStringUnIndentedCollectionItem(): bool
|
private function isStringUnIndentedCollectionItem(): bool
|
||||||
{
|
{
|
||||||
return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- ');
|
return 0 === strncmp($this->currentLine, '- ', 2) || '-' === rtrim($this->currentLine);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1144,22 +1152,23 @@ class Parser
|
|||||||
$value = '';
|
$value = '';
|
||||||
|
|
||||||
for ($i = 0, $linesCount = \count($lines), $previousLineWasNewline = false, $previousLineWasTerminatedWithBackslash = false; $i < $linesCount; ++$i) {
|
for ($i = 0, $linesCount = \count($lines), $previousLineWasNewline = false, $previousLineWasTerminatedWithBackslash = false; $i < $linesCount; ++$i) {
|
||||||
if ('' === trim($lines[$i])) {
|
$trimmedLine = trim($lines[$i]);
|
||||||
|
if ('' === $trimmedLine) {
|
||||||
$value .= "\n";
|
$value .= "\n";
|
||||||
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
|
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
|
||||||
$value .= ' ';
|
$value .= ' ';
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('' !== trim($lines[$i]) && '\\' === substr($lines[$i], -1)) {
|
if ('' !== $trimmedLine && '\\' === $lines[$i][-1]) {
|
||||||
$value .= ltrim(substr($lines[$i], 0, -1));
|
$value .= ltrim(substr($lines[$i], 0, -1));
|
||||||
} elseif ('' !== trim($lines[$i])) {
|
} elseif ('' !== $trimmedLine) {
|
||||||
$value .= trim($lines[$i]);
|
$value .= $trimmedLine;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('' === trim($lines[$i])) {
|
if ('' === $trimmedLine) {
|
||||||
$previousLineWasNewline = true;
|
$previousLineWasNewline = true;
|
||||||
$previousLineWasTerminatedWithBackslash = false;
|
$previousLineWasTerminatedWithBackslash = false;
|
||||||
} elseif ('\\' === substr($lines[$i], -1)) {
|
} elseif ('\\' === $lines[$i][-1]) {
|
||||||
$previousLineWasNewline = false;
|
$previousLineWasNewline = false;
|
||||||
$previousLineWasTerminatedWithBackslash = true;
|
$previousLineWasTerminatedWithBackslash = true;
|
||||||
} else {
|
} else {
|
||||||
|
Reference in New Issue
Block a user