From 5b26e332610ed665137d8b63f43a61ec9d8432b6 Mon Sep 17 00:00:00 2001 From: Klaus Purer Date: Sun, 16 Oct 2016 22:10:53 +0200 Subject: [PATCH 1/3] [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions --- src/Symfony/Component/DomCrawler/Crawler.php | 44 ++++++++++++++++++- .../DomCrawler/Tests/CrawlerTest.php | 5 ++- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 37822e53c2..a1ddffd797 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -856,13 +856,12 @@ class Crawler extends \SplObjectStorage { $expressions = array(); - $unionPattern = '/\|(?![^\[]*\])/'; // An expression which will never match to replace expressions which cannot match in the crawler // We cannot simply drop $nonMatchingExpression = 'a[name() = "b"]'; // Split any unions into individual expressions. - foreach (preg_split($unionPattern, $xpath) as $expression) { + foreach ($this->splitUnionParts($xpath) as $expression) { $expression = trim($expression); $parenthesis = ''; @@ -912,6 +911,47 @@ class Crawler extends \SplObjectStorage return implode(' | ', $expressions); } + /** + * Splits the XPath into parts that are separated by the union operator. + * + * @param string $xpath + * + * @return string[] + */ + private function splitUnionParts($xpath) + { + // Split any unions into individual expressions. We need to iterate + // through the string to correctly parse opening/closing quotes and + // braces which is not possible with regular expressions. + $unionParts = array(); + $inSingleQuotedString = false; + $inDoubleQuotedString = false; + $openedBrackets = 0; + $lastUnion = 0; + $xpathLength = strlen($xpath); + for ($i = 0; $i < $xpathLength; ++$i) { + $char = $xpath[$i]; + + if ($char === "'" && !$inDoubleQuotedString) { + $inSingleQuotedString = !$inSingleQuotedString; + } elseif ($char === '"' && !$inSingleQuotedString) { + $inDoubleQuotedString = !$inDoubleQuotedString; + } elseif (!$inSingleQuotedString && !$inDoubleQuotedString) { + if ($char === '[') { + ++$openedBrackets; + } elseif ($char === ']') { + --$openedBrackets; + } elseif ($char === '|' && $openedBrackets === 0) { + $unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion); + $lastUnion = $i + 1; + } + } + } + $unionParts[] = substr($xpath, $lastUnion); + + return $unionParts; + } + /** * @param int $position * diff --git a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php index 45bbb2f8e5..65e2a90e87 100755 --- a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php @@ -387,6 +387,7 @@ EOF $this->assertCount(5, $crawler->filterXPath('(//a | //div)//img')); $this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)')); $this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )')); + $this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]")); } public function testFilterXPath() @@ -548,7 +549,7 @@ EOF $this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name'); $this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name'); - $this->assertCount(9, $crawler->filterXPath('self::*/a')); + $this->assertCount(10, $crawler->filterXPath('self::*/a')); } public function testFilter() @@ -969,6 +970,8 @@ HTML; GetLink + Klausi|Claudiu +
From 3c216176e85c5a4f6055d67994439fd5eec95179 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Thu, 6 Oct 2016 15:54:54 +0200 Subject: [PATCH 2/3] [HttpKernel] Fix source links with latests Twig versions --- .../Component/HttpKernel/DataCollector/DumpDataCollector.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Symfony/Component/HttpKernel/DataCollector/DumpDataCollector.php b/src/Symfony/Component/HttpKernel/DataCollector/DumpDataCollector.php index 3a445f45bb..538d73c783 100644 --- a/src/Symfony/Component/HttpKernel/DataCollector/DumpDataCollector.php +++ b/src/Symfony/Component/HttpKernel/DataCollector/DumpDataCollector.php @@ -99,11 +99,11 @@ class DumpDataCollector extends DataCollector implements DataDumperInterface } elseif (isset($trace[$i]['object']) && $trace[$i]['object'] instanceof \Twig_Template) { $template = $trace[$i]['object']; $name = $template->getTemplateName(); - $file = method_exists($template, 'getSourceContext') ? $template->getSourceContext()->getPath() : false; $src = method_exists($template, 'getSourceContext') ? $template->getSourceContext()->getCode() : (method_exists($template, 'getSource') ? $template->getSource() : false); $info = $template->getDebugInfo(); - if (null !== $src && isset($info[$trace[$i - 1]['line']])) { + if (isset($info[$trace[$i - 1]['line']])) { $line = $info[$trace[$i - 1]['line']]; + $file = method_exists($template, 'getSourceContext') ? $template->getSourceContext()->getPath() : false; if ($src) { $src = explode("\n", $src); From 17757d8114e255283ab131071b46293f2ff75a2e Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Tue, 18 Oct 2016 09:12:23 +0200 Subject: [PATCH 3/3] [DomCrawler] Optimize DomCrawler::relativize() --- src/Symfony/Component/DomCrawler/Crawler.php | 96 +++++++++----------- 1 file changed, 44 insertions(+), 52 deletions(-) diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index a1ddffd797..6f329d3892 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -860,17 +860,43 @@ class Crawler extends \SplObjectStorage // We cannot simply drop $nonMatchingExpression = 'a[name() = "b"]'; - // Split any unions into individual expressions. - foreach ($this->splitUnionParts($xpath) as $expression) { - $expression = trim($expression); - $parenthesis = ''; + $xpathLen = strlen($xpath); + $openedBrackets = 0; + $startPosition = strspn($xpath, " \t\n\r\0\x0B"); - // If the union is inside some braces, we need to preserve the opening braces and apply - // the change only inside it. - if (preg_match('/^[\(\s*]+/', $expression, $matches)) { - $parenthesis = $matches[0]; - $expression = substr($expression, strlen($parenthesis)); + for ($i = $startPosition; $i <= $xpathLen; ++$i) { + $i += strcspn($xpath, '"\'[]|', $i); + + if ($i < $xpathLen) { + switch ($xpath[$i]) { + case '"': + case "'": + if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) { + return $xpath; // The XPath expression is invalid + } + continue 2; + case '[': + ++$openedBrackets; + continue 2; + case ']': + --$openedBrackets; + continue 2; + } } + if ($openedBrackets) { + continue; + } + + if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) { + // If the union is inside some braces, we need to preserve the opening braces and apply + // the change only inside it. + $j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1); + $parenthesis = substr($xpath, $startPosition, $j); + $startPosition += $j; + } else { + $parenthesis = ''; + } + $expression = rtrim(substr($xpath, $startPosition, $i - $startPosition)); // BC for Symfony 2.4 and lower were elements were adding in a fake _root parent if (0 === strpos($expression, '/_root/')) { @@ -880,7 +906,7 @@ class Crawler extends \SplObjectStorage } // add prefix before absolute element selector - if (empty($expression)) { + if ('' === $expression) { $expression = $nonMatchingExpression; } elseif (0 === strpos($expression, '//')) { $expression = 'descendant-or-self::'.substr($expression, 2); @@ -898,7 +924,7 @@ class Crawler extends \SplObjectStorage // '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results $expression = $nonMatchingExpression; } elseif (0 === strpos($expression, 'descendant::')) { - $expression = 'descendant-or-self::'.substr($expression, strlen('descendant::')); + $expression = 'descendant-or-self::'.substr($expression, 12); } elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) { // the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes) $expression = $nonMatchingExpression; @@ -906,50 +932,16 @@ class Crawler extends \SplObjectStorage $expression = 'self::'.$expression; } $expressions[] = $parenthesis.$expression; - } - return implode(' | ', $expressions); - } - - /** - * Splits the XPath into parts that are separated by the union operator. - * - * @param string $xpath - * - * @return string[] - */ - private function splitUnionParts($xpath) - { - // Split any unions into individual expressions. We need to iterate - // through the string to correctly parse opening/closing quotes and - // braces which is not possible with regular expressions. - $unionParts = array(); - $inSingleQuotedString = false; - $inDoubleQuotedString = false; - $openedBrackets = 0; - $lastUnion = 0; - $xpathLength = strlen($xpath); - for ($i = 0; $i < $xpathLength; ++$i) { - $char = $xpath[$i]; - - if ($char === "'" && !$inDoubleQuotedString) { - $inSingleQuotedString = !$inSingleQuotedString; - } elseif ($char === '"' && !$inSingleQuotedString) { - $inDoubleQuotedString = !$inDoubleQuotedString; - } elseif (!$inSingleQuotedString && !$inDoubleQuotedString) { - if ($char === '[') { - ++$openedBrackets; - } elseif ($char === ']') { - --$openedBrackets; - } elseif ($char === '|' && $openedBrackets === 0) { - $unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion); - $lastUnion = $i + 1; - } + if ($i === $xpathLen) { + return implode(' | ', $expressions); } - } - $unionParts[] = substr($xpath, $lastUnion); - return $unionParts; + $i += strspn($xpath, " \t\n\r\0\x0B", $i + 1); + $startPosition = $i + 1; + } + + return $xpath; // The XPath expression is invalid } /**