bug #20235 [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions (klausi, nicolas-grekas)
This PR was merged into the 2.7 branch. Discussion ---------- [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions | Q | A | ------------- | --- | Branch? | master | Bug fix? | yes | New feature? | no | BC breaks? | no | Deprecations? | no | Tests pass? | yes | Fixed tickets | #20229 | License | MIT | Doc PR | - @klausi could you please validate this patch? Is it an improvement over yours? (sorry I don't have the proper use case to test.) Commits -------17757d8
[DomCrawler] Optimize DomCrawler::relativize()5b26e33
[DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions
This commit is contained in:
commit
8dee4be6b7
|
@ -856,22 +856,47 @@ class Crawler extends \SplObjectStorage
|
|||
{
|
||||
$expressions = array();
|
||||
|
||||
$unionPattern = '/\|(?![^\[]*\])/';
|
||||
// An expression which will never match to replace expressions which cannot match in the crawler
|
||||
// We cannot simply drop
|
||||
$nonMatchingExpression = 'a[name() = "b"]';
|
||||
|
||||
// Split any unions into individual expressions.
|
||||
foreach (preg_split($unionPattern, $xpath) as $expression) {
|
||||
$expression = trim($expression);
|
||||
$parenthesis = '';
|
||||
$xpathLen = strlen($xpath);
|
||||
$openedBrackets = 0;
|
||||
$startPosition = strspn($xpath, " \t\n\r\0\x0B");
|
||||
|
||||
// If the union is inside some braces, we need to preserve the opening braces and apply
|
||||
// the change only inside it.
|
||||
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
|
||||
$parenthesis = $matches[0];
|
||||
$expression = substr($expression, strlen($parenthesis));
|
||||
for ($i = $startPosition; $i <= $xpathLen; ++$i) {
|
||||
$i += strcspn($xpath, '"\'[]|', $i);
|
||||
|
||||
if ($i < $xpathLen) {
|
||||
switch ($xpath[$i]) {
|
||||
case '"':
|
||||
case "'":
|
||||
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
|
||||
return $xpath; // The XPath expression is invalid
|
||||
}
|
||||
continue 2;
|
||||
case '[':
|
||||
++$openedBrackets;
|
||||
continue 2;
|
||||
case ']':
|
||||
--$openedBrackets;
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
if ($openedBrackets) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) {
|
||||
// If the union is inside some braces, we need to preserve the opening braces and apply
|
||||
// the change only inside it.
|
||||
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1);
|
||||
$parenthesis = substr($xpath, $startPosition, $j);
|
||||
$startPosition += $j;
|
||||
} else {
|
||||
$parenthesis = '';
|
||||
}
|
||||
$expression = rtrim(substr($xpath, $startPosition, $i - $startPosition));
|
||||
|
||||
// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
|
||||
if (0 === strpos($expression, '/_root/')) {
|
||||
|
@ -881,7 +906,7 @@ class Crawler extends \SplObjectStorage
|
|||
}
|
||||
|
||||
// add prefix before absolute element selector
|
||||
if (empty($expression)) {
|
||||
if ('' === $expression) {
|
||||
$expression = $nonMatchingExpression;
|
||||
} elseif (0 === strpos($expression, '//')) {
|
||||
$expression = 'descendant-or-self::'.substr($expression, 2);
|
||||
|
@ -899,7 +924,7 @@ class Crawler extends \SplObjectStorage
|
|||
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
|
||||
$expression = $nonMatchingExpression;
|
||||
} elseif (0 === strpos($expression, 'descendant::')) {
|
||||
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
|
||||
$expression = 'descendant-or-self::'.substr($expression, 12);
|
||||
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
|
||||
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
|
||||
$expression = $nonMatchingExpression;
|
||||
|
@ -907,9 +932,16 @@ class Crawler extends \SplObjectStorage
|
|||
$expression = 'self::'.$expression;
|
||||
}
|
||||
$expressions[] = $parenthesis.$expression;
|
||||
|
||||
if ($i === $xpathLen) {
|
||||
return implode(' | ', $expressions);
|
||||
}
|
||||
|
||||
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
|
||||
$startPosition = $i + 1;
|
||||
}
|
||||
|
||||
return implode(' | ', $expressions);
|
||||
return $xpath; // The XPath expression is invalid
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -387,6 +387,7 @@ EOF
|
|||
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
|
||||
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
|
||||
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
|
||||
$this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
|
||||
}
|
||||
|
||||
public function testFilterXPath()
|
||||
|
@ -548,7 +549,7 @@ EOF
|
|||
|
||||
$this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
|
||||
$this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
|
||||
$this->assertCount(9, $crawler->filterXPath('self::*/a'));
|
||||
$this->assertCount(10, $crawler->filterXPath('self::*/a'));
|
||||
}
|
||||
|
||||
public function testFilter()
|
||||
|
@ -969,6 +970,8 @@ HTML;
|
|||
|
||||
<a href="?get=param">GetLink</a>
|
||||
|
||||
<a href="/example">Klausi|Claudiu</a>
|
||||
|
||||
<form action="foo" id="FooFormId">
|
||||
<input type="text" value="TextValue" name="TextName" />
|
||||
<input type="submit" value="FooValue" name="FooName" id="FooId" />
|
||||
|
|
Reference in New Issue