bug #10958 [DomCrawler] Fixed filterXPath() chaining loosing the parent DOM nodes (stof, robbertkl)
This PR was merged into the 2.3 branch. Discussion ---------- [DomCrawler] Fixed filterXPath() chaining loosing the parent DOM nodes | Q | A | ------------- | --- | Bug fix? | yes | New feature? | no | BC breaks? | no | Deprecations? | no | Tests pass? | yes | Fixed tickets | #10206 | License | MIT | Doc PR | n/a This is a fixed version of #10207, preserving the BC for XPath queries. It is the rebased version of #10935 targetting 2.3 The example given in #10260 when reporting the regression in the previous attempt is covered by the new tests added in the first commit of the PR. I also added many tests ensuring that the behavior is the same than in the current implementation. Commits -------80438c2
Fixed the XPath filtering to have the same behavior than Symfony 2.4711ac32
[DomCrawler] Fixed filterXPath() chaining8f706c9
[DomCrawler] Added more tests for the XPath filtering
This commit is contained in:
commit
4eb4a6056c
|
@ -170,7 +170,7 @@ class Crawler extends \SplObjectStorage
|
|||
|
||||
$this->addDocument($dom);
|
||||
|
||||
$base = $this->filterXPath('descendant-or-self::base')->extract(array('href'));
|
||||
$base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href'));
|
||||
|
||||
$baseHref = current($base);
|
||||
if (count($base) && !empty($baseHref)) {
|
||||
|
@ -445,7 +445,7 @@ class Crawler extends \SplObjectStorage
|
|||
$nodes = array();
|
||||
|
||||
while ($node = $node->parentNode) {
|
||||
if (1 === $node->nodeType && '_root' !== $node->nodeName) {
|
||||
if (1 === $node->nodeType) {
|
||||
$nodes[] = $node;
|
||||
}
|
||||
}
|
||||
|
@ -580,6 +580,11 @@ class Crawler extends \SplObjectStorage
|
|||
/**
|
||||
* Filters the list of nodes with an XPath expression.
|
||||
*
|
||||
* The XPath expression is evaluated in the context of the crawler, which
|
||||
* is considered as a fake parent of the elements inside it.
|
||||
* This means that a child selector "div" or "./div" will match only
|
||||
* the div elements of the current crawler, not their children.
|
||||
*
|
||||
* @param string $xpath An XPath expression
|
||||
*
|
||||
* @return Crawler A new instance of Crawler with the filtered list of nodes
|
||||
|
@ -588,15 +593,14 @@ class Crawler extends \SplObjectStorage
|
|||
*/
|
||||
public function filterXPath($xpath)
|
||||
{
|
||||
$document = new \DOMDocument('1.0', 'UTF-8');
|
||||
$root = $document->appendChild($document->createElement('_root'));
|
||||
foreach ($this as $node) {
|
||||
$root->appendChild($document->importNode($node, true));
|
||||
$xpath = $this->relativize($xpath);
|
||||
|
||||
// If we dropped all expressions in the XPath while preparing it, there would be no match
|
||||
if ('' === $xpath) {
|
||||
return new static(null, $this->uri);
|
||||
}
|
||||
|
||||
$domxpath = new \DOMXPath($document);
|
||||
|
||||
return new static($domxpath->query($xpath), $this->uri);
|
||||
return $this->filterRelativeXPath($xpath);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -620,7 +624,8 @@ class Crawler extends \SplObjectStorage
|
|||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
|
||||
return $this->filterXPath(CssSelector::toXPath($selector));
|
||||
// The CssSelector already prefixes the selector with descendant-or-self::
|
||||
return $this->filterRelativeXPath(CssSelector::toXPath($selector));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -634,10 +639,10 @@ class Crawler extends \SplObjectStorage
|
|||
*/
|
||||
public function selectLink($value)
|
||||
{
|
||||
$xpath = sprintf('//a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s)] ', static::xpathLiteral(' '.$value.' ')).
|
||||
sprintf('| //a/img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]/ancestor::a', static::xpathLiteral(' '.$value.' '));
|
||||
$xpath = sprintf('descendant-or-self::a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) ', static::xpathLiteral(' '.$value.' ')).
|
||||
sprintf('or ./img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]]', static::xpathLiteral(' '.$value.' '));
|
||||
|
||||
return $this->filterXPath($xpath);
|
||||
return $this->filterRelativeXPath($xpath);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -652,11 +657,11 @@ class Crawler extends \SplObjectStorage
|
|||
public function selectButton($value)
|
||||
{
|
||||
$translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")';
|
||||
$xpath = sprintf('//input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
|
||||
$xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
|
||||
sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id="%s" or @name="%s"] ', $translate, static::xpathLiteral(' '.$value.' '), $value, $value).
|
||||
sprintf('| //button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);
|
||||
sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);
|
||||
|
||||
return $this->filterXPath($xpath);
|
||||
return $this->filterRelativeXPath($xpath);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -772,6 +777,88 @@ class Crawler extends \SplObjectStorage
|
|||
return sprintf("concat(%s)", implode($parts, ', '));
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters the list of nodes with an XPath expression.
|
||||
*
|
||||
* The XPath expression should already be processed to apply it in the context of each node.
|
||||
*
|
||||
* @param string $xpath
|
||||
*
|
||||
* @return Crawler
|
||||
*/
|
||||
private function filterRelativeXPath($xpath)
|
||||
{
|
||||
$crawler = new static(null, $this->uri);
|
||||
|
||||
foreach ($this as $node) {
|
||||
$domxpath = new \DOMXPath($node->ownerDocument);
|
||||
$crawler->add($domxpath->query($xpath, $node));
|
||||
}
|
||||
|
||||
return $crawler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make the XPath relative to the current context.
|
||||
*
|
||||
* The returned XPath will match elements matching the XPath inside the current crawler
|
||||
* when running in the context of a node of the crawler.
|
||||
*
|
||||
* @param string $xpath
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function relativize($xpath)
|
||||
{
|
||||
$expressions = array();
|
||||
|
||||
$unionPattern = '/\|(?![^\[]*\])/';
|
||||
// An expression which will never match to replace expressions which cannot match in the crawler
|
||||
// We cannot simply drop
|
||||
$nonMatchingExpression = 'a[name() = "b"]';
|
||||
|
||||
// Split any unions into individual expressions.
|
||||
foreach (preg_split($unionPattern, $xpath) as $expression) {
|
||||
$expression = trim($expression);
|
||||
$parenthesis = '';
|
||||
|
||||
// If the union is inside some braces, we need to preserve the opening braces and apply
|
||||
// the change only inside it.
|
||||
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
|
||||
$parenthesis = $matches[0];
|
||||
$expression = substr($expression, strlen($parenthesis));
|
||||
}
|
||||
|
||||
// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
|
||||
if (0 === strpos($expression, '/_root/')) {
|
||||
$expression = './'.substr($expression, 7);
|
||||
}
|
||||
|
||||
// add prefix before absolute element selector
|
||||
if (empty($expression)) {
|
||||
$expression = $nonMatchingExpression;
|
||||
} elseif (0 === strpos($expression, '//')) {
|
||||
$expression = 'descendant-or-self::' . substr($expression, 2);
|
||||
} elseif (0 === strpos($expression, './')) {
|
||||
$expression = 'self::' . substr($expression, 2);
|
||||
} elseif ('/' === $expression[0]) {
|
||||
// the only direct child in Symfony 2.4 and lower is _root, which is already handled previously
|
||||
// so let's drop the expression entirely
|
||||
$expression = $nonMatchingExpression;
|
||||
} elseif ('.' === $expression[0]) {
|
||||
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
|
||||
$expression = $nonMatchingExpression;
|
||||
} elseif (0 === strpos($expression, 'descendant::')) {
|
||||
$expression = 'descendant-or-self::' . substr($expression, strlen('descendant::'));
|
||||
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
|
||||
$expression = 'self::' .$expression;
|
||||
}
|
||||
$expressions[] = $parenthesis.$expression;
|
||||
}
|
||||
|
||||
return implode(' | ', $expressions);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $position
|
||||
*
|
||||
|
|
|
@ -52,13 +52,7 @@ abstract class FormField
|
|||
{
|
||||
$this->node = $node;
|
||||
$this->name = $node->getAttribute('name');
|
||||
|
||||
$this->document = new \DOMDocument('1.0', 'UTF-8');
|
||||
$this->node = $this->document->importNode($this->node, true);
|
||||
|
||||
$root = $this->document->appendChild($this->document->createElement('_root'));
|
||||
$root->appendChild($this->node);
|
||||
$this->xpath = new \DOMXPath($this->document);
|
||||
$this->xpath = new \DOMXPath($node->ownerDocument);
|
||||
|
||||
$this->initialize();
|
||||
}
|
||||
|
|
|
@ -388,9 +388,7 @@ class Form extends Link implements \ArrayAccess
|
|||
{
|
||||
$this->fields = new FormFieldRegistry();
|
||||
|
||||
$document = new \DOMDocument('1.0', 'UTF-8');
|
||||
$xpath = new \DOMXPath($document);
|
||||
$root = $document->appendChild($document->createElement('_root'));
|
||||
$xpath = new \DOMXPath($this->node->ownerDocument);
|
||||
|
||||
// add submitted button if it has a valid name
|
||||
if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) {
|
||||
|
@ -400,38 +398,32 @@ class Form extends Link implements \ArrayAccess
|
|||
|
||||
// temporarily change the name of the input node for the x coordinate
|
||||
$this->button->setAttribute('name', $name.'.x');
|
||||
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
|
||||
$this->set(new Field\InputFormField($this->button));
|
||||
|
||||
// temporarily change the name of the input node for the y coordinate
|
||||
$this->button->setAttribute('name', $name.'.y');
|
||||
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
|
||||
$this->set(new Field\InputFormField($this->button));
|
||||
|
||||
// restore the original name of the input node
|
||||
$this->button->setAttribute('name', $name);
|
||||
} else {
|
||||
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
|
||||
$this->set(new Field\InputFormField($this->button));
|
||||
}
|
||||
}
|
||||
|
||||
// find form elements corresponding to the current form
|
||||
if ($this->node->hasAttribute('id')) {
|
||||
// traverse through the whole document
|
||||
$node = $document->importNode($this->node->ownerDocument->documentElement, true);
|
||||
$root->appendChild($node);
|
||||
|
||||
// corresponding elements are either descendants or have a matching HTML5 form attribute
|
||||
$formId = Crawler::xpathLiteral($this->node->getAttribute('id'));
|
||||
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId), $root);
|
||||
|
||||
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId));
|
||||
foreach ($fieldNodes as $node) {
|
||||
$this->addField($node);
|
||||
}
|
||||
} else {
|
||||
// parent form has no id, add descendant elements only
|
||||
$node = $document->importNode($this->node, true);
|
||||
$root->appendChild($node);
|
||||
|
||||
// descendant elements with form attribute are not part of this form
|
||||
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $root);
|
||||
// do the xpath query with $this->node as the context node, to only find descendant elements
|
||||
// however, descendant elements with form attribute are not part of this form
|
||||
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->node);
|
||||
foreach ($fieldNodes as $node) {
|
||||
$this->addField($node);
|
||||
}
|
||||
|
|
|
@ -368,6 +368,27 @@ EOF
|
|||
$this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->extract('_text'), '->extract() returns an empty array if the node list is empty');
|
||||
}
|
||||
|
||||
public function testFilterXpathComplexQueries()
|
||||
{
|
||||
$crawler = $this->createTestCrawler()->filterXPath('//body');
|
||||
|
||||
$this->assertCount(0, $crawler->filterXPath('/input'));
|
||||
$this->assertCount(0, $crawler->filterXPath('/body'));
|
||||
$this->assertCount(1, $crawler->filterXPath('/_root/body'));
|
||||
$this->assertCount(1, $crawler->filterXPath('./body'));
|
||||
$this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input'));
|
||||
$this->assertCount(1, $crawler->filterXPath('body'));
|
||||
$this->assertCount(6, $crawler->filterXPath('//button | //input'));
|
||||
$this->assertCount(1, $crawler->filterXPath('//body'));
|
||||
$this->assertCount(1, $crawler->filterXPath('descendant-or-self::body'));
|
||||
$this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div');
|
||||
$this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child');
|
||||
$this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child');
|
||||
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
|
||||
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
|
||||
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers Symfony\Component\DomCrawler\Crawler::filterXPath
|
||||
*/
|
||||
|
@ -378,8 +399,10 @@ EOF
|
|||
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler');
|
||||
|
||||
$crawler = $this->createTestCrawler()->filterXPath('//ul');
|
||||
|
||||
$this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');
|
||||
|
||||
$crawler = $this->createTestCrawler();
|
||||
$this->assertCount(3, $crawler->filterXPath('//body')->filterXPath('//button')->parents(), '->filterXpath() preserves parents when chained');
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -455,6 +478,44 @@ EOF
|
|||
}
|
||||
}
|
||||
|
||||
public function testSelectLinkAndLinkFiltered()
|
||||
{
|
||||
$html = <<<HTML
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<body>
|
||||
<div id="action">
|
||||
<a href="/index.php?r=site/login">Login</a>
|
||||
</div>
|
||||
<form id="login-form" action="/index.php?r=site/login" method="post">
|
||||
<button type="submit">Submit</button>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
HTML;
|
||||
|
||||
$crawler = new Crawler($html);
|
||||
$filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'login-form']");
|
||||
|
||||
$this->assertCount(0, $filtered->selectLink('Login'));
|
||||
$this->assertCount(1, $filtered->selectButton('Submit'));
|
||||
|
||||
$filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'action']");
|
||||
|
||||
$this->assertCount(1, $filtered->selectLink('Login'));
|
||||
$this->assertCount(0, $filtered->selectButton('Submit'));
|
||||
|
||||
$this->assertCount(1, $crawler->selectLink('Login')->selectLink('Login'));
|
||||
$this->assertCount(1, $crawler->selectButton('Submit')->selectButton('Submit'));
|
||||
}
|
||||
|
||||
public function testChaining()
|
||||
{
|
||||
$crawler = new Crawler('<div name="a"><div name="b"><div name="c"></div></div></div>');
|
||||
|
||||
$this->assertEquals('a', $crawler->filterXPath('//div')->filterXPath('div')->filterXPath('div')->attr('name'));
|
||||
}
|
||||
|
||||
public function testLinks()
|
||||
{
|
||||
$crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo');
|
||||
|
@ -665,6 +726,10 @@ EOF
|
|||
<li>Two Bis</li>
|
||||
<li>Three Bis</li>
|
||||
</ul>
|
||||
<div id="parent">
|
||||
<div id="child"></div>
|
||||
</div>
|
||||
<div id="sibling"><img /></div>
|
||||
</body>
|
||||
</html>
|
||||
');
|
||||
|
|
|
@ -85,6 +85,20 @@ class FormTest extends \PHPUnit_Framework_TestCase
|
|||
$form = new Form($nodes->item(1), 'http://example.com');
|
||||
}
|
||||
|
||||
public function testConstructorLoadsOnlyFieldsOfTheRightForm()
|
||||
{
|
||||
$dom = $this->createTestMultipleForm();
|
||||
|
||||
$nodes = $dom->getElementsByTagName('form');
|
||||
$buttonElements = $dom->getElementsByTagName('button');
|
||||
|
||||
$form = new Form($nodes->item(0), 'http://example.com');
|
||||
$this->assertCount(3, $form->all());
|
||||
|
||||
$form = new Form($buttonElements->item(1), 'http://example.com');
|
||||
$this->assertCount(5, $form->all());
|
||||
}
|
||||
|
||||
public function testConstructorHandlesFormAttribute()
|
||||
{
|
||||
$dom = $this->createTestHtml5Form();
|
||||
|
@ -840,6 +854,32 @@ class FormTest extends \PHPUnit_Framework_TestCase
|
|||
return $dom;
|
||||
}
|
||||
|
||||
protected function createTestMultipleForm()
|
||||
{
|
||||
$dom = new \DOMDocument();
|
||||
$dom->loadHTML('
|
||||
<html>
|
||||
<h1>Hello form</h1>
|
||||
<form action="" method="POST">
|
||||
<div><input type="checkbox" name="apples[]" value="1" checked /></div>
|
||||
<input type="checkbox" name="oranges[]" value="1" checked />
|
||||
<div><label></label><input type="hidden" name="form_name" value="form-1" /></div>
|
||||
<input type="submit" name="button_1" value="Capture fields" />
|
||||
<button type="submit" name="button_2">Submit form_2</button>
|
||||
</form>
|
||||
<form action="" method="POST">
|
||||
<div><div><input type="checkbox" name="oranges[]" value="2" checked />
|
||||
<input type="checkbox" name="oranges[]" value="3" checked /></div></div>
|
||||
<input type="hidden" name="form_name" value="form_2" />
|
||||
<input type="hidden" name="outer_field" value="success" />
|
||||
<button type="submit" name="button_3">Submit from outside the form</button>
|
||||
</form>
|
||||
<button />
|
||||
</html>');
|
||||
|
||||
return $dom;
|
||||
}
|
||||
|
||||
public function testgetPhpValuesWithEmptyTextarea()
|
||||
{
|
||||
$dom = new \DOMDocument();
|
||||
|
|
Reference in New Issue