bug #10958 [DomCrawler] Fixed filterXPath() chaining loosing the parent DOM nodes (stof, robbertkl)

This PR was merged into the 2.3 branch.

Discussion
----------

[DomCrawler] Fixed filterXPath() chaining loosing the parent DOM nodes

| Q             | A
| ------------- | ---
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | #10206
| License       | MIT
| Doc PR        | n/a

This is a fixed version of #10207, preserving the BC for XPath queries. It is the rebased version of #10935 targetting 2.3

The example given in #10260 when reporting the regression in the previous attempt is covered by the new tests added in the first commit of the PR.
I also added many tests ensuring that the behavior is the same than in the current implementation.

Commits
-------

80438c2 Fixed the XPath filtering to have the same behavior than Symfony 2.4
711ac32 [DomCrawler] Fixed filterXPath() chaining
8f706c9 [DomCrawler] Added more tests for the XPath filtering
This commit is contained in:
Fabien Potencier 2014-05-21 17:44:46 +02:00
commit 4eb4a6056c
5 changed files with 219 additions and 41 deletions

View File

@ -170,7 +170,7 @@ class Crawler extends \SplObjectStorage
$this->addDocument($dom); $this->addDocument($dom);
$base = $this->filterXPath('descendant-or-self::base')->extract(array('href')); $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href'));
$baseHref = current($base); $baseHref = current($base);
if (count($base) && !empty($baseHref)) { if (count($base) && !empty($baseHref)) {
@ -445,7 +445,7 @@ class Crawler extends \SplObjectStorage
$nodes = array(); $nodes = array();
while ($node = $node->parentNode) { while ($node = $node->parentNode) {
if (1 === $node->nodeType && '_root' !== $node->nodeName) { if (1 === $node->nodeType) {
$nodes[] = $node; $nodes[] = $node;
} }
} }
@ -580,6 +580,11 @@ class Crawler extends \SplObjectStorage
/** /**
* Filters the list of nodes with an XPath expression. * Filters the list of nodes with an XPath expression.
* *
* The XPath expression is evaluated in the context of the crawler, which
* is considered as a fake parent of the elements inside it.
* This means that a child selector "div" or "./div" will match only
* the div elements of the current crawler, not their children.
*
* @param string $xpath An XPath expression * @param string $xpath An XPath expression
* *
* @return Crawler A new instance of Crawler with the filtered list of nodes * @return Crawler A new instance of Crawler with the filtered list of nodes
@ -588,15 +593,14 @@ class Crawler extends \SplObjectStorage
*/ */
public function filterXPath($xpath) public function filterXPath($xpath)
{ {
$document = new \DOMDocument('1.0', 'UTF-8'); $xpath = $this->relativize($xpath);
$root = $document->appendChild($document->createElement('_root'));
foreach ($this as $node) { // If we dropped all expressions in the XPath while preparing it, there would be no match
$root->appendChild($document->importNode($node, true)); if ('' === $xpath) {
return new static(null, $this->uri);
} }
$domxpath = new \DOMXPath($document); return $this->filterRelativeXPath($xpath);
return new static($domxpath->query($xpath), $this->uri);
} }
/** /**
@ -620,7 +624,8 @@ class Crawler extends \SplObjectStorage
// @codeCoverageIgnoreEnd // @codeCoverageIgnoreEnd
} }
return $this->filterXPath(CssSelector::toXPath($selector)); // The CssSelector already prefixes the selector with descendant-or-self::
return $this->filterRelativeXPath(CssSelector::toXPath($selector));
} }
/** /**
@ -634,10 +639,10 @@ class Crawler extends \SplObjectStorage
*/ */
public function selectLink($value) public function selectLink($value)
{ {
$xpath = sprintf('//a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s)] ', static::xpathLiteral(' '.$value.' ')). $xpath = sprintf('descendant-or-self::a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) ', static::xpathLiteral(' '.$value.' ')).
sprintf('| //a/img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]/ancestor::a', static::xpathLiteral(' '.$value.' ')); sprintf('or ./img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]]', static::xpathLiteral(' '.$value.' '));
return $this->filterXPath($xpath); return $this->filterRelativeXPath($xpath);
} }
/** /**
@ -652,11 +657,11 @@ class Crawler extends \SplObjectStorage
public function selectButton($value) public function selectButton($value)
{ {
$translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")'; $translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")';
$xpath = sprintf('//input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')). $xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id="%s" or @name="%s"] ', $translate, static::xpathLiteral(' '.$value.' '), $value, $value). sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id="%s" or @name="%s"] ', $translate, static::xpathLiteral(' '.$value.' '), $value, $value).
sprintf('| //button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value); sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);
return $this->filterXPath($xpath); return $this->filterRelativeXPath($xpath);
} }
/** /**
@ -772,6 +777,88 @@ class Crawler extends \SplObjectStorage
return sprintf("concat(%s)", implode($parts, ', ')); return sprintf("concat(%s)", implode($parts, ', '));
} }
/**
* Filters the list of nodes with an XPath expression.
*
* The XPath expression should already be processed to apply it in the context of each node.
*
* @param string $xpath
*
* @return Crawler
*/
private function filterRelativeXPath($xpath)
{
$crawler = new static(null, $this->uri);
foreach ($this as $node) {
$domxpath = new \DOMXPath($node->ownerDocument);
$crawler->add($domxpath->query($xpath, $node));
}
return $crawler;
}
/**
* Make the XPath relative to the current context.
*
* The returned XPath will match elements matching the XPath inside the current crawler
* when running in the context of a node of the crawler.
*
* @param string $xpath
*
* @return string
*/
private function relativize($xpath)
{
$expressions = array();
$unionPattern = '/\|(?![^\[]*\])/';
// An expression which will never match to replace expressions which cannot match in the crawler
// We cannot simply drop
$nonMatchingExpression = 'a[name() = "b"]';
// Split any unions into individual expressions.
foreach (preg_split($unionPattern, $xpath) as $expression) {
$expression = trim($expression);
$parenthesis = '';
// If the union is inside some braces, we need to preserve the opening braces and apply
// the change only inside it.
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
$parenthesis = $matches[0];
$expression = substr($expression, strlen($parenthesis));
}
// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
if (0 === strpos($expression, '/_root/')) {
$expression = './'.substr($expression, 7);
}
// add prefix before absolute element selector
if (empty($expression)) {
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, '//')) {
$expression = 'descendant-or-self::' . substr($expression, 2);
} elseif (0 === strpos($expression, './')) {
$expression = 'self::' . substr($expression, 2);
} elseif ('/' === $expression[0]) {
// the only direct child in Symfony 2.4 and lower is _root, which is already handled previously
// so let's drop the expression entirely
$expression = $nonMatchingExpression;
} elseif ('.' === $expression[0]) {
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, 'descendant::')) {
$expression = 'descendant-or-self::' . substr($expression, strlen('descendant::'));
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
$expression = 'self::' .$expression;
}
$expressions[] = $parenthesis.$expression;
}
return implode(' | ', $expressions);
}
/** /**
* @param int $position * @param int $position
* *

View File

@ -52,13 +52,7 @@ abstract class FormField
{ {
$this->node = $node; $this->node = $node;
$this->name = $node->getAttribute('name'); $this->name = $node->getAttribute('name');
$this->xpath = new \DOMXPath($node->ownerDocument);
$this->document = new \DOMDocument('1.0', 'UTF-8');
$this->node = $this->document->importNode($this->node, true);
$root = $this->document->appendChild($this->document->createElement('_root'));
$root->appendChild($this->node);
$this->xpath = new \DOMXPath($this->document);
$this->initialize(); $this->initialize();
} }

View File

@ -388,9 +388,7 @@ class Form extends Link implements \ArrayAccess
{ {
$this->fields = new FormFieldRegistry(); $this->fields = new FormFieldRegistry();
$document = new \DOMDocument('1.0', 'UTF-8'); $xpath = new \DOMXPath($this->node->ownerDocument);
$xpath = new \DOMXPath($document);
$root = $document->appendChild($document->createElement('_root'));
// add submitted button if it has a valid name // add submitted button if it has a valid name
if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) { if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) {
@ -400,38 +398,32 @@ class Form extends Link implements \ArrayAccess
// temporarily change the name of the input node for the x coordinate // temporarily change the name of the input node for the x coordinate
$this->button->setAttribute('name', $name.'.x'); $this->button->setAttribute('name', $name.'.x');
$this->set(new Field\InputFormField($document->importNode($this->button, true))); $this->set(new Field\InputFormField($this->button));
// temporarily change the name of the input node for the y coordinate // temporarily change the name of the input node for the y coordinate
$this->button->setAttribute('name', $name.'.y'); $this->button->setAttribute('name', $name.'.y');
$this->set(new Field\InputFormField($document->importNode($this->button, true))); $this->set(new Field\InputFormField($this->button));
// restore the original name of the input node // restore the original name of the input node
$this->button->setAttribute('name', $name); $this->button->setAttribute('name', $name);
} else { } else {
$this->set(new Field\InputFormField($document->importNode($this->button, true))); $this->set(new Field\InputFormField($this->button));
} }
} }
// find form elements corresponding to the current form // find form elements corresponding to the current form
if ($this->node->hasAttribute('id')) { if ($this->node->hasAttribute('id')) {
// traverse through the whole document
$node = $document->importNode($this->node->ownerDocument->documentElement, true);
$root->appendChild($node);
// corresponding elements are either descendants or have a matching HTML5 form attribute // corresponding elements are either descendants or have a matching HTML5 form attribute
$formId = Crawler::xpathLiteral($this->node->getAttribute('id')); $formId = Crawler::xpathLiteral($this->node->getAttribute('id'));
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId), $root);
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId));
foreach ($fieldNodes as $node) { foreach ($fieldNodes as $node) {
$this->addField($node); $this->addField($node);
} }
} else { } else {
// parent form has no id, add descendant elements only // do the xpath query with $this->node as the context node, to only find descendant elements
$node = $document->importNode($this->node, true); // however, descendant elements with form attribute are not part of this form
$root->appendChild($node); $fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->node);
// descendant elements with form attribute are not part of this form
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $root);
foreach ($fieldNodes as $node) { foreach ($fieldNodes as $node) {
$this->addField($node); $this->addField($node);
} }

View File

@ -368,6 +368,27 @@ EOF
$this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->extract('_text'), '->extract() returns an empty array if the node list is empty'); $this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->extract('_text'), '->extract() returns an empty array if the node list is empty');
} }
public function testFilterXpathComplexQueries()
{
$crawler = $this->createTestCrawler()->filterXPath('//body');
$this->assertCount(0, $crawler->filterXPath('/input'));
$this->assertCount(0, $crawler->filterXPath('/body'));
$this->assertCount(1, $crawler->filterXPath('/_root/body'));
$this->assertCount(1, $crawler->filterXPath('./body'));
$this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input'));
$this->assertCount(1, $crawler->filterXPath('body'));
$this->assertCount(6, $crawler->filterXPath('//button | //input'));
$this->assertCount(1, $crawler->filterXPath('//body'));
$this->assertCount(1, $crawler->filterXPath('descendant-or-self::body'));
$this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div');
$this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child');
$this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child');
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
}
/** /**
* @covers Symfony\Component\DomCrawler\Crawler::filterXPath * @covers Symfony\Component\DomCrawler\Crawler::filterXPath
*/ */
@ -378,8 +399,10 @@ EOF
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler'); $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler');
$crawler = $this->createTestCrawler()->filterXPath('//ul'); $crawler = $this->createTestCrawler()->filterXPath('//ul');
$this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression'); $this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');
$crawler = $this->createTestCrawler();
$this->assertCount(3, $crawler->filterXPath('//body')->filterXPath('//button')->parents(), '->filterXpath() preserves parents when chained');
} }
/** /**
@ -455,6 +478,44 @@ EOF
} }
} }
public function testSelectLinkAndLinkFiltered()
{
$html = <<<HTML
<!DOCTYPE html>
<html lang="en">
<body>
<div id="action">
<a href="/index.php?r=site/login">Login</a>
</div>
<form id="login-form" action="/index.php?r=site/login" method="post">
<button type="submit">Submit</button>
</form>
</body>
</html>
HTML;
$crawler = new Crawler($html);
$filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'login-form']");
$this->assertCount(0, $filtered->selectLink('Login'));
$this->assertCount(1, $filtered->selectButton('Submit'));
$filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'action']");
$this->assertCount(1, $filtered->selectLink('Login'));
$this->assertCount(0, $filtered->selectButton('Submit'));
$this->assertCount(1, $crawler->selectLink('Login')->selectLink('Login'));
$this->assertCount(1, $crawler->selectButton('Submit')->selectButton('Submit'));
}
public function testChaining()
{
$crawler = new Crawler('<div name="a"><div name="b"><div name="c"></div></div></div>');
$this->assertEquals('a', $crawler->filterXPath('//div')->filterXPath('div')->filterXPath('div')->attr('name'));
}
public function testLinks() public function testLinks()
{ {
$crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo'); $crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo');
@ -665,6 +726,10 @@ EOF
<li>Two Bis</li> <li>Two Bis</li>
<li>Three Bis</li> <li>Three Bis</li>
</ul> </ul>
<div id="parent">
<div id="child"></div>
</div>
<div id="sibling"><img /></div>
</body> </body>
</html> </html>
'); ');

View File

@ -85,6 +85,20 @@ class FormTest extends \PHPUnit_Framework_TestCase
$form = new Form($nodes->item(1), 'http://example.com'); $form = new Form($nodes->item(1), 'http://example.com');
} }
public function testConstructorLoadsOnlyFieldsOfTheRightForm()
{
$dom = $this->createTestMultipleForm();
$nodes = $dom->getElementsByTagName('form');
$buttonElements = $dom->getElementsByTagName('button');
$form = new Form($nodes->item(0), 'http://example.com');
$this->assertCount(3, $form->all());
$form = new Form($buttonElements->item(1), 'http://example.com');
$this->assertCount(5, $form->all());
}
public function testConstructorHandlesFormAttribute() public function testConstructorHandlesFormAttribute()
{ {
$dom = $this->createTestHtml5Form(); $dom = $this->createTestHtml5Form();
@ -840,6 +854,32 @@ class FormTest extends \PHPUnit_Framework_TestCase
return $dom; return $dom;
} }
protected function createTestMultipleForm()
{
$dom = new \DOMDocument();
$dom->loadHTML('
<html>
<h1>Hello form</h1>
<form action="" method="POST">
<div><input type="checkbox" name="apples[]" value="1" checked /></div>
<input type="checkbox" name="oranges[]" value="1" checked />
<div><label></label><input type="hidden" name="form_name" value="form-1" /></div>
<input type="submit" name="button_1" value="Capture fields" />
<button type="submit" name="button_2">Submit form_2</button>
</form>
<form action="" method="POST">
<div><div><input type="checkbox" name="oranges[]" value="2" checked />
<input type="checkbox" name="oranges[]" value="3" checked /></div></div>
<input type="hidden" name="form_name" value="form_2" />
<input type="hidden" name="outer_field" value="success" />
<button type="submit" name="button_3">Submit from outside the form</button>
</form>
<button />
</html>');
return $dom;
}
public function testgetPhpValuesWithEmptyTextarea() public function testgetPhpValuesWithEmptyTextarea()
{ {
$dom = new \DOMDocument(); $dom = new \DOMDocument();