bug #10958 [DomCrawler] Fixed filterXPath() chaining loosing the parent DOM nodes (stof, robbertkl)

This PR was merged into the 2.3 branch.

Discussion
----------

[DomCrawler] Fixed filterXPath() chaining loosing the parent DOM nodes

| Q             | A
| ------------- | ---
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | #10206
| License       | MIT
| Doc PR        | n/a

This is a fixed version of #10207, preserving the BC for XPath queries. It is the rebased version of #10935 targetting 2.3

The example given in #10260 when reporting the regression in the previous attempt is covered by the new tests added in the first commit of the PR.
I also added many tests ensuring that the behavior is the same than in the current implementation.

Commits
-------

80438c2 Fixed the XPath filtering to have the same behavior than Symfony 2.4
711ac32 [DomCrawler] Fixed filterXPath() chaining
8f706c9 [DomCrawler] Added more tests for the XPath filtering
This commit is contained in:
Fabien Potencier 2014-05-21 17:44:46 +02:00
commit 4eb4a6056c
5 changed files with 219 additions and 41 deletions

View File

@ -170,7 +170,7 @@ class Crawler extends \SplObjectStorage
$this->addDocument($dom);
$base = $this->filterXPath('descendant-or-self::base')->extract(array('href'));
$base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href'));
$baseHref = current($base);
if (count($base) && !empty($baseHref)) {
@ -445,7 +445,7 @@ class Crawler extends \SplObjectStorage
$nodes = array();
while ($node = $node->parentNode) {
if (1 === $node->nodeType && '_root' !== $node->nodeName) {
if (1 === $node->nodeType) {
$nodes[] = $node;
}
}
@ -580,6 +580,11 @@ class Crawler extends \SplObjectStorage
/**
* Filters the list of nodes with an XPath expression.
*
* The XPath expression is evaluated in the context of the crawler, which
* is considered as a fake parent of the elements inside it.
* This means that a child selector "div" or "./div" will match only
* the div elements of the current crawler, not their children.
*
* @param string $xpath An XPath expression
*
* @return Crawler A new instance of Crawler with the filtered list of nodes
@ -588,15 +593,14 @@ class Crawler extends \SplObjectStorage
*/
public function filterXPath($xpath)
{
$document = new \DOMDocument('1.0', 'UTF-8');
$root = $document->appendChild($document->createElement('_root'));
foreach ($this as $node) {
$root->appendChild($document->importNode($node, true));
$xpath = $this->relativize($xpath);
// If we dropped all expressions in the XPath while preparing it, there would be no match
if ('' === $xpath) {
return new static(null, $this->uri);
}
$domxpath = new \DOMXPath($document);
return new static($domxpath->query($xpath), $this->uri);
return $this->filterRelativeXPath($xpath);
}
/**
@ -620,7 +624,8 @@ class Crawler extends \SplObjectStorage
// @codeCoverageIgnoreEnd
}
return $this->filterXPath(CssSelector::toXPath($selector));
// The CssSelector already prefixes the selector with descendant-or-self::
return $this->filterRelativeXPath(CssSelector::toXPath($selector));
}
/**
@ -634,10 +639,10 @@ class Crawler extends \SplObjectStorage
*/
public function selectLink($value)
{
$xpath = sprintf('//a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s)] ', static::xpathLiteral(' '.$value.' ')).
sprintf('| //a/img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]/ancestor::a', static::xpathLiteral(' '.$value.' '));
$xpath = sprintf('descendant-or-self::a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) ', static::xpathLiteral(' '.$value.' ')).
sprintf('or ./img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]]', static::xpathLiteral(' '.$value.' '));
return $this->filterXPath($xpath);
return $this->filterRelativeXPath($xpath);
}
/**
@ -652,11 +657,11 @@ class Crawler extends \SplObjectStorage
public function selectButton($value)
{
$translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")';
$xpath = sprintf('//input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
$xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id="%s" or @name="%s"] ', $translate, static::xpathLiteral(' '.$value.' '), $value, $value).
sprintf('| //button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);
sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);
return $this->filterXPath($xpath);
return $this->filterRelativeXPath($xpath);
}
/**
@ -772,6 +777,88 @@ class Crawler extends \SplObjectStorage
return sprintf("concat(%s)", implode($parts, ', '));
}
/**
* Filters the list of nodes with an XPath expression.
*
* The XPath expression should already be processed to apply it in the context of each node.
*
* @param string $xpath
*
* @return Crawler
*/
private function filterRelativeXPath($xpath)
{
$crawler = new static(null, $this->uri);
foreach ($this as $node) {
$domxpath = new \DOMXPath($node->ownerDocument);
$crawler->add($domxpath->query($xpath, $node));
}
return $crawler;
}
/**
* Make the XPath relative to the current context.
*
* The returned XPath will match elements matching the XPath inside the current crawler
* when running in the context of a node of the crawler.
*
* @param string $xpath
*
* @return string
*/
private function relativize($xpath)
{
$expressions = array();
$unionPattern = '/\|(?![^\[]*\])/';
// An expression which will never match to replace expressions which cannot match in the crawler
// We cannot simply drop
$nonMatchingExpression = 'a[name() = "b"]';
// Split any unions into individual expressions.
foreach (preg_split($unionPattern, $xpath) as $expression) {
$expression = trim($expression);
$parenthesis = '';
// If the union is inside some braces, we need to preserve the opening braces and apply
// the change only inside it.
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
$parenthesis = $matches[0];
$expression = substr($expression, strlen($parenthesis));
}
// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
if (0 === strpos($expression, '/_root/')) {
$expression = './'.substr($expression, 7);
}
// add prefix before absolute element selector
if (empty($expression)) {
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, '//')) {
$expression = 'descendant-or-self::' . substr($expression, 2);
} elseif (0 === strpos($expression, './')) {
$expression = 'self::' . substr($expression, 2);
} elseif ('/' === $expression[0]) {
// the only direct child in Symfony 2.4 and lower is _root, which is already handled previously
// so let's drop the expression entirely
$expression = $nonMatchingExpression;
} elseif ('.' === $expression[0]) {
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, 'descendant::')) {
$expression = 'descendant-or-self::' . substr($expression, strlen('descendant::'));
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
$expression = 'self::' .$expression;
}
$expressions[] = $parenthesis.$expression;
}
return implode(' | ', $expressions);
}
/**
* @param int $position
*

View File

@ -52,13 +52,7 @@ abstract class FormField
{
$this->node = $node;
$this->name = $node->getAttribute('name');
$this->document = new \DOMDocument('1.0', 'UTF-8');
$this->node = $this->document->importNode($this->node, true);
$root = $this->document->appendChild($this->document->createElement('_root'));
$root->appendChild($this->node);
$this->xpath = new \DOMXPath($this->document);
$this->xpath = new \DOMXPath($node->ownerDocument);
$this->initialize();
}

View File

@ -388,9 +388,7 @@ class Form extends Link implements \ArrayAccess
{
$this->fields = new FormFieldRegistry();
$document = new \DOMDocument('1.0', 'UTF-8');
$xpath = new \DOMXPath($document);
$root = $document->appendChild($document->createElement('_root'));
$xpath = new \DOMXPath($this->node->ownerDocument);
// add submitted button if it has a valid name
if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) {
@ -400,38 +398,32 @@ class Form extends Link implements \ArrayAccess
// temporarily change the name of the input node for the x coordinate
$this->button->setAttribute('name', $name.'.x');
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
$this->set(new Field\InputFormField($this->button));
// temporarily change the name of the input node for the y coordinate
$this->button->setAttribute('name', $name.'.y');
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
$this->set(new Field\InputFormField($this->button));
// restore the original name of the input node
$this->button->setAttribute('name', $name);
} else {
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
$this->set(new Field\InputFormField($this->button));
}
}
// find form elements corresponding to the current form
if ($this->node->hasAttribute('id')) {
// traverse through the whole document
$node = $document->importNode($this->node->ownerDocument->documentElement, true);
$root->appendChild($node);
// corresponding elements are either descendants or have a matching HTML5 form attribute
$formId = Crawler::xpathLiteral($this->node->getAttribute('id'));
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId), $root);
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId));
foreach ($fieldNodes as $node) {
$this->addField($node);
}
} else {
// parent form has no id, add descendant elements only
$node = $document->importNode($this->node, true);
$root->appendChild($node);
// descendant elements with form attribute are not part of this form
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $root);
// do the xpath query with $this->node as the context node, to only find descendant elements
// however, descendant elements with form attribute are not part of this form
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->node);
foreach ($fieldNodes as $node) {
$this->addField($node);
}

View File

@ -368,6 +368,27 @@ EOF
$this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->extract('_text'), '->extract() returns an empty array if the node list is empty');
}
public function testFilterXpathComplexQueries()
{
$crawler = $this->createTestCrawler()->filterXPath('//body');
$this->assertCount(0, $crawler->filterXPath('/input'));
$this->assertCount(0, $crawler->filterXPath('/body'));
$this->assertCount(1, $crawler->filterXPath('/_root/body'));
$this->assertCount(1, $crawler->filterXPath('./body'));
$this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input'));
$this->assertCount(1, $crawler->filterXPath('body'));
$this->assertCount(6, $crawler->filterXPath('//button | //input'));
$this->assertCount(1, $crawler->filterXPath('//body'));
$this->assertCount(1, $crawler->filterXPath('descendant-or-self::body'));
$this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div');
$this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child');
$this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child');
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::filterXPath
*/
@ -378,8 +399,10 @@ EOF
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler');
$crawler = $this->createTestCrawler()->filterXPath('//ul');
$this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');
$crawler = $this->createTestCrawler();
$this->assertCount(3, $crawler->filterXPath('//body')->filterXPath('//button')->parents(), '->filterXpath() preserves parents when chained');
}
/**
@ -455,6 +478,44 @@ EOF
}
}
public function testSelectLinkAndLinkFiltered()
{
$html = <<<HTML
<!DOCTYPE html>
<html lang="en">
<body>
<div id="action">
<a href="/index.php?r=site/login">Login</a>
</div>
<form id="login-form" action="/index.php?r=site/login" method="post">
<button type="submit">Submit</button>
</form>
</body>
</html>
HTML;
$crawler = new Crawler($html);
$filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'login-form']");
$this->assertCount(0, $filtered->selectLink('Login'));
$this->assertCount(1, $filtered->selectButton('Submit'));
$filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'action']");
$this->assertCount(1, $filtered->selectLink('Login'));
$this->assertCount(0, $filtered->selectButton('Submit'));
$this->assertCount(1, $crawler->selectLink('Login')->selectLink('Login'));
$this->assertCount(1, $crawler->selectButton('Submit')->selectButton('Submit'));
}
public function testChaining()
{
$crawler = new Crawler('<div name="a"><div name="b"><div name="c"></div></div></div>');
$this->assertEquals('a', $crawler->filterXPath('//div')->filterXPath('div')->filterXPath('div')->attr('name'));
}
public function testLinks()
{
$crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo');
@ -665,6 +726,10 @@ EOF
<li>Two Bis</li>
<li>Three Bis</li>
</ul>
<div id="parent">
<div id="child"></div>
</div>
<div id="sibling"><img /></div>
</body>
</html>
');

View File

@ -85,6 +85,20 @@ class FormTest extends \PHPUnit_Framework_TestCase
$form = new Form($nodes->item(1), 'http://example.com');
}
public function testConstructorLoadsOnlyFieldsOfTheRightForm()
{
$dom = $this->createTestMultipleForm();
$nodes = $dom->getElementsByTagName('form');
$buttonElements = $dom->getElementsByTagName('button');
$form = new Form($nodes->item(0), 'http://example.com');
$this->assertCount(3, $form->all());
$form = new Form($buttonElements->item(1), 'http://example.com');
$this->assertCount(5, $form->all());
}
public function testConstructorHandlesFormAttribute()
{
$dom = $this->createTestHtml5Form();
@ -840,6 +854,32 @@ class FormTest extends \PHPUnit_Framework_TestCase
return $dom;
}
protected function createTestMultipleForm()
{
$dom = new \DOMDocument();
$dom->loadHTML('
<html>
<h1>Hello form</h1>
<form action="" method="POST">
<div><input type="checkbox" name="apples[]" value="1" checked /></div>
<input type="checkbox" name="oranges[]" value="1" checked />
<div><label></label><input type="hidden" name="form_name" value="form-1" /></div>
<input type="submit" name="button_1" value="Capture fields" />
<button type="submit" name="button_2">Submit form_2</button>
</form>
<form action="" method="POST">
<div><div><input type="checkbox" name="oranges[]" value="2" checked />
<input type="checkbox" name="oranges[]" value="3" checked /></div></div>
<input type="hidden" name="form_name" value="form_2" />
<input type="hidden" name="outer_field" value="success" />
<button type="submit" name="button_3">Submit from outside the form</button>
</form>
<button />
</html>');
return $dom;
}
public function testgetPhpValuesWithEmptyTextarea()
{
$dom = new \DOMDocument();