bug #10207 [DomCrawler] Fixed filterXPath() chaining (robbertkl)

This PR was merged into the 2.3 branch.

Discussion
----------

[DomCrawler] Fixed filterXPath() chaining

| Q             | A
| ------------- | ---
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | debatable (see below)
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | #10206
| License       | MIT
| Doc PR        |

As @stof mentions in #10206, each node in the Crawler can belong to a different \DOMDocument. Therefore, I've made each node do its own XPath, relative to itself, and add all the results to a new Crawler. This way, all resulting nodes are still part of their original \DOMDocument and thus can reach all of their parent nodes.

No current tests break on this change. I've added a new test for this case, by checking if the number of parents is correct after obtaining a node through chaining of `filterXPath()`.

Now for BC: I can think of a number of cases where this change would give a different result. However, it's debatable/unclear if:
- the old behavior was a bug in the first place (which would validate this change), or
- the old behavior was intended (which would make this a BC breaking change)

As an example, consider the following HTML:
```html
<div name="a"><div name="b"><div name="c"></div></div></div>
```
What would happen if we run this:
```php
echo $crawler->filterXPath('//div')->filterXPath('div')->filterXPath('div')->attr('name');
```
Aside from breaking reachability of the parent nodes by chaining, with the original code it would echo 'a'.
With this patch it would echo 'c', which, to me, makes more sense.

Commits
-------

43a7716 [DomCrawler] Fixed filterXPath() chaining
This commit is contained in:
Fabien Potencier 2014-02-05 17:38:09 +01:00
commit c11c5888f3
4 changed files with 20 additions and 34 deletions

View File

@ -441,7 +441,7 @@ class Crawler extends \SplObjectStorage
$nodes = array();
while ($node = $node->parentNode) {
if (1 === $node->nodeType && '_root' !== $node->nodeName) {
if (1 === $node->nodeType) {
$nodes[] = $node;
}
}
@ -584,15 +584,13 @@ class Crawler extends \SplObjectStorage
*/
public function filterXPath($xpath)
{
$document = new \DOMDocument('1.0', 'UTF-8');
$root = $document->appendChild($document->createElement('_root'));
$crawler = new static(null, $this->uri);
foreach ($this as $node) {
$root->appendChild($document->importNode($node, true));
$domxpath = new \DOMXPath($node->ownerDocument);
$crawler->add($domxpath->query($xpath, $node));
}
$domxpath = new \DOMXPath($document);
return new static($domxpath->query($xpath), $this->uri);
return $crawler;
}
/**

View File

@ -52,13 +52,7 @@ abstract class FormField
{
$this->node = $node;
$this->name = $node->getAttribute('name');
$this->document = new \DOMDocument('1.0', 'UTF-8');
$this->node = $this->document->importNode($this->node, true);
$root = $this->document->appendChild($this->document->createElement('_root'));
$root->appendChild($this->node);
$this->xpath = new \DOMXPath($this->document);
$this->xpath = new \DOMXPath($node->ownerDocument);
$this->initialize();
}

View File

@ -378,9 +378,7 @@ class Form extends Link implements \ArrayAccess
{
$this->fields = new FormFieldRegistry();
$document = new \DOMDocument('1.0', 'UTF-8');
$xpath = new \DOMXPath($document);
$root = $document->appendChild($document->createElement('_root'));
$xpath = new \DOMXPath($this->node->ownerDocument);
// add submitted button if it has a valid name
if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) {
@ -390,39 +388,33 @@ class Form extends Link implements \ArrayAccess
// temporarily change the name of the input node for the x coordinate
$this->button->setAttribute('name', $name.'.x');
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
$this->set(new Field\InputFormField($this->button));
// temporarily change the name of the input node for the y coordinate
$this->button->setAttribute('name', $name.'.y');
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
$this->set(new Field\InputFormField($this->button));
// restore the original name of the input node
$this->button->setAttribute('name', $name);
}
else {
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
} else {
$this->set(new Field\InputFormField($this->button));
}
}
// find form elements corresponding to the current form
if ($this->node->hasAttribute('id')) {
// traverse through the whole document
$node = $document->importNode($this->node->ownerDocument->documentElement, true);
$root->appendChild($node);
// corresponding elements are either descendants or have a matching HTML5 form attribute
$formId = Crawler::xpathLiteral($this->node->getAttribute('id'));
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId), $root);
// do the xpath query without $this->node as the context node (i.e. traverse through the whole document)
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId));
foreach ($fieldNodes as $node) {
$this->addField($node);
}
} else {
// parent form has no id, add descendant elements only
$node = $document->importNode($this->node, true);
$root->appendChild($node);
// descendant elements with form attribute are not part of this form
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $root);
// do the xpath query with $this->node as the context node, to only find descendant elements
// however, descendant elements with form attribute are not part of this form
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->node);
foreach ($fieldNodes as $node) {
$this->addField($node);
}

View File

@ -378,8 +378,10 @@ EOF
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler');
$crawler = $this->createTestCrawler()->filterXPath('//ul');
$this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');
$crawler = $this->createTestCrawler();
$this->assertCount(3, $crawler->filterXPath('//body')->filterXPath('//button')->parents(), '->filterXpath() preserves parents when chained');
}
/**