diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 770e4026b8..de65978614 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -170,7 +170,7 @@ class Crawler extends \SplObjectStorage $this->addDocument($dom); - $base = $this->filterXPath('descendant-or-self::base')->extract(array('href')); + $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href')); $baseHref = current($base); if (count($base) && !empty($baseHref)) { @@ -445,7 +445,7 @@ class Crawler extends \SplObjectStorage $nodes = array(); while ($node = $node->parentNode) { - if (1 === $node->nodeType && '_root' !== $node->nodeName) { + if (1 === $node->nodeType) { $nodes[] = $node; } } @@ -580,6 +580,11 @@ class Crawler extends \SplObjectStorage /** * Filters the list of nodes with an XPath expression. * + * The XPath expression is evaluated in the context of the crawler, which + * is considered as a fake parent of the elements inside it. + * This means that a child selector "div" or "./div" will match only + * the div elements of the current crawler, not their children. + * * @param string $xpath An XPath expression * * @return Crawler A new instance of Crawler with the filtered list of nodes @@ -588,15 +593,14 @@ class Crawler extends \SplObjectStorage */ public function filterXPath($xpath) { - $document = new \DOMDocument('1.0', 'UTF-8'); - $root = $document->appendChild($document->createElement('_root')); - foreach ($this as $node) { - $root->appendChild($document->importNode($node, true)); + $xpath = $this->relativize($xpath); + + // If we dropped all expressions in the XPath while preparing it, there would be no match + if ('' === $xpath) { + return new static(null, $this->uri); } - $domxpath = new \DOMXPath($document); - - return new static($domxpath->query($xpath), $this->uri); + return $this->filterRelativeXPath($xpath); } /** @@ -620,7 +624,8 @@ class Crawler extends \SplObjectStorage // @codeCoverageIgnoreEnd } - return $this->filterXPath(CssSelector::toXPath($selector)); + // The CssSelector already prefixes the selector with descendant-or-self:: + return $this->filterRelativeXPath(CssSelector::toXPath($selector)); } /** @@ -634,10 +639,10 @@ class Crawler extends \SplObjectStorage */ public function selectLink($value) { - $xpath = sprintf('//a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s)] ', static::xpathLiteral(' '.$value.' ')). - sprintf('| //a/img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]/ancestor::a', static::xpathLiteral(' '.$value.' ')); + $xpath = sprintf('descendant-or-self::a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) ', static::xpathLiteral(' '.$value.' ')). + sprintf('or ./img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]]', static::xpathLiteral(' '.$value.' ')); - return $this->filterXPath($xpath); + return $this->filterRelativeXPath($xpath); } /** @@ -652,11 +657,11 @@ class Crawler extends \SplObjectStorage public function selectButton($value) { $translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")'; - $xpath = sprintf('//input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')). + $xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')). sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id="%s" or @name="%s"] ', $translate, static::xpathLiteral(' '.$value.' '), $value, $value). - sprintf('| //button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value); + sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value); - return $this->filterXPath($xpath); + return $this->filterRelativeXPath($xpath); } /** @@ -772,6 +777,88 @@ class Crawler extends \SplObjectStorage return sprintf("concat(%s)", implode($parts, ', ')); } + /** + * Filters the list of nodes with an XPath expression. + * + * The XPath expression should already be processed to apply it in the context of each node. + * + * @param string $xpath + * + * @return Crawler + */ + private function filterRelativeXPath($xpath) + { + $crawler = new static(null, $this->uri); + + foreach ($this as $node) { + $domxpath = new \DOMXPath($node->ownerDocument); + $crawler->add($domxpath->query($xpath, $node)); + } + + return $crawler; + } + + /** + * Make the XPath relative to the current context. + * + * The returned XPath will match elements matching the XPath inside the current crawler + * when running in the context of a node of the crawler. + * + * @param string $xpath + * + * @return string + */ + private function relativize($xpath) + { + $expressions = array(); + + $unionPattern = '/\|(?![^\[]*\])/'; + // An expression which will never match to replace expressions which cannot match in the crawler + // We cannot simply drop + $nonMatchingExpression = 'a[name() = "b"]'; + + // Split any unions into individual expressions. + foreach (preg_split($unionPattern, $xpath) as $expression) { + $expression = trim($expression); + $parenthesis = ''; + + // If the union is inside some braces, we need to preserve the opening braces and apply + // the change only inside it. + if (preg_match('/^[\(\s*]+/', $expression, $matches)) { + $parenthesis = $matches[0]; + $expression = substr($expression, strlen($parenthesis)); + } + + // BC for Symfony 2.4 and lower were elements were adding in a fake _root parent + if (0 === strpos($expression, '/_root/')) { + $expression = './'.substr($expression, 7); + } + + // add prefix before absolute element selector + if (empty($expression)) { + $expression = $nonMatchingExpression; + } elseif (0 === strpos($expression, '//')) { + $expression = 'descendant-or-self::' . substr($expression, 2); + } elseif (0 === strpos($expression, './')) { + $expression = 'self::' . substr($expression, 2); + } elseif ('/' === $expression[0]) { + // the only direct child in Symfony 2.4 and lower is _root, which is already handled previously + // so let's drop the expression entirely + $expression = $nonMatchingExpression; + } elseif ('.' === $expression[0]) { + // '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results + $expression = $nonMatchingExpression; + } elseif (0 === strpos($expression, 'descendant::')) { + $expression = 'descendant-or-self::' . substr($expression, strlen('descendant::')); + } elseif (0 !== strpos($expression, 'descendant-or-self::')) { + $expression = 'self::' .$expression; + } + $expressions[] = $parenthesis.$expression; + } + + return implode(' | ', $expressions); + } + /** * @param int $position * diff --git a/src/Symfony/Component/DomCrawler/Field/FormField.php b/src/Symfony/Component/DomCrawler/Field/FormField.php index 2114b4ed5d..9db433520e 100644 --- a/src/Symfony/Component/DomCrawler/Field/FormField.php +++ b/src/Symfony/Component/DomCrawler/Field/FormField.php @@ -52,13 +52,7 @@ abstract class FormField { $this->node = $node; $this->name = $node->getAttribute('name'); - - $this->document = new \DOMDocument('1.0', 'UTF-8'); - $this->node = $this->document->importNode($this->node, true); - - $root = $this->document->appendChild($this->document->createElement('_root')); - $root->appendChild($this->node); - $this->xpath = new \DOMXPath($this->document); + $this->xpath = new \DOMXPath($node->ownerDocument); $this->initialize(); } diff --git a/src/Symfony/Component/DomCrawler/Form.php b/src/Symfony/Component/DomCrawler/Form.php index 0f28d45d53..c228b56195 100644 --- a/src/Symfony/Component/DomCrawler/Form.php +++ b/src/Symfony/Component/DomCrawler/Form.php @@ -388,9 +388,7 @@ class Form extends Link implements \ArrayAccess { $this->fields = new FormFieldRegistry(); - $document = new \DOMDocument('1.0', 'UTF-8'); - $xpath = new \DOMXPath($document); - $root = $document->appendChild($document->createElement('_root')); + $xpath = new \DOMXPath($this->node->ownerDocument); // add submitted button if it has a valid name if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) { @@ -400,38 +398,32 @@ class Form extends Link implements \ArrayAccess // temporarily change the name of the input node for the x coordinate $this->button->setAttribute('name', $name.'.x'); - $this->set(new Field\InputFormField($document->importNode($this->button, true))); + $this->set(new Field\InputFormField($this->button)); // temporarily change the name of the input node for the y coordinate $this->button->setAttribute('name', $name.'.y'); - $this->set(new Field\InputFormField($document->importNode($this->button, true))); + $this->set(new Field\InputFormField($this->button)); // restore the original name of the input node $this->button->setAttribute('name', $name); } else { - $this->set(new Field\InputFormField($document->importNode($this->button, true))); + $this->set(new Field\InputFormField($this->button)); } } // find form elements corresponding to the current form if ($this->node->hasAttribute('id')) { - // traverse through the whole document - $node = $document->importNode($this->node->ownerDocument->documentElement, true); - $root->appendChild($node); - // corresponding elements are either descendants or have a matching HTML5 form attribute $formId = Crawler::xpathLiteral($this->node->getAttribute('id')); - $fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId), $root); + + $fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId)); foreach ($fieldNodes as $node) { $this->addField($node); } } else { - // parent form has no id, add descendant elements only - $node = $document->importNode($this->node, true); - $root->appendChild($node); - - // descendant elements with form attribute are not part of this form - $fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $root); + // do the xpath query with $this->node as the context node, to only find descendant elements + // however, descendant elements with form attribute are not part of this form + $fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->node); foreach ($fieldNodes as $node) { $this->addField($node); } diff --git a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php index 07a08d0d04..04921f693c 100644 --- a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php @@ -368,6 +368,27 @@ EOF $this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->extract('_text'), '->extract() returns an empty array if the node list is empty'); } + public function testFilterXpathComplexQueries() + { + $crawler = $this->createTestCrawler()->filterXPath('//body'); + + $this->assertCount(0, $crawler->filterXPath('/input')); + $this->assertCount(0, $crawler->filterXPath('/body')); + $this->assertCount(1, $crawler->filterXPath('/_root/body')); + $this->assertCount(1, $crawler->filterXPath('./body')); + $this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input')); + $this->assertCount(1, $crawler->filterXPath('body')); + $this->assertCount(6, $crawler->filterXPath('//button | //input')); + $this->assertCount(1, $crawler->filterXPath('//body')); + $this->assertCount(1, $crawler->filterXPath('descendant-or-self::body')); + $this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div'); + $this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child'); + $this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child'); + $this->assertCount(5, $crawler->filterXPath('(//a | //div)//img')); + $this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)')); + $this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )')); + } + /** * @covers Symfony\Component\DomCrawler\Crawler::filterXPath */ @@ -378,8 +399,10 @@ EOF $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler'); $crawler = $this->createTestCrawler()->filterXPath('//ul'); - $this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression'); + + $crawler = $this->createTestCrawler(); + $this->assertCount(3, $crawler->filterXPath('//body')->filterXPath('//button')->parents(), '->filterXpath() preserves parents when chained'); } /** @@ -455,6 +478,44 @@ EOF } } + public function testSelectLinkAndLinkFiltered() + { + $html = << + + +
+ Login +
+
+ +
+ + +HTML; + + $crawler = new Crawler($html); + $filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'login-form']"); + + $this->assertCount(0, $filtered->selectLink('Login')); + $this->assertCount(1, $filtered->selectButton('Submit')); + + $filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'action']"); + + $this->assertCount(1, $filtered->selectLink('Login')); + $this->assertCount(0, $filtered->selectButton('Submit')); + + $this->assertCount(1, $crawler->selectLink('Login')->selectLink('Login')); + $this->assertCount(1, $crawler->selectButton('Submit')->selectButton('Submit')); + } + + public function testChaining() + { + $crawler = new Crawler('
'); + + $this->assertEquals('a', $crawler->filterXPath('//div')->filterXPath('div')->filterXPath('div')->attr('name')); + } + public function testLinks() { $crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo'); @@ -665,6 +726,10 @@ EOF
  • Two Bis
  • Three Bis
  • +
    +
    +
    +
    '); diff --git a/src/Symfony/Component/DomCrawler/Tests/FormTest.php b/src/Symfony/Component/DomCrawler/Tests/FormTest.php index 02e7fbba4b..0c12b08dc8 100644 --- a/src/Symfony/Component/DomCrawler/Tests/FormTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/FormTest.php @@ -85,6 +85,20 @@ class FormTest extends \PHPUnit_Framework_TestCase $form = new Form($nodes->item(1), 'http://example.com'); } + public function testConstructorLoadsOnlyFieldsOfTheRightForm() + { + $dom = $this->createTestMultipleForm(); + + $nodes = $dom->getElementsByTagName('form'); + $buttonElements = $dom->getElementsByTagName('button'); + + $form = new Form($nodes->item(0), 'http://example.com'); + $this->assertCount(3, $form->all()); + + $form = new Form($buttonElements->item(1), 'http://example.com'); + $this->assertCount(5, $form->all()); + } + public function testConstructorHandlesFormAttribute() { $dom = $this->createTestHtml5Form(); @@ -840,6 +854,32 @@ class FormTest extends \PHPUnit_Framework_TestCase return $dom; } + protected function createTestMultipleForm() + { + $dom = new \DOMDocument(); + $dom->loadHTML(' + +

    Hello form

    +
    +
    + +
    + + +
    +
    +
    +
    + + + +
    +