feature #19430 [DomCrawler] Add support for XPath expression evaluation (jakzal)

This PR was merged into the 3.2-dev branch.

Discussion
----------

[DomCrawler] Add support for XPath expression evaluation

| Q             | A
| ------------- | ---
| Branch?       | master
| Bug fix?      | no
| New feature?  | yes
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | #19162
| License       | MIT
| Doc PR        | TODO

Example usage:

```php
<?php

use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\VarDumper\VarDumper;

require_once __DIR__.'/vendor/autoload.php';

$html = '<html>
<body>
    <span id="article-100" class="article">Article 1</span>
    <span id="article-101" class="article">Article 2</span>
    <span id="article-102" class="article">Article 3</span>
</body>
</html>';

$crawler = new Crawler();
$crawler->addHtmlContent($html);

VarDumper::dump($crawler->filterXPath('//span[contains(@id, "article-")]')->evaluate('substring-after(@id, "-")'));
// array:3 [
//   0 => "100"
//   1 => "101"
//   2 => "102"
// ]

VarDumper::dump($crawler->evaluate('substring-after(//span[contains(@id, "article-")]/@id, "-")'));
// array:1 [
//   0 => "100"
// ]

VarDumper::dump($crawler->filterXPath('//span[@class="article"]')->evaluate('count(@id)'));
// array:3 [
//   0 => 1.0
//   1 => 1.0
//   2 => 1.0
// ]

VarDumper::dump($crawler->evaluate('count(//span[@class="article"])'));
// array:1 [
//   0 => 3.0
// ]

VarDumper::dump($crawler->evaluate('//span[1]'));
// Symfony\Component\DomCrawler\Crawler { }
```

Commits
-------

3148fad [DomCrawler] Add support for XPath expression evaluation
This commit is contained in:
Fabien Potencier 2016-08-02 12:15:39 +02:00
commit a0b22f03cb
2 changed files with 75 additions and 0 deletions

View File

@ -592,6 +592,36 @@ class Crawler implements \Countable, \IteratorAggregate
return $html;
}
/**
* Evaluates an XPath expression.
*
* Since an XPath expression might evaluate to either a simple type or a \DOMDoneList,
* this method will return either an array of simple types or a new Crawler instance.
*
* @param string $xpath An XPath expression
*
* @return array|Crawler An array of evaluation results or a new Crawler instance
*/
public function evaluate($xpath)
{
if (null === $this->document) {
throw new \LogicException('Cannot evaluate the expression on an uninitialized crawler.');
}
$data = array();
$domxpath = $this->createDOMXPath($this->document, $this->findNamespacePrefixes($xpath));
foreach ($this->nodes as $node) {
$data[] = $domxpath->evaluate($xpath, $node);
}
if (isset($data[0]) && $data[0] instanceof \DOMNodeList) {
return $this->createSubCrawler($data);
}
return $data;
}
/**
* Extracts information from the list of nodes.
*

View File

@ -1061,6 +1061,51 @@ HTML;
$this->assertCount(1, $crawler->filter('li:contains("List item 1")'));
}
public function testEvaluateReturnsTypedResultOfXPathExpressionOnADocumentSubset()
{
$crawler = $this->createTestCrawler();
$result = $crawler->filterXPath('//form/input')->evaluate('substring-before(@name, "Name")');
$this->assertSame(array('Text', 'Foo', 'Bar'), $result);
}
public function testEvaluateReturnsTypedResultOfNamespacedXPathExpressionOnADocumentSubset()
{
$crawler = $this->createTestXmlCrawler();
$result = $crawler->filterXPath('//yt:accessControl/@action')->evaluate('string(.)');
$this->assertSame(array('comment', 'videoRespond'), $result);
}
public function testEvaluateReturnsTypedResultOfNamespacedXPathExpression()
{
$crawler = $this->createTestXmlCrawler();
$crawler->registerNamespace('youtube', 'http://gdata.youtube.com/schemas/2007');
$result = $crawler->evaluate('string(//youtube:accessControl/@action)');
$this->assertSame(array('comment'), $result);
}
public function testEvaluateReturnsACrawlerIfXPathExpressionEvaluatesToANode()
{
$crawler = $this->createTestCrawler()->evaluate('//form/input[1]');
$this->assertInstanceOf(Crawler::class, $crawler);
$this->assertCount(1, $crawler);
$this->assertSame('input', $crawler->first()->nodeName());
}
/**
* @expectedException \LogicException
*/
public function testEvaluateThrowsAnExceptionIfDocumentIsEmpty()
{
(new Crawler())->evaluate('//form/input[1]');
}
public function createTestCrawler($uri = null)
{
$dom = new \DOMDocument();