[DomCrawler] Added auto-discovery of namespaces in Crawler::filter() and Crawler::filterByXPath().
Improved content type guessing.
This commit is contained in:
parent
b1542f0620
commit
e5b8abb564
@ -92,7 +92,7 @@ class Crawler extends \SplObjectStorage
|
||||
public function addContent($content, $type = null)
|
||||
{
|
||||
if (empty($type)) {
|
||||
$type = 'text/html';
|
||||
$type = 0 === strpos($content, '<?xml') ? 'application/xml' : 'text/html';
|
||||
}
|
||||
|
||||
// DOM only for HTML/XML content
|
||||
@ -580,6 +580,15 @@ class Crawler extends \SplObjectStorage
|
||||
}
|
||||
|
||||
$domxpath = new \DOMXPath($document);
|
||||
if (preg_match_all('/(?P<prefix>[a-zA-Z_][a-zA-Z_0-9\-\.]+):[^:]/', $xpath, $matches)) {
|
||||
foreach ($matches['prefix'] as $prefix) {
|
||||
// ask for one namespace, otherwise we'd get a collection with an item for each node
|
||||
$namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $prefix));
|
||||
foreach ($namespaces as $node) {
|
||||
$domxpath->registerNamespace($node->prefix, $node->nodeValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new static($domxpath->query($xpath), $this->uri);
|
||||
}
|
||||
|
@ -370,11 +370,31 @@ EOF
|
||||
$this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');
|
||||
}
|
||||
|
||||
public function testFilterXPathWithDefaultNamespace()
|
||||
{
|
||||
$crawler = $this->createTestXmlCrawler()->filterXPath('//entry/id');
|
||||
$this->assertCount(1, $crawler, '->filterXPath() automatically registers a namespace');
|
||||
}
|
||||
|
||||
public function testFilterXPathWithNamespace()
|
||||
{
|
||||
$crawler = $this->createTestXmlCrawler()->filterXPath('//yt:accessControl');
|
||||
$this->assertCount(2, $crawler, '->filterXPath() automatically registers a namespace');
|
||||
}
|
||||
|
||||
public function testFilterXPathWithMultipleNamespaces()
|
||||
{
|
||||
$crawler = $this->createTestXmlCrawler()->filterXPath('//media:group/yt:aspectRatio');
|
||||
$this->assertCount(1, $crawler, '->filterXPath() automatically registers multiple namespaces');
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers Symfony\Component\DomCrawler\Crawler::filter
|
||||
*/
|
||||
public function testFilter()
|
||||
{
|
||||
$this->markSkippedIfCssSelectorNotPresent();
|
||||
|
||||
$crawler = $this->createTestCrawler();
|
||||
$this->assertNotSame($crawler, $crawler->filter('li'), '->filter() returns a new instance of a crawler');
|
||||
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filter() returns a new instance of a crawler');
|
||||
@ -384,6 +404,14 @@ EOF
|
||||
$this->assertCount(6, $crawler->filter('li'), '->filter() filters the node list with the CSS selector');
|
||||
}
|
||||
|
||||
public function testFilterWithNamespace()
|
||||
{
|
||||
$this->markSkippedIfCssSelectorNotPresent();
|
||||
|
||||
$crawler = $this->createTestXmlCrawler()->filter('yt|accessControl');
|
||||
$this->assertCount(2, $crawler, '->filter() automatically registers namespaces');
|
||||
}
|
||||
|
||||
public function testSelectLink()
|
||||
{
|
||||
$crawler = $this->createTestCrawler();
|
||||
@ -656,6 +684,22 @@ EOF
|
||||
return new Crawler($dom, $uri);
|
||||
}
|
||||
|
||||
protected function createTestXmlCrawler($uri = null)
|
||||
{
|
||||
$xml = '<?xml version="1.0" encoding="UTF-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xmlns:yt="http://gdata.youtube.com/schemas/2007">
|
||||
<id>tag:youtube.com,2008:video:kgZRZmEc9j4</id>
|
||||
<yt:accessControl action="comment" permission="allowed"/>
|
||||
<yt:accessControl action="videoRespond" permission="moderated"/>
|
||||
<media:group>
|
||||
<media:title type="plain">Chordates - CrashCourse Biology #24</media:title>
|
||||
<yt:aspectRatio>widescreen</yt:aspectRatio>
|
||||
</media:group>
|
||||
</entry>';
|
||||
|
||||
return new Crawler($xml, $uri);
|
||||
}
|
||||
|
||||
protected function createDomDocument()
|
||||
{
|
||||
$dom = new \DOMDocument();
|
||||
@ -672,4 +716,11 @@ EOF
|
||||
|
||||
return $domxpath->query('//div');
|
||||
}
|
||||
|
||||
protected function markSkippedIfCssSelectorNotPresent()
|
||||
{
|
||||
if (!class_exists('Symfony\Component\CssSelector\CssSelector')) {
|
||||
$this->markTestSkipped('The "CssSelector" component is not available');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user