[DomCrawler] Enabled manual namespace registration.

This commit is contained in:
Jakub Zalas 2013-09-22 23:29:38 +01:00
parent be1e4e6585
commit 9110468e99
3 changed files with 55 additions and 10 deletions

View File

@ -4,9 +4,11 @@ CHANGELOG
2.4.0
-----
* added auto-registration of document namespaces for `Crawler::filterXPath()` and `Crawler::filter()`
* added support for automatic discovery and explicit registration of document
namespaces for `Crawler::filterXPath()` and `Crawler::filter()`
* improved content type guessing in `Crawler::addContent()`
* [BC BREAK] `Crawler::addXmlContent()` no longer removes the default document namespace
* [BC BREAK] `Crawler::addXmlContent()` no longer removes the default document
namespace
2.3.0
-----

View File

@ -32,6 +32,11 @@ class Crawler extends \SplObjectStorage
*/
private $defaultNamespacePrefix = 'default';
/**
* @var array A map of manually registered namespaces
*/
private $namespaces = array();
/**
* Constructor.
*
@ -723,6 +728,15 @@ class Crawler extends \SplObjectStorage
$this->defaultNamespacePrefix = $prefix;
}
/**
* @param string $prefix
* @param string $namespace
*/
public function registerNamespace($prefix, $namespace)
{
$this->namespaces[$prefix] = $namespace;
}
/**
* Converts string for XPath expressions.
*
@ -820,18 +834,37 @@ class Crawler extends \SplObjectStorage
$domxpath = new \DOMXPath($document);
foreach ($prefixes as $prefix) {
// ask for one namespace, otherwise we'd get a collection with an item for each node
$namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $this->defaultNamespacePrefix === $prefix ? '' : $prefix));
if ($node = $namespaces->item(0)) {
$domxpath->registerNamespace($prefix, $node->nodeValue);
} else {
throw new \InvalidArgumentException(sprintf('Could not find a namespace for the prefix: "%s"', $prefix));
}
$namespace = $this->discoverNamespace($domxpath, $prefix);
$domxpath->registerNamespace($prefix, $namespace);
}
return $domxpath;
}
/**
* @param \DOMXPath $domxpath
* @param string $prefix
*
* @return string
*
* @throws \InvalidArgumentException
*/
private function discoverNamespace(\DOMXPath $domxpath, $prefix)
{
if (isset($this->namespaces[$prefix])) {
return $this->namespaces[$prefix];
}
// ask for one namespace, otherwise we'd get a collection with an item for each node
$namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $this->defaultNamespacePrefix === $prefix ? '' : $prefix));
if ($node = $namespaces->item(0)) {
return $node->nodeValue;
}
throw new \InvalidArgumentException(sprintf('Could not find a namespace for the prefix: "%s"', $prefix));
}
/**
* @param $xpath
*

View File

@ -384,7 +384,7 @@ EOF
$crawler->setDefaultNamespacePrefix('x');
$crawler = $crawler->filterXPath('//x:entry/x:id');
$this->assertCount(1, $crawler, '->filterXPath() automatically registers a namespace');
$this->assertCount(1, $crawler, '->filterXPath() lets to override the default namespace prefix');
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
}
@ -410,6 +410,16 @@ EOF
$this->createTestXmlCrawler()->filterXPath('//media:group/foo:aspectRatio');
}
public function testFilterXPathWithManuallyRegisteredNamespace()
{
$crawler = $this->createTestXmlCrawler();
$crawler->registerNamespace('m', 'http://search.yahoo.com/mrss/');
$crawler = $crawler->filterXPath('//m:group/yt:aspectRatio');
$this->assertCount(1, $crawler, '->filterXPath() uses manually registered namespace');
$this->assertSame('widescreen', $crawler->text());
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::filter
*/