Crawler default namespace fix

This commit is contained in:
ChrisC 2013-12-14 15:13:38 +01:00 committed by Fabien Potencier
parent aff0594b75
commit cfff054e3b
3 changed files with 29 additions and 1 deletions

View File

@ -3,7 +3,8 @@ CHANGELOG
2.4.0
-----
* `Crawler::addXmlContent()` removes the default document namespace again if it's an only namespace.
* added support for automatic discovery and explicit registration of document
namespaces for `Crawler::filterXPath()` and `Crawler::filter()`
* improved content type guessing in `Crawler::addContent()`

View File

@ -200,6 +200,11 @@ class Crawler extends \SplObjectStorage
*/
public function addXmlContent($content, $charset = 'UTF-8')
{
// remove the default namespace if it's the only namespace to make XPath expressions simpler
if (!preg_match('/xmlns:/', $content)) {
$content = str_replace('xmlns', 'ns', $content);
}
$current = libxml_use_internal_errors(true);
$disableEntities = libxml_disable_entity_loader(true);

View File

@ -469,6 +469,28 @@ EOF
$this->assertSame('widescreen', $crawler->text());
}
public function testFilterWithDefaultNamespaceOnly()
{
$crawler = new Crawler('<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://localhost/foo</loc>
<changefreq>weekly</changefreq>
<priority>0.5</priority>
<lastmod>2012-11-16</lastmod>
</url>
<url>
<loc>http://localhost/bar</loc>
<changefreq>weekly</changefreq>
<priority>0.5</priority>
<lastmod>2012-11-16</lastmod>
</url>
</urlset>
');
$this->assertEquals(2, $crawler->filter('url')->count());
}
public function testSelectLink()
{
$crawler = $this->createTestCrawler();