Crawler default namespace fix
This commit is contained in:
parent
aff0594b75
commit
cfff054e3b
@ -4,6 +4,7 @@ CHANGELOG
|
||||
2.4.0
|
||||
-----
|
||||
|
||||
* `Crawler::addXmlContent()` removes the default document namespace again if it's an only namespace.
|
||||
* added support for automatic discovery and explicit registration of document
|
||||
namespaces for `Crawler::filterXPath()` and `Crawler::filter()`
|
||||
* improved content type guessing in `Crawler::addContent()`
|
||||
|
@ -200,6 +200,11 @@ class Crawler extends \SplObjectStorage
|
||||
*/
|
||||
public function addXmlContent($content, $charset = 'UTF-8')
|
||||
{
|
||||
// remove the default namespace if it's the only namespace to make XPath expressions simpler
|
||||
if (!preg_match('/xmlns:/', $content)) {
|
||||
$content = str_replace('xmlns', 'ns', $content);
|
||||
}
|
||||
|
||||
$current = libxml_use_internal_errors(true);
|
||||
$disableEntities = libxml_disable_entity_loader(true);
|
||||
|
||||
|
@ -469,6 +469,28 @@ EOF
|
||||
$this->assertSame('widescreen', $crawler->text());
|
||||
}
|
||||
|
||||
public function testFilterWithDefaultNamespaceOnly()
|
||||
{
|
||||
$crawler = new Crawler('<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>http://localhost/foo</loc>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.5</priority>
|
||||
<lastmod>2012-11-16</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>http://localhost/bar</loc>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.5</priority>
|
||||
<lastmod>2012-11-16</lastmod>
|
||||
</url>
|
||||
</urlset>
|
||||
');
|
||||
|
||||
$this->assertEquals(2, $crawler->filter('url')->count());
|
||||
}
|
||||
|
||||
public function testSelectLink()
|
||||
{
|
||||
$crawler = $this->createTestCrawler();
|
||||
|
Reference in New Issue
Block a user