From 3292163fadaafab0c81c5c81355bb3f67eeeca5b Mon Sep 17 00:00:00 2001 From: Jakub Zalas Date: Wed, 25 Sep 2013 23:07:33 +0100 Subject: [PATCH] [DomCrawler] Fixed an issue with namespace prefix matching being to greedy. The regexp matching prefixes is naive and matches most of strings followed by a colon. It is also incomplete as it does not match all the supported characters (like the unicode ones). It is simple though and sufficient in most situations. --- src/Symfony/Component/DomCrawler/Crawler.php | 8 ++++---- .../DomCrawler/Tests/CrawlerTest.php | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 2935a51f53..ddeb57f768 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -835,7 +835,9 @@ class Crawler extends \SplObjectStorage foreach ($prefixes as $prefix) { $namespace = $this->discoverNamespace($domxpath, $prefix); - $domxpath->registerNamespace($prefix, $namespace); + if (null !== $namespace) { + $domxpath->registerNamespace($prefix, $namespace); + } } return $domxpath; @@ -861,8 +863,6 @@ class Crawler extends \SplObjectStorage if ($node = $namespaces->item(0)) { return $node->nodeValue; } - - throw new \InvalidArgumentException(sprintf('Could not find a namespace for the prefix: "%s"', $prefix)); } /** @@ -872,7 +872,7 @@ class Crawler extends \SplObjectStorage */ private function findNamespacePrefixes($xpath) { - if (preg_match_all('/(?P[a-zA-Z_][a-zA-Z_0-9\-\.]*):[^:]/', $xpath, $matches)) { + if (preg_match_all('/(?P[a-z_][a-z_0-9\-\.]*):[^"\/]/i', $xpath, $matches)) { return array_unique($matches['prefix']); } diff --git a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php index 5c27451f6f..5629700054 100644 --- a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php @@ -401,15 +401,6 @@ EOF $this->assertSame('widescreen', $crawler->text()); } - /** - * @expectedException \InvalidArgumentException - * @expectedExceptionMessage Could not find a namespace for the prefix: "foo" - */ - public function testFilterXPathWithAnInvalidNamespace() - { - $this->createTestXmlCrawler()->filterXPath('//media:group/foo:aspectRatio'); - } - public function testFilterXPathWithManuallyRegisteredNamespace() { $crawler = $this->createTestXmlCrawler(); @@ -420,6 +411,15 @@ EOF $this->assertSame('widescreen', $crawler->text()); } + public function testFilterXPathWithAnUrl() + { + $crawler = $this->createTestXmlCrawler(); + + $crawler = $crawler->filterXPath('//media:category[@scheme="http://gdata.youtube.com/schemas/2007/categories.cat"]'); + $this->assertCount(1, $crawler); + $this->assertSame('Music', $crawler->text()); + } + /** * @covers Symfony\Component\DomCrawler\Crawler::filter */ @@ -741,6 +741,7 @@ EOF Chordates - CrashCourse Biology #24 widescreen + Music '; return new Crawler($xml, $uri);