[DomCrawler] Fixed an issue with namespace prefix matching being to greedy.

The regexp matching prefixes is naive and matches most of strings followed by a colon. It is also incomplete as it does not match all the supported characters (like the unicode ones). It is simple though and sufficient in most situations.
This commit is contained in:
Jakub Zalas 2013-09-25 23:07:33 +01:00
parent 55de9d9746
commit 3292163fad
2 changed files with 14 additions and 13 deletions

View File

@ -835,7 +835,9 @@ class Crawler extends \SplObjectStorage
foreach ($prefixes as $prefix) {
$namespace = $this->discoverNamespace($domxpath, $prefix);
$domxpath->registerNamespace($prefix, $namespace);
if (null !== $namespace) {
$domxpath->registerNamespace($prefix, $namespace);
}
}
return $domxpath;
@ -861,8 +863,6 @@ class Crawler extends \SplObjectStorage
if ($node = $namespaces->item(0)) {
return $node->nodeValue;
}
throw new \InvalidArgumentException(sprintf('Could not find a namespace for the prefix: "%s"', $prefix));
}
/**
@ -872,7 +872,7 @@ class Crawler extends \SplObjectStorage
*/
private function findNamespacePrefixes($xpath)
{
if (preg_match_all('/(?P<prefix>[a-zA-Z_][a-zA-Z_0-9\-\.]*):[^:]/', $xpath, $matches)) {
if (preg_match_all('/(?P<prefix>[a-z_][a-z_0-9\-\.]*):[^"\/]/i', $xpath, $matches)) {
return array_unique($matches['prefix']);
}

View File

@ -401,15 +401,6 @@ EOF
$this->assertSame('widescreen', $crawler->text());
}
/**
* @expectedException \InvalidArgumentException
* @expectedExceptionMessage Could not find a namespace for the prefix: "foo"
*/
public function testFilterXPathWithAnInvalidNamespace()
{
$this->createTestXmlCrawler()->filterXPath('//media:group/foo:aspectRatio');
}
public function testFilterXPathWithManuallyRegisteredNamespace()
{
$crawler = $this->createTestXmlCrawler();
@ -420,6 +411,15 @@ EOF
$this->assertSame('widescreen', $crawler->text());
}
public function testFilterXPathWithAnUrl()
{
$crawler = $this->createTestXmlCrawler();
$crawler = $crawler->filterXPath('//media:category[@scheme="http://gdata.youtube.com/schemas/2007/categories.cat"]');
$this->assertCount(1, $crawler);
$this->assertSame('Music', $crawler->text());
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::filter
*/
@ -741,6 +741,7 @@ EOF
<media:title type="plain">Chordates - CrashCourse Biology #24</media:title>
<yt:aspectRatio>widescreen</yt:aspectRatio>
</media:group>
<media:category label="Music" scheme="http://gdata.youtube.com/schemas/2007/categories.cat">Music</media:category>
</entry>';
return new Crawler($xml, $uri);