[DomCrawler] Fixed an issue with namespace prefix matching being to greedy.
The regexp matching prefixes is naive and matches most of strings followed by a colon. It is also incomplete as it does not match all the supported characters (like the unicode ones). It is simple though and sufficient in most situations.
This commit is contained in:
parent
55de9d9746
commit
3292163fad
@ -835,8 +835,10 @@ class Crawler extends \SplObjectStorage
|
||||
|
||||
foreach ($prefixes as $prefix) {
|
||||
$namespace = $this->discoverNamespace($domxpath, $prefix);
|
||||
if (null !== $namespace) {
|
||||
$domxpath->registerNamespace($prefix, $namespace);
|
||||
}
|
||||
}
|
||||
|
||||
return $domxpath;
|
||||
}
|
||||
@ -861,8 +863,6 @@ class Crawler extends \SplObjectStorage
|
||||
if ($node = $namespaces->item(0)) {
|
||||
return $node->nodeValue;
|
||||
}
|
||||
|
||||
throw new \InvalidArgumentException(sprintf('Could not find a namespace for the prefix: "%s"', $prefix));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -872,7 +872,7 @@ class Crawler extends \SplObjectStorage
|
||||
*/
|
||||
private function findNamespacePrefixes($xpath)
|
||||
{
|
||||
if (preg_match_all('/(?P<prefix>[a-zA-Z_][a-zA-Z_0-9\-\.]*):[^:]/', $xpath, $matches)) {
|
||||
if (preg_match_all('/(?P<prefix>[a-z_][a-z_0-9\-\.]*):[^"\/]/i', $xpath, $matches)) {
|
||||
return array_unique($matches['prefix']);
|
||||
}
|
||||
|
||||
|
@ -401,15 +401,6 @@ EOF
|
||||
$this->assertSame('widescreen', $crawler->text());
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \InvalidArgumentException
|
||||
* @expectedExceptionMessage Could not find a namespace for the prefix: "foo"
|
||||
*/
|
||||
public function testFilterXPathWithAnInvalidNamespace()
|
||||
{
|
||||
$this->createTestXmlCrawler()->filterXPath('//media:group/foo:aspectRatio');
|
||||
}
|
||||
|
||||
public function testFilterXPathWithManuallyRegisteredNamespace()
|
||||
{
|
||||
$crawler = $this->createTestXmlCrawler();
|
||||
@ -420,6 +411,15 @@ EOF
|
||||
$this->assertSame('widescreen', $crawler->text());
|
||||
}
|
||||
|
||||
public function testFilterXPathWithAnUrl()
|
||||
{
|
||||
$crawler = $this->createTestXmlCrawler();
|
||||
|
||||
$crawler = $crawler->filterXPath('//media:category[@scheme="http://gdata.youtube.com/schemas/2007/categories.cat"]');
|
||||
$this->assertCount(1, $crawler);
|
||||
$this->assertSame('Music', $crawler->text());
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers Symfony\Component\DomCrawler\Crawler::filter
|
||||
*/
|
||||
@ -741,6 +741,7 @@ EOF
|
||||
<media:title type="plain">Chordates - CrashCourse Biology #24</media:title>
|
||||
<yt:aspectRatio>widescreen</yt:aspectRatio>
|
||||
</media:group>
|
||||
<media:category label="Music" scheme="http://gdata.youtube.com/schemas/2007/categories.cat">Music</media:category>
|
||||
</entry>';
|
||||
|
||||
return new Crawler($xml, $uri);
|
||||
|
Reference in New Issue
Block a user