Cache discovered namespaces in DomCrawler
This commit is contained in:
parent
3ac26fcd67
commit
4c74dead48
@ -33,6 +33,11 @@ class Crawler implements \Countable, \IteratorAggregate
|
|||||||
*/
|
*/
|
||||||
private $namespaces = [];
|
private $namespaces = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var \ArrayIterator A map of cached namespaces
|
||||||
|
*/
|
||||||
|
private $cachedNamespaces;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @var string The base href value
|
* @var string The base href value
|
||||||
*/
|
*/
|
||||||
@ -68,6 +73,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
|||||||
$this->uri = $uri;
|
$this->uri = $uri;
|
||||||
$this->baseHref = $baseHref ?: $uri;
|
$this->baseHref = $baseHref ?: $uri;
|
||||||
$this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null;
|
$this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null;
|
||||||
|
$this->cachedNamespaces = new \ArrayIterator();
|
||||||
|
|
||||||
$this->add($node);
|
$this->add($node);
|
||||||
}
|
}
|
||||||
@ -99,6 +105,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
|||||||
{
|
{
|
||||||
$this->nodes = [];
|
$this->nodes = [];
|
||||||
$this->document = null;
|
$this->document = null;
|
||||||
|
$this->cachedNamespaces = new \ArrayIterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -967,12 +974,14 @@ class Crawler implements \Countable, \IteratorAggregate
|
|||||||
*/
|
*/
|
||||||
private function filterRelativeXPath(string $xpath): object
|
private function filterRelativeXPath(string $xpath): object
|
||||||
{
|
{
|
||||||
$prefixes = $this->findNamespacePrefixes($xpath);
|
|
||||||
|
|
||||||
$crawler = $this->createSubCrawler(null);
|
$crawler = $this->createSubCrawler(null);
|
||||||
|
if (null === $this->document) {
|
||||||
|
return $crawler;
|
||||||
|
}
|
||||||
|
|
||||||
|
$domxpath = $this->createDOMXPath($this->document, $this->findNamespacePrefixes($xpath));
|
||||||
|
|
||||||
foreach ($this->nodes as $node) {
|
foreach ($this->nodes as $node) {
|
||||||
$domxpath = $this->createDOMXPath($node->ownerDocument, $prefixes);
|
|
||||||
$crawler->add($domxpath->query($xpath, $node));
|
$crawler->add($domxpath->query($xpath, $node));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1189,10 +1198,17 @@ class Crawler implements \Countable, \IteratorAggregate
|
|||||||
return $this->namespaces[$prefix];
|
return $this->namespaces[$prefix];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($this->cachedNamespaces->offsetExists($prefix)) {
|
||||||
|
return $this->cachedNamespaces->offsetGet($prefix);
|
||||||
|
}
|
||||||
|
|
||||||
// ask for one namespace, otherwise we'd get a collection with an item for each node
|
// ask for one namespace, otherwise we'd get a collection with an item for each node
|
||||||
$namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $this->defaultNamespacePrefix === $prefix ? '' : $prefix));
|
$namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $this->defaultNamespacePrefix === $prefix ? '' : $prefix));
|
||||||
|
|
||||||
return ($node = $namespaces->item(0)) ? $node->nodeValue : null;
|
$namespace = ($node = $namespaces->item(0)) ? $node->nodeValue : null;
|
||||||
|
$this->cachedNamespaces->offsetSet($prefix, $namespace);
|
||||||
|
|
||||||
|
return $namespace;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function findNamespacePrefixes(string $xpath): array
|
private function findNamespacePrefixes(string $xpath): array
|
||||||
@ -1217,6 +1233,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
|||||||
$crawler->isHtml = $this->isHtml;
|
$crawler->isHtml = $this->isHtml;
|
||||||
$crawler->document = $this->document;
|
$crawler->document = $this->document;
|
||||||
$crawler->namespaces = $this->namespaces;
|
$crawler->namespaces = $this->namespaces;
|
||||||
|
$crawler->cachedNamespaces = $this->cachedNamespaces;
|
||||||
$crawler->html5Parser = $this->html5Parser;
|
$crawler->html5Parser = $this->html5Parser;
|
||||||
|
|
||||||
return $crawler;
|
return $crawler;
|
||||||
|
Reference in New Issue
Block a user