Cache discovered namespaces in DomCrawler
This commit is contained in:
parent
3ac26fcd67
commit
4c74dead48
@ -33,6 +33,11 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*/
|
||||
private $namespaces = [];
|
||||
|
||||
/**
|
||||
* @var \ArrayIterator A map of cached namespaces
|
||||
*/
|
||||
private $cachedNamespaces;
|
||||
|
||||
/**
|
||||
* @var string The base href value
|
||||
*/
|
||||
@ -68,6 +73,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
$this->uri = $uri;
|
||||
$this->baseHref = $baseHref ?: $uri;
|
||||
$this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null;
|
||||
$this->cachedNamespaces = new \ArrayIterator();
|
||||
|
||||
$this->add($node);
|
||||
}
|
||||
@ -99,6 +105,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
{
|
||||
$this->nodes = [];
|
||||
$this->document = null;
|
||||
$this->cachedNamespaces = new \ArrayIterator();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -967,12 +974,14 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*/
|
||||
private function filterRelativeXPath(string $xpath): object
|
||||
{
|
||||
$prefixes = $this->findNamespacePrefixes($xpath);
|
||||
|
||||
$crawler = $this->createSubCrawler(null);
|
||||
if (null === $this->document) {
|
||||
return $crawler;
|
||||
}
|
||||
|
||||
$domxpath = $this->createDOMXPath($this->document, $this->findNamespacePrefixes($xpath));
|
||||
|
||||
foreach ($this->nodes as $node) {
|
||||
$domxpath = $this->createDOMXPath($node->ownerDocument, $prefixes);
|
||||
$crawler->add($domxpath->query($xpath, $node));
|
||||
}
|
||||
|
||||
@ -1189,10 +1198,17 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
return $this->namespaces[$prefix];
|
||||
}
|
||||
|
||||
if ($this->cachedNamespaces->offsetExists($prefix)) {
|
||||
return $this->cachedNamespaces->offsetGet($prefix);
|
||||
}
|
||||
|
||||
// ask for one namespace, otherwise we'd get a collection with an item for each node
|
||||
$namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $this->defaultNamespacePrefix === $prefix ? '' : $prefix));
|
||||
|
||||
return ($node = $namespaces->item(0)) ? $node->nodeValue : null;
|
||||
$namespace = ($node = $namespaces->item(0)) ? $node->nodeValue : null;
|
||||
$this->cachedNamespaces->offsetSet($prefix, $namespace);
|
||||
|
||||
return $namespace;
|
||||
}
|
||||
|
||||
private function findNamespacePrefixes(string $xpath): array
|
||||
@ -1217,6 +1233,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
$crawler->isHtml = $this->isHtml;
|
||||
$crawler->document = $this->document;
|
||||
$crawler->namespaces = $this->namespaces;
|
||||
$crawler->cachedNamespaces = $this->cachedNamespaces;
|
||||
$crawler->html5Parser = $this->html5Parser;
|
||||
|
||||
return $crawler;
|
||||
|
Reference in New Issue
Block a user