Cache discovered namespaces in DomCrawler

This commit is contained in:
Simon Berger 2020-11-16 20:09:52 +01:00 committed by Fabien Potencier
parent 3ac26fcd67
commit 4c74dead48

View File

@ -33,6 +33,11 @@ class Crawler implements \Countable, \IteratorAggregate
*/ */
private $namespaces = []; private $namespaces = [];
/**
* @var \ArrayIterator A map of cached namespaces
*/
private $cachedNamespaces;
/** /**
* @var string The base href value * @var string The base href value
*/ */
@ -68,6 +73,7 @@ class Crawler implements \Countable, \IteratorAggregate
$this->uri = $uri; $this->uri = $uri;
$this->baseHref = $baseHref ?: $uri; $this->baseHref = $baseHref ?: $uri;
$this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null; $this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null;
$this->cachedNamespaces = new \ArrayIterator();
$this->add($node); $this->add($node);
} }
@ -99,6 +105,7 @@ class Crawler implements \Countable, \IteratorAggregate
{ {
$this->nodes = []; $this->nodes = [];
$this->document = null; $this->document = null;
$this->cachedNamespaces = new \ArrayIterator();
} }
/** /**
@ -967,12 +974,14 @@ class Crawler implements \Countable, \IteratorAggregate
*/ */
private function filterRelativeXPath(string $xpath): object private function filterRelativeXPath(string $xpath): object
{ {
$prefixes = $this->findNamespacePrefixes($xpath);
$crawler = $this->createSubCrawler(null); $crawler = $this->createSubCrawler(null);
if (null === $this->document) {
return $crawler;
}
$domxpath = $this->createDOMXPath($this->document, $this->findNamespacePrefixes($xpath));
foreach ($this->nodes as $node) { foreach ($this->nodes as $node) {
$domxpath = $this->createDOMXPath($node->ownerDocument, $prefixes);
$crawler->add($domxpath->query($xpath, $node)); $crawler->add($domxpath->query($xpath, $node));
} }
@ -1189,10 +1198,17 @@ class Crawler implements \Countable, \IteratorAggregate
return $this->namespaces[$prefix]; return $this->namespaces[$prefix];
} }
if ($this->cachedNamespaces->offsetExists($prefix)) {
return $this->cachedNamespaces->offsetGet($prefix);
}
// ask for one namespace, otherwise we'd get a collection with an item for each node // ask for one namespace, otherwise we'd get a collection with an item for each node
$namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $this->defaultNamespacePrefix === $prefix ? '' : $prefix)); $namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $this->defaultNamespacePrefix === $prefix ? '' : $prefix));
return ($node = $namespaces->item(0)) ? $node->nodeValue : null; $namespace = ($node = $namespaces->item(0)) ? $node->nodeValue : null;
$this->cachedNamespaces->offsetSet($prefix, $namespace);
return $namespace;
} }
private function findNamespacePrefixes(string $xpath): array private function findNamespacePrefixes(string $xpath): array
@ -1217,6 +1233,7 @@ class Crawler implements \Countable, \IteratorAggregate
$crawler->isHtml = $this->isHtml; $crawler->isHtml = $this->isHtml;
$crawler->document = $this->document; $crawler->document = $this->document;
$crawler->namespaces = $this->namespaces; $crawler->namespaces = $this->namespaces;
$crawler->cachedNamespaces = $this->cachedNamespaces;
$crawler->html5Parser = $this->html5Parser; $crawler->html5Parser = $this->html5Parser;
return $crawler; return $crawler;