From ba83bdadb115029b839a7751b4f894408ee152e2 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Fri, 26 Apr 2019 07:53:53 +0200 Subject: [PATCH] [DomCrawler] fix HTML5 parser integration --- src/Symfony/Component/DomCrawler/Crawler.php | 23 +++++++------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 4efbbf9b2d..70a4b607dc 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -69,6 +69,7 @@ class Crawler implements \Countable, \IteratorAggregate { $this->uri = $uri; $this->baseHref = $baseHref ?: $uri; + $this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null; $this->add($node); } @@ -190,13 +191,7 @@ class Crawler implements \Countable, \IteratorAggregate public function addHtmlContent($content, $charset = 'UTF-8') { // Use HTML5 parser if the content is HTML5 and the library is available - if (!$this->html5Parser - && class_exists(HTML5::class) - && '' === strtolower(substr(ltrim($content), 0, 15))) { - $this->html5Parser = new HTML5(['disable_html_ns' => true]); - } - - $dom = null !== $this->html5Parser ? $this->parseHtml5($content, $charset) : $this->parseXhtml($content, $charset); + $dom = null !== $this->html5Parser && strspn($content, " \t\r\n") === stripos($content, '') ? $this->parseHtml5($content, $charset) : $this->parseXhtml($content, $charset); $this->addDocument($dom); $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(['href']); @@ -599,18 +594,16 @@ class Crawler implements \Countable, \IteratorAggregate throw new \InvalidArgumentException('The current node list is empty.'); } - if (null !== $this->html5Parser) { - $html = ''; - foreach ($this->getNode(0)->childNodes as $child) { - $html .= $this->html5Parser->saveHTML($child); - } + $node = $this->getNode(0); + $owner = $node->ownerDocument; - return $html; + if (null !== $this->html5Parser && '' === $owner->saveXML($owner->childNodes[0])) { + $owner = $this->html5Parser; } $html = ''; - foreach ($this->getNode(0)->childNodes as $child) { - $html .= $child->ownerDocument->saveHTML($child); + foreach ($node->childNodes as $child) { + $html .= $owner->saveHTML($child); } return $html;