From 73eda66b99a668f891781ebf49a88ad23e237a36 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Fri, 29 Sep 2017 12:22:06 +0200 Subject: [PATCH] [DowCrawler] Default to UTF-8 when possible --- src/Symfony/Component/DomCrawler/Crawler.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 1ad80e933e..5d201dff1e 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -127,8 +127,8 @@ class Crawler implements \Countable, \IteratorAggregate /** * Adds HTML/XML content. * - * If the charset is not set via the content type, it is assumed - * to be ISO-8859-1, which is the default charset defined by the + * If the charset is not set via the content type, it is assumed to be UTF-8, + * or ISO-8859-1 as a fallback, which is the default charset defined by the * HTTP 1.1 specification. * * @param string $content A string to parse as HTML/XML @@ -161,7 +161,7 @@ class Crawler implements \Countable, \IteratorAggregate } if (null === $charset) { - $charset = 'ISO-8859-1'; + $charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1'; } if ('x' === $xmlMatches[1]) {