[DowCrawler] Default to UTF-8 when possible
This commit is contained in:
parent
a442e378e1
commit
73eda66b99
@ -127,8 +127,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Adds HTML/XML content.
|
||||
*
|
||||
* If the charset is not set via the content type, it is assumed
|
||||
* to be ISO-8859-1, which is the default charset defined by the
|
||||
* If the charset is not set via the content type, it is assumed to be UTF-8,
|
||||
* or ISO-8859-1 as a fallback, which is the default charset defined by the
|
||||
* HTTP 1.1 specification.
|
||||
*
|
||||
* @param string $content A string to parse as HTML/XML
|
||||
@ -161,7 +161,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
}
|
||||
|
||||
if (null === $charset) {
|
||||
$charset = 'ISO-8859-1';
|
||||
$charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1';
|
||||
}
|
||||
|
||||
if ('x' === $xmlMatches[1]) {
|
||||
|
Reference in New Issue
Block a user