fix #9321 Crawler::addHtmlContent add gbk encoding support

This commit is contained in:
bronze1man 2013-10-17 22:30:34 +08:00 committed by Fabien Potencier
parent 0285bfde5d
commit acb2df0842
2 changed files with 24 additions and 2 deletions

View File

@ -147,8 +147,18 @@ class Crawler extends \SplObjectStorage
$dom = new \DOMDocument('1.0', $charset);
$dom->validateOnParse = true;
if (function_exists('mb_convert_encoding') && in_array(strtolower($charset), array_map('strtolower', mb_list_encodings()))) {
$content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
if (function_exists('mb_convert_encoding')) {
$has_error = false;
$previous = set_error_handler(function()use(&$has_error){
$has_error = true;
});
$tmpContent = @mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
set_error_handler($previous);
if (!$has_error) {
$content = $tmpContent;
}
}
@$dom->loadHTML($content);

View File

@ -112,6 +112,18 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase
$this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->text());
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
public function testAddHtmlContentCharsetGbk()
{
$crawler = new Crawler();
//gbk encode of <html><p>中文</p></html>
$crawler->addHtmlContent(base64_decode('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+'), 'gbk');
$this->assertEquals('中文', $crawler->filterXPath('//p')->text());
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/