[DomCrawler] fixed encoding when using addHtmlContent() (fixes #3881)

This commit is contained in:
Fabien Potencier 2012-05-07 14:20:03 +02:00
parent f14961b747
commit c9ebe67d65
2 changed files with 26 additions and 0 deletions

View File

@ -129,6 +129,10 @@ class Crawler extends \SplObjectStorage
$dom = new \DOMDocument('1.0', $charset);
$dom->validateOnParse = true;
if (function_exists('mb_convert_encoding')) {
$content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
}
$current = libxml_use_internal_errors(true);
@$dom->loadHTML($content);
libxml_use_internal_errors($current);

View File

@ -69,6 +69,17 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase
$this->assertEquals('http://symfony.com/contact', $crawler->filterXPath('//a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
public function testAddHtmlContentCharset()
{
$crawler = new Crawler();
$crawler->addHtmlContent('<html><div class="foo">Tiếng Việt</html>', 'UTF-8');
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
@ -108,6 +119,17 @@ EOF
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addXmlContent() adds nodes from an XML string');
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
*/
public function testAddXmlContentCharset()
{
$crawler = new Crawler();
$crawler->addXmlContent('<html><div class="foo">Tiếng Việt</div></html>', 'UTF-8');
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
*/