diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index c5fc198a34..3f8d9ae58c 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -92,11 +92,11 @@ class Crawler extends \SplObjectStorage } // DOM only for HTML/XML content - if (!preg_match('/(x|ht)ml/i', $type, $matches)) { + if (!preg_match('/(x|ht)ml/i', $type, $xmlMatches)) { return null; } - $charset = 'ISO-8859-1'; + $charset = null; if (false !== $pos = strpos($type, 'charset=')) { $charset = substr($type, $pos + 8); if (false !== $pos = strpos($charset, ';')) { @@ -104,7 +104,16 @@ class Crawler extends \SplObjectStorage } } - if ('x' === $matches[1]) { + if (null === $charset && + preg_match('/\]+charset *= *["\']?([a-zA-Z\-0-9]+)/i', $content, $matches)) { + $charset = $matches[1]; + } + + if (null === $charset) { + $charset = 'ISO-8859-1'; + } + + if ('x' === $xmlMatches[1]) { $this->addXmlContent($content, $charset); } else { $this->addHtmlContent($content, $charset); diff --git a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php index 4411c7d443..2cc4831771 100644 --- a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php @@ -207,6 +207,10 @@ EOF $crawler = new Crawler(); $crawler->addContent('foo bar', 'text/plain'); $this->assertCount(0, $crawler, '->addContent() does nothing if the type is not (x|ht)ml'); + + $crawler = new Crawler(); + $crawler->addContent('中文'); + $this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset'); } /**