From a57a4aff55ef29562d4a657221ae4d01e3c53b7f Mon Sep 17 00:00:00 2001 From: Fabien Potencier Date: Wed, 28 Sep 2011 09:53:50 +0200 Subject: [PATCH] [DomCrawler] added a way to get parsing errors for Crawler::addHtmlContent() and Crawler::addXmlContent() via libxml functions --- CHANGELOG-2.1.md | 1 + src/Symfony/Component/DomCrawler/Crawler.php | 20 +++++++ .../Component/DomCrawler/CrawlerTest.php | 54 +++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/CHANGELOG-2.1.md b/CHANGELOG-2.1.md index ce6297cab3..35e7e12b2e 100644 --- a/CHANGELOG-2.1.md +++ b/CHANGELOG-2.1.md @@ -35,6 +35,7 @@ To get the diff between two versions, go to https://github.com/symfony/symfony/c ### DomCrawler + * added a way to get parsing errors for Crawler::addHtmlContent() and Crawler::addXmlContent() via libxml functions * added support for submitting a form without a submit button ### Finder diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 7e42b507be..24a9352495 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -107,6 +107,13 @@ class Crawler extends \SplObjectStorage /** * Adds an HTML content to the list of nodes. * + * The libxml errors are disabled when the content is parsed. + * + * If you want to get parsing errors, be sure to enable + * internal errors via libxml_use_internal_errors(true) + * and then, get the errors via libxml_get_errors(). Be + * sure to clear errors with libxml_clear_errors() afterward. + * * @param string $content The HTML content * @param string $charset The charset * @@ -117,7 +124,10 @@ class Crawler extends \SplObjectStorage $dom = new \DOMDocument('1.0', $charset); $dom->validateOnParse = true; + $current = libxml_use_internal_errors(true); @$dom->loadHTML($content); + libxml_use_internal_errors($current); + $this->addDocument($dom); $base = $this->filter('base')->extract(array('href')); @@ -130,6 +140,13 @@ class Crawler extends \SplObjectStorage /** * Adds an XML content to the list of nodes. * + * The libxml errors are disabled when the content is parsed. + * + * If you want to get parsing errors, be sure to enable + * internal errors via libxml_use_internal_errors(true) + * and then, get the errors via libxml_get_errors(). Be + * sure to clear errors with libxml_clear_errors() afterward. + * * @param string $content The XML content * @param string $charset The charset * @@ -141,7 +158,10 @@ class Crawler extends \SplObjectStorage $dom->validateOnParse = true; // remove the default namespace to make XPath expressions simpler + $current = libxml_use_internal_errors(true); @$dom->loadXML(str_replace('xmlns', 'ns', $content)); + libxml_use_internal_errors($current); + $this->addDocument($dom); } diff --git a/tests/Symfony/Tests/Component/DomCrawler/CrawlerTest.php b/tests/Symfony/Tests/Component/DomCrawler/CrawlerTest.php index 53bc9b6925..78bb9c38ed 100644 --- a/tests/Symfony/Tests/Component/DomCrawler/CrawlerTest.php +++ b/tests/Symfony/Tests/Component/DomCrawler/CrawlerTest.php @@ -69,6 +69,34 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase $this->assertEquals('http://symfony.com/contact', $crawler->filter('a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string'); } + /** + * @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent + */ + public function testAddHtmlContentWithErrors() + { + libxml_use_internal_errors(true); + + $crawler = new Crawler(); + $crawler->addHtmlContent(<< + + + + + + + +EOF + , 'UTF-8'); + + $errors = libxml_get_errors(); + $this->assertEquals(1, count($errors)); + $this->assertEquals("Tag nav invalid\n", $errors[0]->message); + + libxml_clear_errors(); + libxml_use_internal_errors(false); + } + /** * @covers Symfony\Component\DomCrawler\Crawler::addXmlContent */ @@ -80,6 +108,32 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addXmlContent() adds nodes from an XML string'); } + /** + * @covers Symfony\Component\DomCrawler\Crawler::addXmlContent + */ + public function testAddXmlContentWithErrors() + { + libxml_use_internal_errors(true); + + $crawler = new Crawler(); + $crawler->addXmlContent(<< + + + + + + + +EOF + , 'UTF-8'); + + $this->assertTrue(count(libxml_get_errors()) > 1); + + libxml_clear_errors(); + libxml_use_internal_errors(false); + } + /** * @covers Symfony\Component\DomCrawler\Crawler::addContent */