[DomCrawler] added a way to get parsing errors for Crawler::addHtmlContent() and Crawler::addXmlContent() via libxml functions

This commit is contained in:
Fabien Potencier 2011-09-28 09:53:50 +02:00
parent 382a421d5d
commit a57a4aff55
3 changed files with 75 additions and 0 deletions

View File

@ -35,6 +35,7 @@ To get the diff between two versions, go to https://github.com/symfony/symfony/c
### DomCrawler
* added a way to get parsing errors for Crawler::addHtmlContent() and Crawler::addXmlContent() via libxml functions
* added support for submitting a form without a submit button
### Finder

View File

@ -107,6 +107,13 @@ class Crawler extends \SplObjectStorage
/**
* Adds an HTML content to the list of nodes.
*
* The libxml errors are disabled when the content is parsed.
*
* If you want to get parsing errors, be sure to enable
* internal errors via libxml_use_internal_errors(true)
* and then, get the errors via libxml_get_errors(). Be
* sure to clear errors with libxml_clear_errors() afterward.
*
* @param string $content The HTML content
* @param string $charset The charset
*
@ -117,7 +124,10 @@ class Crawler extends \SplObjectStorage
$dom = new \DOMDocument('1.0', $charset);
$dom->validateOnParse = true;
$current = libxml_use_internal_errors(true);
@$dom->loadHTML($content);
libxml_use_internal_errors($current);
$this->addDocument($dom);
$base = $this->filter('base')->extract(array('href'));
@ -130,6 +140,13 @@ class Crawler extends \SplObjectStorage
/**
* Adds an XML content to the list of nodes.
*
* The libxml errors are disabled when the content is parsed.
*
* If you want to get parsing errors, be sure to enable
* internal errors via libxml_use_internal_errors(true)
* and then, get the errors via libxml_get_errors(). Be
* sure to clear errors with libxml_clear_errors() afterward.
*
* @param string $content The XML content
* @param string $charset The charset
*
@ -141,7 +158,10 @@ class Crawler extends \SplObjectStorage
$dom->validateOnParse = true;
// remove the default namespace to make XPath expressions simpler
$current = libxml_use_internal_errors(true);
@$dom->loadXML(str_replace('xmlns', 'ns', $content));
libxml_use_internal_errors($current);
$this->addDocument($dom);
}

View File

@ -69,6 +69,34 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase
$this->assertEquals('http://symfony.com/contact', $crawler->filter('a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
public function testAddHtmlContentWithErrors()
{
libxml_use_internal_errors(true);
$crawler = new Crawler();
$crawler->addHtmlContent(<<<EOF
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<nav><a href="#"><a href="#"></nav>
</body>
</html>
EOF
, 'UTF-8');
$errors = libxml_get_errors();
$this->assertEquals(1, count($errors));
$this->assertEquals("Tag nav invalid\n", $errors[0]->message);
libxml_clear_errors();
libxml_use_internal_errors(false);
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
*/
@ -80,6 +108,32 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase
$this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addXmlContent() adds nodes from an XML string');
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
*/
public function testAddXmlContentWithErrors()
{
libxml_use_internal_errors(true);
$crawler = new Crawler();
$crawler->addXmlContent(<<<EOF
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<nav><a href="#"><a href="#"></nav>
</body>
</html>
EOF
, 'UTF-8');
$this->assertTrue(count(libxml_get_errors()) > 1);
libxml_clear_errors();
libxml_use_internal_errors(false);
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addContent
*/