feature #16057 Deprecate loading multiple documents in the same crawler (stof)

This PR was merged into the 2.8 branch.

Discussion
----------

Deprecate loading multiple documents in the same crawler

| Q             | A
| ------------- | ---
| Bug fix?      | no
| New feature?  | no
| BC breaks?    | no
| Deprecations? | yes
| Tests pass?   | yes
| Fixed tickets | #15849
| License       | MIT
| Doc PR        | n/a

Note that loading multiple documents in the same crawler already creates weird things when working with namespaces (the list of mapping of aliases to namespaces is shared between documents, which was flawed).

As said in the issue, this opens the door to optimizations in the future (sharing the DOMXpath instance for instance, including with subcrawler)

Commits
-------

0d1cb3b Deprecate loading multiple documents in the same crawler
This commit is contained in:
Fabien Potencier 2015-10-02 12:39:30 +02:00
commit 4af8a5545a
2 changed files with 32 additions and 2 deletions

View File

@ -40,6 +40,11 @@ class Crawler extends \SplObjectStorage
*/
private $baseHref;
/**
* @var \DOMDocument|null
*/
private $document;
/**
* Whether the Crawler contains HTML or XML content (used when converting CSS to XPath).
*
@ -68,6 +73,7 @@ class Crawler extends \SplObjectStorage
public function clear()
{
parent::removeAll($this);
$this->document = null;
}
/**
@ -307,6 +313,14 @@ class Crawler extends \SplObjectStorage
*/
public function addNode(\DOMNode $node)
{
if (null !== $this->document && $this->document !== $node->ownerDocument) {
@trigger_error('Attaching DOM nodes from multiple documents in a Crawler is deprecated as of 2.8 and will be forbidden in 3.0.', E_USER_DEPRECATED);
}
if (null === $this->document) {
$this->document = $node->ownerDocument;
}
if ($node instanceof \DOMDocument) {
parent::attach($node->documentElement);
} else {
@ -1152,6 +1166,7 @@ class Crawler extends \SplObjectStorage
{
$crawler = new static($nodes, $this->uri, $this->baseHref);
$crawler->isHtml = $this->isHtml;
$crawler->document = $this->document;
return $crawler;
}

View File

@ -20,7 +20,10 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase
$crawler = new Crawler();
$this->assertCount(0, $crawler, '__construct() returns an empty crawler');
$crawler = new Crawler(new \DOMNode());
$doc = new \DOMDocument();
$node = $doc->createElement('test');
$crawler = new Crawler($node);
$this->assertCount(1, $crawler, '__construct() takes a node as a first argument');
}
@ -71,6 +74,14 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase
$crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addHtmlContent() adds nodes from an HTML string');
}
/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
public function testAddHtmlContentWithBaseTag()
{
$crawler = new Crawler();
$crawler->addHtmlContent('<html><head><base href="http://symfony.com"></head><a href="/contact"></a></html>', 'UTF-8');
@ -267,6 +278,7 @@ EOF
*/
public function testAddNodes()
{
$list = array();
foreach ($this->createNodeList() as $node) {
$list[] = $node;
}
@ -290,7 +302,10 @@ EOF
public function testClear()
{
$crawler = new Crawler(new \DOMNode());
$doc = new \DOMDocument();
$node = $doc->createElement('test');
$crawler = new Crawler($node);
$crawler->clear();
$this->assertCount(0, $crawler, '->clear() removes all the nodes from the crawler');
}