feature #16057 Deprecate loading multiple documents in the same crawler (stof)
This PR was merged into the 2.8 branch.
Discussion
----------
Deprecate loading multiple documents in the same crawler
| Q | A
| ------------- | ---
| Bug fix? | no
| New feature? | no
| BC breaks? | no
| Deprecations? | yes
| Tests pass? | yes
| Fixed tickets | #15849
| License | MIT
| Doc PR | n/a
Note that loading multiple documents in the same crawler already creates weird things when working with namespaces (the list of mapping of aliases to namespaces is shared between documents, which was flawed).
As said in the issue, this opens the door to optimizations in the future (sharing the DOMXpath instance for instance, including with subcrawler)
Commits
-------
0d1cb3b
Deprecate loading multiple documents in the same crawler
This commit is contained in:
commit
4af8a5545a
@ -40,6 +40,11 @@ class Crawler extends \SplObjectStorage
|
||||
*/
|
||||
private $baseHref;
|
||||
|
||||
/**
|
||||
* @var \DOMDocument|null
|
||||
*/
|
||||
private $document;
|
||||
|
||||
/**
|
||||
* Whether the Crawler contains HTML or XML content (used when converting CSS to XPath).
|
||||
*
|
||||
@ -68,6 +73,7 @@ class Crawler extends \SplObjectStorage
|
||||
public function clear()
|
||||
{
|
||||
parent::removeAll($this);
|
||||
$this->document = null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -307,6 +313,14 @@ class Crawler extends \SplObjectStorage
|
||||
*/
|
||||
public function addNode(\DOMNode $node)
|
||||
{
|
||||
if (null !== $this->document && $this->document !== $node->ownerDocument) {
|
||||
@trigger_error('Attaching DOM nodes from multiple documents in a Crawler is deprecated as of 2.8 and will be forbidden in 3.0.', E_USER_DEPRECATED);
|
||||
}
|
||||
|
||||
if (null === $this->document) {
|
||||
$this->document = $node->ownerDocument;
|
||||
}
|
||||
|
||||
if ($node instanceof \DOMDocument) {
|
||||
parent::attach($node->documentElement);
|
||||
} else {
|
||||
@ -1152,6 +1166,7 @@ class Crawler extends \SplObjectStorage
|
||||
{
|
||||
$crawler = new static($nodes, $this->uri, $this->baseHref);
|
||||
$crawler->isHtml = $this->isHtml;
|
||||
$crawler->document = $this->document;
|
||||
|
||||
return $crawler;
|
||||
}
|
||||
|
@ -20,7 +20,10 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase
|
||||
$crawler = new Crawler();
|
||||
$this->assertCount(0, $crawler, '__construct() returns an empty crawler');
|
||||
|
||||
$crawler = new Crawler(new \DOMNode());
|
||||
$doc = new \DOMDocument();
|
||||
$node = $doc->createElement('test');
|
||||
|
||||
$crawler = new Crawler($node);
|
||||
$this->assertCount(1, $crawler, '__construct() takes a node as a first argument');
|
||||
}
|
||||
|
||||
@ -71,6 +74,14 @@ class CrawlerTest extends \PHPUnit_Framework_TestCase
|
||||
$crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');
|
||||
|
||||
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addHtmlContent() adds nodes from an HTML string');
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
|
||||
*/
|
||||
public function testAddHtmlContentWithBaseTag()
|
||||
{
|
||||
$crawler = new Crawler();
|
||||
|
||||
$crawler->addHtmlContent('<html><head><base href="http://symfony.com"></head><a href="/contact"></a></html>', 'UTF-8');
|
||||
|
||||
@ -267,6 +278,7 @@ EOF
|
||||
*/
|
||||
public function testAddNodes()
|
||||
{
|
||||
$list = array();
|
||||
foreach ($this->createNodeList() as $node) {
|
||||
$list[] = $node;
|
||||
}
|
||||
@ -290,7 +302,10 @@ EOF
|
||||
|
||||
public function testClear()
|
||||
{
|
||||
$crawler = new Crawler(new \DOMNode());
|
||||
$doc = new \DOMDocument();
|
||||
$node = $doc->createElement('test');
|
||||
|
||||
$crawler = new Crawler($node);
|
||||
$crawler->clear();
|
||||
$this->assertCount(0, $crawler, '->clear() removes all the nodes from the crawler');
|
||||
}
|
||||
|
Reference in New Issue
Block a user