[DomCrawler] fixed bug #12143
This commit is contained in:
parent
8d18c98de0
commit
61f22d7ffb
@ -23,21 +23,27 @@ use Symfony\Component\CssSelector\CssSelector;
|
|||||||
class Crawler extends \SplObjectStorage
|
class Crawler extends \SplObjectStorage
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* @var string The current URI or the base href value
|
* @var string The current URI
|
||||||
*/
|
*/
|
||||||
protected $uri;
|
protected $uri;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var string The base href value
|
||||||
|
*/
|
||||||
|
private $baseHref;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
*
|
*
|
||||||
* @param mixed $node A Node to use as the base for the crawling
|
* @param mixed $node A Node to use as the base for the crawling
|
||||||
* @param string $uri The current URI or the base href value
|
* @param string $currentUri The current URI
|
||||||
*
|
* @param string $baseHref The base href value
|
||||||
* @api
|
* @api
|
||||||
*/
|
*/
|
||||||
public function __construct($node = null, $uri = null)
|
public function __construct($node = null, $currentUri = null, $baseHref = null)
|
||||||
{
|
{
|
||||||
$this->uri = $uri;
|
$this->uri = $currentUri;
|
||||||
|
$this->baseHref = $baseHref ?: $currentUri;
|
||||||
|
|
||||||
$this->add($node);
|
$this->add($node);
|
||||||
}
|
}
|
||||||
@ -176,13 +182,13 @@ class Crawler extends \SplObjectStorage
|
|||||||
|
|
||||||
$baseHref = current($base);
|
$baseHref = current($base);
|
||||||
if (count($base) && !empty($baseHref)) {
|
if (count($base) && !empty($baseHref)) {
|
||||||
if ($this->uri) {
|
if ($this->baseHref) {
|
||||||
$linkNode = $dom->createElement('a');
|
$linkNode = $dom->createElement('a');
|
||||||
$linkNode->setAttribute('href', $baseHref);
|
$linkNode->setAttribute('href', $baseHref);
|
||||||
$link = new Link($linkNode, $this->uri);
|
$link = new Link($linkNode, $this->baseHref);
|
||||||
$this->uri = $link->getUri();
|
$this->baseHref = $link->getUri();
|
||||||
} else {
|
} else {
|
||||||
$this->uri = $baseHref;
|
$this->baseHref = $baseHref;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -294,11 +300,11 @@ class Crawler extends \SplObjectStorage
|
|||||||
{
|
{
|
||||||
foreach ($this as $i => $node) {
|
foreach ($this as $i => $node) {
|
||||||
if ($i == $position) {
|
if ($i == $position) {
|
||||||
return new static($node, $this->uri);
|
return new static($node, $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new static(null, $this->uri);
|
return new static(null, $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -323,7 +329,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
{
|
{
|
||||||
$data = array();
|
$data = array();
|
||||||
foreach ($this as $i => $node) {
|
foreach ($this as $i => $node) {
|
||||||
$data[] = $closure(new static($node, $this->uri), $i);
|
$data[] = $closure(new static($node, $this->uri, $this->baseHref), $i);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $data;
|
return $data;
|
||||||
@ -344,12 +350,12 @@ class Crawler extends \SplObjectStorage
|
|||||||
{
|
{
|
||||||
$nodes = array();
|
$nodes = array();
|
||||||
foreach ($this as $i => $node) {
|
foreach ($this as $i => $node) {
|
||||||
if (false !== $closure(new static($node, $this->uri), $i)) {
|
if (false !== $closure(new static($node, $this->uri, $this->baseHref), $i)) {
|
||||||
$nodes[] = $node;
|
$nodes[] = $node;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new static($nodes, $this->uri);
|
return new static($nodes, $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -391,7 +397,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
throw new \InvalidArgumentException('The current node list is empty.');
|
throw new \InvalidArgumentException('The current node list is empty.');
|
||||||
}
|
}
|
||||||
|
|
||||||
return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri);
|
return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -409,7 +415,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
throw new \InvalidArgumentException('The current node list is empty.');
|
throw new \InvalidArgumentException('The current node list is empty.');
|
||||||
}
|
}
|
||||||
|
|
||||||
return new static($this->sibling($this->getNode(0)), $this->uri);
|
return new static($this->sibling($this->getNode(0)), $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -427,7 +433,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
throw new \InvalidArgumentException('The current node list is empty.');
|
throw new \InvalidArgumentException('The current node list is empty.');
|
||||||
}
|
}
|
||||||
|
|
||||||
return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri);
|
return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -454,7 +460,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new static($nodes, $this->uri);
|
return new static($nodes, $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -474,7 +480,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
|
|
||||||
$node = $this->getNode(0)->firstChild;
|
$node = $this->getNode(0)->firstChild;
|
||||||
|
|
||||||
return new static($node ? $this->sibling($node) : array(), $this->uri);
|
return new static($node ? $this->sibling($node) : array(), $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -601,7 +607,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
|
|
||||||
// If we dropped all expressions in the XPath while preparing it, there would be no match
|
// If we dropped all expressions in the XPath while preparing it, there would be no match
|
||||||
if ('' === $xpath) {
|
if ('' === $xpath) {
|
||||||
return new static(null, $this->uri);
|
return new static(null, $this->uri, $this->baseHref);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this->filterRelativeXPath($xpath);
|
return $this->filterRelativeXPath($xpath);
|
||||||
@ -687,7 +693,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
|
|
||||||
$node = $this->getNode(0);
|
$node = $this->getNode(0);
|
||||||
|
|
||||||
return new Link($node, $this->uri, $method);
|
return new Link($node, $this->baseHref, $method);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -701,7 +707,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
{
|
{
|
||||||
$links = array();
|
$links = array();
|
||||||
foreach ($this as $node) {
|
foreach ($this as $node) {
|
||||||
$links[] = new Link($node, $this->uri, 'get');
|
$links[] = new Link($node, $this->baseHref, 'get');
|
||||||
}
|
}
|
||||||
|
|
||||||
return $links;
|
return $links;
|
||||||
@ -792,7 +798,7 @@ class Crawler extends \SplObjectStorage
|
|||||||
*/
|
*/
|
||||||
private function filterRelativeXPath($xpath)
|
private function filterRelativeXPath($xpath)
|
||||||
{
|
{
|
||||||
$crawler = new static(null, $this->uri);
|
$crawler = new static(null, $this->uri, $this->baseHref);
|
||||||
|
|
||||||
foreach ($this as $node) {
|
foreach ($this as $node) {
|
||||||
$domxpath = new \DOMXPath($node->ownerDocument);
|
$domxpath = new \DOMXPath($node->ownerDocument);
|
||||||
|
@ -824,16 +824,33 @@ HTML;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testBaseTag()
|
/**
|
||||||
|
* @dataProvider getBaseTagData
|
||||||
|
*/
|
||||||
|
public function testBaseTag($baseValue, $linkValue, $expectedUri, $currentUri = null, $description = null)
|
||||||
{
|
{
|
||||||
$crawler = new Crawler('<html><base href="http://base.com"><a href="link"></a></html>');
|
$crawler = new Crawler('<html><base href="'.$baseValue.'"><a href="'.$linkValue.'"></a></html>', $currentUri);
|
||||||
$this->assertEquals('http://base.com/link', $crawler->filterXPath('//a')->link()->getUri());
|
$this->assertEquals($expectedUri, $crawler->filterXPath('//a')->link()->getUri(), $description);
|
||||||
|
}
|
||||||
|
|
||||||
$crawler = new Crawler('<html><base href="//base.com"><a href="link"></a></html>', 'https://domain.com');
|
public function getBaseTagData()
|
||||||
$this->assertEquals('https://base.com/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can use a schema-less URL');
|
{
|
||||||
|
return array(
|
||||||
|
array('http://base.com', 'link', 'http://base.com/link'),
|
||||||
|
array('//base.com', 'link', 'https://base.com/link', 'https://domain.com', '<base> tag can use a schema-less URL'),
|
||||||
|
array('path/', 'link', 'https://domain.com/path/link', 'https://domain.com', '<base> tag can set a path'),
|
||||||
|
array('http://base.com', '#', 'http://base.com#', 'http://domain.com/path/link', '<base> tag does work with links to an anchor'),
|
||||||
|
array('http://base.com', '', 'http://base.com', 'http://domain.com/path/link', '<base> tag does work with empty links'),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
$crawler = new Crawler('<html><base href="path/"><a href="link"></a></html>', 'https://domain.com');
|
public function testBaseTagWithForm()
|
||||||
$this->assertEquals('https://domain.com/path/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can set a path');
|
{
|
||||||
|
$crawler = new Crawler('<html><base href="/basepath"><form method="post" action="/registration"><button type="submit" name="submit"/></form></html>', 'http://example.com/registration');
|
||||||
|
$this->assertEquals('http://example.com/registration', $crawler->filterXPath('//button')->form()->getUri());
|
||||||
|
|
||||||
|
$crawler = new Crawler('<html><base href="/basepath"><form method="post"><button type="submit" name="submit"/></form></html>', 'http://example.com/registration');
|
||||||
|
$this->assertEquals('http://example.com/registration', $crawler->filterXPath('//button')->form()->getUri());
|
||||||
}
|
}
|
||||||
|
|
||||||
public function createTestCrawler($uri = null)
|
public function createTestCrawler($uri = null)
|
||||||
|
Reference in New Issue
Block a user