2010-04-15 13:41:42 +01:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/*
|
2010-04-24 00:22:16 +01:00
|
|
|
* This file is part of the Symfony package.
|
2010-04-15 13:41:42 +01:00
|
|
|
*
|
2011-03-06 11:40:06 +00:00
|
|
|
* (c) Fabien Potencier <fabien@symfony.com>
|
2010-04-15 13:41:42 +01:00
|
|
|
*
|
|
|
|
* For the full copyright and license information, please view the LICENSE
|
|
|
|
* file that was distributed with this source code.
|
|
|
|
*/
|
|
|
|
|
2011-01-15 13:29:43 +00:00
|
|
|
namespace Symfony\Component\DomCrawler;
|
|
|
|
|
2010-04-15 13:41:42 +01:00
|
|
|
/**
|
2014-02-11 14:26:57 +00:00
|
|
|
* Link represents an HTML link (an HTML a or area tag).
|
2010-04-15 13:41:42 +01:00
|
|
|
*
|
2011-03-06 11:40:06 +00:00
|
|
|
* @author Fabien Potencier <fabien@symfony.com>
|
2011-03-24 09:00:10 +00:00
|
|
|
*
|
|
|
|
* @api
|
2010-04-15 13:41:42 +01:00
|
|
|
*/
|
|
|
|
class Link
|
|
|
|
{
|
2012-04-08 08:55:44 +01:00
|
|
|
/**
|
|
|
|
* @var \DOMNode A \DOMNode instance
|
|
|
|
*/
|
2011-04-23 15:25:21 +01:00
|
|
|
protected $node;
|
2014-07-03 23:56:58 +01:00
|
|
|
|
2012-04-08 08:55:44 +01:00
|
|
|
/**
|
|
|
|
* @var string The method to use for the link
|
|
|
|
*/
|
2011-04-23 15:25:21 +01:00
|
|
|
protected $method;
|
2014-07-03 23:56:58 +01:00
|
|
|
|
2012-04-08 08:55:44 +01:00
|
|
|
/**
|
|
|
|
* @var string The URI of the page where the link is embedded (or the base href)
|
|
|
|
*/
|
2011-04-23 15:25:21 +01:00
|
|
|
protected $currentUri;
|
2010-04-15 13:41:42 +01:00
|
|
|
|
2010-05-06 12:25:53 +01:00
|
|
|
/**
|
|
|
|
* Constructor.
|
|
|
|
*
|
2011-04-23 15:25:21 +01:00
|
|
|
* @param \DOMNode $node A \DOMNode instance
|
|
|
|
* @param string $currentUri The URI of the page where the link is embedded (or the base href)
|
|
|
|
* @param string $method The method to use for the link (get by default)
|
2010-05-06 12:25:53 +01:00
|
|
|
*
|
2012-04-08 08:55:44 +01:00
|
|
|
* @throws \InvalidArgumentException if the node is not a link
|
2011-03-24 09:00:10 +00:00
|
|
|
*
|
|
|
|
* @api
|
2010-05-06 12:25:53 +01:00
|
|
|
*/
|
2011-05-25 14:38:02 +01:00
|
|
|
public function __construct(\DOMNode $node, $currentUri, $method = 'GET')
|
2011-04-23 15:25:21 +01:00
|
|
|
{
|
2013-02-19 17:21:02 +00:00
|
|
|
if (!in_array(strtolower(substr($currentUri, 0, 4)), array('http', 'file'))) {
|
2011-04-27 15:04:13 +01:00
|
|
|
throw new \InvalidArgumentException(sprintf('Current URI must be an absolute URL ("%s").', $currentUri));
|
2011-04-23 15:25:21 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
$this->setNode($node);
|
2011-05-25 14:38:02 +01:00
|
|
|
$this->method = $method ? strtoupper($method) : null;
|
2011-04-23 15:25:21 +01:00
|
|
|
$this->currentUri = $currentUri;
|
|
|
|
}
|
|
|
|
|
2010-05-06 12:25:53 +01:00
|
|
|
/**
|
|
|
|
* Gets the node associated with this link.
|
|
|
|
*
|
|
|
|
* @return \DOMNode A \DOMNode instance
|
|
|
|
*/
|
|
|
|
public function getNode()
|
|
|
|
{
|
|
|
|
return $this->node;
|
2010-04-15 13:41:42 +01:00
|
|
|
}
|
|
|
|
|
2011-06-14 14:16:43 +01:00
|
|
|
/**
|
|
|
|
* Gets the method associated with this link.
|
|
|
|
*
|
|
|
|
* @return string The method
|
|
|
|
*
|
|
|
|
* @api
|
|
|
|
*/
|
|
|
|
public function getMethod()
|
|
|
|
{
|
|
|
|
return $this->method;
|
|
|
|
}
|
|
|
|
|
2010-05-06 12:25:53 +01:00
|
|
|
/**
|
|
|
|
* Gets the URI associated with this link.
|
|
|
|
*
|
|
|
|
* @return string The URI
|
2011-03-24 09:00:10 +00:00
|
|
|
*
|
|
|
|
* @api
|
2010-05-06 12:25:53 +01:00
|
|
|
*/
|
2011-04-23 15:25:21 +01:00
|
|
|
public function getUri()
|
2010-05-06 12:25:53 +01:00
|
|
|
{
|
2011-11-10 14:38:28 +00:00
|
|
|
$uri = trim($this->getRawUri());
|
2010-04-15 13:41:42 +01:00
|
|
|
|
2011-04-23 15:25:21 +01:00
|
|
|
// absolute URL?
|
2013-02-28 11:27:10 +00:00
|
|
|
if (null !== parse_url($uri, PHP_URL_SCHEME)) {
|
2011-04-23 15:25:21 +01:00
|
|
|
return $uri;
|
2011-04-08 16:52:43 +01:00
|
|
|
}
|
|
|
|
|
2011-04-23 15:25:21 +01:00
|
|
|
// empty URI
|
|
|
|
if (!$uri) {
|
|
|
|
return $this->currentUri;
|
2011-04-08 16:52:43 +01:00
|
|
|
}
|
|
|
|
|
2014-06-21 21:15:52 +01:00
|
|
|
// an anchor
|
2012-04-08 08:55:44 +01:00
|
|
|
if ('#' === $uri[0]) {
|
2014-06-21 21:15:52 +01:00
|
|
|
return $this->cleanupAnchor($this->currentUri).$uri;
|
2010-09-02 17:56:55 +01:00
|
|
|
}
|
|
|
|
|
2014-06-21 21:15:52 +01:00
|
|
|
$baseUri = $this->cleanupUri($this->currentUri);
|
2011-06-22 21:08:01 +01:00
|
|
|
|
2014-06-21 21:15:52 +01:00
|
|
|
if ('?' === $uri[0]) {
|
2011-06-22 21:08:01 +01:00
|
|
|
return $baseUri.$uri;
|
|
|
|
}
|
|
|
|
|
2013-02-25 14:57:52 +00:00
|
|
|
// absolute URL with relative schema
|
|
|
|
if (0 === strpos($uri, '//')) {
|
2014-06-21 21:15:52 +01:00
|
|
|
return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri;
|
2013-02-25 14:57:52 +00:00
|
|
|
}
|
|
|
|
|
2014-06-21 21:15:52 +01:00
|
|
|
$baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri);
|
2013-03-02 14:09:29 +00:00
|
|
|
|
2011-04-23 15:25:21 +01:00
|
|
|
// absolute path
|
|
|
|
if ('/' === $uri[0]) {
|
2013-03-02 14:09:29 +00:00
|
|
|
return $baseUri.$uri;
|
2010-05-06 12:25:53 +01:00
|
|
|
}
|
2010-04-15 13:41:42 +01:00
|
|
|
|
2011-04-23 15:25:21 +01:00
|
|
|
// relative path
|
2013-03-02 14:09:29 +00:00
|
|
|
$path = parse_url(substr($this->currentUri, strlen($baseUri)), PHP_URL_PATH);
|
|
|
|
$path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
|
|
|
|
|
|
|
|
return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path;
|
2011-04-23 15:25:21 +01:00
|
|
|
}
|
2010-04-15 13:41:42 +01:00
|
|
|
|
2012-04-08 08:55:44 +01:00
|
|
|
/**
|
2013-12-28 08:32:39 +00:00
|
|
|
* Returns raw URI data.
|
2012-04-08 08:55:44 +01:00
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
2011-04-23 15:25:21 +01:00
|
|
|
protected function getRawUri()
|
|
|
|
{
|
|
|
|
return $this->node->getAttribute('href');
|
2010-04-15 13:41:42 +01:00
|
|
|
}
|
|
|
|
|
2013-03-02 14:09:29 +00:00
|
|
|
/**
|
2014-12-21 17:00:50 +00:00
|
|
|
* Returns the canonicalized URI path (see RFC 3986, section 5.2.4).
|
2013-03-02 14:09:29 +00:00
|
|
|
*
|
|
|
|
* @param string $path URI path
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
protected function canonicalizePath($path)
|
|
|
|
{
|
|
|
|
if ('' === $path || '/' === $path) {
|
|
|
|
return $path;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ('.' === substr($path, -1)) {
|
2015-03-07 19:12:23 +00:00
|
|
|
$path .= '/';
|
2013-03-02 14:09:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
$output = array();
|
|
|
|
|
|
|
|
foreach (explode('/', $path) as $segment) {
|
|
|
|
if ('..' === $segment) {
|
|
|
|
array_pop($output);
|
|
|
|
} elseif ('.' !== $segment) {
|
|
|
|
array_push($output, $segment);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return implode('/', $output);
|
|
|
|
}
|
|
|
|
|
2012-04-08 08:55:44 +01:00
|
|
|
/**
|
2012-11-01 15:08:59 +00:00
|
|
|
* Sets current \DOMNode instance.
|
2012-04-08 08:55:44 +01:00
|
|
|
*
|
|
|
|
* @param \DOMNode $node A \DOMNode instance
|
|
|
|
*
|
|
|
|
* @throws \LogicException If given node is not an anchor
|
|
|
|
*/
|
2011-06-14 14:16:43 +01:00
|
|
|
protected function setNode(\DOMNode $node)
|
2010-04-15 13:41:42 +01:00
|
|
|
{
|
2014-05-17 14:30:22 +01:00
|
|
|
if ('a' !== $node->nodeName && 'area' !== $node->nodeName) {
|
2011-06-14 14:16:43 +01:00
|
|
|
throw new \LogicException(sprintf('Unable to click on a "%s" tag.', $node->nodeName));
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->node = $node;
|
2010-04-15 13:41:42 +01:00
|
|
|
}
|
2014-06-21 21:15:52 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Removes the query string and the anchor from the given uri.
|
|
|
|
*
|
2014-07-03 23:56:58 +01:00
|
|
|
* @param string $uri The uri to clean
|
2014-06-21 21:15:52 +01:00
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
private function cleanupUri($uri)
|
|
|
|
{
|
|
|
|
return $this->cleanupQuery($this->cleanupAnchor($uri));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove the query string from the uri.
|
|
|
|
*
|
2014-07-03 23:56:58 +01:00
|
|
|
* @param string $uri
|
2014-06-21 21:15:52 +01:00
|
|
|
*
|
2014-07-03 23:56:58 +01:00
|
|
|
* @return string
|
2014-06-21 21:15:52 +01:00
|
|
|
*/
|
|
|
|
private function cleanupQuery($uri)
|
|
|
|
{
|
|
|
|
if (false !== $pos = strpos($uri, '?')) {
|
|
|
|
return substr($uri, 0, $pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $uri;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove the anchor from the uri.
|
|
|
|
*
|
2014-07-03 23:56:58 +01:00
|
|
|
* @param string $uri
|
2014-06-21 21:15:52 +01:00
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
private function cleanupAnchor($uri)
|
|
|
|
{
|
|
|
|
if (false !== $pos = strpos($uri, '#')) {
|
|
|
|
return substr($uri, 0, $pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $uri;
|
|
|
|
}
|
2010-04-15 13:41:42 +01:00
|
|
|
}
|