From 8f8ba380d6d81fe347ef6957cb87f8a977287e95 Mon Sep 17 00:00:00 2001 From: Matthijs van den Bos Date: Thu, 28 Feb 2013 12:27:10 +0100 Subject: [PATCH] [DomCrawler] fix handling of schemes by Link::getUri() A link (anchor tag with an href attr) in crawled by the Crawler can contain any valid URI, including mailto: links. Currently this is not correctly supported by Link::getUri. Schemes that do not start with 'http' are treated as relative URIs and appenden to the base URI. This leads to strange URIs like this: http://foo.com/mailto:foo@bar.com Fixed Link::getUri to treat any URI with a schema part as an absolute URL. Updated the unit tests to test for this. --- src/Symfony/Component/DomCrawler/Link.php | 2 +- src/Symfony/Component/DomCrawler/Tests/LinkTest.php | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Symfony/Component/DomCrawler/Link.php b/src/Symfony/Component/DomCrawler/Link.php index 31e8ba12f5..dfd8fceaf8 100644 --- a/src/Symfony/Component/DomCrawler/Link.php +++ b/src/Symfony/Component/DomCrawler/Link.php @@ -89,7 +89,7 @@ class Link $uri = trim($this->getRawUri()); // absolute URL? - if (0 === strpos($uri, 'http')) { + if (null !== parse_url($uri, PHP_URL_SCHEME)) { return $uri; } diff --git a/src/Symfony/Component/DomCrawler/Tests/LinkTest.php b/src/Symfony/Component/DomCrawler/Tests/LinkTest.php index 5d40179a53..976082d3ee 100644 --- a/src/Symfony/Component/DomCrawler/Tests/LinkTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/LinkTest.php @@ -93,6 +93,8 @@ class LinkTest extends \PHPUnit_Framework_TestCase array('?a=b', 'http://localhost/bar/', 'http://localhost/bar/?a=b'), array('http://login.foo.com/foo', 'http://localhost/bar/', 'http://login.foo.com/foo'), + array('https://login.foo.com/foo', 'https://localhost/bar/', 'https://login.foo.com/foo'), + array('mailto:foo@bar.com', 'http://localhost/foo', 'mailto:foo@bar.com'), array('?foo=2', 'http://localhost?foo=1', 'http://localhost?foo=2'), array('?foo=2', 'http://localhost/?foo=1', 'http://localhost/?foo=2'),