From 17467185533158b987c02988361a7ec054832b6e Mon Sep 17 00:00:00 2001 From: Amrouche Hamza Date: Mon, 8 Jul 2019 18:46:19 +0200 Subject: [PATCH 1/3] [DomCrawler] add a value() method, normalize whitespaces --- src/Symfony/Component/DomCrawler/CHANGELOG.md | 1 + src/Symfony/Component/DomCrawler/Crawler.php | 14 +++++++++++--- .../DomCrawler/Tests/AbstractCrawlerTest.php | 12 ++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/Symfony/Component/DomCrawler/CHANGELOG.md b/src/Symfony/Component/DomCrawler/CHANGELOG.md index ad8c47982e..56863b7c73 100644 --- a/src/Symfony/Component/DomCrawler/CHANGELOG.md +++ b/src/Symfony/Component/DomCrawler/CHANGELOG.md @@ -8,6 +8,7 @@ CHANGELOG * Added `Crawler::matches()` method. * Added `Crawler::closest()` method. * Added `Crawler::outerHtml()` method. +* Added an argument to the `Crawler::text()` method to opt-in normalizing whitespaces. 4.3.0 ----- diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 169b056e4a..a1975644ee 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -591,7 +591,9 @@ class Crawler implements \Countable, \IteratorAggregate } /** - * Returns the node value of the first node of the list. + * Returns the text of the first node of the list. + * + * Pass true as the 2nd argument to normalize whitespaces. * * @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown * @@ -599,7 +601,7 @@ class Crawler implements \Countable, \IteratorAggregate * * @throws \InvalidArgumentException When current node is empty */ - public function text(/* $default = null */) + public function text(/* $default = null, $normalizeWhitespace = true */) { if (!$this->nodes) { if (0 < \func_num_args()) { @@ -609,7 +611,13 @@ class Crawler implements \Countable, \IteratorAggregate throw new \InvalidArgumentException('The current node list is empty.'); } - return $this->getNode(0)->nodeValue; + $text = $this->getNode(0)->nodeValue; + + if (\func_num_args() > 1 && func_get_arg(1)) { + return trim(preg_replace('/(?:\s{2,}+|[^\S ])/', ' ', $text)); + } + + return $text; } /** diff --git a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php index 8de52465a8..4a60ca83b0 100644 --- a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php @@ -253,6 +253,14 @@ abstract class AbstractCrawlerTest extends TestCase $this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist'); } + public function testNormalizeWhiteSpace() + { + $crawler = $this->createTestCrawler()->filterXPath('//p'); + $this->assertSame('Elsa <3', $crawler->text(null, true), '->text(null, true) returns the text with normalized whitespace'); + $this->assertNotSame('Elsa <3', $crawler->text(null, false)); + $this->assertNotSame('Elsa <3', $crawler->text()); + } + public function testEach() { $data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) { @@ -1291,6 +1299,10 @@ HTML;
  • Two Bis
  • Three Bis
  • +

    + Elsa + <3 +

    From 7a3a664ea01874d7a7eb35cd139b6244b562afba Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Fri, 27 Sep 2019 19:09:14 +0200 Subject: [PATCH 2/3] cs fix --- src/Symfony/Component/DomCrawler/Crawler.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index a1975644ee..3a59f9e405 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -595,7 +595,8 @@ class Crawler implements \Countable, \IteratorAggregate * * Pass true as the 2nd argument to normalize whitespaces. * - * @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown + * @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown + * @param mixed $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces * * @return string The node value * From f5d3d5fe1755994416e3a91f348fb81eb0e175b4 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Fri, 27 Sep 2019 19:11:11 +0200 Subject: [PATCH 3/3] cs fix bis --- src/Symfony/Component/DomCrawler/Crawler.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 3a59f9e405..57331af77a 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -596,13 +596,13 @@ class Crawler implements \Countable, \IteratorAggregate * Pass true as the 2nd argument to normalize whitespaces. * * @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown - * @param mixed $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces + * @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces * * @return string The node value * * @throws \InvalidArgumentException When current node is empty */ - public function text(/* $default = null, $normalizeWhitespace = true */) + public function text(/* $default = null, bool $normalizeWhitespace = false */) { if (!$this->nodes) { if (0 < \func_num_args()) {