From 17467185533158b987c02988361a7ec054832b6e Mon Sep 17 00:00:00 2001 From: Amrouche Hamza Date: Mon, 8 Jul 2019 18:46:19 +0200 Subject: [PATCH] [DomCrawler] add a value() method, normalize whitespaces --- src/Symfony/Component/DomCrawler/CHANGELOG.md | 1 + src/Symfony/Component/DomCrawler/Crawler.php | 14 +++++++++++--- .../DomCrawler/Tests/AbstractCrawlerTest.php | 12 ++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/Symfony/Component/DomCrawler/CHANGELOG.md b/src/Symfony/Component/DomCrawler/CHANGELOG.md index ad8c47982e..56863b7c73 100644 --- a/src/Symfony/Component/DomCrawler/CHANGELOG.md +++ b/src/Symfony/Component/DomCrawler/CHANGELOG.md @@ -8,6 +8,7 @@ CHANGELOG * Added `Crawler::matches()` method. * Added `Crawler::closest()` method. * Added `Crawler::outerHtml()` method. +* Added an argument to the `Crawler::text()` method to opt-in normalizing whitespaces. 4.3.0 ----- diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 169b056e4a..a1975644ee 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -591,7 +591,9 @@ class Crawler implements \Countable, \IteratorAggregate } /** - * Returns the node value of the first node of the list. + * Returns the text of the first node of the list. + * + * Pass true as the 2nd argument to normalize whitespaces. * * @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown * @@ -599,7 +601,7 @@ class Crawler implements \Countable, \IteratorAggregate * * @throws \InvalidArgumentException When current node is empty */ - public function text(/* $default = null */) + public function text(/* $default = null, $normalizeWhitespace = true */) { if (!$this->nodes) { if (0 < \func_num_args()) { @@ -609,7 +611,13 @@ class Crawler implements \Countable, \IteratorAggregate throw new \InvalidArgumentException('The current node list is empty.'); } - return $this->getNode(0)->nodeValue; + $text = $this->getNode(0)->nodeValue; + + if (\func_num_args() > 1 && func_get_arg(1)) { + return trim(preg_replace('/(?:\s{2,}+|[^\S ])/', ' ', $text)); + } + + return $text; } /** diff --git a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php index 8de52465a8..4a60ca83b0 100644 --- a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php @@ -253,6 +253,14 @@ abstract class AbstractCrawlerTest extends TestCase $this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist'); } + public function testNormalizeWhiteSpace() + { + $crawler = $this->createTestCrawler()->filterXPath('//p'); + $this->assertSame('Elsa <3', $crawler->text(null, true), '->text(null, true) returns the text with normalized whitespace'); + $this->assertNotSame('Elsa <3', $crawler->text(null, false)); + $this->assertNotSame('Elsa <3', $crawler->text()); + } + public function testEach() { $data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) { @@ -1291,6 +1299,10 @@ HTML;
  • Two Bis
  • Three Bis
  • +

    + Elsa + <3 +