diff --git a/src/Symfony/Component/DomCrawler/CHANGELOG.md b/src/Symfony/Component/DomCrawler/CHANGELOG.md index 155c317002..fa043cfd1e 100644 --- a/src/Symfony/Component/DomCrawler/CHANGELOG.md +++ b/src/Symfony/Component/DomCrawler/CHANGELOG.md @@ -14,6 +14,7 @@ CHANGELOG * Added `Crawler::matches()` method. * Added `Crawler::closest()` method. * Added `Crawler::outerHtml()` method. +* Added an argument to the `Crawler::text()` method to opt-in normalizing whitespaces. 4.3.0 ----- diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 0fc61a7683..ac14bedf7b 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -569,15 +569,16 @@ class Crawler implements \Countable, \IteratorAggregate } /** - * Returns the node value of the first node of the list. + * Returns the text of the first node of the list. * - * @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown + * @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown + * @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces * * @return string The node value * * @throws \InvalidArgumentException When current node is empty */ - public function text($default = null) + public function text($default = null, bool $normalizeWhitespace = false) { if (!$this->nodes) { if (0 < \func_num_args()) { @@ -587,7 +588,13 @@ class Crawler implements \Countable, \IteratorAggregate throw new \InvalidArgumentException('The current node list is empty.'); } - return $this->getNode(0)->nodeValue; + $text = $this->getNode(0)->nodeValue; + + if (\func_num_args() > 1 && func_get_arg(1)) { + return trim(preg_replace('/(?:\s{2,}+|[^\S ])/', ' ', $text)); + } + + return $text; } /** diff --git a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php index 92388a5ca2..ee45649a77 100644 --- a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php @@ -253,6 +253,14 @@ abstract class AbstractCrawlerTest extends TestCase $this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist'); } + public function testNormalizeWhiteSpace() + { + $crawler = $this->createTestCrawler()->filterXPath('//p'); + $this->assertSame('Elsa <3', $crawler->text(null, true), '->text(null, true) returns the text with normalized whitespace'); + $this->assertNotSame('Elsa <3', $crawler->text(null, false)); + $this->assertNotSame('Elsa <3', $crawler->text()); + } + public function testEach() { $data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) { @@ -1235,6 +1243,10 @@ HTML;
  • Two Bis
  • Three Bis
  • +

    + Elsa + <3 +