[DomCrawler] add a value() method, normalize whitespaces

This commit is contained in:
Amrouche Hamza 2019-07-08 18:46:19 +02:00 committed by Nicolas Grekas
parent 50da16bb6e
commit 1746718553
3 changed files with 24 additions and 3 deletions

View File

@ -8,6 +8,7 @@ CHANGELOG
* Added `Crawler::matches()` method. * Added `Crawler::matches()` method.
* Added `Crawler::closest()` method. * Added `Crawler::closest()` method.
* Added `Crawler::outerHtml()` method. * Added `Crawler::outerHtml()` method.
* Added an argument to the `Crawler::text()` method to opt-in normalizing whitespaces.
4.3.0 4.3.0
----- -----

View File

@ -591,7 +591,9 @@ class Crawler implements \Countable, \IteratorAggregate
} }
/** /**
* Returns the node value of the first node of the list. * Returns the text of the first node of the list.
*
* Pass true as the 2nd argument to normalize whitespaces.
* *
* @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown * @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown
* *
@ -599,7 +601,7 @@ class Crawler implements \Countable, \IteratorAggregate
* *
* @throws \InvalidArgumentException When current node is empty * @throws \InvalidArgumentException When current node is empty
*/ */
public function text(/* $default = null */) public function text(/* $default = null, $normalizeWhitespace = true */)
{ {
if (!$this->nodes) { if (!$this->nodes) {
if (0 < \func_num_args()) { if (0 < \func_num_args()) {
@ -609,7 +611,13 @@ class Crawler implements \Countable, \IteratorAggregate
throw new \InvalidArgumentException('The current node list is empty.'); throw new \InvalidArgumentException('The current node list is empty.');
} }
return $this->getNode(0)->nodeValue; $text = $this->getNode(0)->nodeValue;
if (\func_num_args() > 1 && func_get_arg(1)) {
return trim(preg_replace('/(?:\s{2,}+|[^\S ])/', ' ', $text));
}
return $text;
} }
/** /**

View File

@ -253,6 +253,14 @@ abstract class AbstractCrawlerTest extends TestCase
$this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist'); $this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist');
} }
public function testNormalizeWhiteSpace()
{
$crawler = $this->createTestCrawler()->filterXPath('//p');
$this->assertSame('Elsa <3', $crawler->text(null, true), '->text(null, true) returns the text with normalized whitespace');
$this->assertNotSame('Elsa <3', $crawler->text(null, false));
$this->assertNotSame('Elsa <3', $crawler->text());
}
public function testEach() public function testEach()
{ {
$data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) { $data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) {
@ -1291,6 +1299,10 @@ HTML;
<li>Two Bis</li> <li>Two Bis</li>
<li>Three Bis</li> <li>Three Bis</li>
</ul> </ul>
<p class="whitespace">
Elsa
&lt;3
</p>
<div id="parent"> <div id="parent">
<div id="child"></div> <div id="child"></div>
<div id="child2" xmlns:foo="http://example.com"></div> <div id="child2" xmlns:foo="http://example.com"></div>