gnu-social/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php

744 lines
24 KiB
PHP
Raw Normal View History

<?php
/**
* @file
* Test the Tree Builder.
*/
namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\Tokenizer;
use Masterminds\HTML5\Parser\DOMTreeBuilder;
/**
* These tests are functional, not necessarily unit tests.
*/
class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
{
protected $errors = array();
/**
* Convenience function for parsing.
*/
protected function parse($string, array $options = array())
{
$treeBuilder = new DOMTreeBuilder(false, $options);
$scanner = new Scanner($string);
$parser = new Tokenizer($scanner, $treeBuilder);
$parser->parse();
$this->errors = $treeBuilder->getErrors();
return $treeBuilder->document();
}
/**
* Utility function for parsing a fragment of HTML5.
*/
protected function parseFragment($string)
{
$treeBuilder = new DOMTreeBuilder(true);
$scanner = new Scanner($string);
$parser = new Tokenizer($scanner, $treeBuilder);
$parser->parse();
$this->errors = $treeBuilder->getErrors();
return $treeBuilder->fragment();
}
public function testDocument()
{
$html = '<!DOCTYPE html><html></html>';
$doc = $this->parse($html);
2019-11-16 16:51:01 +00:00
$this->assertEquals('UTF-8', $doc->encoding);
$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertEquals('html', $doc->documentElement->tagName);
$this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI);
}
public function testBareAmpersand()
{
$html = "<!doctype html>
<html>
<body>
<img src='a&b' />
<img src='a&=' />
<img src='a&=c' />
<img src='a&=9' />
</body>
</html>";
$doc = $this->parse($html);
$this->assertEmpty($this->errors);
$this->assertXmlStringEqualsXmlString('
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><body>
<img src="a&amp;b"/>
<img src="a&amp;="/>
<img src="a&amp;=c"/>
<img src="a&amp;=9"/>
</body>
</html>', $doc->saveXML());
}
public function testBareAmpersandNotAllowedInAttributes()
{
$html = "<!doctype html>
<html>
<body>
<img src='a&' />
<img src='a&+' />
</body>
</html>";
$doc = $this->parse($html);
$this->assertCount(2, $this->errors);
$this->assertXmlStringEqualsXmlString('
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><body>
<img src="a&amp;"/>
<img src="a&amp;+"/>
</body>
</html>', $doc->saveXML());
}
public function testBareAmpersandNotAllowedInBody()
{
$html = '<!doctype html>
<html>
<body>
a&b
a&=
a&=c
a&=9
a&+
a& -- valid
</body>
</html>';
$doc = $this->parse($html);
$this->assertCount(5, $this->errors);
$this->assertXmlStringEqualsXmlString('
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><body>
a&amp;b
a&amp;=
a&amp;=c
a&amp;=9
a&amp;+
a&amp; -- valid
</body>
</html>', $doc->saveXML());
}
public function testEntityAtEndOfFile()
{
$fragment = $this->parseFragment('&#');
$this->assertInstanceOf('DOMDocumentFragment', $fragment);
$this->assertSame('&#', $fragment->textContent);
$this->assertEquals('Line 1, Col 2: Expected &#DEC; &#HEX;, got EOF', $this->errors[0]);
}
public function testStrangeCapitalization()
{
$html = '<!doctype html>
<html>
<head>
<Title>Hello, world!</TitlE>
</head>
<body>TheBody<script>foo</script></body>
</html>';
$doc = $this->parse($html);
$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertEquals('html', $doc->documentElement->tagName);
$xpath = new \DOMXPath($doc);
$xpath->registerNamespace('x', 'http://www.w3.org/1999/xhtml');
$this->assertEquals('Hello, world!', $xpath->query('//x:title')->item(0)->nodeValue);
$this->assertEquals('foo', $xpath->query('//x:script')->item(0)->nodeValue);
}
public function testDocumentWithDisabledNamespaces()
{
$html = '<!DOCTYPE html><html></html>';
$doc = $this->parse($html, array('disable_html_ns' => true));
$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertEquals('html', $doc->documentElement->tagName);
$this->assertNull($doc->documentElement->namespaceURI);
}
public function testDocumentWithATargetDocument()
{
$targetDom = new \DOMDocument();
$html = '<!DOCTYPE html><html></html>';
$doc = $this->parse($html, array('target_document' => $targetDom));
$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertSame($doc, $targetDom);
$this->assertEquals('html', $doc->documentElement->tagName);
}
public function testDocumentFakeAttrAbsence()
{
$html = '<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><body>foo</body></html>';
$doc = $this->parse($html, array('xmlNamespaces' => true));
$xp = new \DOMXPath($doc);
$this->assertEquals(0, $xp->query('//@html5-php-fake-id-attribute')->length);
}
public function testFragment()
{
$html = '<div>test</div><span>test2</span>';
$doc = $this->parseFragment($html);
$this->assertInstanceOf('\DOMDocumentFragment', $doc);
$this->assertTrue($doc->hasChildNodes());
$this->assertEquals('div', $doc->childNodes->item(0)->tagName);
$this->assertEquals('test', $doc->childNodes->item(0)->textContent);
$this->assertEquals('span', $doc->childNodes->item(1)->tagName);
$this->assertEquals('test2', $doc->childNodes->item(1)->textContent);
}
public function testElements()
{
$html = '<!DOCTYPE html><html><head><title></title></head><body></body></html>';
$doc = $this->parse($html);
$root = $doc->documentElement;
$this->assertEquals('html', $root->tagName);
$this->assertEquals('html', $root->localName);
$this->assertEquals('html', $root->nodeName);
$this->assertEquals(2, $root->childNodes->length);
$kids = $root->childNodes;
$this->assertEquals('head', $kids->item(0)->tagName);
$this->assertEquals('body', $kids->item(1)->tagName);
$head = $kids->item(0);
$this->assertEquals(1, $head->childNodes->length);
$this->assertEquals('title', $head->childNodes->item(0)->tagName);
}
public function testImplicitNamespaces()
{
$dom = $this->parse('<!DOCTYPE html><html><body><a xlink:href="bar">foo</a></body></html>');
$a = $dom->getElementsByTagName('a')->item(0);
$attr = $a->getAttributeNode('xlink:href');
$this->assertEquals('http://www.w3.org/1999/xlink', $attr->namespaceURI);
$dom = $this->parse('<!DOCTYPE html><html><body><a xml:base="bar">foo</a></body></html>');
$a = $dom->getElementsByTagName('a')->item(0);
$attr = $a->getAttributeNode('xml:base');
$this->assertEquals('http://www.w3.org/XML/1998/namespace', $attr->namespaceURI);
}
public function testCustomImplicitNamespaces()
{
$dom = $this->parse('<!DOCTYPE html><html><body><a t:href="bar">foo</a></body></html>', array(
'implicitNamespaces' => array(
't' => 'http://www.example.com',
),
));
$a = $dom->getElementsByTagName('a')->item(0);
$attr = $a->getAttributeNode('t:href');
$this->assertEquals('http://www.example.com', $attr->namespaceURI);
$dom = $this->parse('<!DOCTYPE html><html><body><t:a>foo</t:a></body></html>', array(
'implicitNamespaces' => array(
't' => 'http://www.example.com',
),
));
$list = $dom->getElementsByTagNameNS('http://www.example.com', 'a');
$this->assertEquals(1, $list->length);
}
public function testXmlNamespaces()
{
$dom = $this->parse(
'<!DOCTYPE html><html>
<t:body xmlns:t="http://www.example.com">
<a t:href="bar">foo</a>
</body>
<div>foo</div>
</html>', array(
'xmlNamespaces' => true,
));
$a = $dom->getElementsByTagName('a')->item(0);
$attr = $a->getAttributeNode('t:href');
$this->assertEquals('http://www.example.com', $attr->namespaceURI);
$list = $dom->getElementsByTagNameNS('http://www.example.com', 'body');
$this->assertEquals(1, $list->length);
}
public function testXmlNamespaceNesting()
{
$dom = $this->parse(
'<!DOCTYPE html><html>
<body xmlns:x="http://www.prefixed.com" id="body">
<a id="bar1" xmlns="http://www.prefixed.com/bar1">
<b id="bar4" xmlns="http://www.prefixed.com/bar4"><x:prefixed id="prefixed"/></b>
</a>
<svg id="svg"></svg>
<c id="bar2" xmlns="http://www.prefixed.com/bar2"></c>
<div id="div"></div>
<d id="bar3"></d>
<xn:d xmlns:xn="http://www.prefixed.com/xn" xmlns="http://www.prefixed.com/bar5_x" id="bar5"><x id="bar5_x"/></xn:d>
</body>
</html>', array(
'xmlNamespaces' => true,
));
$this->assertEmpty($this->errors);
$div = $dom->getElementById('div');
$this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI);
$body = $dom->getElementById('body');
$this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI);
$bar1 = $dom->getElementById('bar1');
$this->assertEquals('http://www.prefixed.com/bar1', $bar1->namespaceURI);
$bar2 = $dom->getElementById('bar2');
$this->assertEquals('http://www.prefixed.com/bar2', $bar2->namespaceURI);
$bar3 = $dom->getElementById('bar3');
$this->assertEquals('http://www.w3.org/1999/xhtml', $bar3->namespaceURI);
$bar4 = $dom->getElementById('bar4');
$this->assertEquals('http://www.prefixed.com/bar4', $bar4->namespaceURI);
$svg = $dom->getElementById('svg');
$this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
$prefixed = $dom->getElementById('prefixed');
$this->assertEquals('http://www.prefixed.com', $prefixed->namespaceURI);
$prefixed = $dom->getElementById('bar5');
$this->assertEquals('http://www.prefixed.com/xn', $prefixed->namespaceURI);
$prefixed = $dom->getElementById('bar5_x');
$this->assertEquals('http://www.prefixed.com/bar5_x', $prefixed->namespaceURI);
}
public function testMoveNonInlineElements()
{
$doc = $this->parse('<p>line1<br/><hr/>line2</p>');
$this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1<br/></p><hr/>line2</html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.');
$doc = $this->parse('<p>line1<div>line2</div></p>');
$this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1</p><div>line2</div></html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.');
}
public function testAttributes()
{
$html = "<!DOCTYPE html>
<html>
<head><title></title></head>
<body id='a' class='b c'></body>
</html>";
$doc = $this->parse($html);
$root = $doc->documentElement;
$body = $root->GetElementsByTagName('body')->item(0);
$this->assertEquals('body', $body->tagName);
$this->assertTrue($body->hasAttributes());
$this->assertEquals('a', $body->getAttribute('id'));
$this->assertEquals('b c', $body->getAttribute('class'));
$body2 = $doc->getElementById('a');
$this->assertEquals('body', $body2->tagName);
$this->assertEquals('a', $body2->getAttribute('id'));
}
public function testSVGAttributes()
{
$html = "<!DOCTYPE html>
<html><body>
<svg width='150' viewbox='2'>
<rect textlength='2'/>
<animatecolor>foo</animatecolor>
</svg>
</body></html>";
$doc = $this->parse($html);
$root = $doc->documentElement;
$svg = $root->getElementsByTagName('svg')->item(0);
$this->assertTrue($svg->hasAttribute('viewBox'));
$rect = $root->getElementsByTagName('rect')->item(0);
$this->assertTrue($rect->hasAttribute('textLength'));
$ac = $root->getElementsByTagName('animateColor');
$this->assertEquals(1, $ac->length);
}
public function testMathMLAttribute()
{
$html = '<!doctype html>
<html lang="en">
<body>
<math>
<mi>x</mi>
<csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus">
<mo>&PlusMinus;</mo>
</csymbol>
<mi>y</mi>
</math>
</body>
</html>';
$doc = $this->parse($html);
$root = $doc->documentElement;
$csymbol = $root->getElementsByTagName('csymbol')->item(0);
$this->assertTrue($csymbol->hasAttribute('definitionURL'));
}
public function testMissingHtmlTag()
{
$html = '<!DOCTYPE html><title>test</title>';
$doc = $this->parse($html);
$this->assertEquals('html', $doc->documentElement->tagName);
$this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName);
}
public function testComment()
{
$html = '<html><!--Hello World.--></html>';
$doc = $this->parse($html);
$comment = $doc->documentElement->childNodes->item(0);
$this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
$this->assertEquals('Hello World.', $comment->data);
$html = '<!--Hello World.--><html></html>';
$doc = $this->parse($html);
$comment = $doc->childNodes->item(1);
$this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
$this->assertEquals('Hello World.', $comment->data);
$comment = $doc->childNodes->item(2);
$this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType);
$this->assertEquals('html', $comment->tagName);
}
public function testCDATA()
{
$html = '<!DOCTYPE html><html><math><![CDATA[test]]></math></html>';
$doc = $this->parse($html);
$wrapper = $doc->getElementsByTagName('math')->item(0);
$this->assertEquals(1, $wrapper->childNodes->length);
$cdata = $wrapper->childNodes->item(0);
$this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType);
$this->assertEquals('test', $cdata->data);
}
public function testText()
{
$html = '<!DOCTYPE html><html><head></head><body><math>test</math></body></html>';
$doc = $this->parse($html);
$wrapper = $doc->getElementsByTagName('math')->item(0);
$this->assertEquals(1, $wrapper->childNodes->length);
$data = $wrapper->childNodes->item(0);
$this->assertEquals(XML_TEXT_NODE, $data->nodeType);
$this->assertEquals('test', $data->data);
// The DomTreeBuilder has special handling for text when in before head mode.
$html = '<!DOCTYPE html><html>
Foo<head></head><body></body></html>';
$doc = $this->parse($html);
$this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]);
$headElement = $doc->documentElement->firstChild;
$this->assertEquals('head', $headElement->tagName);
}
public function testParseErrors()
{
$html = '<!DOCTYPE html><html><math><![CDATA[test';
$doc = $this->parse($html);
// We're JUST testing that we can access errors. Actual testing of
// error messages happen in the Tokenizer's tests.
$this->assertGreaterThan(0, count($this->errors));
$this->assertTrue(is_string($this->errors[0]));
}
public function testProcessingInstruction()
{
// Test the simple case, which is where PIs are inserted into the DOM.
$doc = $this->parse('<!DOCTYPE html><html><?foo bar?>');
$this->assertEquals(1, $doc->documentElement->childNodes->length);
$pi = $doc->documentElement->firstChild;
$this->assertInstanceOf('\DOMProcessingInstruction', $pi);
$this->assertEquals('foo', $pi->nodeName);
$this->assertEquals('bar', $pi->data);
// Leading xml PIs should be ignored.
$doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>');
$this->assertEquals(2, $doc->childNodes->length);
$this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0));
$this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1));
}
public function testAutocloseP()
{
$html = '<!DOCTYPE html><html><body><p><figure></body></html>';
$doc = $this->parse($html);
$p = $doc->getElementsByTagName('p')->item(0);
$this->assertEquals(0, $p->childNodes->length);
$this->assertEquals('figure', $p->nextSibling->tagName);
}
public function testAutocloseLI()
{
$html = '<!doctype html>
<html lang="en">
<body>
<ul><li>Foo<li>Bar<li>Baz</ul>
</body>
</html>';
$doc = $this->parse($html);
$length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length;
$this->assertEquals(3, $length);
}
public function testMathML()
{
$html = '<!doctype html>
<html lang="en">
<body>
<math xmlns="http://www.w3.org/1998/Math/MathML">
<mi>x</mi>
<csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus">
<mo>&PlusMinus;</mo>
</csymbol>
<mi>y</mi>
</math>
</body>
</html>';
$doc = $this->parse($html);
$math = $doc->getElementsByTagName('math')->item(0);
$this->assertEquals('math', $math->tagName);
$this->assertEquals('math', $math->nodeName);
$this->assertEquals('math', $math->localName);
$this->assertEquals('http://www.w3.org/1998/Math/MathML', $math->namespaceURI);
}
public function testSVG()
{
$html = '<!doctype html>
<html lang="en">
<body>
<svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg">
<rect width="1" height="2" x="2" fill="#d2232c" />
<text font-family="Verdana" font-size="32">
<textpath xlink:href="#Foo">
Test Text.
</textPath>
</text>
</svg>
</body>
</html>';
$doc = $this->parse($html);
$svg = $doc->getElementsByTagName('svg')->item(0);
$this->assertEquals('svg', $svg->tagName);
$this->assertEquals('svg', $svg->nodeName);
$this->assertEquals('svg', $svg->localName);
$this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
$textPath = $doc->getElementsByTagName('textPath')->item(0);
$this->assertEquals('textPath', $textPath->tagName);
}
public function testNoScript()
{
$html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
$doc = $this->parse($html);
$this->assertEmpty($this->errors);
$noscript = $doc->getElementsByTagName('noscript')->item(0);
$this->assertEquals('noscript', $noscript->tagName);
$html = '<!DOCTYPE html><html><body><noscript><p>No JS</p></noscript></body></html>';
$doc = $this->parse($html);
$this->assertEmpty($this->errors);
$p = $doc->getElementsByTagName('p')->item(0);
$this->assertEquals('p', $p->tagName);
}
/**
* Regression for issue #13.
*/
public function testRegressionHTMLNoBody()
{
$html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
$doc = $this->parse($html);
$span = $doc->getElementById('test');
$this->assertEmpty($this->errors);
$this->assertEquals('span', $span->tagName);
$this->assertEquals('Test', $span->textContent);
}
public function testInstructionProcessor()
{
$string = '<!DOCTYPE html><html><?foo bar ?></html>';
$treeBuilder = new DOMTreeBuilder();
$is = new InstructionProcessorMock();
$treeBuilder->setInstructionProcessor($is);
$scanner = new Scanner($string);
$parser = new Tokenizer($scanner, $treeBuilder);
$parser->parse();
$dom = $treeBuilder->document();
$div = $dom->getElementsByTagName('div')->item(0);
$this->assertEquals(1, $is->count);
$this->assertEquals('foo', $is->name);
$this->assertEquals('bar ', $is->data);
$this->assertEquals('div', $div->tagName);
$this->assertEquals('foo', $div->textContent);
}
public function testSelectGroupedOptions()
{
$html = <<<EOM
<!DOCTYPE html>
<html>
<head>
<title>testSelectGroupedOptions</title>
</head>
<body>
<select>
<optgroup id="first" label="first">
<option value="foo">foo</option>
<option value="bar">bar</option>
<option value="baz">baz</option>
</optgroup>
<optgroup id="second" label="second">
<option value="lorem">lorem</option>
<option value="ipsum">ipsum</option>
</optgroup>
</select>
</body>
</html>
EOM;
$dom = $this->parse($html);
$this->assertSame(3, $dom->getElementById('first')->getElementsByTagName('option')->length);
$this->assertSame(2, $dom->getElementById('second')->getElementsByTagName('option')->length);
}
public function testVoidTag()
{
$html = <<<EOM
<!DOCTYPE html>
<html>
<head>
<title>testVoidTag</title>
<meta>
<meta>
</head>
<body></body>
</html>
EOM;
$dom = $this->parse($html);
$this->assertSame(2, $dom->getElementsByTagName('meta')->length);
$this->assertSame(0, $dom->getElementsByTagName('meta')->item(0)->childNodes->length);
$this->assertSame(0, $dom->getElementsByTagName('meta')->item(1)->childNodes->length);
}
public function testIgnoreSelfClosingTag()
{
$html = <<<EOM
<!DOCTYPE html>
<html>
<head>
<title>testIllegalSelfClosingTag</title>
</head>
<body>
<div /><span>Hello, World!</span></div>
</body>
</html>
EOM;
$dom = $this->parse($html);
$this->assertSame(1, $dom->getElementsByTagName('div')->item(0)->childNodes->length);
}
public function testIAudioInParagraph()
{
$html = <<<EOM
<!DOCTYPE html>
<html>
<head>
<title>testIllegalSelfClosingTag</title>
</head>
<body>
<p>
<audio preload="none" controls="controls">
<source src="https://example.com/test.mp3" type="audio/mpeg" />
Your browser does not support the audio element.
</audio>
</p>
</body>
</html>>
</html>
EOM;
$dom = $this->parse($html);
$audio = $dom->getElementsByTagName('audio')->item(0);
$this->assertSame('p', $audio->parentNode->nodeName);
$this->assertSame(3, $audio->childNodes->length);
}
public function testClosingBr()
{
$html = <<<EOM
<!DOCTYPE html>
<html>
<head>
<title>testClosingBr</title>
</head>
<body>
<p>
This line ends with a normal line break <br class="attribute-should-be-retained">
This line ends with a line break marked up as a closing tag </br class="attribute-should-be-discarded">
</p>
</body>
</html>>
</html>
EOM;
$dom = $this->parse($html);
$this->assertSame(2, $dom->getElementsByTagName('br')->length);
$this->assertSame(1, $dom->getElementsByTagName('br')->item(0)->attributes->length);
$this->assertSame(0, $dom->getElementsByTagName('br')->item(1)->attributes->length);
}
}