html5 = $this->getInstance();
* Parse and serialize a string.
protected function cycle($html)
$dom = $this->html5->loadHTML('
' . $html . '');
$out = $this->html5->saveHTML($dom);
return $out;
protected function cycleFragment($fragment)
$dom = $this->html5->loadHTMLFragment($fragment);
$out = $this->html5->saveHTML($dom);
return $out;
public function testImageTagsInSvg()
$html = '
$doc = $this->html5->loadHTML($html);
$this->assertInstanceOf('DOMElement', $doc->getElementsByTagName('image')->item(0));
public function testLoadOptions()
// doc
$dom = $this->html5->loadHTML($this->wrap(' '), array(
'implicitNamespaces' => array('t' => ''),
'xmlNamespaces' => true,
$this->assertInstanceOf('\DOMDocument', $dom);
$xpath = new \DOMXPath($dom);
$xpath->registerNamespace('t', '');
$this->assertEquals(1, $xpath->query('//t:tag')->length);
// doc fragment
$frag = $this->html5->loadHTMLFragment(' ', array(
'implicitNamespaces' => array('t' => ''),
'xmlNamespaces' => true,
$this->assertInstanceOf('\DOMDocumentFragment', $frag);
$xpath = new \DOMXPath($frag->ownerDocument);
$xpath->registerNamespace('t', '');
$this->assertEquals(1, $xpath->query('//t:tag', $frag)->length);
public function testEncodingUtf8()
$dom = $this->html5->load(__DIR__ . '/Fixtures/encoding/utf-8.html');
$this->assertInstanceOf('\DOMDocument', $dom);
$this->assertContains('Žťčýů', $dom->saveHTML());
public function testEncodingWindows1252()
$dom = $this->html5->load(__DIR__ . '/Fixtures/encoding/windows-1252.html', array(
'encoding' => 'Windows-1252',
$this->assertInstanceOf('\DOMDocument', $dom);
$dumpedAsUtf8 = mb_convert_encoding($dom->saveHTML(), 'UTF-8', 'Windows-1252');
$this->assertNotFalse(mb_strpos($dumpedAsUtf8, 'Ž'));
$this->assertNotFalse(mb_strpos($dumpedAsUtf8, 'è'));
$this->assertNotFalse(mb_strpos($dumpedAsUtf8, 'ý'));
$this->assertNotFalse(mb_strpos($dumpedAsUtf8, 'ù'));
public function testErrors()
$dom = $this->html5->loadHTML('');
$this->assertInstanceOf('\DOMDocument', $dom);
public function testLoad()
$dom = $this->html5->load(__DIR__ . '/Html5Test.html');
$this->assertInstanceOf('\DOMDocument', $dom);
$file = fopen(__DIR__ . '/Html5Test.html', 'r');
$dom = $this->html5->load($file);
$this->assertInstanceOf('\DOMDocument', $dom);
$dom = $this->html5->loadHTMLFile(__DIR__ . '/Html5Test.html');
$this->assertInstanceOf('\DOMDocument', $dom);
public function testLoadHTML()
$contents = file_get_contents(__DIR__ . '/Html5Test.html');
$dom = $this->html5->loadHTML($contents);
$this->assertInstanceOf('\DOMDocument', $dom);
public function testLoadHTMLWithComments()
$contents = '
$dom = $this->html5->loadHTML($contents);
$this->assertInstanceOf('\DOMDocument', $dom);
$expected = '
$this->assertEquals($expected, $this->html5->saveHTML($dom));
public function testLoadHTMLFragment()
$fragment = '';
$dom = $this->html5->loadHTMLFragment($fragment);
$this->assertInstanceOf('\DOMDocumentFragment', $dom);
public function testSaveHTML()
$dom = $this->html5->load(__DIR__ . '/Html5Test.html');
$this->assertInstanceOf('\DOMDocument', $dom);
$saved = $this->html5->saveHTML($dom);
$this->assertRegExp('|This is a test.
|', $saved);
public function testSaveHTMLFragment()
$fragment = '';
$dom = $this->html5->loadHTMLFragment($fragment);
$string = $this->html5->saveHTML($dom);
$this->assertEquals($fragment, $string);
public function testSave()
$dom = $this->html5->load(__DIR__ . '/Html5Test.html');
$this->assertInstanceOf('\DOMDocument', $dom);
// Test resource
$file = fopen('php://temp', 'w');
$this->html5->save($dom, $file);
$content = stream_get_contents($file, -1, 0);
$this->assertRegExp('|This is a test.
|', $content);
// Test file
$tmpfname = tempnam(sys_get_temp_dir(), 'html5-php');
$this->html5->save($dom, $tmpfname);
$content = file_get_contents($tmpfname);
$this->assertRegExp('|This is a test.
|', $content);
// This test reads a document into a dom, turn the dom into a document,
// then tries to read that document again. This makes sure we are reading,
// and generating a document that works at a high level.
public function testItWorks()
$dom = $this->html5->load(__DIR__ . '/Html5Test.html');
$this->assertInstanceOf('\DOMDocument', $dom);
$saved = $this->html5->saveHTML($dom);
$dom2 = $this->html5->loadHTML($saved);
$this->assertInstanceOf('\DOMDocument', $dom2);
public function testConfig()
$html5 = $this->getInstance();
$options = $html5->getOptions();
$this->assertEquals(false, $options['encode_entities']);
$html5 = $this->getInstance(array(
'foo' => 'bar',
'encode_entities' => true,
$options = $html5->getOptions();
$this->assertEquals('bar', $options['foo']);
$this->assertEquals(true, $options['encode_entities']);
// Need to reset to original so future tests pass as expected.
// $this->getInstance()->setOption('encode_entities', false);
public function testSvg()
$dom = $this->html5->loadHTML(
foo bar baz
Test Text.
// Test a mixed case attribute.
$list = $dom->getElementsByTagName('svg');
$svg = $list->item(0);
$this->assertEquals('0 0 3 2', $svg->getAttribute('viewBox'));
// Test a mixed case tag.
// Note: getElementsByTagName is not case sensitive.
$list = $dom->getElementsByTagName('textPath');
$textPath = $list->item(0);
$this->assertEquals('textPath', $textPath->tagName);
$this->assertNotEquals('textpath', $textPath->tagName);
$html = $this->html5->saveHTML($dom);
$this->assertRegExp('||', $html);
$this->assertRegExp('| |', $html);
public function testMathMl()
$dom = $this->html5->loadHTML(
foo bar baz
$list = $dom->getElementsByTagName('math');
$list = $dom->getElementsByTagName('div');
$div = $list->item(0);
$this->assertEquals('', $div->getAttribute('definitionurl'));
$list = $dom->getElementsByTagName('csymbol');
$csymbol = $list->item(0);
$this->assertEquals('', $csymbol->getAttribute('definitionURL'));
$html = $this->html5->saveHTML($dom);
$this->assertRegExp('||', $html);
$this->assertRegExp('|y |', $html);
public function testUnknownElements()
// The : should not have special handling accourding to section 2.9 of the
// spec. This is differenant than XML. Since we don't know these elements
// they are handled as normal elements. Note, to do this is really
// an invalid example and you should not embed prefixed xml in html5.
$dom = $this->html5->loadHTMLFragment(
Big rectangle thing
um, yeah ');
$markup = $this->html5->saveHTML($dom);
$this->assertRegExp('|Big rectangle thing |', $markup);
$this->assertRegExp('|um, yeah |', $markup);
public function testElements()
// Should have content.
$res = $this->cycle('FOO
|', $res);
// Should be empty
$res = $this->cycle(' ');
$this->assertRegExp('| |', $res);
// Should have content.
$res = $this->cycleFragment('FOO
|', $res);
// Should be empty
$res = $this->cycleFragment(' ');
$this->assertRegExp('| |', $res);
// Elements with dashes and underscores
$res = $this->cycleFragment(' ');
$this->assertRegExp('| |', $res);
$res = $this->cycleFragment(' ');
$this->assertRegExp('| |', $res);
// Should have no closing tag.
$res = $this->cycle(' ');
$this->assertRegExp('| Error