forked from GNUsocial/gnu-social
		
	
		
			
				
	
	
		
			982 lines
		
	
	
		
			31 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			982 lines
		
	
	
		
			31 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
 | 
						|
namespace Masterminds\HTML5\Tests\Parser;
 | 
						|
 | 
						|
use Masterminds\HTML5\Parser\UTF8Utils;
 | 
						|
use Masterminds\HTML5\Parser\Scanner;
 | 
						|
use Masterminds\HTML5\Parser\Tokenizer;
 | 
						|
 | 
						|
class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
 | 
						|
{
 | 
						|
    // ================================================================
 | 
						|
    // Additional assertions.
 | 
						|
    // ================================================================
 | 
						|
 | 
						|
    /**
 | 
						|
     * Tests that an event matches both the event type and the expected value.
 | 
						|
     *
 | 
						|
     * @param string $type
 | 
						|
     *                        Expected event type
 | 
						|
     * @param string $expects
 | 
						|
     *                        The value expected in $event['data'][0]
 | 
						|
     */
 | 
						|
    public function assertEventEquals($type, $expects, $event)
 | 
						|
    {
 | 
						|
        $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, true));
 | 
						|
        if (is_array($expects)) {
 | 
						|
            $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, true) . ': ' . print_r($event, true));
 | 
						|
        } else {
 | 
						|
            $d = (is_array($event['data']) ? $event['data'][0] : null);
 | 
						|
            $this->assertEquals($expects, $d, "Event $type should equal $expects: " . print_r($event, true));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Assert that a given event is 'error'.
 | 
						|
     */
 | 
						|
    public function assertEventError($event)
 | 
						|
    {
 | 
						|
        $this->assertEquals('error', $event['name'], 'Expected error for event: ' . print_r($event, true));
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Asserts that all of the tests are good.
 | 
						|
     *
 | 
						|
     * This loops through a map of tests/expectations and runs a few assertions on each test.
 | 
						|
     *
 | 
						|
     * Checks:
 | 
						|
     * - depth (if depth is > 0)
 | 
						|
     * - event name
 | 
						|
     * - matches on event 0.
 | 
						|
     */
 | 
						|
    protected function isAllGood($name, $depth, $tests, $debug = false)
 | 
						|
    {
 | 
						|
        foreach ($tests as $try => $expects) {
 | 
						|
            if ($debug) {
 | 
						|
                fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, true));
 | 
						|
            }
 | 
						|
            $e = $this->parse($try);
 | 
						|
            if ($depth > 0) {
 | 
						|
                $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, true));
 | 
						|
            }
 | 
						|
            $this->assertEventEquals($name, $expects, $e->get(0));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // ================================================================
 | 
						|
    // Utility functions.
 | 
						|
    // ================================================================
 | 
						|
    public function testParse()
 | 
						|
    {
 | 
						|
        list($tok, $events) = $this->createTokenizer('');
 | 
						|
 | 
						|
        $tok->parse();
 | 
						|
        $e1 = $events->get(0);
 | 
						|
 | 
						|
        $this->assertEquals(1, $events->Depth());
 | 
						|
        $this->assertEquals('eof', $e1['name']);
 | 
						|
    }
 | 
						|
 | 
						|
    public function testWhitespace()
 | 
						|
    {
 | 
						|
        $spaces = '    ';
 | 
						|
        list($tok, $events) = $this->createTokenizer($spaces);
 | 
						|
 | 
						|
        $tok->parse();
 | 
						|
 | 
						|
        $this->assertEquals(2, $events->depth());
 | 
						|
 | 
						|
        $e1 = $events->get(0);
 | 
						|
 | 
						|
        $this->assertEquals('text', $e1['name']);
 | 
						|
        $this->assertEquals($spaces, $e1['data'][0]);
 | 
						|
    }
 | 
						|
 | 
						|
    public function testCharacterReference()
 | 
						|
    {
 | 
						|
        $good = array(
 | 
						|
            '&' => '&',
 | 
						|
            '<' => '<',
 | 
						|
            '&' => '&',
 | 
						|
            '&' => '&',
 | 
						|
        );
 | 
						|
        $this->isAllGood('text', 2, $good);
 | 
						|
 | 
						|
        // Test with broken charref
 | 
						|
        $str = '&foo';
 | 
						|
        $events = $this->parse($str);
 | 
						|
        $e1 = $events->get(0);
 | 
						|
        $this->assertEquals('error', $e1['name']);
 | 
						|
 | 
						|
        $str = 'oo';
 | 
						|
        $events = $this->parse($str);
 | 
						|
        $e1 = $events->get(0);
 | 
						|
        $this->assertEquals('error', $e1['name']);
 | 
						|
 | 
						|
        $str = '&#foo';
 | 
						|
        $events = $this->parse($str);
 | 
						|
        $e1 = $events->get(0);
 | 
						|
        $this->assertEquals('error', $e1['name']);
 | 
						|
 | 
						|
        // FIXME: Once the text processor is done, need to verify that the
 | 
						|
        // tokens are transformed correctly into text.
 | 
						|
    }
 | 
						|
 | 
						|
    public function testBogusComment()
 | 
						|
    {
 | 
						|
        $bogus = array(
 | 
						|
            '</+this is a bogus comment. +>',
 | 
						|
            '<!+this is a bogus comment. !>',
 | 
						|
            '<!D OCTYPE foo bar>',
 | 
						|
            '<!DOCTYEP foo bar>',
 | 
						|
            '<![CADATA[ TEST ]]>',
 | 
						|
            '<![CDATA Hello ]]>',
 | 
						|
            '<![CDATA[ Hello [[>',
 | 
						|
            '<!CDATA[[ test ]]>',
 | 
						|
            '<![CDATA[',
 | 
						|
            '<![CDATA[hellooooo hello',
 | 
						|
            '<? Hello World ?>',
 | 
						|
            '<? Hello World',
 | 
						|
        );
 | 
						|
        foreach ($bogus as $str) {
 | 
						|
            $events = $this->parse($str);
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventEquals('comment', $str, $events->get(1));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    public function testEndTag()
 | 
						|
    {
 | 
						|
        $succeed = array(
 | 
						|
            '</a>' => 'a',
 | 
						|
            '</test>' => 'test',
 | 
						|
            '</test
 | 
						|
      >' => 'test',
 | 
						|
            '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
 | 
						|
            // See 8.2.4.10, which requires this and does not say error.
 | 
						|
            '</a<b>' => 'a<b',
 | 
						|
        );
 | 
						|
        $this->isAllGood('endTag', 2, $succeed);
 | 
						|
 | 
						|
        // Recoverable failures
 | 
						|
        $fail = array(
 | 
						|
            '</a class="monkey">' => 'a',
 | 
						|
            '</a <b>' => 'a',
 | 
						|
            '</a <b <c>' => 'a',
 | 
						|
            '</a is the loneliest letter>' => 'a',
 | 
						|
            '</a' => 'a',
 | 
						|
        );
 | 
						|
        foreach ($fail as $test => $result) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEquals(3, $events->depth());
 | 
						|
            // Should have triggered an error.
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            // Should have tried to parse anyway.
 | 
						|
            $this->assertEventEquals('endTag', $result, $events->get(1));
 | 
						|
        }
 | 
						|
 | 
						|
        // BogoComments
 | 
						|
        $comments = array(
 | 
						|
            '</>' => '</>',
 | 
						|
            '</ >' => '</ >',
 | 
						|
            '</ a>' => '</ a>',
 | 
						|
        );
 | 
						|
        foreach ($comments as $test => $result) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEquals(3, $events->depth());
 | 
						|
 | 
						|
            // Should have triggered an error.
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
 | 
						|
            // Should have tried to parse anyway.
 | 
						|
            $this->assertEventEquals('comment', $result, $events->get(1));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    public function testComment()
 | 
						|
    {
 | 
						|
        $good = array(
 | 
						|
            '<!--easy-->' => 'easy',
 | 
						|
            '<!-- 1 > 0 -->' => ' 1 > 0 ',
 | 
						|
            '<!-- --$i -->' => ' --$i ',
 | 
						|
            '<!----$i-->' => '--$i',
 | 
						|
            "<!--\nHello World.\na-->" => "\nHello World.\na",
 | 
						|
            '<!-- <!-- -->' => ' <!-- ',
 | 
						|
        );
 | 
						|
        foreach ($good as $test => $expected) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEventEquals('comment', $expected, $events->get(0));
 | 
						|
        }
 | 
						|
 | 
						|
        $fail = array(
 | 
						|
            '<!-->' => '',
 | 
						|
            '<!--Hello' => 'Hello',
 | 
						|
            "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
 | 
						|
            '<!--' => '',
 | 
						|
        );
 | 
						|
        foreach ($fail as $test => $expected) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEquals(3, $events->depth());
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventEquals('comment', $expected, $events->get(1));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    public function testCDATASection()
 | 
						|
    {
 | 
						|
        $good = array(
 | 
						|
            '<![CDATA[ This is a test. ]]>' => ' This is a test. ',
 | 
						|
            '<![CDATA[CDATA]]>' => 'CDATA',
 | 
						|
            '<![CDATA[ ]] > ]]>' => ' ]] > ',
 | 
						|
            '<![CDATA[ ]]>' => ' ',
 | 
						|
        );
 | 
						|
        $this->isAllGood('cdata', 2, $good);
 | 
						|
    }
 | 
						|
 | 
						|
    public function testDoctype()
 | 
						|
    {
 | 
						|
        $good = array(
 | 
						|
            '<!DOCTYPE html>' => array(
 | 
						|
                'html',
 | 
						|
                0,
 | 
						|
                null,
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<!doctype html>' => array(
 | 
						|
                'html',
 | 
						|
                0,
 | 
						|
                null,
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<!DocType html>' => array(
 | 
						|
                'html',
 | 
						|
                0,
 | 
						|
                null,
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<!DOCTYPE\nhtml>" => array(
 | 
						|
                'html',
 | 
						|
                0,
 | 
						|
                null,
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<!DOCTYPE\fhtml>" => array(
 | 
						|
                'html',
 | 
						|
                0,
 | 
						|
                null,
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE html PUBLIC "foo bar">' => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_PUBLIC,
 | 
						|
                'foo bar',
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<!DOCTYPE html PUBLIC 'foo bar'>" => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_PUBLIC,
 | 
						|
                'foo bar',
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE      html      PUBLIC     "foo bar"    >' => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_PUBLIC,
 | 
						|
                'foo bar',
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_PUBLIC,
 | 
						|
                'foo bar',
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE html SYSTEM "foo bar">' => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_SYSTEM,
 | 
						|
                'foo bar',
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<!DOCTYPE html SYSTEM 'foo bar'>" => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_SYSTEM,
 | 
						|
                'foo bar',
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE      html      SYSTEM "foo/bar"    >' => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_SYSTEM,
 | 
						|
                'foo/bar',
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_SYSTEM,
 | 
						|
                'foo bar',
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
        );
 | 
						|
        $this->isAllGood('doctype', 2, $good);
 | 
						|
 | 
						|
        $bad = array(
 | 
						|
            '<!DOCTYPE>' => array(
 | 
						|
                null,
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE    >' => array(
 | 
						|
                null,
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE  foo' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE foo PUB' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE foo PUB>' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE  foo PUB "Looks good">' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE  foo SYSTME "Looks good"' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
 | 
						|
            // Can't tell whether these are ids or ID types, since the context is chopped.
 | 
						|
            '<!DOCTYPE foo PUBLIC' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE  foo PUBLIC>' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE foo SYSTEM' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE  foo SYSTEM>' => array(
 | 
						|
                'foo',
 | 
						|
                EventStack::DOCTYPE_NONE,
 | 
						|
                null,
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
 | 
						|
            '<!DOCTYPE html SYSTEM "foo bar"' => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_SYSTEM,
 | 
						|
                'foo bar',
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array(
 | 
						|
                'html',
 | 
						|
                EventStack::DOCTYPE_SYSTEM,
 | 
						|
                'foo bar',
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
        );
 | 
						|
        foreach ($bad as $test => $expects) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            // fprintf(STDOUT, $test . PHP_EOL);
 | 
						|
            $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventEquals('doctype', $expects, $events->get(1));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    public function testProcessorInstruction()
 | 
						|
    {
 | 
						|
        $good = array(
 | 
						|
            '<?hph ?>' => 'hph',
 | 
						|
            '<?hph echo "Hello World"; ?>' => array(
 | 
						|
                'hph',
 | 
						|
                'echo "Hello World"; ',
 | 
						|
            ),
 | 
						|
            "<?hph \necho 'Hello World';\n?>" => array(
 | 
						|
                'hph',
 | 
						|
                "echo 'Hello World';\n",
 | 
						|
            ),
 | 
						|
        );
 | 
						|
        $this->isAllGood('pi', 2, $good);
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * This tests just simple tags.
 | 
						|
     */
 | 
						|
    public function testSimpleTags()
 | 
						|
    {
 | 
						|
        $open = array(
 | 
						|
            '<foo>' => 'foo',
 | 
						|
            '<FOO>' => 'foo',
 | 
						|
            '<fOO>' => 'foo',
 | 
						|
            '<foo >' => 'foo',
 | 
						|
            "<foo\n\n\n\n>" => 'foo',
 | 
						|
            '<foo:bar>' => 'foo:bar',
 | 
						|
        );
 | 
						|
        $this->isAllGood('startTag', 2, $open);
 | 
						|
 | 
						|
        $selfClose = array(
 | 
						|
            '<foo/>' => 'foo',
 | 
						|
            '<FOO/>' => 'foo',
 | 
						|
            '<foo />' => 'foo',
 | 
						|
            "<foo\n\n\n\n/>" => 'foo',
 | 
						|
            '<foo:bar/>' => 'foo:bar',
 | 
						|
        );
 | 
						|
        foreach ($selfClose as $test => $expects) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEquals(2, $events->depth(), "Counting events for '$test'" . print_r($events, true));
 | 
						|
            $this->assertEventEquals('startTag', $expects, $events->get(0));
 | 
						|
            $event = $events->get(0);
 | 
						|
            $this->assertTrue($event['data'][2]);
 | 
						|
        }
 | 
						|
 | 
						|
        $bad = array(
 | 
						|
            '<foo' => 'foo',
 | 
						|
            '<foo ' => 'foo',
 | 
						|
            '<foo/' => 'foo',
 | 
						|
            '<foo /' => 'foo',
 | 
						|
        );
 | 
						|
 | 
						|
        foreach ($bad as $test => $expects) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventEquals('startTag', $expects, $events->get(1));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    public function testTagsWithAttributeAndMissingName()
 | 
						|
    {
 | 
						|
        $cases = array(
 | 
						|
            '<id="top_featured">' => 'id',
 | 
						|
            '<color="white">' => 'color',
 | 
						|
            "<class='neaktivni_stranka'>" => 'class',
 | 
						|
            '<bgcolor="white">' => 'bgcolor',
 | 
						|
            '<class="nom">' => 'class',
 | 
						|
        );
 | 
						|
 | 
						|
        foreach ($cases as $html => $expected) {
 | 
						|
            $events = $this->parse($html);
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventError($events->get(1));
 | 
						|
            $this->assertEventError($events->get(2));
 | 
						|
            $this->assertEventEquals('startTag', $expected, $events->get(3));
 | 
						|
            $this->assertEventEquals('eof', null, $events->get(4));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    public function testTagNotClosedAfterTagName()
 | 
						|
    {
 | 
						|
        $cases = array(
 | 
						|
            '<noscript<img>' => array(
 | 
						|
                'noscript',
 | 
						|
                'img',
 | 
						|
            ),
 | 
						|
            '<center<a>' => array(
 | 
						|
                'center',
 | 
						|
                'a',
 | 
						|
            ),
 | 
						|
            '<br<br>' => array(
 | 
						|
                'br',
 | 
						|
                'br',
 | 
						|
            ),
 | 
						|
        );
 | 
						|
 | 
						|
        foreach ($cases as $html => $expected) {
 | 
						|
            $events = $this->parse($html);
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventEquals('startTag', $expected[0], $events->get(1));
 | 
						|
            $this->assertEventEquals('startTag', $expected[1], $events->get(2));
 | 
						|
            $this->assertEventEquals('eof', null, $events->get(3));
 | 
						|
        }
 | 
						|
 | 
						|
        $events = $this->parse('<span<>02</span>');
 | 
						|
        $this->assertEventError($events->get(0));
 | 
						|
        $this->assertEventEquals('startTag', 'span', $events->get(1));
 | 
						|
        $this->assertEventError($events->get(2));
 | 
						|
        $this->assertEventEquals('text', '>02', $events->get(3));
 | 
						|
        $this->assertEventEquals('endTag', 'span', $events->get(4));
 | 
						|
        $this->assertEventEquals('eof', null, $events->get(5));
 | 
						|
 | 
						|
        $events = $this->parse('<p</p>');
 | 
						|
        $this->assertEventError($events->get(0));
 | 
						|
        $this->assertEventEquals('startTag', 'p', $events->get(1));
 | 
						|
        $this->assertEventEquals('endTag', 'p', $events->get(2));
 | 
						|
        $this->assertEventEquals('eof', null, $events->get(3));
 | 
						|
 | 
						|
        $events = $this->parse('<strong><WordPress</strong>');
 | 
						|
        $this->assertEventEquals('startTag', 'strong', $events->get(0));
 | 
						|
        $this->assertEventError($events->get(1));
 | 
						|
        $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
 | 
						|
        $this->assertEventEquals('endTag', 'strong', $events->get(3));
 | 
						|
        $this->assertEventEquals('eof', null, $events->get(4));
 | 
						|
 | 
						|
        $events = $this->parse('<src=<a>');
 | 
						|
        $this->assertEventError($events->get(0));
 | 
						|
        $this->assertEventError($events->get(1));
 | 
						|
        $this->assertEventError($events->get(2));
 | 
						|
        $this->assertEventEquals('startTag', 'src', $events->get(3));
 | 
						|
        $this->assertEventEquals('startTag', 'a', $events->get(4));
 | 
						|
        $this->assertEventEquals('eof', null, $events->get(5));
 | 
						|
 | 
						|
        $events = $this->parse('<br...<a>');
 | 
						|
        $this->assertEventError($events->get(0));
 | 
						|
        $this->assertEventEquals('startTag', 'br', $events->get(1));
 | 
						|
        $this->assertEventEquals('eof', null, $events->get(2));
 | 
						|
    }
 | 
						|
 | 
						|
    public function testIllegalTagNames()
 | 
						|
    {
 | 
						|
        $cases = array(
 | 
						|
            '<li">' => 'li',
 | 
						|
            '<p">' => 'p',
 | 
						|
            '<b  >' => 'b',
 | 
						|
            '<static*all>' => 'static',
 | 
						|
            '<h*0720/>' => 'h',
 | 
						|
            '<st*ATTRIBUTE />' => 'st',
 | 
						|
        );
 | 
						|
 | 
						|
        foreach ($cases as $html => $expected) {
 | 
						|
            $events = $this->parse($html);
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventEquals('startTag', $expected, $events->get(1));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * @depends testCharacterReference
 | 
						|
     */
 | 
						|
    public function testTagAttributes()
 | 
						|
    {
 | 
						|
        // Opening tags.
 | 
						|
        $good = array(
 | 
						|
            '<foo bar="baz">' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'baz',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo bar=" baz ">' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => ' baz ',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo bar=\"\nbaz\n\">" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => "\nbaz\n",
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo bar='baz'>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'baz',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo bar="A full sentence.">' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'A full sentence.',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo a='1' b=\"2\">" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'a' => '1',
 | 
						|
                    'b' => '2',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo ns:bar='baz'>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'ns:bar' => 'baz',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo a='blue&red'>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'a' => 'blue&red',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo a='blue&red'>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'a' => 'blue&red',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo a='blue&&&red'>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'a' => 'blue&&&red',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo a='blue&&red'>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'a' => 'blue&&red',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            "<foo\nbar='baz'\n>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'baz',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<doe a deer>' => array(
 | 
						|
                'doe',
 | 
						|
                array(
 | 
						|
                    'a' => null,
 | 
						|
                    'deer' => null,
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo bar=baz>' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'baz',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
 | 
						|
            // Updated for 8.1.2.3
 | 
						|
            '<foo    bar   =   "baz"      >' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'baz',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
 | 
						|
            // The spec allows an unquoted value '/'. This will not be a closing
 | 
						|
            // tag.
 | 
						|
            '<foo bar=/>' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => '/',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo bar=baz/>' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'baz/',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
        );
 | 
						|
        $this->isAllGood('startTag', 2, $good);
 | 
						|
 | 
						|
        // Self-closing tags.
 | 
						|
        $withEnd = array(
 | 
						|
            '<foo bar="baz"/>' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'baz',
 | 
						|
                ),
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<foo BAR="baz"/>' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'baz',
 | 
						|
                ),
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            '<foo BAR="BAZ"/>' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'BAZ',
 | 
						|
                ),
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            "<foo a='1' b=\"2\" c=3 d/>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'a' => '1',
 | 
						|
                    'b' => '2',
 | 
						|
                    'c' => '3',
 | 
						|
                    'd' => null,
 | 
						|
                ),
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
        );
 | 
						|
        $this->isAllGood('startTag', 2, $withEnd);
 | 
						|
 | 
						|
        // Cause a parse error.
 | 
						|
        $bad = array(
 | 
						|
            // This will emit an entity lookup failure for &+dark.
 | 
						|
            "<foo a='blue&+dark'>" => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'a' => 'blue&+dark',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo bar=>' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => null,
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo bar="oh' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'oh',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo bar=oh">' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'oh"',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
 | 
						|
            // these attributes are ignored because of current implementation
 | 
						|
            // of method "DOMElement::setAttribute"
 | 
						|
            // see issue #23: https://github.com/Masterminds/html5-php/issues/23
 | 
						|
            '<foo b"="baz">' => array(
 | 
						|
                'foo',
 | 
						|
                array(),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo 2abc="baz">' => array(
 | 
						|
                'foo',
 | 
						|
                array(),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo ?="baz">' => array(
 | 
						|
                'foo',
 | 
						|
                array(),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo foo?bar="baz">' => array(
 | 
						|
                'foo',
 | 
						|
                array(),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
        )
 | 
						|
        ;
 | 
						|
        foreach ($bad as $test => $expects) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventEquals('startTag', $expects, $events->get(1));
 | 
						|
        }
 | 
						|
 | 
						|
        // Cause multiple parse errors.
 | 
						|
        $reallyBad = array(
 | 
						|
            '<foo ="bar">' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    '=' => null,
 | 
						|
                    '"bar"' => null,
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
            '<foo////>' => array(
 | 
						|
                'foo',
 | 
						|
                array(),
 | 
						|
                true,
 | 
						|
            ),
 | 
						|
            // character "&" in unquoted attribute shouldn't cause an infinite loop
 | 
						|
            '<foo bar=index.php?str=1&id=29>' => array(
 | 
						|
                'foo',
 | 
						|
                array(
 | 
						|
                    'bar' => 'index.php?str=1&id=29',
 | 
						|
                ),
 | 
						|
                false,
 | 
						|
            ),
 | 
						|
        );
 | 
						|
        foreach ($reallyBad as $test => $expects) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            // fprintf(STDOUT, $test . print_r($events, true));
 | 
						|
            $this->assertEventError($events->get(0));
 | 
						|
            $this->assertEventError($events->get(1));
 | 
						|
            // $this->assertEventEquals('startTag', $expects, $events->get(1));
 | 
						|
        }
 | 
						|
 | 
						|
        // Regression: Malformed elements should be detected.
 | 
						|
        // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), false),
 | 
						|
        $events = $this->parse('<foo baz="1" <bar></foo>');
 | 
						|
        $this->assertEventError($events->get(0));
 | 
						|
        $this->assertEventEquals('startTag', array(
 | 
						|
            'foo',
 | 
						|
            array(
 | 
						|
                'baz' => '1',
 | 
						|
            ),
 | 
						|
            false,
 | 
						|
        ), $events->get(1));
 | 
						|
        $this->assertEventEquals('startTag', array(
 | 
						|
            'bar',
 | 
						|
            array(),
 | 
						|
            false,
 | 
						|
        ), $events->get(2));
 | 
						|
        $this->assertEventEquals('endTag', array(
 | 
						|
            'foo',
 | 
						|
        ), $events->get(3));
 | 
						|
    }
 | 
						|
 | 
						|
    public function testRawText()
 | 
						|
    {
 | 
						|
        $good = array(
 | 
						|
            '<script>abcd efg hijk lmnop</script>     ' => 'abcd efg hijk lmnop',
 | 
						|
            '<script><not/><the/><tag></script>' => '<not/><the/><tag>',
 | 
						|
            '<script><<<<<<<<</script>' => '<<<<<<<<',
 | 
						|
            '<script>hello</script</script>' => 'hello</script',
 | 
						|
            "<script>\nhello</script\n</script>" => "\nhello</script\n",
 | 
						|
            '<script>&</script>' => '&',
 | 
						|
            '<script><!--not a comment--></script>' => '<!--not a comment-->',
 | 
						|
            '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>',
 | 
						|
        );
 | 
						|
        foreach ($good as $test => $expects) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEventEquals('startTag', 'script', $events->get(0));
 | 
						|
            $this->assertEventEquals('text', $expects, $events->get(1));
 | 
						|
            $this->assertEventEquals('endTag', 'script', $events->get(2));
 | 
						|
        }
 | 
						|
 | 
						|
        $bad = array(
 | 
						|
            '<script>&</script' => '&</script',
 | 
						|
            '<script>Hello world' => 'Hello world',
 | 
						|
        );
 | 
						|
        foreach ($bad as $test => $expects) {
 | 
						|
            $events = $this->parse($test);
 | 
						|
            $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, true));
 | 
						|
            $this->assertEventEquals('startTag', 'script', $events->get(0));
 | 
						|
            $this->assertEventError($events->get(1));
 | 
						|
            $this->assertEventEquals('text', $expects, $events->get(2));
 | 
						|
        }
 | 
						|
 | 
						|
        // Testing case sensitivity
 | 
						|
        $events = $this->parse('<TITLE>a test</TITLE>');
 | 
						|
        $this->assertEventEquals('startTag', 'title', $events->get(0));
 | 
						|
        $this->assertEventEquals('text', 'a test', $events->get(1));
 | 
						|
        $this->assertEventEquals('endTag', 'title', $events->get(2));
 | 
						|
 | 
						|
        // Testing end tags with whitespaces
 | 
						|
        $events = $this->parse('<title>Whitespaces are tasty</title >');
 | 
						|
        $this->assertEventEquals('startTag', 'title', $events->get(0));
 | 
						|
        $this->assertEventEquals('text', 'Whitespaces are tasty', $events->get(1));
 | 
						|
        $this->assertEventEquals('endTag', 'title', $events->get(2));
 | 
						|
    }
 | 
						|
 | 
						|
    public function testRcdata()
 | 
						|
    {
 | 
						|
        list($tok, $events) = $this->createTokenizer('<title>'<!-- not a comment --></TITLE>');
 | 
						|
        $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
 | 
						|
        $tok->parse();
 | 
						|
        $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
 | 
						|
    }
 | 
						|
 | 
						|
    public function testText()
 | 
						|
    {
 | 
						|
        $events = $this->parse('a<br>b');
 | 
						|
        $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
 | 
						|
        $this->assertEventEquals('text', 'a', $events->get(0));
 | 
						|
        $this->assertEventEquals('startTag', 'br', $events->get(1));
 | 
						|
        $this->assertEventEquals('text', 'b', $events->get(2));
 | 
						|
 | 
						|
        $events = $this->parse('<a>Test</a>');
 | 
						|
        $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
 | 
						|
        $this->assertEventEquals('startTag', 'a', $events->get(0));
 | 
						|
        $this->assertEventEquals('text', 'Test', $events->get(1));
 | 
						|
        $this->assertEventEquals('endTag', 'a', $events->get(2));
 | 
						|
 | 
						|
        $events = $this->parse('<p>0</p><p>1</p>');
 | 
						|
        $this->assertEquals(7, $events->depth(), 'Events: ' . print_r($events, true));
 | 
						|
 | 
						|
        $this->assertEventEquals('startTag', 'p', $events->get(0));
 | 
						|
        $this->assertEventEquals('text', '0', $events->get(1));
 | 
						|
        $this->assertEventEquals('endTag', 'p', $events->get(2));
 | 
						|
 | 
						|
        $this->assertEventEquals('startTag', 'p', $events->get(3));
 | 
						|
        $this->assertEventEquals('text', '1', $events->get(4));
 | 
						|
        $this->assertEventEquals('endTag', 'p', $events->get(5));
 | 
						|
 | 
						|
        $events = $this->parse('a<![CDATA[test]]>b');
 | 
						|
        $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
 | 
						|
        $this->assertEventEquals('text', 'a', $events->get(0));
 | 
						|
        $this->assertEventEquals('cdata', 'test', $events->get(1));
 | 
						|
        $this->assertEventEquals('text', 'b', $events->get(2));
 | 
						|
 | 
						|
        $events = $this->parse('a<!--test-->b');
 | 
						|
        $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
 | 
						|
        $this->assertEventEquals('text', 'a', $events->get(0));
 | 
						|
        $this->assertEventEquals('comment', 'test', $events->get(1));
 | 
						|
        $this->assertEventEquals('text', 'b', $events->get(2));
 | 
						|
 | 
						|
        $events = $this->parse('a&b');
 | 
						|
        $this->assertEquals(2, $events->depth(), 'Events: ' . print_r($events, true));
 | 
						|
        $this->assertEventEquals('text', 'a&b', $events->get(0));
 | 
						|
 | 
						|
        $events = $this->parse('a²b');
 | 
						|
        $this->assertEquals(2, $events->depth(), 'Events: ' . print_r($events, true));
 | 
						|
        $this->assertEventEquals('text', 'a²b', $events->get(0));
 | 
						|
    }
 | 
						|
 | 
						|
    // ================================================================
 | 
						|
    // Utility functions.
 | 
						|
    // ================================================================
 | 
						|
    protected function createTokenizer($string, $debug = false)
 | 
						|
    {
 | 
						|
        $eventHandler = new EventStack();
 | 
						|
        $scanner = new Scanner($string);
 | 
						|
 | 
						|
        $scanner->debug = $debug;
 | 
						|
 | 
						|
        return array(
 | 
						|
            new Tokenizer($scanner, $eventHandler),
 | 
						|
            $eventHandler,
 | 
						|
        );
 | 
						|
    }
 | 
						|
 | 
						|
    public function parse($string, $debug = false)
 | 
						|
    {
 | 
						|
        list($tok, $events) = $this->createTokenizer($string, $debug);
 | 
						|
        $tok->parse();
 | 
						|
 | 
						|
        return $events;
 | 
						|
    }
 | 
						|
}
 |