array( 'finally' => Parser::T_FINALLY, 'yield' => Parser::T_YIELD, ), self::PHP_5_4 => array( 'callable' => Parser::T_CALLABLE, 'insteadof' => Parser::T_INSTEADOF, 'trait' => Parser::T_TRAIT, '__trait__' => Parser::T_TRAIT_C, ), ); $this->newKeywords = array(); foreach ($newKeywordsPerVersion as $version => $newKeywords) { if (version_compare(PHP_VERSION, $version, '>=')) { break; } $this->newKeywords += $newKeywords; } if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) { return; } $this->tokenMap[self::T_COALESCE] = Parser::T_COALESCE; $this->tokenMap[self::T_SPACESHIP] = Parser::T_SPACESHIP; $this->tokenMap[self::T_YIELD_FROM] = Parser::T_YIELD_FROM; if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) { return; } $this->tokenMap[self::T_ELLIPSIS] = Parser::T_ELLIPSIS; $this->tokenMap[self::T_POW] = Parser::T_POW; $this->tokenMap[self::T_POW_EQUAL] = Parser::T_POW_EQUAL; } public function startLexing($code) { $this->inObjectAccess = false; $preprocessedCode = $this->preprocessCode($code); parent::startLexing($preprocessedCode); if ($preprocessedCode !== $code) { $this->postprocessTokens(); } // Set code property back to the original code, so __halt_compiler() // handling and (start|end)FilePos attributes use the correct offsets $this->code = $code; } /* * Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences. * ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely * use it here. * Later when preprocessing the tokens these sequences will either be replaced * by real tokens or replaced with their original content (e.g. if they occurred * inside a string, i.e. a place where they don't have a special meaning). */ protected function preprocessCode($code) { if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) { return $code; } $code = str_replace('??', '~__EMU__COALESCE__~', $code); $code = str_replace('<=>', '~__EMU__SPACESHIP__~', $code); $code = preg_replace_callback('(yield[ \n\r\t]+from)', function($matches) { // Encoding $0 in order to preserve exact whitespace return '~__EMU__YIELDFROM__' . bin2hex($matches[0]) . '__~'; }, $code); if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) { return $code; } $code = str_replace('...', '~__EMU__ELLIPSIS__~', $code); $code = preg_replace('((?=')) { return $code; } // binary notation (0b010101101001...) return preg_replace('(\b0b[01]+\b)', '~__EMU__BINARY__$0__~', $code); } /* * Replaces the ~__EMU__...~ sequences with real tokens or their original * value. */ protected function postprocessTokens() { // we need to manually iterate and manage a count because we'll change // the tokens array on the way for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) { // first check that the following tokens are of form ~LABEL~, // then match the __EMU__... sequence. if ('~' === $this->tokens[$i] && isset($this->tokens[$i + 2]) && '~' === $this->tokens[$i + 2] && T_STRING === $this->tokens[$i + 1][0] && preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches) ) { if ('BINARY' === $matches[1]) { // the binary number can either be an integer or a double, so return a LNUMBER // or DNUMBER respectively $isInt = is_int(bindec($matches[2])); $replace = array( array($isInt ? T_LNUMBER : T_DNUMBER, $matches[2], $this->tokens[$i + 1][2]) ); } else if ('ELLIPSIS' === $matches[1]) { $replace = array( array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2]) ); } else if ('POW' === $matches[1]) { $replace = array( array(self::T_POW, '**', $this->tokens[$i + 1][2]) ); } else if ('POWEQUAL' === $matches[1]) { $replace = array( array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2]) ); } else if ('COALESCE' === $matches[1]) { $replace = array( array(self::T_COALESCE, '??', $this->tokens[$i + 1][2]) ); } else if ('SPACESHIP' === $matches[1]) { $replace = array( array(self::T_SPACESHIP, '<=>', $this->tokens[$i + 1][2]), ); } else if ('YIELDFROM' === $matches[1]) { $content = $this->hex2bin($matches[2]); $replace = array( array(self::T_YIELD_FROM, $content, $this->tokens[$i + 1][2] - substr_count($content, "\n")) ); } else { throw new \RuntimeException('Invalid __EMU__ sequence'); } array_splice($this->tokens, $i, 3, $replace); $c -= 3 - count($replace); // for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences // in their content with the original character sequence } elseif (is_array($this->tokens[$i]) && 0 !== strpos($this->tokens[$i][1], '__EMU__') ) { $this->tokens[$i][1] = preg_replace_callback( '(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)', array($this, 'restoreContentCallback'), $this->tokens[$i][1] ); } } } /* * This method is a callback for restoring EMU sequences in * multichar tokens (like strings) to their original value. */ public function restoreContentCallback(array $matches) { if ('BINARY' === $matches[1]) { return $matches[2]; } else if ('ELLIPSIS' === $matches[1]) { return '...'; } else if ('POW' === $matches[1]) { return '**'; } else if ('POWEQUAL' === $matches[1]) { return '**='; } else if ('COALESCE' === $matches[1]) { return '??'; } else if ('SPACESHIP' === $matches[1]) { return '<=>'; } else if ('YIELDFROM' === $matches[1]) { return $this->hex2bin($matches[2]); } else { return $matches[0]; } } private function hex2bin($str) { // TODO Drop when removing support for PHP 5.3 return pack('H*', $str); } public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) { $token = parent::getNextToken($value, $startAttributes, $endAttributes); // replace new keywords by their respective tokens. This is not done // if we currently are in an object access (e.g. in $obj->namespace // "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE) if (Parser::T_STRING === $token && !$this->inObjectAccess) { if (isset($this->newKeywords[strtolower($value)])) { return $this->newKeywords[strtolower($value)]; } } else { // keep track of whether we currently are in an object access (after ->) $this->inObjectAccess = Parser::T_OBJECT_OPERATOR === $token; } return $token; } }