diff --git a/lib/PhpParser/Lexer/Emulative.php b/lib/PhpParser/Lexer/Emulative.php index d55e2d4..2bd6aa1 100644 --- a/lib/PhpParser/Lexer/Emulative.php +++ b/lib/PhpParser/Lexer/Emulative.php @@ -4,9 +4,6 @@ namespace PhpParser\Lexer; use PhpParser\Parser\Tokens; -/** - * ATTENTION: This code is WRITE-ONLY. Do not try to read it. - */ class Emulative extends \PhpParser\Lexer { protected $newKeywords; @@ -56,128 +53,103 @@ class Emulative extends \PhpParser\Lexer public function startLexing($code) { $this->inObjectAccess = false; - $preprocessedCode = $this->preprocessCode($code); - parent::startLexing($preprocessedCode); - if ($preprocessedCode !== $code) { - $this->postprocessTokens(); + parent::startLexing($code); + if ($this->requiresEmulation($code)) { + $this->emulateTokens(); } - - // Set code property back to the original code, so __halt_compiler() - // handling and (start|end)FilePos attributes use the correct offsets - $this->code = $code; } /* - * Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences. - * ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely - * use it here. - * Later when preprocessing the tokens these sequences will either be replaced - * by real tokens or replaced with their original content (e.g. if they occurred - * inside a string, i.e. a place where they don't have a special meaning). + * Checks if the code is potentially using features that require emulation. */ - protected function preprocessCode($code) { + protected function requiresEmulation($code) { if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) { - return $code; + return false; } - $code = str_replace('??', '~__EMU__COALESCE__~', $code); - $code = str_replace('<=>', '~__EMU__SPACESHIP__~', $code); - $code = preg_replace_callback('(yield[ \n\r\t]+from)', function($matches) { - // Encoding $0 in order to preserve exact whitespace - return '~__EMU__YIELDFROM__' . bin2hex($matches[0]) . '__~'; - }, $code); + if (preg_match('/\?\?|<=>|yield[ \n\r\t]+from/', $code)) { + return true; + } if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) { - return $code; + return false; } - $code = str_replace('...', '~__EMU__ELLIPSIS__~', $code); - $code = preg_replace('((?tokens); $i < $c; ++$i) { - // first check that the following tokens are of form ~LABEL~, - // then match the __EMU__... sequence. - if ('~' === $this->tokens[$i] - && isset($this->tokens[$i + 2]) - && '~' === $this->tokens[$i + 2] - && T_STRING === $this->tokens[$i + 1][0] - && preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches) - ) { - if ('ELLIPSIS' === $matches[1]) { - $replace = array( - array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2]) - ); - } else if ('POW' === $matches[1]) { - $replace = array( - array(self::T_POW, '**', $this->tokens[$i + 1][2]) - ); - } else if ('POWEQUAL' === $matches[1]) { - $replace = array( - array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2]) - ); - } else if ('COALESCE' === $matches[1]) { - $replace = array( - array(self::T_COALESCE, '??', $this->tokens[$i + 1][2]) - ); - } else if ('SPACESHIP' === $matches[1]) { - $replace = array( - array(self::T_SPACESHIP, '<=>', $this->tokens[$i + 1][2]), - ); - } else if ('YIELDFROM' === $matches[1]) { - $content = hex2bin($matches[2]); - $replace = array( - array(self::T_YIELD_FROM, $content, $this->tokens[$i + 1][2] - substr_count($content, "\n")) - ); - } else { - throw new \RuntimeException('Invalid __EMU__ sequence'); + $replace = null; + if (isset($this->tokens[$i + 1])) { + if ($this->tokens[$i] === '?' && $this->tokens[$i + 1] === '?') { + array_splice($this->tokens, $i, 2, array( + array(self::T_COALESCE, '??', $line) + )); + $c--; + continue; + } + if ($this->tokens[$i][0] === T_IS_SMALLER_OR_EQUAL + && $this->tokens[$i + 1] === '>' + ) { + array_splice($this->tokens, $i, 2, array( + array(self::T_SPACESHIP, '<=>', $line) + )); + $c--; + continue; + } + if ($this->tokens[$i] === '*' && $this->tokens[$i + 1] === '*') { + array_splice($this->tokens, $i, 2, array( + array(self::T_POW, '**', $line) + )); + $c--; + continue; + } + if ($this->tokens[$i] === '*' && $this->tokens[$i + 1][0] === T_MUL_EQUAL) { + array_splice($this->tokens, $i, 2, array( + array(self::T_POW_EQUAL, '**=', $line) + )); + $c--; + continue; } - - array_splice($this->tokens, $i, 3, $replace); - $c -= 3 - count($replace); - // for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences - // in their content with the original character sequence - } elseif (is_array($this->tokens[$i]) - && 0 !== strpos($this->tokens[$i][1], '__EMU__') - ) { - $this->tokens[$i][1] = preg_replace_callback( - '(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)', - array($this, 'restoreContentCallback'), - $this->tokens[$i][1] - ); } - } - } - /* - * This method is a callback for restoring EMU sequences in - * multichar tokens (like strings) to their original value. - */ - public function restoreContentCallback(array $matches) { - if ('ELLIPSIS' === $matches[1]) { - return '...'; - } else if ('POW' === $matches[1]) { - return '**'; - } else if ('POWEQUAL' === $matches[1]) { - return '**='; - } else if ('COALESCE' === $matches[1]) { - return '??'; - } else if ('SPACESHIP' === $matches[1]) { - return '<=>'; - } else if ('YIELDFROM' === $matches[1]) { - return hex2bin($matches[2]); - } else { - return $matches[0]; + if (isset($this->tokens[$i + 2])) { + if ($this->tokens[$i][0] === T_YIELD && $this->tokens[$i + 1][0] === T_WHITESPACE + && $this->tokens[$i + 2][0] === T_STRING + && !strcasecmp($this->tokens[$i + 2][1], 'from') + ) { + array_splice($this->tokens, $i, 3, array( + array( + self::T_YIELD_FROM, + $this->tokens[$i][1] . $this->tokens[$i + 1][1] . $this->tokens[$i + 2][1], + $line + ) + )); + $c -= 2; + $line += substr_count($this->tokens[$i][1], "\n"); + continue; + } + if ($this->tokens[$i] === '.' && $this->tokens[$i + 1] === '.' + && $this->tokens[$i + 2] === '.' + ) { + array_splice($this->tokens, $i, 3, array( + array(self::T_ELLIPSIS, '...', $line) + )); + $c -= 2; + continue; + } + } + + if (\is_array($this->tokens[$i])) { + $line += substr_count($this->tokens[$i][1], "\n"); + } } }