diff --git a/lib/PhpParser/Lexer.php b/lib/PhpParser/Lexer.php index c87811f..f12fd89 100644 --- a/lib/PhpParser/Lexer.php +++ b/lib/PhpParser/Lexer.php @@ -34,14 +34,25 @@ class Lexer * first three. For more info see getNextToken() docs. */ public function __construct(array $options = []) { - // map from internal tokens to PhpParser tokens - $this->tokenMap = $this->createTokenMap(); - - // Compatibility define for PHP < 7.4 + // Compatibility define for PHP < 7.4. if (!defined('T_BAD_CHARACTER')) { \define('T_BAD_CHARACTER', -1); } + // Compatibility defines for PHP < 8.0. + if (!defined('T_NAME_QUALIFIED')) { + \define('T_NAME_QUALIFIED', -2); + } + if (!defined('T_NAME_FULLY_QUALIFIED')) { + \define('T_NAME_FULLY_QUALIFIED', -3); + } + if (!defined('T_NAME_RELATIVE')) { + \define('T_NAME_RELATIVE', -4); + } + + // Create Map from internal tokens to PhpParser tokens. + $this->tokenMap = $this->createTokenMap(); + // map of tokens to drop while lexing (the map is only used for isset lookup, // that's why the value is simply set to 1; the value is never actually used.) $this->dropTokens = array_fill_keys( @@ -138,7 +149,9 @@ class Lexer // by checking if a trailing comment has a "*/" at the end. // // Additionally, we canonicalize to the PHP 8 comment format here, which does not include - // the trailing whitespace anymore + // the trailing whitespace anymore. + // + // We also canonicalize to the PHP 8 T_NAME_* tokens. $filePos = 0; $line = 1; @@ -170,6 +183,46 @@ class Lexer } } + // Emulate PHP 8 T_NAME_* tokens, by combining sequences of T_NS_SEPARATOR and T_STRING + // into a single token. + // TODO: Also handle reserved keywords in namespaced names. + if (\is_array($token) + && ($token[0] === \T_NS_SEPARATOR || $token[0] === \T_STRING || $token[0] === \T_NAMESPACE)) { + $lastWasSeparator = $token[0] === \T_NS_SEPARATOR; + $text = $token[1]; + for ($j = $i + 1; isset($this->tokens[$j]); $j++) { + if ($lastWasSeparator) { + if ($this->tokens[$j][0] !== \T_STRING) { + break; + } + $lastWasSeparator = false; + } else { + if ($this->tokens[$j][0] !== \T_NS_SEPARATOR) { + break; + } + $lastWasSeparator = true; + } + $text .= $this->tokens[$j][1]; + } + if ($lastWasSeparator) { + // Trailing separator is not part of the name. + $j--; + $text = substr($text, 0, -1); + } + if ($j > $i + 1) { + if ($token[0] === \T_NS_SEPARATOR) { + $type = \T_NAME_FULLY_QUALIFIED; + } else if ($token[0] === \T_NAMESPACE) { + $type = \T_NAME_RELATIVE; + } else { + $type = \T_NAME_QUALIFIED; + } + $token = [$type, $text, $line]; + array_splice($this->tokens, $i, $j - $i, [$token]); + $numTokens -= $j - $i - 1; + } + } + $tokenValue = \is_string($token) ? $token : $token[1]; $tokenLen = \strlen($tokenValue); @@ -409,6 +462,11 @@ class Lexer $tokenMap[\T_COMPILER_HALT_OFFSET] = Tokens::T_STRING; } + // Assign tokens for which we define compatibility constants, as token_name() does not know them. + $tokenMap[\T_NAME_QUALIFIED] = Tokens::T_NAME_QUALIFIED; + $tokenMap[\T_NAME_FULLY_QUALIFIED] = Tokens::T_NAME_FULLY_QUALIFIED; + $tokenMap[\T_NAME_RELATIVE] = Tokens::T_NAME_RELATIVE; + return $tokenMap; } } diff --git a/test/PhpParser/LexerTest.php b/test/PhpParser/LexerTest.php index 79aea44..bbb13e0 100644 --- a/test/PhpParser/LexerTest.php +++ b/test/PhpParser/LexerTest.php @@ -215,6 +215,18 @@ class LexerTest extends \PHPUnit\Framework\TestCase [], [] ], + // tests PHP 8 T_NAME_* emulation + [ + ' []], + [ + [Tokens::T_NAME_QUALIFIED, 'Foo\Bar', [], []], + [Tokens::T_NAME_FULLY_QUALIFIED, '\Foo\Bar', [], []], + [Tokens::T_NAME_RELATIVE, 'namespace\Foo\Bar', [], []], + [Tokens::T_NAME_QUALIFIED, 'Foo\Bar', [], []], + [Tokens::T_NS_SEPARATOR, '\\', [], []], + ] + ], ]; }