From 6f74784e16288f07435e68bff5ca20a4e58342ed Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 30 Jun 2019 13:10:12 +0200 Subject: [PATCH] Switch to a normalized token representation Each token is now represented by a Token object. --- lib/PhpParser/Internal/TokenStream.php | 60 +++--- lib/PhpParser/Lexer.php | 191 ++++++++---------- lib/PhpParser/Lexer/Emulative.php | 26 ++- .../CoaleseEqualTokenEmulator.php | 10 +- .../Lexer/TokenEmulator/FnTokenEmulator.php | 13 +- .../NumericLiteralSeparatorEmulator.php | 25 ++- .../TokenEmulator/TokenEmulatorInterface.php | 5 +- lib/PhpParser/PrettyPrinterAbstract.php | 38 ++-- lib/PhpParser/Token.php | 21 ++ test/PhpParser/LexerTest.php | 17 +- 10 files changed, 198 insertions(+), 208 deletions(-) create mode 100644 lib/PhpParser/Token.php diff --git a/lib/PhpParser/Internal/TokenStream.php b/lib/PhpParser/Internal/TokenStream.php index cf9e00a..26ea92b 100644 --- a/lib/PhpParser/Internal/TokenStream.php +++ b/lib/PhpParser/Internal/TokenStream.php @@ -2,6 +2,8 @@ namespace PhpParser\Internal; +use PhpParser\Token; + /** * Provides operations on token streams, for use by pretty printer. * @@ -9,7 +11,7 @@ namespace PhpParser\Internal; */ class TokenStream { - /** @var array Tokens (in token_get_all format) */ + /** @var Token[] */ private $tokens; /** @var int[] Map from position to indentation */ private $indentMap; @@ -17,7 +19,7 @@ class TokenStream /** * Create token stream instance. * - * @param array $tokens Tokens in token_get_all() format + * @param Token[] $tokens Tokens */ public function __construct(array $tokens) { $this->tokens = $tokens; @@ -33,8 +35,8 @@ class TokenStream * @return bool */ public function haveParens(int $startPos, int $endPos) : bool { - return $this->haveTokenImmediativelyBefore($startPos, '(') - && $this->haveTokenImmediatelyAfter($endPos, ')'); + return $this->haveTokenImmediativelyBefore($startPos, \ord('(')) + && $this->haveTokenImmediatelyAfter($endPos, \ord(')')); } /** @@ -46,8 +48,8 @@ class TokenStream * @return bool */ public function haveBraces(int $startPos, int $endPos) : bool { - return $this->haveTokenImmediativelyBefore($startPos, '{') - && $this->haveTokenImmediatelyAfter($endPos, '}'); + return $this->haveTokenImmediativelyBefore($startPos, \ord('{')) + && $this->haveTokenImmediatelyAfter($endPos, \ord('}')); } /** @@ -64,7 +66,7 @@ class TokenStream $tokens = $this->tokens; $pos--; for (; $pos >= 0; $pos--) { - $tokenType = $tokens[$pos][0]; + $tokenType = $tokens[$pos]->id; if ($tokenType === $expectedTokenType) { return true; } @@ -90,7 +92,7 @@ class TokenStream $tokens = $this->tokens; $pos++; for (; $pos < \count($tokens); $pos++) { - $tokenType = $tokens[$pos][0]; + $tokenType = $tokens[$pos]->id; if ($tokenType === $expectedTokenType) { return true; } @@ -110,7 +112,7 @@ class TokenStream return $pos; } - if ($tokens[$pos][0] !== $skipTokenType) { + if ($tokens[$pos]->id !== $skipTokenType) { // Shouldn't happen. The skip token MUST be there throw new \Exception('Encountered unexpected token'); } @@ -127,7 +129,7 @@ class TokenStream return $pos; } - if ($tokens[$pos][0] !== $skipTokenType) { + if ($tokens[$pos]->id !== $skipTokenType) { // Shouldn't happen. The skip token MUST be there throw new \Exception('Encountered unexpected token'); } @@ -145,7 +147,7 @@ class TokenStream public function skipLeftWhitespace(int $pos) { $tokens = $this->tokens; for (; $pos >= 0; $pos--) { - $type = $tokens[$pos][0]; + $type = $tokens[$pos]->id; if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) { break; } @@ -162,7 +164,7 @@ class TokenStream public function skipRightWhitespace(int $pos) { $tokens = $this->tokens; for ($count = \count($tokens); $pos < $count; $pos++) { - $type = $tokens[$pos][0]; + $type = $tokens[$pos]->id; if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) { break; } @@ -173,7 +175,7 @@ class TokenStream public function findRight($pos, $findTokenType) { $tokens = $this->tokens; for ($count = \count($tokens); $pos < $count; $pos++) { - $type = $tokens[$pos][0]; + $type = $tokens[$pos]->id; if ($type === $findTokenType) { return $pos; } @@ -206,23 +208,19 @@ class TokenStream $result = ''; for ($pos = $from; $pos < $to; $pos++) { $token = $tokens[$pos]; - if (\is_array($token)) { - $type = $token[0]; - $content = $token[1]; - if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) { - $result .= $content; - } else { - // TODO Handle non-space indentation - if ($indent < 0) { - $result .= str_replace("\n" . str_repeat(" ", -$indent), "\n", $content); - } elseif ($indent > 0) { - $result .= str_replace("\n", "\n" . str_repeat(" ", $indent), $content); - } else { - $result .= $content; - } - } + $type = $token->id; + $content = $token->value; + if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) { + $result .= $content; } else { - $result .= $token; + // TODO Handle non-space indentation + if ($indent < 0) { + $result .= str_replace("\n" . str_repeat(" ", -$indent), "\n", $content); + } elseif ($indent > 0) { + $result .= str_replace("\n", "\n" . str_repeat(" ", $indent), $content); + } else { + $result .= $content; + } } } return $result; @@ -239,8 +237,8 @@ class TokenStream foreach ($this->tokens as $token) { $indentMap[] = $indent; - if ($token[0] === \T_WHITESPACE) { - $content = $token[1]; + if ($token->id === \T_WHITESPACE) { + $content = $token->value; $newlinePos = \strrpos($content, "\n"); if (false !== $newlinePos) { $indent = \strlen($content) - $newlinePos - 1; diff --git a/lib/PhpParser/Lexer.php b/lib/PhpParser/Lexer.php index 51ffd1e..23ca80c 100644 --- a/lib/PhpParser/Lexer.php +++ b/lib/PhpParser/Lexer.php @@ -11,11 +11,11 @@ class Lexer * file positions from going out of sync. */ const T_BAD_CHARACTER = -1; + /** @var string */ protected $code; + /** @var Token[] */ protected $tokens; protected $pos; - protected $line; - protected $filePos; protected $prevCloseTagHasNewline; protected $tokenMap; @@ -87,9 +87,7 @@ class Lexer $scream = ini_set('xdebug.scream', '0'); - error_clear_last(); - $this->tokens = @token_get_all($code); - $this->handleErrors($errorHandler); + $this->tokens = $this->createNormalizedTokens($code, $errorHandler); if (false !== $scream) { ini_set('xdebug.scream', $scream); @@ -109,7 +107,7 @@ class Lexer ); } - $tokens[] = [self::T_BAD_CHARACTER, $chr, $line]; + $tokens[] = new Token(self::T_BAD_CHARACTER, $chr, $line, $i); $errorHandler->handleError(new Error($errorMsg, [ 'startLine' => $line, 'endLine' => $line, @@ -125,10 +123,10 @@ class Lexer * * @return bool */ - private function isUnterminatedComment($token) : bool { - return ($token[0] === \T_COMMENT || $token[0] === \T_DOC_COMMENT) - && substr($token[1], 0, 2) === '/*' - && substr($token[1], -2) !== '*/'; + private function isUnterminatedComment(Token $token) : bool { + return ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) + && substr($token->value, 0, 2) === '/*' + && substr($token->value, -2) !== '*/'; } /** @@ -146,75 +144,65 @@ class Lexer return null !== error_get_last(); } - protected function handleErrors(ErrorHandler $errorHandler) { - if (!$this->errorMayHaveOccurred()) { - return; - } - - // PHP's error handling for token_get_all() is rather bad, so if we want detailed - // error information we need to compute it ourselves. Invalid character errors are - // detected by finding "gaps" in the token array. Unterminated comments are detected - // by checking if a trailing comment has a "*/" at the end. + private function createNormalizedTokens(string $code, ErrorHandler $errorHandler) { + error_clear_last(); + $rawTokens = @token_get_all($code); + $checkForMissingTokens = $this->errorMayHaveOccurred(); + $tokens = []; $filePos = 0; $line = 1; - $numTokens = \count($this->tokens); - for ($i = 0; $i < $numTokens; $i++) { - $token = $this->tokens[$i]; - $tokenValue = \is_string($token) ? $token : $token[1]; - $tokenLen = \strlen($tokenValue); + foreach ($rawTokens as $rawToken) { + if (\is_array($rawToken)) { + $token = new Token($rawToken[0], $rawToken[1], $line, $filePos); + } elseif (\strlen($rawToken) == 2) { + // Bug in token_get_all() when lexing b". + $token = new Token(\ord('"'), $rawToken, $line, $filePos); + } else { + $token = new Token(\ord($rawToken), $rawToken, $line, $filePos); + } - if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) { + $value = $token->value; + $tokenLen = \strlen($value); + if ($checkForMissingTokens && substr($code, $filePos, $tokenLen) !== $value) { // Something is missing, must be an invalid character - $nextFilePos = strpos($this->code, $tokenValue, $filePos); + $nextFilePos = strpos($code, $value, $filePos); $badCharTokens = $this->handleInvalidCharacterRange( $filePos, $nextFilePos, $line, $errorHandler); + $tokens = array_merge($tokens, $badCharTokens); $filePos = (int) $nextFilePos; - - array_splice($this->tokens, $i, 0, $badCharTokens); - $numTokens += \count($badCharTokens); - $i += \count($badCharTokens); } + $tokens[] = $token; $filePos += $tokenLen; - $line += substr_count($tokenValue, "\n"); + $line += substr_count($value, "\n"); } - if ($filePos !== \strlen($this->code)) { - if (substr($this->code, $filePos, 2) === '/*') { - // Unlike PHP, HHVM will drop unterminated comments entirely - $comment = substr($this->code, $filePos); - $errorHandler->handleError(new Error('Unterminated comment', [ - 'startLine' => $line, - 'endLine' => $line + substr_count($comment, "\n"), - 'startFilePos' => $filePos, - 'endFilePos' => $filePos + \strlen($comment), - ])); - - // Emulate the PHP behavior - $isDocComment = isset($comment[3]) && $comment[3] === '*'; - $this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line]; - } else { - // Invalid characters at the end of the input - $badCharTokens = $this->handleInvalidCharacterRange( - $filePos, \strlen($this->code), $line, $errorHandler); - $this->tokens = array_merge($this->tokens, $badCharTokens); - } - return; + if ($filePos !== \strlen($code)) { + // Invalid characters at the end of the input + $badCharTokens = $this->handleInvalidCharacterRange( + $filePos, \strlen($code), $line, $errorHandler); + $tokens = array_merge($tokens, $badCharTokens); } - if (count($this->tokens) > 0) { + if (\count($tokens) > 0) { // Check for unterminated comment - $lastToken = $this->tokens[count($this->tokens) - 1]; + $lastToken = $tokens[\count($tokens) - 1]; if ($this->isUnterminatedComment($lastToken)) { $errorHandler->handleError(new Error('Unterminated comment', [ - 'startLine' => $line - substr_count($lastToken[1], "\n"), + 'startLine' => $line - substr_count($lastToken->value, "\n"), 'endLine' => $line, - 'startFilePos' => $filePos - \strlen($lastToken[1]), + 'startFilePos' => $filePos - \strlen($lastToken->value), 'endFilePos' => $filePos, ])); } } + + // Add an EOF sentinel token + // TODO: Should the value be an empty string instead? + $tokens[] = new Token(0, "\0", $line, \strlen($code)); + + return $tokens; } /** @@ -244,70 +232,49 @@ class Lexer $endAttributes = []; while (1) { - if (isset($this->tokens[++$this->pos])) { - $token = $this->tokens[$this->pos]; - } else { - // EOF token with ID 0 - $token = "\0"; - } + $token = $this->tokens[++$this->pos]; if ($this->attributeStartLineUsed) { - $startAttributes['startLine'] = $this->line; + $startAttributes['startLine'] = $token->line; } if ($this->attributeStartTokenPosUsed) { $startAttributes['startTokenPos'] = $this->pos; } if ($this->attributeStartFilePosUsed) { - $startAttributes['startFilePos'] = $this->filePos; + $startAttributes['startFilePos'] = $token->filePos; } - if (\is_string($token)) { - $value = $token; - if (isset($token[1])) { - // bug in token_get_all - $this->filePos += 2; - $id = ord('"'); - } else { - $this->filePos += 1; - $id = ord($token); - } - } elseif (!isset($this->dropTokens[$token[0]])) { - $value = $token[1]; - $id = $this->tokenMap[$token[0]]; - if (\T_CLOSE_TAG === $token[0]) { - $this->prevCloseTagHasNewline = false !== strpos($token[1], "\n"); - } elseif (\T_INLINE_HTML === $token[0]) { + $phpId = $token->id; + $value = $token->value; + if (!isset($this->dropTokens[$phpId])) { + $id = $this->tokenMap[$phpId]; + if (\T_CLOSE_TAG === $phpId) { + $this->prevCloseTagHasNewline = false !== strpos($value, "\n"); + } elseif (\T_INLINE_HTML === $phpId) { $startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline; } - $this->line += substr_count($value, "\n"); - $this->filePos += \strlen($value); - } else { - if (\T_COMMENT === $token[0] || \T_DOC_COMMENT === $token[0]) { - if ($this->attributeCommentsUsed) { - $comment = \T_DOC_COMMENT === $token[0] - ? new Comment\Doc($token[1], $this->line, $this->filePos, $this->pos) - : new Comment($token[1], $this->line, $this->filePos, $this->pos); - $startAttributes['comments'][] = $comment; - } + if ($this->attributeEndLineUsed) { + $endAttributes['endLine'] = $token->line + substr_count($value, "\n"); + } + if ($this->attributeEndTokenPosUsed) { + $endAttributes['endTokenPos'] = $this->pos; + } + if ($this->attributeEndFilePosUsed) { + $endAttributes['endFilePos'] = $token->filePos + \strlen($value) - 1; } - $this->line += substr_count($token[1], "\n"); - $this->filePos += \strlen($token[1]); - continue; + return $id; } - if ($this->attributeEndLineUsed) { - $endAttributes['endLine'] = $this->line; + if (\T_COMMENT === $phpId || \T_DOC_COMMENT === $phpId) { + if ($this->attributeCommentsUsed) { + $comment = \T_DOC_COMMENT === $phpId + ? new Comment\Doc($value, $token->line, $token->filePos, $this->pos) + : new Comment($value, $token->line, $token->filePos, $this->pos); + $startAttributes['comments'][] = $comment; + } } - if ($this->attributeEndTokenPosUsed) { - $endAttributes['endTokenPos'] = $this->pos; - } - if ($this->attributeEndFilePosUsed) { - $endAttributes['endFilePos'] = $this->filePos - 1; - } - - return $id; } throw new \RuntimeException('Reached end of lexer loop'); @@ -333,8 +300,9 @@ class Lexer * @return string Remaining text */ public function handleHaltCompiler() : string { - // text after T_HALT_COMPILER, still including (); - $textAfter = substr($this->code, $this->filePos); + // Text after T_HALT_COMPILER, still including (); + $tokenAfter = $this->tokens[$this->pos + 1]; + $textAfter = substr($this->code, $tokenAfter->filePos); // ensure that it is followed by (); // this simplifies the situation, by not allowing any comments @@ -343,8 +311,8 @@ class Lexer throw new Error('__HALT_COMPILER must be followed by "();"'); } - // prevent the lexer from returning any further tokens - $this->pos = count($this->tokens); + // Point to one before EOF token, so it will be returned on the getNextToken() call + $this->pos = count($this->tokens) - 2; // return with (); removed return substr($textAfter, strlen($matches[0])); @@ -362,9 +330,12 @@ class Lexer protected function createTokenMap() : array { $tokenMap = []; - // 256 is the minimum possible token number, as everything below - // it is an ASCII value - for ($i = 256; $i < 1000; ++$i) { + // ASCII values map to themselves. + for ($i = 0; $i < 256; ++$i) { + $tokenMap[$i] = $i; + } + + for (; $i < 1000; ++$i) { if (\T_DOUBLE_COLON === $i) { // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM $tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM; diff --git a/lib/PhpParser/Lexer/Emulative.php b/lib/PhpParser/Lexer/Emulative.php index 864bffa..fab0d0c 100644 --- a/lib/PhpParser/Lexer/Emulative.php +++ b/lib/PhpParser/Lexer/Emulative.php @@ -159,13 +159,7 @@ REGEX; $pos = 0; for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) { $token = $this->tokens[$i]; - if (\is_string($token)) { - // We assume that patches don't apply to string tokens - $pos += \strlen($token); - continue; - } - - $len = \strlen($token[1]); + $len = \strlen($token->value); $posDelta = 0; while ($patchPos >= $pos && $patchPos < $pos + $len) { $patchTextLen = \strlen($patchText); @@ -177,15 +171,15 @@ REGEX; $c--; } else { // Remove from token string - $this->tokens[$i][1] = substr_replace( - $token[1], '', $patchPos - $pos + $posDelta, $patchTextLen + $this->tokens[$i]->value = substr_replace( + $token->value, '', $patchPos - $pos + $posDelta, $patchTextLen ); $posDelta -= $patchTextLen; } } elseif ($patchType === 'add') { // Insert into the token string - $this->tokens[$i][1] = substr_replace( - $token[1], $patchText, $patchPos - $pos + $posDelta, 0 + $this->tokens[$i]->value = substr_replace( + $token->value, $patchText, $patchPos - $pos + $posDelta, 0 ); $posDelta += $patchTextLen; } else { @@ -196,7 +190,7 @@ REGEX; $patchIdx++; if ($patchIdx >= \count($this->patches)) { // No more patches, we're done - return; + break 2; } list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx]; @@ -209,8 +203,12 @@ REGEX; $pos += $len; } - // A patch did not apply - assert(false); + // To retain a minimum amount of sanity, recompute token offsets in a separate loop... + $pos = 0; + foreach ($this->tokens as $token) { + $token->filePos = $pos; + $pos += \strlen($token->value); + } } /** diff --git a/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php index 10f7e1e..fba12fa 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php +++ b/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php @@ -3,6 +3,7 @@ namespace PhpParser\Lexer\TokenEmulator; use PhpParser\Lexer\Emulative; +use PhpParser\Token; final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface { @@ -20,20 +21,17 @@ final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface { // We need to manually iterate and manage a count because we'll change // the tokens array on the way - $line = 1; for ($i = 0, $c = count($tokens); $i < $c; ++$i) { if (isset($tokens[$i + 1])) { - if ($tokens[$i][0] === T_COALESCE && $tokens[$i + 1] === '=') { + $token = $tokens[$i]; + if ($token->id === \T_COALESCE && $tokens[$i + 1]->value === '=') { array_splice($tokens, $i, 2, [ - [Emulative::T_COALESCE_EQUAL, '??=', $line] + new Token(Emulative::T_COALESCE_EQUAL, '??=', $token->line, $token->filePos), ]); $c--; continue; } } - if (\is_array($tokens[$i])) { - $line += substr_count($tokens[$i][1], "\n"); - } } return $tokens; diff --git a/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php index 0939dd4..66e977c 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php +++ b/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php @@ -3,6 +3,7 @@ namespace PhpParser\Lexer\TokenEmulator; use PhpParser\Lexer\Emulative; +use PhpParser\Token; final class FnTokenEmulator implements TokenEmulatorInterface { @@ -21,13 +22,13 @@ final class FnTokenEmulator implements TokenEmulatorInterface // We need to manually iterate and manage a count because we'll change // the tokens array on the way foreach ($tokens as $i => $token) { - if ($token[0] === T_STRING && $token[1] === 'fn') { + if ($token->id === \T_STRING && $token->value === 'fn') { $previousNonSpaceToken = $this->getPreviousNonSpaceToken($tokens, $i); - if ($previousNonSpaceToken !== null && $previousNonSpaceToken[0] === T_OBJECT_OPERATOR) { + if ($previousNonSpaceToken !== null && $previousNonSpaceToken->id === T_OBJECT_OPERATOR) { continue; } - $tokens[$i][0] = Emulative::T_FN; + $token->id = Emulative::T_FN; } } @@ -35,13 +36,13 @@ final class FnTokenEmulator implements TokenEmulatorInterface } /** - * @param mixed[] $tokens - * @return mixed[]|null + * @param Token[] $tokens + * @return Token|null */ private function getPreviousNonSpaceToken(array $tokens, int $start) { for ($i = $start - 1; $i >= 0; --$i) { - if ($tokens[$i][0] === T_WHITESPACE) { + if ($tokens[$i]->id === \T_WHITESPACE) { continue; } diff --git a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php index f564c59..fc8c959 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php +++ b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php @@ -3,6 +3,7 @@ namespace PhpParser\Lexer\TokenEmulator; use PhpParser\Lexer\Emulative; +use PhpParser\Token; final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface { @@ -24,47 +25,46 @@ final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface return preg_match('~[0-9a-f]_[0-9a-f]~i', $code) !== false; } + /** + * @param Token[] $tokens + * @return Token[] + */ public function emulate(string $code, array $tokens): array { // We need to manually iterate and manage a count because we'll change // the tokens array on the way - $codeOffset = 0; for ($i = 0, $c = count($tokens); $i < $c; ++$i) { $token = $tokens[$i]; - $tokenLen = \strlen(\is_array($token) ? $token[1] : $token); + $tokenLen = \strlen($token->value); - if ($token[0] !== T_LNUMBER && $token[0] !== T_DNUMBER) { - $codeOffset += $tokenLen; + if ($token->id !== T_LNUMBER && $token->id !== T_DNUMBER) { continue; } - $res = preg_match(self::NUMBER, $code, $matches, 0, $codeOffset); + $res = preg_match(self::NUMBER, $code, $matches, 0, $token->filePos); assert($res, "No number at number token position"); $match = $matches[0]; $matchLen = \strlen($match); if ($matchLen === $tokenLen) { // Original token already holds the full number. - $codeOffset += $tokenLen; continue; } $tokenKind = $this->resolveIntegerOrFloatToken($match); - $newTokens = [[$tokenKind, $match, $token[2]]]; + $newTokens = [new Token($tokenKind, $match, $token->line, $token->filePos)]; $numTokens = 1; $len = $tokenLen; while ($matchLen > $len) { $nextToken = $tokens[$i + $numTokens]; - $nextTokenText = \is_array($nextToken) ? $nextToken[1] : $nextToken; - $nextTokenLen = \strlen($nextTokenText); + $nextTokenLen = \strlen($nextToken->value); $numTokens++; if ($matchLen < $len + $nextTokenLen) { // Split trailing characters into a partial token. - assert(is_array($nextToken), "Partial token should be an array token"); - $partialText = substr($nextTokenText, $matchLen - $len); - $newTokens[] = [$nextToken[0], $partialText, $nextToken[2]]; + $partialText = substr($nextToken->value, $matchLen - $len); + $newTokens[] = new Token($nextToken->id, $partialText, $nextToken->line, $nextToken->filePos); break; } @@ -73,7 +73,6 @@ final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface array_splice($tokens, $i, $numTokens, $newTokens); $c -= $numTokens - \count($newTokens); - $codeOffset += $matchLen; } return $tokens; diff --git a/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php b/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php index e03ccc6..b2fd997 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php +++ b/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php @@ -2,13 +2,16 @@ namespace PhpParser\Lexer\TokenEmulator; +use PhpParser\Token; + /** @internal */ interface TokenEmulatorInterface { public function isEmulationNeeded(string $code): bool; /** - * @return array Modified Tokens + * @param Token[] $tokens + * @return Token[] */ public function emulate(string $code, array $tokens): array; } diff --git a/lib/PhpParser/PrettyPrinterAbstract.php b/lib/PhpParser/PrettyPrinterAbstract.php index 90f10c4..df95c78 100644 --- a/lib/PhpParser/PrettyPrinterAbstract.php +++ b/lib/PhpParser/PrettyPrinterAbstract.php @@ -491,7 +491,7 @@ abstract class PrettyPrinterAbstract $pos = 0; $result = $this->pArray($stmts, $origStmts, $pos, 0, 'File', 'stmts', null); if (null !== $result) { - $result .= $this->origTokens->getTokenCode($pos, count($origTokens), 0); + $result .= $this->origTokens->getTokenCode($pos, count($origTokens) - 1, 0); } else { // Fallback // TODO Add \T_WHITESPACE]; $stripRight = ['right' => \T_WHITESPACE]; $stripDoubleArrow = ['right' => \T_DOUBLE_ARROW]; - $stripColon = ['left' => ':']; - $stripEquals = ['left' => '=']; + $stripColon = ['left' => \ord(':')]; + $stripEquals = ['left' => \ord('=')]; $this->removalMap = [ 'Expr_ArrayDimFetch->dim' => $stripBoth, 'Expr_ArrayItem->key' => $stripDoubleArrow, @@ -1254,22 +1254,22 @@ abstract class PrettyPrinterAbstract // TODO: "yield" where both key and value are inserted doesn't work // [$find, $beforeToken, $extraLeft, $extraRight] $this->insertionMap = [ - 'Expr_ArrayDimFetch->dim' => ['[', false, null, null], + 'Expr_ArrayDimFetch->dim' => [\ord('['), false, null, null], 'Expr_ArrayItem->key' => [null, false, null, ' => '], - 'Expr_ArrowFunction->returnType' => [')', false, ' : ', null], - 'Expr_Closure->returnType' => [')', false, ' : ', null], - 'Expr_Ternary->if' => ['?', false, ' ', ' '], + 'Expr_ArrowFunction->returnType' => [\ord(')'), false, ' : ', null], + 'Expr_Closure->returnType' => [\ord(')'), false, ' : ', null], + 'Expr_Ternary->if' => [\ord('?'), false, ' ', ' '], 'Expr_Yield->key' => [\T_YIELD, false, null, ' => '], 'Expr_Yield->value' => [\T_YIELD, false, ' ', null], 'Param->type' => [null, false, null, ' '], 'Param->default' => [null, false, ' = ', null], 'Stmt_Break->num' => [\T_BREAK, false, ' ', null], - 'Stmt_ClassMethod->returnType' => [')', false, ' : ', null], + 'Stmt_ClassMethod->returnType' => [\ord(')'), false, ' : ', null], 'Stmt_Class->extends' => [null, false, ' extends ', null], 'Expr_PrintableNewAnonClass->extends' => [null, ' extends ', null], 'Stmt_Continue->num' => [\T_CONTINUE, false, ' ', null], 'Stmt_Foreach->keyVar' => [\T_AS, false, null, ' => '], - 'Stmt_Function->returnType' => [')', false, ' : ', null], + 'Stmt_Function->returnType' => [\ord(')'), false, ' : ', null], 'Stmt_If->else' => [null, false, ' ', null], 'Stmt_Namespace->name' => [\T_NAMESPACE, false, ' ', null], 'Stmt_Property->type' => [\T_VARIABLE, true, null, ' '], @@ -1367,19 +1367,19 @@ abstract class PrettyPrinterAbstract // [$find, $extraLeft, $extraRight] $this->emptyListInsertionMap = [ - 'Expr_ArrowFunction->params' => ['(', '', ''], - 'Expr_Closure->uses' => [')', ' use(', ')'], - 'Expr_Closure->params' => ['(', '', ''], - 'Expr_FuncCall->args' => ['(', '', ''], - 'Expr_MethodCall->args' => ['(', '', ''], - 'Expr_New->args' => ['(', '', ''], - 'Expr_PrintableNewAnonClass->args' => ['(', '', ''], + 'Expr_ArrowFunction->params' => [\ord('('), '', ''], + 'Expr_Closure->uses' => [\ord(')'), ' use(', ')'], + 'Expr_Closure->params' => [\ord('('), '', ''], + 'Expr_FuncCall->args' => [\ord('('), '', ''], + 'Expr_MethodCall->args' => [\ord('('), '', ''], + 'Expr_New->args' => [\ord('('), '', ''], + 'Expr_PrintableNewAnonClass->args' => [\ord('('), '', ''], 'Expr_PrintableNewAnonClass->implements' => [null, ' implements ', ''], - 'Expr_StaticCall->args' => ['(', '', ''], + 'Expr_StaticCall->args' => [\ord('('), '', ''], 'Stmt_Class->implements' => [null, ' implements ', ''], - 'Stmt_ClassMethod->params' => ['(', '', ''], + 'Stmt_ClassMethod->params' => [\ord('('), '', ''], 'Stmt_Interface->extends' => [null, ' extends ', ''], - 'Stmt_Function->params' => ['(', '', ''], + 'Stmt_Function->params' => [\ord('('), '', ''], /* These cannot be empty to start with: * Expr_Isset->vars diff --git a/lib/PhpParser/Token.php b/lib/PhpParser/Token.php new file mode 100644 index 0000000..d058cbf --- /dev/null +++ b/lib/PhpParser/Token.php @@ -0,0 +1,21 @@ +id = $id; + $this->value = $value; + $this->line = $line; + $this->filePos = $filePos; + } +} \ No newline at end of file diff --git a/test/PhpParser/LexerTest.php b/test/PhpParser/LexerTest.php index f24c64d..632384f 100644 --- a/test/PhpParser/LexerTest.php +++ b/test/PhpParser/LexerTest.php @@ -247,17 +247,18 @@ class LexerTest extends \PHPUnit\Framework\TestCase public function testGetTokens() { $code = 'getLexer(); $lexer->startLexing($code); - $this->assertSame($expectedTokens, $lexer->getTokens()); + $this->assertEquals($expectedTokens, $lexer->getTokens()); } }