From 6f74784e16288f07435e68bff5ca20a4e58342ed Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 30 Jun 2019 13:10:12 +0200
Subject: [PATCH] Switch to a normalized token representation

Each token is now represented by a Token object.
---
 lib/PhpParser/Internal/TokenStream.php        |  60 +++---
 lib/PhpParser/Lexer.php                       | 191 ++++++++----------
 lib/PhpParser/Lexer/Emulative.php             |  26 ++-
 .../CoaleseEqualTokenEmulator.php             |  10 +-
 .../Lexer/TokenEmulator/FnTokenEmulator.php   |  13 +-
 .../NumericLiteralSeparatorEmulator.php       |  25 ++-
 .../TokenEmulator/TokenEmulatorInterface.php  |   5 +-
 lib/PhpParser/PrettyPrinterAbstract.php       |  38 ++--
 lib/PhpParser/Token.php                       |  21 ++
 test/PhpParser/LexerTest.php                  |  17 +-
 10 files changed, 198 insertions(+), 208 deletions(-)
 create mode 100644 lib/PhpParser/Token.php

diff --git a/lib/PhpParser/Internal/TokenStream.php b/lib/PhpParser/Internal/TokenStream.php
index cf9e00a..26ea92b 100644
--- a/lib/PhpParser/Internal/TokenStream.php
+++ b/lib/PhpParser/Internal/TokenStream.php
@@ -2,6 +2,8 @@
 
 namespace PhpParser\Internal;
 
+use PhpParser\Token;
+
 /**
  * Provides operations on token streams, for use by pretty printer.
  *
@@ -9,7 +11,7 @@ namespace PhpParser\Internal;
  */
 class TokenStream
 {
-    /** @var array Tokens (in token_get_all format) */
+    /** @var Token[] */
     private $tokens;
     /** @var int[] Map from position to indentation */
     private $indentMap;
@@ -17,7 +19,7 @@ class TokenStream
     /**
      * Create token stream instance.
      *
-     * @param array $tokens Tokens in token_get_all() format
+     * @param Token[] $tokens Tokens
      */
     public function __construct(array $tokens) {
         $this->tokens = $tokens;
@@ -33,8 +35,8 @@ class TokenStream
      * @return bool
      */
     public function haveParens(int $startPos, int $endPos) : bool {
-        return $this->haveTokenImmediativelyBefore($startPos, '(')
-            && $this->haveTokenImmediatelyAfter($endPos, ')');
+        return $this->haveTokenImmediativelyBefore($startPos, \ord('('))
+            && $this->haveTokenImmediatelyAfter($endPos, \ord(')'));
     }
 
     /**
@@ -46,8 +48,8 @@ class TokenStream
      * @return bool
      */
     public function haveBraces(int $startPos, int $endPos) : bool {
-        return $this->haveTokenImmediativelyBefore($startPos, '{')
-            && $this->haveTokenImmediatelyAfter($endPos, '}');
+        return $this->haveTokenImmediativelyBefore($startPos, \ord('{'))
+            && $this->haveTokenImmediatelyAfter($endPos, \ord('}'));
     }
 
     /**
@@ -64,7 +66,7 @@ class TokenStream
         $tokens = $this->tokens;
         $pos--;
         for (; $pos >= 0; $pos--) {
-            $tokenType = $tokens[$pos][0];
+            $tokenType = $tokens[$pos]->id;
             if ($tokenType === $expectedTokenType) {
                 return true;
             }
@@ -90,7 +92,7 @@ class TokenStream
         $tokens = $this->tokens;
         $pos++;
         for (; $pos < \count($tokens); $pos++) {
-            $tokenType = $tokens[$pos][0];
+            $tokenType = $tokens[$pos]->id;
             if ($tokenType === $expectedTokenType) {
                 return true;
             }
@@ -110,7 +112,7 @@ class TokenStream
             return $pos;
         }
 
-        if ($tokens[$pos][0] !== $skipTokenType) {
+        if ($tokens[$pos]->id !== $skipTokenType) {
             // Shouldn't happen. The skip token MUST be there
             throw new \Exception('Encountered unexpected token');
         }
@@ -127,7 +129,7 @@ class TokenStream
             return $pos;
         }
 
-        if ($tokens[$pos][0] !== $skipTokenType) {
+        if ($tokens[$pos]->id !== $skipTokenType) {
             // Shouldn't happen. The skip token MUST be there
             throw new \Exception('Encountered unexpected token');
         }
@@ -145,7 +147,7 @@ class TokenStream
     public function skipLeftWhitespace(int $pos) {
         $tokens = $this->tokens;
         for (; $pos >= 0; $pos--) {
-            $type = $tokens[$pos][0];
+            $type = $tokens[$pos]->id;
             if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) {
                 break;
             }
@@ -162,7 +164,7 @@ class TokenStream
     public function skipRightWhitespace(int $pos) {
         $tokens = $this->tokens;
         for ($count = \count($tokens); $pos < $count; $pos++) {
-            $type = $tokens[$pos][0];
+            $type = $tokens[$pos]->id;
             if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) {
                 break;
             }
@@ -173,7 +175,7 @@ class TokenStream
     public function findRight($pos, $findTokenType) {
         $tokens = $this->tokens;
         for ($count = \count($tokens); $pos < $count; $pos++) {
-            $type = $tokens[$pos][0];
+            $type = $tokens[$pos]->id;
             if ($type === $findTokenType) {
                 return $pos;
             }
@@ -206,23 +208,19 @@ class TokenStream
         $result = '';
         for ($pos = $from; $pos < $to; $pos++) {
             $token = $tokens[$pos];
-            if (\is_array($token)) {
-                $type = $token[0];
-                $content = $token[1];
-                if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) {
-                    $result .= $content;
-                } else {
-                    // TODO Handle non-space indentation
-                    if ($indent < 0) {
-                        $result .= str_replace("\n" . str_repeat(" ", -$indent), "\n", $content);
-                    } elseif ($indent > 0) {
-                        $result .= str_replace("\n", "\n" . str_repeat(" ", $indent), $content);
-                    } else {
-                        $result .= $content;
-                    }
-                }
+            $type = $token->id;
+            $content = $token->value;
+            if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) {
+                $result .= $content;
             } else {
-                $result .= $token;
+                // TODO Handle non-space indentation
+                if ($indent < 0) {
+                    $result .= str_replace("\n" . str_repeat(" ", -$indent), "\n", $content);
+                } elseif ($indent > 0) {
+                    $result .= str_replace("\n", "\n" . str_repeat(" ", $indent), $content);
+                } else {
+                    $result .= $content;
+                }
             }
         }
         return $result;
@@ -239,8 +237,8 @@ class TokenStream
         foreach ($this->tokens as $token) {
             $indentMap[] = $indent;
 
-            if ($token[0] === \T_WHITESPACE) {
-                $content = $token[1];
+            if ($token->id === \T_WHITESPACE) {
+                $content = $token->value;
                 $newlinePos = \strrpos($content, "\n");
                 if (false !== $newlinePos) {
                     $indent = \strlen($content) - $newlinePos - 1;
diff --git a/lib/PhpParser/Lexer.php b/lib/PhpParser/Lexer.php
index 51ffd1e..23ca80c 100644
--- a/lib/PhpParser/Lexer.php
+++ b/lib/PhpParser/Lexer.php
@@ -11,11 +11,11 @@ class Lexer
      * file positions from going out of sync. */
     const T_BAD_CHARACTER = -1;
 
+    /** @var string */
     protected $code;
+    /** @var Token[] */
     protected $tokens;
     protected $pos;
-    protected $line;
-    protected $filePos;
     protected $prevCloseTagHasNewline;
 
     protected $tokenMap;
@@ -87,9 +87,7 @@ class Lexer
 
         $scream = ini_set('xdebug.scream', '0');
 
-        error_clear_last();
-        $this->tokens = @token_get_all($code);
-        $this->handleErrors($errorHandler);
+        $this->tokens = $this->createNormalizedTokens($code, $errorHandler);
 
         if (false !== $scream) {
             ini_set('xdebug.scream', $scream);
@@ -109,7 +107,7 @@ class Lexer
                 );
             }
 
-            $tokens[] = [self::T_BAD_CHARACTER, $chr, $line];
+            $tokens[] = new Token(self::T_BAD_CHARACTER, $chr, $line, $i);
             $errorHandler->handleError(new Error($errorMsg, [
                 'startLine' => $line,
                 'endLine' => $line,
@@ -125,10 +123,10 @@ class Lexer
      *
      * @return bool
      */
-    private function isUnterminatedComment($token) : bool {
-        return ($token[0] === \T_COMMENT || $token[0] === \T_DOC_COMMENT)
-            && substr($token[1], 0, 2) === '/*'
-            && substr($token[1], -2) !== '*/';
+    private function isUnterminatedComment(Token $token) : bool {
+        return ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT)
+            && substr($token->value, 0, 2) === '/*'
+            && substr($token->value, -2) !== '*/';
     }
 
     /**
@@ -146,75 +144,65 @@ class Lexer
         return null !== error_get_last();
     }
 
-    protected function handleErrors(ErrorHandler $errorHandler) {
-        if (!$this->errorMayHaveOccurred()) {
-            return;
-        }
-
-        // PHP's error handling for token_get_all() is rather bad, so if we want detailed
-        // error information we need to compute it ourselves. Invalid character errors are
-        // detected by finding "gaps" in the token array. Unterminated comments are detected
-        // by checking if a trailing comment has a "*/" at the end.
+    private function createNormalizedTokens(string $code, ErrorHandler $errorHandler) {
+        error_clear_last();
+        $rawTokens = @token_get_all($code);
+        $checkForMissingTokens = $this->errorMayHaveOccurred();
 
+        $tokens = [];
         $filePos = 0;
         $line = 1;
-        $numTokens = \count($this->tokens);
-        for ($i = 0; $i < $numTokens; $i++) {
-            $token = $this->tokens[$i];
-            $tokenValue = \is_string($token) ? $token : $token[1];
-            $tokenLen = \strlen($tokenValue);
+        foreach ($rawTokens as $rawToken) {
+            if (\is_array($rawToken)) {
+                $token = new Token($rawToken[0], $rawToken[1], $line, $filePos);
+            } elseif (\strlen($rawToken) == 2) {
+                // Bug in token_get_all() when lexing b".
+                $token = new Token(\ord('"'), $rawToken, $line, $filePos);
+            } else {
+                $token = new Token(\ord($rawToken), $rawToken, $line, $filePos);
+            }
 
-            if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) {
+            $value = $token->value;
+            $tokenLen = \strlen($value);
+            if ($checkForMissingTokens && substr($code, $filePos, $tokenLen) !== $value) {
                 // Something is missing, must be an invalid character
-                $nextFilePos = strpos($this->code, $tokenValue, $filePos);
+                $nextFilePos = strpos($code, $value, $filePos);
                 $badCharTokens = $this->handleInvalidCharacterRange(
                     $filePos, $nextFilePos, $line, $errorHandler);
+                $tokens = array_merge($tokens, $badCharTokens);
                 $filePos = (int) $nextFilePos;
-
-                array_splice($this->tokens, $i, 0, $badCharTokens);
-                $numTokens += \count($badCharTokens);
-                $i += \count($badCharTokens);
             }
 
+            $tokens[] = $token;
             $filePos += $tokenLen;
-            $line += substr_count($tokenValue, "\n");
+            $line += substr_count($value, "\n");
         }
 
-        if ($filePos !== \strlen($this->code)) {
-            if (substr($this->code, $filePos, 2) === '/*') {
-                // Unlike PHP, HHVM will drop unterminated comments entirely
-                $comment = substr($this->code, $filePos);
-                $errorHandler->handleError(new Error('Unterminated comment', [
-                    'startLine' => $line,
-                    'endLine' => $line + substr_count($comment, "\n"),
-                    'startFilePos' => $filePos,
-                    'endFilePos' => $filePos + \strlen($comment),
-                ]));
-
-                // Emulate the PHP behavior
-                $isDocComment = isset($comment[3]) && $comment[3] === '*';
-                $this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line];
-            } else {
-                // Invalid characters at the end of the input
-                $badCharTokens = $this->handleInvalidCharacterRange(
-                    $filePos, \strlen($this->code), $line, $errorHandler);
-                $this->tokens = array_merge($this->tokens, $badCharTokens);
-            }
-            return;
+        if ($filePos !== \strlen($code)) {
+            // Invalid characters at the end of the input
+            $badCharTokens = $this->handleInvalidCharacterRange(
+                $filePos, \strlen($code), $line, $errorHandler);
+            $tokens = array_merge($tokens, $badCharTokens);
         }
 
-        if (count($this->tokens) > 0) {
+        if (\count($tokens) > 0) {
             // Check for unterminated comment
-            $lastToken = $this->tokens[count($this->tokens) - 1];
+            $lastToken = $tokens[\count($tokens) - 1];
             if ($this->isUnterminatedComment($lastToken)) {
                 $errorHandler->handleError(new Error('Unterminated comment', [
-                    'startLine' => $line - substr_count($lastToken[1], "\n"),
+                    'startLine' => $line - substr_count($lastToken->value, "\n"),
                     'endLine' => $line,
-                    'startFilePos' => $filePos - \strlen($lastToken[1]),
+                    'startFilePos' => $filePos - \strlen($lastToken->value),
                     'endFilePos' => $filePos,
                 ]));
             }
         }
+
+        // Add an EOF sentinel token
+        // TODO: Should the value be an empty string instead?
+        $tokens[] = new Token(0, "\0", $line, \strlen($code));
+
+        return $tokens;
     }
 
     /**
@@ -244,70 +232,49 @@ class Lexer
         $endAttributes   = [];
 
         while (1) {
-            if (isset($this->tokens[++$this->pos])) {
-                $token = $this->tokens[$this->pos];
-            } else {
-                // EOF token with ID 0
-                $token = "\0";
-            }
+            $token = $this->tokens[++$this->pos];
 
             if ($this->attributeStartLineUsed) {
-                $startAttributes['startLine'] = $this->line;
+                $startAttributes['startLine'] = $token->line;
             }
             if ($this->attributeStartTokenPosUsed) {
                 $startAttributes['startTokenPos'] = $this->pos;
             }
             if ($this->attributeStartFilePosUsed) {
-                $startAttributes['startFilePos'] = $this->filePos;
+                $startAttributes['startFilePos'] = $token->filePos;
             }
 
-            if (\is_string($token)) {
-                $value = $token;
-                if (isset($token[1])) {
-                    // bug in token_get_all
-                    $this->filePos += 2;
-                    $id = ord('"');
-                } else {
-                    $this->filePos += 1;
-                    $id = ord($token);
-                }
-            } elseif (!isset($this->dropTokens[$token[0]])) {
-                $value = $token[1];
-                $id = $this->tokenMap[$token[0]];
-                if (\T_CLOSE_TAG === $token[0]) {
-                    $this->prevCloseTagHasNewline = false !== strpos($token[1], "\n");
-                } elseif (\T_INLINE_HTML === $token[0]) {
+            $phpId = $token->id;
+            $value = $token->value;
+            if (!isset($this->dropTokens[$phpId])) {
+                $id = $this->tokenMap[$phpId];
+                if (\T_CLOSE_TAG === $phpId) {
+                    $this->prevCloseTagHasNewline = false !== strpos($value, "\n");
+                } elseif (\T_INLINE_HTML === $phpId) {
                     $startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline;
                 }
 
-                $this->line += substr_count($value, "\n");
-                $this->filePos += \strlen($value);
-            } else {
-                if (\T_COMMENT === $token[0] || \T_DOC_COMMENT === $token[0]) {
-                    if ($this->attributeCommentsUsed) {
-                        $comment = \T_DOC_COMMENT === $token[0]
-                            ? new Comment\Doc($token[1], $this->line, $this->filePos, $this->pos)
-                            : new Comment($token[1], $this->line, $this->filePos, $this->pos);
-                        $startAttributes['comments'][] = $comment;
-                    }
+                if ($this->attributeEndLineUsed) {
+                    $endAttributes['endLine'] = $token->line + substr_count($value, "\n");
+                }
+                if ($this->attributeEndTokenPosUsed) {
+                    $endAttributes['endTokenPos'] = $this->pos;
+                }
+                if ($this->attributeEndFilePosUsed) {
+                    $endAttributes['endFilePos'] = $token->filePos + \strlen($value) - 1;
                 }
 
-                $this->line += substr_count($token[1], "\n");
-                $this->filePos += \strlen($token[1]);
-                continue;
+                return $id;
             }
 
-            if ($this->attributeEndLineUsed) {
-                $endAttributes['endLine'] = $this->line;
+            if (\T_COMMENT === $phpId || \T_DOC_COMMENT === $phpId) {
+                if ($this->attributeCommentsUsed) {
+                    $comment = \T_DOC_COMMENT === $phpId
+                        ? new Comment\Doc($value, $token->line, $token->filePos, $this->pos)
+                        : new Comment($value, $token->line, $token->filePos, $this->pos);
+                    $startAttributes['comments'][] = $comment;
+                }
             }
-            if ($this->attributeEndTokenPosUsed) {
-                $endAttributes['endTokenPos'] = $this->pos;
-            }
-            if ($this->attributeEndFilePosUsed) {
-                $endAttributes['endFilePos'] = $this->filePos - 1;
-            }
-
-            return $id;
         }
 
         throw new \RuntimeException('Reached end of lexer loop');
@@ -333,8 +300,9 @@ class Lexer
      * @return string Remaining text
      */
     public function handleHaltCompiler() : string {
-        // text after T_HALT_COMPILER, still including ();
-        $textAfter = substr($this->code, $this->filePos);
+        // Text after T_HALT_COMPILER, still including ();
+        $tokenAfter = $this->tokens[$this->pos + 1];
+        $textAfter = substr($this->code, $tokenAfter->filePos);
 
         // ensure that it is followed by ();
         // this simplifies the situation, by not allowing any comments
@@ -343,8 +311,8 @@ class Lexer
             throw new Error('__HALT_COMPILER must be followed by "();"');
         }
 
-        // prevent the lexer from returning any further tokens
-        $this->pos = count($this->tokens);
+        // Point to one before EOF token, so it will be returned on the getNextToken() call
+        $this->pos = count($this->tokens) - 2;
 
         // return with (); removed
         return substr($textAfter, strlen($matches[0]));
@@ -362,9 +330,12 @@ class Lexer
     protected function createTokenMap() : array {
         $tokenMap = [];
 
-        // 256 is the minimum possible token number, as everything below
-        // it is an ASCII value
-        for ($i = 256; $i < 1000; ++$i) {
+        // ASCII values map to themselves.
+        for ($i = 0; $i < 256; ++$i) {
+            $tokenMap[$i] = $i;
+        }
+
+        for (; $i < 1000; ++$i) {
             if (\T_DOUBLE_COLON === $i) {
                 // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
                 $tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM;
diff --git a/lib/PhpParser/Lexer/Emulative.php b/lib/PhpParser/Lexer/Emulative.php
index 864bffa..fab0d0c 100644
--- a/lib/PhpParser/Lexer/Emulative.php
+++ b/lib/PhpParser/Lexer/Emulative.php
@@ -159,13 +159,7 @@ REGEX;
         $pos = 0;
         for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) {
             $token = $this->tokens[$i];
-            if (\is_string($token)) {
-                // We assume that patches don't apply to string tokens
-                $pos += \strlen($token);
-                continue;
-            }
-
-            $len = \strlen($token[1]);
+            $len = \strlen($token->value);
             $posDelta = 0;
             while ($patchPos >= $pos && $patchPos < $pos + $len) {
                 $patchTextLen = \strlen($patchText);
@@ -177,15 +171,15 @@ REGEX;
                         $c--;
                     } else {
                         // Remove from token string
-                        $this->tokens[$i][1] = substr_replace(
-                            $token[1], '', $patchPos - $pos + $posDelta, $patchTextLen
+                        $this->tokens[$i]->value = substr_replace(
+                            $token->value, '', $patchPos - $pos + $posDelta, $patchTextLen
                         );
                         $posDelta -= $patchTextLen;
                     }
                 } elseif ($patchType === 'add') {
                     // Insert into the token string
-                    $this->tokens[$i][1] = substr_replace(
-                        $token[1], $patchText, $patchPos - $pos + $posDelta, 0
+                    $this->tokens[$i]->value = substr_replace(
+                        $token->value, $patchText, $patchPos - $pos + $posDelta, 0
                     );
                     $posDelta += $patchTextLen;
                 } else {
@@ -196,7 +190,7 @@ REGEX;
                 $patchIdx++;
                 if ($patchIdx >= \count($this->patches)) {
                     // No more patches, we're done
-                    return;
+                    break 2;
                 }
 
                 list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx];
@@ -209,8 +203,12 @@ REGEX;
             $pos += $len;
         }
 
-        // A patch did not apply
-        assert(false);
+        // To retain a minimum amount of sanity, recompute token offsets in a separate loop...
+        $pos = 0;
+        foreach ($this->tokens as $token) {
+            $token->filePos = $pos;
+            $pos += \strlen($token->value);
+        }
     }
 
     /**
diff --git a/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php
index 10f7e1e..fba12fa 100644
--- a/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php
@@ -3,6 +3,7 @@
 namespace PhpParser\Lexer\TokenEmulator;
 
 use PhpParser\Lexer\Emulative;
+use PhpParser\Token;
 
 final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
 {
@@ -20,20 +21,17 @@ final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
     {
         // We need to manually iterate and manage a count because we'll change
         // the tokens array on the way
-        $line = 1;
         for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
             if (isset($tokens[$i + 1])) {
-                if ($tokens[$i][0] === T_COALESCE && $tokens[$i + 1] === '=') {
+                $token = $tokens[$i];
+                if ($token->id === \T_COALESCE && $tokens[$i + 1]->value === '=') {
                     array_splice($tokens, $i, 2, [
-                        [Emulative::T_COALESCE_EQUAL, '??=', $line]
+                        new Token(Emulative::T_COALESCE_EQUAL, '??=', $token->line, $token->filePos),
                     ]);
                     $c--;
                     continue;
                 }
             }
-            if (\is_array($tokens[$i])) {
-                $line += substr_count($tokens[$i][1], "\n");
-            }
         }
 
         return $tokens;
diff --git a/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php
index 0939dd4..66e977c 100644
--- a/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php
@@ -3,6 +3,7 @@
 namespace PhpParser\Lexer\TokenEmulator;
 
 use PhpParser\Lexer\Emulative;
+use PhpParser\Token;
 
 final class FnTokenEmulator implements TokenEmulatorInterface
 {
@@ -21,13 +22,13 @@ final class FnTokenEmulator implements TokenEmulatorInterface
         // We need to manually iterate and manage a count because we'll change
         // the tokens array on the way
         foreach ($tokens as $i => $token) {
-            if ($token[0] === T_STRING && $token[1] === 'fn') {
+            if ($token->id === \T_STRING && $token->value === 'fn') {
                 $previousNonSpaceToken = $this->getPreviousNonSpaceToken($tokens, $i);
-                if ($previousNonSpaceToken !== null && $previousNonSpaceToken[0] === T_OBJECT_OPERATOR) {
+                if ($previousNonSpaceToken !== null && $previousNonSpaceToken->id === T_OBJECT_OPERATOR) {
                     continue;
                 }
 
-                $tokens[$i][0] = Emulative::T_FN;
+                $token->id = Emulative::T_FN;
             }
         }
 
@@ -35,13 +36,13 @@ final class FnTokenEmulator implements TokenEmulatorInterface
     }
 
     /**
-     * @param mixed[] $tokens
-     * @return mixed[]|null
+     * @param Token[] $tokens
+     * @return Token|null
      */
     private function getPreviousNonSpaceToken(array $tokens, int $start)
     {
         for ($i = $start - 1; $i >= 0; --$i) {
-            if ($tokens[$i][0] === T_WHITESPACE) {
+            if ($tokens[$i]->id === \T_WHITESPACE) {
                 continue;
             }
 
diff --git a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php
index f564c59..fc8c959 100644
--- a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php
@@ -3,6 +3,7 @@
 namespace PhpParser\Lexer\TokenEmulator;
 
 use PhpParser\Lexer\Emulative;
+use PhpParser\Token;
 
 final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
 {
@@ -24,47 +25,46 @@ final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
         return preg_match('~[0-9a-f]_[0-9a-f]~i', $code) !== false;
     }
 
+    /**
+     * @param Token[] $tokens
+     * @return Token[]
+     */
     public function emulate(string $code, array $tokens): array
     {
         // We need to manually iterate and manage a count because we'll change
         // the tokens array on the way
-        $codeOffset = 0;
         for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
             $token = $tokens[$i];
-            $tokenLen = \strlen(\is_array($token) ? $token[1] : $token);
+            $tokenLen = \strlen($token->value);
 
-            if ($token[0] !== T_LNUMBER && $token[0] !== T_DNUMBER) {
-                $codeOffset += $tokenLen;
+            if ($token->id !== T_LNUMBER && $token->id !== T_DNUMBER) {
                 continue;
             }
 
-            $res = preg_match(self::NUMBER, $code, $matches, 0, $codeOffset);
+            $res = preg_match(self::NUMBER, $code, $matches, 0, $token->filePos);
             assert($res, "No number at number token position");
 
             $match = $matches[0];
             $matchLen = \strlen($match);
             if ($matchLen === $tokenLen) {
                 // Original token already holds the full number.
-                $codeOffset += $tokenLen;
                 continue;
             }
 
             $tokenKind = $this->resolveIntegerOrFloatToken($match);
-            $newTokens = [[$tokenKind, $match, $token[2]]];
+            $newTokens = [new Token($tokenKind, $match, $token->line, $token->filePos)];
 
             $numTokens = 1;
             $len = $tokenLen;
             while ($matchLen > $len) {
                 $nextToken = $tokens[$i + $numTokens];
-                $nextTokenText = \is_array($nextToken) ? $nextToken[1] : $nextToken;
-                $nextTokenLen = \strlen($nextTokenText);
+                $nextTokenLen = \strlen($nextToken->value);
 
                 $numTokens++;
                 if ($matchLen < $len + $nextTokenLen) {
                     // Split trailing characters into a partial token.
-                    assert(is_array($nextToken), "Partial token should be an array token");
-                    $partialText = substr($nextTokenText, $matchLen - $len);
-                    $newTokens[] = [$nextToken[0], $partialText, $nextToken[2]];
+                    $partialText = substr($nextToken->value, $matchLen - $len);
+                    $newTokens[] = new Token($nextToken->id, $partialText, $nextToken->line, $nextToken->filePos);
                     break;
                 }
 
@@ -73,7 +73,6 @@ final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
 
             array_splice($tokens, $i, $numTokens, $newTokens);
             $c -= $numTokens - \count($newTokens);
-            $codeOffset += $matchLen;
         }
 
         return $tokens;
diff --git a/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php b/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php
index e03ccc6..b2fd997 100644
--- a/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php
@@ -2,13 +2,16 @@
 
 namespace PhpParser\Lexer\TokenEmulator;
 
+use PhpParser\Token;
+
 /** @internal */
 interface TokenEmulatorInterface
 {
     public function isEmulationNeeded(string $code): bool;
 
     /**
-     * @return array Modified Tokens
+     * @param Token[] $tokens
+     * @return Token[]
      */
     public function emulate(string $code, array $tokens): array;
 }
diff --git a/lib/PhpParser/PrettyPrinterAbstract.php b/lib/PhpParser/PrettyPrinterAbstract.php
index 90f10c4..df95c78 100644
--- a/lib/PhpParser/PrettyPrinterAbstract.php
+++ b/lib/PhpParser/PrettyPrinterAbstract.php
@@ -491,7 +491,7 @@ abstract class PrettyPrinterAbstract
         $pos = 0;
         $result = $this->pArray($stmts, $origStmts, $pos, 0, 'File', 'stmts', null);
         if (null !== $result) {
-            $result .= $this->origTokens->getTokenCode($pos, count($origTokens), 0);
+            $result .= $this->origTokens->getTokenCode($pos, count($origTokens) - 1, 0);
         } else {
             // Fallback
             // TODO Add <?php properly
@@ -1213,8 +1213,8 @@ abstract class PrettyPrinterAbstract
         $stripLeft = ['left' => \T_WHITESPACE];
         $stripRight = ['right' => \T_WHITESPACE];
         $stripDoubleArrow = ['right' => \T_DOUBLE_ARROW];
-        $stripColon = ['left' => ':'];
-        $stripEquals = ['left' => '='];
+        $stripColon = ['left' => \ord(':')];
+        $stripEquals = ['left' => \ord('=')];
         $this->removalMap = [
             'Expr_ArrayDimFetch->dim' => $stripBoth,
             'Expr_ArrayItem->key' => $stripDoubleArrow,
@@ -1254,22 +1254,22 @@ abstract class PrettyPrinterAbstract
         // TODO: "yield" where both key and value are inserted doesn't work
         // [$find, $beforeToken, $extraLeft, $extraRight]
         $this->insertionMap = [
-            'Expr_ArrayDimFetch->dim' => ['[', false, null, null],
+            'Expr_ArrayDimFetch->dim' => [\ord('['), false, null, null],
             'Expr_ArrayItem->key' => [null, false, null, ' => '],
-            'Expr_ArrowFunction->returnType' => [')', false, ' : ', null],
-            'Expr_Closure->returnType' => [')', false, ' : ', null],
-            'Expr_Ternary->if' => ['?', false, ' ', ' '],
+            'Expr_ArrowFunction->returnType' => [\ord(')'), false, ' : ', null],
+            'Expr_Closure->returnType' => [\ord(')'), false, ' : ', null],
+            'Expr_Ternary->if' => [\ord('?'), false, ' ', ' '],
             'Expr_Yield->key' => [\T_YIELD, false, null, ' => '],
             'Expr_Yield->value' => [\T_YIELD, false, ' ', null],
             'Param->type' => [null, false, null, ' '],
             'Param->default' => [null, false, ' = ', null],
             'Stmt_Break->num' => [\T_BREAK, false, ' ', null],
-            'Stmt_ClassMethod->returnType' => [')', false, ' : ', null],
+            'Stmt_ClassMethod->returnType' => [\ord(')'), false, ' : ', null],
             'Stmt_Class->extends' => [null, false, ' extends ', null],
             'Expr_PrintableNewAnonClass->extends' => [null, ' extends ', null],
             'Stmt_Continue->num' => [\T_CONTINUE, false, ' ', null],
             'Stmt_Foreach->keyVar' => [\T_AS, false, null, ' => '],
-            'Stmt_Function->returnType' => [')', false, ' : ', null],
+            'Stmt_Function->returnType' => [\ord(')'), false, ' : ', null],
             'Stmt_If->else' => [null, false, ' ', null],
             'Stmt_Namespace->name' => [\T_NAMESPACE, false, ' ', null],
             'Stmt_Property->type' => [\T_VARIABLE, true, null, ' '],
@@ -1367,19 +1367,19 @@ abstract class PrettyPrinterAbstract
 
         // [$find, $extraLeft, $extraRight]
         $this->emptyListInsertionMap = [
-            'Expr_ArrowFunction->params' => ['(', '', ''],
-            'Expr_Closure->uses' => [')', ' use(', ')'],
-            'Expr_Closure->params' => ['(', '', ''],
-            'Expr_FuncCall->args' => ['(', '', ''],
-            'Expr_MethodCall->args' => ['(', '', ''],
-            'Expr_New->args' => ['(', '', ''],
-            'Expr_PrintableNewAnonClass->args' => ['(', '', ''],
+            'Expr_ArrowFunction->params' => [\ord('('), '', ''],
+            'Expr_Closure->uses' => [\ord(')'), ' use(', ')'],
+            'Expr_Closure->params' => [\ord('('), '', ''],
+            'Expr_FuncCall->args' => [\ord('('), '', ''],
+            'Expr_MethodCall->args' => [\ord('('), '', ''],
+            'Expr_New->args' => [\ord('('), '', ''],
+            'Expr_PrintableNewAnonClass->args' => [\ord('('), '', ''],
             'Expr_PrintableNewAnonClass->implements' => [null, ' implements ', ''],
-            'Expr_StaticCall->args' => ['(', '', ''],
+            'Expr_StaticCall->args' => [\ord('('), '', ''],
             'Stmt_Class->implements' => [null, ' implements ', ''],
-            'Stmt_ClassMethod->params' => ['(', '', ''],
+            'Stmt_ClassMethod->params' => [\ord('('), '', ''],
             'Stmt_Interface->extends' => [null, ' extends ', ''],
-            'Stmt_Function->params' => ['(', '', ''],
+            'Stmt_Function->params' => [\ord('('), '', ''],
 
             /* These cannot be empty to start with:
              * Expr_Isset->vars
diff --git a/lib/PhpParser/Token.php b/lib/PhpParser/Token.php
new file mode 100644
index 0000000..d058cbf
--- /dev/null
+++ b/lib/PhpParser/Token.php
@@ -0,0 +1,21 @@
+<?php declare(strict_types=1);
+
+namespace PhpParser;
+
+class Token {
+    /** @var int Token id (T_* constant) */
+    public $id;
+    /** @var string Textual value of the token */
+    public $value;
+    /** @var int Start line number of the token */
+    public $line;
+    /** @var int Offset of the token in the source code */
+    public $filePos;
+
+    public function __construct(int $id, string $value, int $line, int $filePos) {
+        $this->id = $id;
+        $this->value = $value;
+        $this->line = $line;
+        $this->filePos = $filePos;
+    }
+}
\ No newline at end of file
diff --git a/test/PhpParser/LexerTest.php b/test/PhpParser/LexerTest.php
index f24c64d..632384f 100644
--- a/test/PhpParser/LexerTest.php
+++ b/test/PhpParser/LexerTest.php
@@ -247,17 +247,18 @@ class LexerTest extends \PHPUnit\Framework\TestCase
     public function testGetTokens() {
         $code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
         $expectedTokens = [
-            [T_OPEN_TAG, '<?php ', 1],
-            [T_CONSTANT_ENCAPSED_STRING, '"a"', 1],
-            ';',
-            [T_WHITESPACE, "\n", 1],
-            [T_COMMENT, '// foo' . "\n", 2],
-            [T_CONSTANT_ENCAPSED_STRING, '"b"', 3],
-            ';',
+            new Token(T_OPEN_TAG, '<?php ', 1, 0),
+            new Token(T_CONSTANT_ENCAPSED_STRING, '"a"', 1, 6),
+            new Token(\ord(';'), ';', 1, 9),
+            new Token(T_WHITESPACE, "\n", 1, 10),
+            new Token(T_COMMENT, '// foo' . "\n", 2, 11),
+            new Token(T_CONSTANT_ENCAPSED_STRING, '"b"', 3, 18),
+            new Token(\ord(';'), ';', 3, 21),
+            new Token(0, "\0", 3, 22),
         ];
 
         $lexer = $this->getLexer();
         $lexer->startLexing($code);
-        $this->assertSame($expectedTokens, $lexer->getTokens());
+        $this->assertEquals($expectedTokens, $lexer->getTokens());
     }
 }