1
0
mirror of https://github.com/danog/PHP-Parser.git synced 2024-12-11 17:09:41 +01:00

Switch to a normalized token representation

Each token is now represented by a Token object.
This commit is contained in:
Nikita Popov 2019-06-30 13:10:12 +02:00
parent 3f718ee2c3
commit 6f74784e16
10 changed files with 198 additions and 208 deletions

View File

@ -2,6 +2,8 @@
namespace PhpParser\Internal; namespace PhpParser\Internal;
use PhpParser\Token;
/** /**
* Provides operations on token streams, for use by pretty printer. * Provides operations on token streams, for use by pretty printer.
* *
@ -9,7 +11,7 @@ namespace PhpParser\Internal;
*/ */
class TokenStream class TokenStream
{ {
/** @var array Tokens (in token_get_all format) */ /** @var Token[] */
private $tokens; private $tokens;
/** @var int[] Map from position to indentation */ /** @var int[] Map from position to indentation */
private $indentMap; private $indentMap;
@ -17,7 +19,7 @@ class TokenStream
/** /**
* Create token stream instance. * Create token stream instance.
* *
* @param array $tokens Tokens in token_get_all() format * @param Token[] $tokens Tokens
*/ */
public function __construct(array $tokens) { public function __construct(array $tokens) {
$this->tokens = $tokens; $this->tokens = $tokens;
@ -33,8 +35,8 @@ class TokenStream
* @return bool * @return bool
*/ */
public function haveParens(int $startPos, int $endPos) : bool { public function haveParens(int $startPos, int $endPos) : bool {
return $this->haveTokenImmediativelyBefore($startPos, '(') return $this->haveTokenImmediativelyBefore($startPos, \ord('('))
&& $this->haveTokenImmediatelyAfter($endPos, ')'); && $this->haveTokenImmediatelyAfter($endPos, \ord(')'));
} }
/** /**
@ -46,8 +48,8 @@ class TokenStream
* @return bool * @return bool
*/ */
public function haveBraces(int $startPos, int $endPos) : bool { public function haveBraces(int $startPos, int $endPos) : bool {
return $this->haveTokenImmediativelyBefore($startPos, '{') return $this->haveTokenImmediativelyBefore($startPos, \ord('{'))
&& $this->haveTokenImmediatelyAfter($endPos, '}'); && $this->haveTokenImmediatelyAfter($endPos, \ord('}'));
} }
/** /**
@ -64,7 +66,7 @@ class TokenStream
$tokens = $this->tokens; $tokens = $this->tokens;
$pos--; $pos--;
for (; $pos >= 0; $pos--) { for (; $pos >= 0; $pos--) {
$tokenType = $tokens[$pos][0]; $tokenType = $tokens[$pos]->id;
if ($tokenType === $expectedTokenType) { if ($tokenType === $expectedTokenType) {
return true; return true;
} }
@ -90,7 +92,7 @@ class TokenStream
$tokens = $this->tokens; $tokens = $this->tokens;
$pos++; $pos++;
for (; $pos < \count($tokens); $pos++) { for (; $pos < \count($tokens); $pos++) {
$tokenType = $tokens[$pos][0]; $tokenType = $tokens[$pos]->id;
if ($tokenType === $expectedTokenType) { if ($tokenType === $expectedTokenType) {
return true; return true;
} }
@ -110,7 +112,7 @@ class TokenStream
return $pos; return $pos;
} }
if ($tokens[$pos][0] !== $skipTokenType) { if ($tokens[$pos]->id !== $skipTokenType) {
// Shouldn't happen. The skip token MUST be there // Shouldn't happen. The skip token MUST be there
throw new \Exception('Encountered unexpected token'); throw new \Exception('Encountered unexpected token');
} }
@ -127,7 +129,7 @@ class TokenStream
return $pos; return $pos;
} }
if ($tokens[$pos][0] !== $skipTokenType) { if ($tokens[$pos]->id !== $skipTokenType) {
// Shouldn't happen. The skip token MUST be there // Shouldn't happen. The skip token MUST be there
throw new \Exception('Encountered unexpected token'); throw new \Exception('Encountered unexpected token');
} }
@ -145,7 +147,7 @@ class TokenStream
public function skipLeftWhitespace(int $pos) { public function skipLeftWhitespace(int $pos) {
$tokens = $this->tokens; $tokens = $this->tokens;
for (; $pos >= 0; $pos--) { for (; $pos >= 0; $pos--) {
$type = $tokens[$pos][0]; $type = $tokens[$pos]->id;
if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) { if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) {
break; break;
} }
@ -162,7 +164,7 @@ class TokenStream
public function skipRightWhitespace(int $pos) { public function skipRightWhitespace(int $pos) {
$tokens = $this->tokens; $tokens = $this->tokens;
for ($count = \count($tokens); $pos < $count; $pos++) { for ($count = \count($tokens); $pos < $count; $pos++) {
$type = $tokens[$pos][0]; $type = $tokens[$pos]->id;
if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) { if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) {
break; break;
} }
@ -173,7 +175,7 @@ class TokenStream
public function findRight($pos, $findTokenType) { public function findRight($pos, $findTokenType) {
$tokens = $this->tokens; $tokens = $this->tokens;
for ($count = \count($tokens); $pos < $count; $pos++) { for ($count = \count($tokens); $pos < $count; $pos++) {
$type = $tokens[$pos][0]; $type = $tokens[$pos]->id;
if ($type === $findTokenType) { if ($type === $findTokenType) {
return $pos; return $pos;
} }
@ -206,23 +208,19 @@ class TokenStream
$result = ''; $result = '';
for ($pos = $from; $pos < $to; $pos++) { for ($pos = $from; $pos < $to; $pos++) {
$token = $tokens[$pos]; $token = $tokens[$pos];
if (\is_array($token)) { $type = $token->id;
$type = $token[0]; $content = $token->value;
$content = $token[1]; if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) {
if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) { $result .= $content;
$result .= $content;
} else {
// TODO Handle non-space indentation
if ($indent < 0) {
$result .= str_replace("\n" . str_repeat(" ", -$indent), "\n", $content);
} elseif ($indent > 0) {
$result .= str_replace("\n", "\n" . str_repeat(" ", $indent), $content);
} else {
$result .= $content;
}
}
} else { } else {
$result .= $token; // TODO Handle non-space indentation
if ($indent < 0) {
$result .= str_replace("\n" . str_repeat(" ", -$indent), "\n", $content);
} elseif ($indent > 0) {
$result .= str_replace("\n", "\n" . str_repeat(" ", $indent), $content);
} else {
$result .= $content;
}
} }
} }
return $result; return $result;
@ -239,8 +237,8 @@ class TokenStream
foreach ($this->tokens as $token) { foreach ($this->tokens as $token) {
$indentMap[] = $indent; $indentMap[] = $indent;
if ($token[0] === \T_WHITESPACE) { if ($token->id === \T_WHITESPACE) {
$content = $token[1]; $content = $token->value;
$newlinePos = \strrpos($content, "\n"); $newlinePos = \strrpos($content, "\n");
if (false !== $newlinePos) { if (false !== $newlinePos) {
$indent = \strlen($content) - $newlinePos - 1; $indent = \strlen($content) - $newlinePos - 1;

View File

@ -11,11 +11,11 @@ class Lexer
* file positions from going out of sync. */ * file positions from going out of sync. */
const T_BAD_CHARACTER = -1; const T_BAD_CHARACTER = -1;
/** @var string */
protected $code; protected $code;
/** @var Token[] */
protected $tokens; protected $tokens;
protected $pos; protected $pos;
protected $line;
protected $filePos;
protected $prevCloseTagHasNewline; protected $prevCloseTagHasNewline;
protected $tokenMap; protected $tokenMap;
@ -87,9 +87,7 @@ class Lexer
$scream = ini_set('xdebug.scream', '0'); $scream = ini_set('xdebug.scream', '0');
error_clear_last(); $this->tokens = $this->createNormalizedTokens($code, $errorHandler);
$this->tokens = @token_get_all($code);
$this->handleErrors($errorHandler);
if (false !== $scream) { if (false !== $scream) {
ini_set('xdebug.scream', $scream); ini_set('xdebug.scream', $scream);
@ -109,7 +107,7 @@ class Lexer
); );
} }
$tokens[] = [self::T_BAD_CHARACTER, $chr, $line]; $tokens[] = new Token(self::T_BAD_CHARACTER, $chr, $line, $i);
$errorHandler->handleError(new Error($errorMsg, [ $errorHandler->handleError(new Error($errorMsg, [
'startLine' => $line, 'startLine' => $line,
'endLine' => $line, 'endLine' => $line,
@ -125,10 +123,10 @@ class Lexer
* *
* @return bool * @return bool
*/ */
private function isUnterminatedComment($token) : bool { private function isUnterminatedComment(Token $token) : bool {
return ($token[0] === \T_COMMENT || $token[0] === \T_DOC_COMMENT) return ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT)
&& substr($token[1], 0, 2) === '/*' && substr($token->value, 0, 2) === '/*'
&& substr($token[1], -2) !== '*/'; && substr($token->value, -2) !== '*/';
} }
/** /**
@ -146,75 +144,65 @@ class Lexer
return null !== error_get_last(); return null !== error_get_last();
} }
protected function handleErrors(ErrorHandler $errorHandler) { private function createNormalizedTokens(string $code, ErrorHandler $errorHandler) {
if (!$this->errorMayHaveOccurred()) { error_clear_last();
return; $rawTokens = @token_get_all($code);
} $checkForMissingTokens = $this->errorMayHaveOccurred();
// PHP's error handling for token_get_all() is rather bad, so if we want detailed
// error information we need to compute it ourselves. Invalid character errors are
// detected by finding "gaps" in the token array. Unterminated comments are detected
// by checking if a trailing comment has a "*/" at the end.
$tokens = [];
$filePos = 0; $filePos = 0;
$line = 1; $line = 1;
$numTokens = \count($this->tokens); foreach ($rawTokens as $rawToken) {
for ($i = 0; $i < $numTokens; $i++) { if (\is_array($rawToken)) {
$token = $this->tokens[$i]; $token = new Token($rawToken[0], $rawToken[1], $line, $filePos);
$tokenValue = \is_string($token) ? $token : $token[1]; } elseif (\strlen($rawToken) == 2) {
$tokenLen = \strlen($tokenValue); // Bug in token_get_all() when lexing b".
$token = new Token(\ord('"'), $rawToken, $line, $filePos);
} else {
$token = new Token(\ord($rawToken), $rawToken, $line, $filePos);
}
if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) { $value = $token->value;
$tokenLen = \strlen($value);
if ($checkForMissingTokens && substr($code, $filePos, $tokenLen) !== $value) {
// Something is missing, must be an invalid character // Something is missing, must be an invalid character
$nextFilePos = strpos($this->code, $tokenValue, $filePos); $nextFilePos = strpos($code, $value, $filePos);
$badCharTokens = $this->handleInvalidCharacterRange( $badCharTokens = $this->handleInvalidCharacterRange(
$filePos, $nextFilePos, $line, $errorHandler); $filePos, $nextFilePos, $line, $errorHandler);
$tokens = array_merge($tokens, $badCharTokens);
$filePos = (int) $nextFilePos; $filePos = (int) $nextFilePos;
array_splice($this->tokens, $i, 0, $badCharTokens);
$numTokens += \count($badCharTokens);
$i += \count($badCharTokens);
} }
$tokens[] = $token;
$filePos += $tokenLen; $filePos += $tokenLen;
$line += substr_count($tokenValue, "\n"); $line += substr_count($value, "\n");
} }
if ($filePos !== \strlen($this->code)) { if ($filePos !== \strlen($code)) {
if (substr($this->code, $filePos, 2) === '/*') { // Invalid characters at the end of the input
// Unlike PHP, HHVM will drop unterminated comments entirely $badCharTokens = $this->handleInvalidCharacterRange(
$comment = substr($this->code, $filePos); $filePos, \strlen($code), $line, $errorHandler);
$errorHandler->handleError(new Error('Unterminated comment', [ $tokens = array_merge($tokens, $badCharTokens);
'startLine' => $line,
'endLine' => $line + substr_count($comment, "\n"),
'startFilePos' => $filePos,
'endFilePos' => $filePos + \strlen($comment),
]));
// Emulate the PHP behavior
$isDocComment = isset($comment[3]) && $comment[3] === '*';
$this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line];
} else {
// Invalid characters at the end of the input
$badCharTokens = $this->handleInvalidCharacterRange(
$filePos, \strlen($this->code), $line, $errorHandler);
$this->tokens = array_merge($this->tokens, $badCharTokens);
}
return;
} }
if (count($this->tokens) > 0) { if (\count($tokens) > 0) {
// Check for unterminated comment // Check for unterminated comment
$lastToken = $this->tokens[count($this->tokens) - 1]; $lastToken = $tokens[\count($tokens) - 1];
if ($this->isUnterminatedComment($lastToken)) { if ($this->isUnterminatedComment($lastToken)) {
$errorHandler->handleError(new Error('Unterminated comment', [ $errorHandler->handleError(new Error('Unterminated comment', [
'startLine' => $line - substr_count($lastToken[1], "\n"), 'startLine' => $line - substr_count($lastToken->value, "\n"),
'endLine' => $line, 'endLine' => $line,
'startFilePos' => $filePos - \strlen($lastToken[1]), 'startFilePos' => $filePos - \strlen($lastToken->value),
'endFilePos' => $filePos, 'endFilePos' => $filePos,
])); ]));
} }
} }
// Add an EOF sentinel token
// TODO: Should the value be an empty string instead?
$tokens[] = new Token(0, "\0", $line, \strlen($code));
return $tokens;
} }
/** /**
@ -244,70 +232,49 @@ class Lexer
$endAttributes = []; $endAttributes = [];
while (1) { while (1) {
if (isset($this->tokens[++$this->pos])) { $token = $this->tokens[++$this->pos];
$token = $this->tokens[$this->pos];
} else {
// EOF token with ID 0
$token = "\0";
}
if ($this->attributeStartLineUsed) { if ($this->attributeStartLineUsed) {
$startAttributes['startLine'] = $this->line; $startAttributes['startLine'] = $token->line;
} }
if ($this->attributeStartTokenPosUsed) { if ($this->attributeStartTokenPosUsed) {
$startAttributes['startTokenPos'] = $this->pos; $startAttributes['startTokenPos'] = $this->pos;
} }
if ($this->attributeStartFilePosUsed) { if ($this->attributeStartFilePosUsed) {
$startAttributes['startFilePos'] = $this->filePos; $startAttributes['startFilePos'] = $token->filePos;
} }
if (\is_string($token)) { $phpId = $token->id;
$value = $token; $value = $token->value;
if (isset($token[1])) { if (!isset($this->dropTokens[$phpId])) {
// bug in token_get_all $id = $this->tokenMap[$phpId];
$this->filePos += 2; if (\T_CLOSE_TAG === $phpId) {
$id = ord('"'); $this->prevCloseTagHasNewline = false !== strpos($value, "\n");
} else { } elseif (\T_INLINE_HTML === $phpId) {
$this->filePos += 1;
$id = ord($token);
}
} elseif (!isset($this->dropTokens[$token[0]])) {
$value = $token[1];
$id = $this->tokenMap[$token[0]];
if (\T_CLOSE_TAG === $token[0]) {
$this->prevCloseTagHasNewline = false !== strpos($token[1], "\n");
} elseif (\T_INLINE_HTML === $token[0]) {
$startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline; $startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline;
} }
$this->line += substr_count($value, "\n"); if ($this->attributeEndLineUsed) {
$this->filePos += \strlen($value); $endAttributes['endLine'] = $token->line + substr_count($value, "\n");
} else { }
if (\T_COMMENT === $token[0] || \T_DOC_COMMENT === $token[0]) { if ($this->attributeEndTokenPosUsed) {
if ($this->attributeCommentsUsed) { $endAttributes['endTokenPos'] = $this->pos;
$comment = \T_DOC_COMMENT === $token[0] }
? new Comment\Doc($token[1], $this->line, $this->filePos, $this->pos) if ($this->attributeEndFilePosUsed) {
: new Comment($token[1], $this->line, $this->filePos, $this->pos); $endAttributes['endFilePos'] = $token->filePos + \strlen($value) - 1;
$startAttributes['comments'][] = $comment;
}
} }
$this->line += substr_count($token[1], "\n"); return $id;
$this->filePos += \strlen($token[1]);
continue;
} }
if ($this->attributeEndLineUsed) { if (\T_COMMENT === $phpId || \T_DOC_COMMENT === $phpId) {
$endAttributes['endLine'] = $this->line; if ($this->attributeCommentsUsed) {
$comment = \T_DOC_COMMENT === $phpId
? new Comment\Doc($value, $token->line, $token->filePos, $this->pos)
: new Comment($value, $token->line, $token->filePos, $this->pos);
$startAttributes['comments'][] = $comment;
}
} }
if ($this->attributeEndTokenPosUsed) {
$endAttributes['endTokenPos'] = $this->pos;
}
if ($this->attributeEndFilePosUsed) {
$endAttributes['endFilePos'] = $this->filePos - 1;
}
return $id;
} }
throw new \RuntimeException('Reached end of lexer loop'); throw new \RuntimeException('Reached end of lexer loop');
@ -333,8 +300,9 @@ class Lexer
* @return string Remaining text * @return string Remaining text
*/ */
public function handleHaltCompiler() : string { public function handleHaltCompiler() : string {
// text after T_HALT_COMPILER, still including (); // Text after T_HALT_COMPILER, still including ();
$textAfter = substr($this->code, $this->filePos); $tokenAfter = $this->tokens[$this->pos + 1];
$textAfter = substr($this->code, $tokenAfter->filePos);
// ensure that it is followed by (); // ensure that it is followed by ();
// this simplifies the situation, by not allowing any comments // this simplifies the situation, by not allowing any comments
@ -343,8 +311,8 @@ class Lexer
throw new Error('__HALT_COMPILER must be followed by "();"'); throw new Error('__HALT_COMPILER must be followed by "();"');
} }
// prevent the lexer from returning any further tokens // Point to one before EOF token, so it will be returned on the getNextToken() call
$this->pos = count($this->tokens); $this->pos = count($this->tokens) - 2;
// return with (); removed // return with (); removed
return substr($textAfter, strlen($matches[0])); return substr($textAfter, strlen($matches[0]));
@ -362,9 +330,12 @@ class Lexer
protected function createTokenMap() : array { protected function createTokenMap() : array {
$tokenMap = []; $tokenMap = [];
// 256 is the minimum possible token number, as everything below // ASCII values map to themselves.
// it is an ASCII value for ($i = 0; $i < 256; ++$i) {
for ($i = 256; $i < 1000; ++$i) { $tokenMap[$i] = $i;
}
for (; $i < 1000; ++$i) {
if (\T_DOUBLE_COLON === $i) { if (\T_DOUBLE_COLON === $i) {
// T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
$tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM; $tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM;

View File

@ -159,13 +159,7 @@ REGEX;
$pos = 0; $pos = 0;
for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) { for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) {
$token = $this->tokens[$i]; $token = $this->tokens[$i];
if (\is_string($token)) { $len = \strlen($token->value);
// We assume that patches don't apply to string tokens
$pos += \strlen($token);
continue;
}
$len = \strlen($token[1]);
$posDelta = 0; $posDelta = 0;
while ($patchPos >= $pos && $patchPos < $pos + $len) { while ($patchPos >= $pos && $patchPos < $pos + $len) {
$patchTextLen = \strlen($patchText); $patchTextLen = \strlen($patchText);
@ -177,15 +171,15 @@ REGEX;
$c--; $c--;
} else { } else {
// Remove from token string // Remove from token string
$this->tokens[$i][1] = substr_replace( $this->tokens[$i]->value = substr_replace(
$token[1], '', $patchPos - $pos + $posDelta, $patchTextLen $token->value, '', $patchPos - $pos + $posDelta, $patchTextLen
); );
$posDelta -= $patchTextLen; $posDelta -= $patchTextLen;
} }
} elseif ($patchType === 'add') { } elseif ($patchType === 'add') {
// Insert into the token string // Insert into the token string
$this->tokens[$i][1] = substr_replace( $this->tokens[$i]->value = substr_replace(
$token[1], $patchText, $patchPos - $pos + $posDelta, 0 $token->value, $patchText, $patchPos - $pos + $posDelta, 0
); );
$posDelta += $patchTextLen; $posDelta += $patchTextLen;
} else { } else {
@ -196,7 +190,7 @@ REGEX;
$patchIdx++; $patchIdx++;
if ($patchIdx >= \count($this->patches)) { if ($patchIdx >= \count($this->patches)) {
// No more patches, we're done // No more patches, we're done
return; break 2;
} }
list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx]; list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx];
@ -209,8 +203,12 @@ REGEX;
$pos += $len; $pos += $len;
} }
// A patch did not apply // To retain a minimum amount of sanity, recompute token offsets in a separate loop...
assert(false); $pos = 0;
foreach ($this->tokens as $token) {
$token->filePos = $pos;
$pos += \strlen($token->value);
}
} }
/** /**

View File

@ -3,6 +3,7 @@
namespace PhpParser\Lexer\TokenEmulator; namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative; use PhpParser\Lexer\Emulative;
use PhpParser\Token;
final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
{ {
@ -20,20 +21,17 @@ final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
{ {
// We need to manually iterate and manage a count because we'll change // We need to manually iterate and manage a count because we'll change
// the tokens array on the way // the tokens array on the way
$line = 1;
for ($i = 0, $c = count($tokens); $i < $c; ++$i) { for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
if (isset($tokens[$i + 1])) { if (isset($tokens[$i + 1])) {
if ($tokens[$i][0] === T_COALESCE && $tokens[$i + 1] === '=') { $token = $tokens[$i];
if ($token->id === \T_COALESCE && $tokens[$i + 1]->value === '=') {
array_splice($tokens, $i, 2, [ array_splice($tokens, $i, 2, [
[Emulative::T_COALESCE_EQUAL, '??=', $line] new Token(Emulative::T_COALESCE_EQUAL, '??=', $token->line, $token->filePos),
]); ]);
$c--; $c--;
continue; continue;
} }
} }
if (\is_array($tokens[$i])) {
$line += substr_count($tokens[$i][1], "\n");
}
} }
return $tokens; return $tokens;

View File

@ -3,6 +3,7 @@
namespace PhpParser\Lexer\TokenEmulator; namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative; use PhpParser\Lexer\Emulative;
use PhpParser\Token;
final class FnTokenEmulator implements TokenEmulatorInterface final class FnTokenEmulator implements TokenEmulatorInterface
{ {
@ -21,13 +22,13 @@ final class FnTokenEmulator implements TokenEmulatorInterface
// We need to manually iterate and manage a count because we'll change // We need to manually iterate and manage a count because we'll change
// the tokens array on the way // the tokens array on the way
foreach ($tokens as $i => $token) { foreach ($tokens as $i => $token) {
if ($token[0] === T_STRING && $token[1] === 'fn') { if ($token->id === \T_STRING && $token->value === 'fn') {
$previousNonSpaceToken = $this->getPreviousNonSpaceToken($tokens, $i); $previousNonSpaceToken = $this->getPreviousNonSpaceToken($tokens, $i);
if ($previousNonSpaceToken !== null && $previousNonSpaceToken[0] === T_OBJECT_OPERATOR) { if ($previousNonSpaceToken !== null && $previousNonSpaceToken->id === T_OBJECT_OPERATOR) {
continue; continue;
} }
$tokens[$i][0] = Emulative::T_FN; $token->id = Emulative::T_FN;
} }
} }
@ -35,13 +36,13 @@ final class FnTokenEmulator implements TokenEmulatorInterface
} }
/** /**
* @param mixed[] $tokens * @param Token[] $tokens
* @return mixed[]|null * @return Token|null
*/ */
private function getPreviousNonSpaceToken(array $tokens, int $start) private function getPreviousNonSpaceToken(array $tokens, int $start)
{ {
for ($i = $start - 1; $i >= 0; --$i) { for ($i = $start - 1; $i >= 0; --$i) {
if ($tokens[$i][0] === T_WHITESPACE) { if ($tokens[$i]->id === \T_WHITESPACE) {
continue; continue;
} }

View File

@ -3,6 +3,7 @@
namespace PhpParser\Lexer\TokenEmulator; namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative; use PhpParser\Lexer\Emulative;
use PhpParser\Token;
final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
{ {
@ -24,47 +25,46 @@ final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
return preg_match('~[0-9a-f]_[0-9a-f]~i', $code) !== false; return preg_match('~[0-9a-f]_[0-9a-f]~i', $code) !== false;
} }
/**
* @param Token[] $tokens
* @return Token[]
*/
public function emulate(string $code, array $tokens): array public function emulate(string $code, array $tokens): array
{ {
// We need to manually iterate and manage a count because we'll change // We need to manually iterate and manage a count because we'll change
// the tokens array on the way // the tokens array on the way
$codeOffset = 0;
for ($i = 0, $c = count($tokens); $i < $c; ++$i) { for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
$token = $tokens[$i]; $token = $tokens[$i];
$tokenLen = \strlen(\is_array($token) ? $token[1] : $token); $tokenLen = \strlen($token->value);
if ($token[0] !== T_LNUMBER && $token[0] !== T_DNUMBER) { if ($token->id !== T_LNUMBER && $token->id !== T_DNUMBER) {
$codeOffset += $tokenLen;
continue; continue;
} }
$res = preg_match(self::NUMBER, $code, $matches, 0, $codeOffset); $res = preg_match(self::NUMBER, $code, $matches, 0, $token->filePos);
assert($res, "No number at number token position"); assert($res, "No number at number token position");
$match = $matches[0]; $match = $matches[0];
$matchLen = \strlen($match); $matchLen = \strlen($match);
if ($matchLen === $tokenLen) { if ($matchLen === $tokenLen) {
// Original token already holds the full number. // Original token already holds the full number.
$codeOffset += $tokenLen;
continue; continue;
} }
$tokenKind = $this->resolveIntegerOrFloatToken($match); $tokenKind = $this->resolveIntegerOrFloatToken($match);
$newTokens = [[$tokenKind, $match, $token[2]]]; $newTokens = [new Token($tokenKind, $match, $token->line, $token->filePos)];
$numTokens = 1; $numTokens = 1;
$len = $tokenLen; $len = $tokenLen;
while ($matchLen > $len) { while ($matchLen > $len) {
$nextToken = $tokens[$i + $numTokens]; $nextToken = $tokens[$i + $numTokens];
$nextTokenText = \is_array($nextToken) ? $nextToken[1] : $nextToken; $nextTokenLen = \strlen($nextToken->value);
$nextTokenLen = \strlen($nextTokenText);
$numTokens++; $numTokens++;
if ($matchLen < $len + $nextTokenLen) { if ($matchLen < $len + $nextTokenLen) {
// Split trailing characters into a partial token. // Split trailing characters into a partial token.
assert(is_array($nextToken), "Partial token should be an array token"); $partialText = substr($nextToken->value, $matchLen - $len);
$partialText = substr($nextTokenText, $matchLen - $len); $newTokens[] = new Token($nextToken->id, $partialText, $nextToken->line, $nextToken->filePos);
$newTokens[] = [$nextToken[0], $partialText, $nextToken[2]];
break; break;
} }
@ -73,7 +73,6 @@ final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
array_splice($tokens, $i, $numTokens, $newTokens); array_splice($tokens, $i, $numTokens, $newTokens);
$c -= $numTokens - \count($newTokens); $c -= $numTokens - \count($newTokens);
$codeOffset += $matchLen;
} }
return $tokens; return $tokens;

View File

@ -2,13 +2,16 @@
namespace PhpParser\Lexer\TokenEmulator; namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Token;
/** @internal */ /** @internal */
interface TokenEmulatorInterface interface TokenEmulatorInterface
{ {
public function isEmulationNeeded(string $code): bool; public function isEmulationNeeded(string $code): bool;
/** /**
* @return array Modified Tokens * @param Token[] $tokens
* @return Token[]
*/ */
public function emulate(string $code, array $tokens): array; public function emulate(string $code, array $tokens): array;
} }

View File

@ -491,7 +491,7 @@ abstract class PrettyPrinterAbstract
$pos = 0; $pos = 0;
$result = $this->pArray($stmts, $origStmts, $pos, 0, 'File', 'stmts', null); $result = $this->pArray($stmts, $origStmts, $pos, 0, 'File', 'stmts', null);
if (null !== $result) { if (null !== $result) {
$result .= $this->origTokens->getTokenCode($pos, count($origTokens), 0); $result .= $this->origTokens->getTokenCode($pos, count($origTokens) - 1, 0);
} else { } else {
// Fallback // Fallback
// TODO Add <?php properly // TODO Add <?php properly
@ -1213,8 +1213,8 @@ abstract class PrettyPrinterAbstract
$stripLeft = ['left' => \T_WHITESPACE]; $stripLeft = ['left' => \T_WHITESPACE];
$stripRight = ['right' => \T_WHITESPACE]; $stripRight = ['right' => \T_WHITESPACE];
$stripDoubleArrow = ['right' => \T_DOUBLE_ARROW]; $stripDoubleArrow = ['right' => \T_DOUBLE_ARROW];
$stripColon = ['left' => ':']; $stripColon = ['left' => \ord(':')];
$stripEquals = ['left' => '=']; $stripEquals = ['left' => \ord('=')];
$this->removalMap = [ $this->removalMap = [
'Expr_ArrayDimFetch->dim' => $stripBoth, 'Expr_ArrayDimFetch->dim' => $stripBoth,
'Expr_ArrayItem->key' => $stripDoubleArrow, 'Expr_ArrayItem->key' => $stripDoubleArrow,
@ -1254,22 +1254,22 @@ abstract class PrettyPrinterAbstract
// TODO: "yield" where both key and value are inserted doesn't work // TODO: "yield" where both key and value are inserted doesn't work
// [$find, $beforeToken, $extraLeft, $extraRight] // [$find, $beforeToken, $extraLeft, $extraRight]
$this->insertionMap = [ $this->insertionMap = [
'Expr_ArrayDimFetch->dim' => ['[', false, null, null], 'Expr_ArrayDimFetch->dim' => [\ord('['), false, null, null],
'Expr_ArrayItem->key' => [null, false, null, ' => '], 'Expr_ArrayItem->key' => [null, false, null, ' => '],
'Expr_ArrowFunction->returnType' => [')', false, ' : ', null], 'Expr_ArrowFunction->returnType' => [\ord(')'), false, ' : ', null],
'Expr_Closure->returnType' => [')', false, ' : ', null], 'Expr_Closure->returnType' => [\ord(')'), false, ' : ', null],
'Expr_Ternary->if' => ['?', false, ' ', ' '], 'Expr_Ternary->if' => [\ord('?'), false, ' ', ' '],
'Expr_Yield->key' => [\T_YIELD, false, null, ' => '], 'Expr_Yield->key' => [\T_YIELD, false, null, ' => '],
'Expr_Yield->value' => [\T_YIELD, false, ' ', null], 'Expr_Yield->value' => [\T_YIELD, false, ' ', null],
'Param->type' => [null, false, null, ' '], 'Param->type' => [null, false, null, ' '],
'Param->default' => [null, false, ' = ', null], 'Param->default' => [null, false, ' = ', null],
'Stmt_Break->num' => [\T_BREAK, false, ' ', null], 'Stmt_Break->num' => [\T_BREAK, false, ' ', null],
'Stmt_ClassMethod->returnType' => [')', false, ' : ', null], 'Stmt_ClassMethod->returnType' => [\ord(')'), false, ' : ', null],
'Stmt_Class->extends' => [null, false, ' extends ', null], 'Stmt_Class->extends' => [null, false, ' extends ', null],
'Expr_PrintableNewAnonClass->extends' => [null, ' extends ', null], 'Expr_PrintableNewAnonClass->extends' => [null, ' extends ', null],
'Stmt_Continue->num' => [\T_CONTINUE, false, ' ', null], 'Stmt_Continue->num' => [\T_CONTINUE, false, ' ', null],
'Stmt_Foreach->keyVar' => [\T_AS, false, null, ' => '], 'Stmt_Foreach->keyVar' => [\T_AS, false, null, ' => '],
'Stmt_Function->returnType' => [')', false, ' : ', null], 'Stmt_Function->returnType' => [\ord(')'), false, ' : ', null],
'Stmt_If->else' => [null, false, ' ', null], 'Stmt_If->else' => [null, false, ' ', null],
'Stmt_Namespace->name' => [\T_NAMESPACE, false, ' ', null], 'Stmt_Namespace->name' => [\T_NAMESPACE, false, ' ', null],
'Stmt_Property->type' => [\T_VARIABLE, true, null, ' '], 'Stmt_Property->type' => [\T_VARIABLE, true, null, ' '],
@ -1367,19 +1367,19 @@ abstract class PrettyPrinterAbstract
// [$find, $extraLeft, $extraRight] // [$find, $extraLeft, $extraRight]
$this->emptyListInsertionMap = [ $this->emptyListInsertionMap = [
'Expr_ArrowFunction->params' => ['(', '', ''], 'Expr_ArrowFunction->params' => [\ord('('), '', ''],
'Expr_Closure->uses' => [')', ' use(', ')'], 'Expr_Closure->uses' => [\ord(')'), ' use(', ')'],
'Expr_Closure->params' => ['(', '', ''], 'Expr_Closure->params' => [\ord('('), '', ''],
'Expr_FuncCall->args' => ['(', '', ''], 'Expr_FuncCall->args' => [\ord('('), '', ''],
'Expr_MethodCall->args' => ['(', '', ''], 'Expr_MethodCall->args' => [\ord('('), '', ''],
'Expr_New->args' => ['(', '', ''], 'Expr_New->args' => [\ord('('), '', ''],
'Expr_PrintableNewAnonClass->args' => ['(', '', ''], 'Expr_PrintableNewAnonClass->args' => [\ord('('), '', ''],
'Expr_PrintableNewAnonClass->implements' => [null, ' implements ', ''], 'Expr_PrintableNewAnonClass->implements' => [null, ' implements ', ''],
'Expr_StaticCall->args' => ['(', '', ''], 'Expr_StaticCall->args' => [\ord('('), '', ''],
'Stmt_Class->implements' => [null, ' implements ', ''], 'Stmt_Class->implements' => [null, ' implements ', ''],
'Stmt_ClassMethod->params' => ['(', '', ''], 'Stmt_ClassMethod->params' => [\ord('('), '', ''],
'Stmt_Interface->extends' => [null, ' extends ', ''], 'Stmt_Interface->extends' => [null, ' extends ', ''],
'Stmt_Function->params' => ['(', '', ''], 'Stmt_Function->params' => [\ord('('), '', ''],
/* These cannot be empty to start with: /* These cannot be empty to start with:
* Expr_Isset->vars * Expr_Isset->vars

21
lib/PhpParser/Token.php Normal file
View File

@ -0,0 +1,21 @@
<?php declare(strict_types=1);
namespace PhpParser;
class Token {
/** @var int Token id (T_* constant) */
public $id;
/** @var string Textual value of the token */
public $value;
/** @var int Start line number of the token */
public $line;
/** @var int Offset of the token in the source code */
public $filePos;
public function __construct(int $id, string $value, int $line, int $filePos) {
$this->id = $id;
$this->value = $value;
$this->line = $line;
$this->filePos = $filePos;
}
}

View File

@ -247,17 +247,18 @@ class LexerTest extends \PHPUnit\Framework\TestCase
public function testGetTokens() { public function testGetTokens() {
$code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";'; $code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
$expectedTokens = [ $expectedTokens = [
[T_OPEN_TAG, '<?php ', 1], new Token(T_OPEN_TAG, '<?php ', 1, 0),
[T_CONSTANT_ENCAPSED_STRING, '"a"', 1], new Token(T_CONSTANT_ENCAPSED_STRING, '"a"', 1, 6),
';', new Token(\ord(';'), ';', 1, 9),
[T_WHITESPACE, "\n", 1], new Token(T_WHITESPACE, "\n", 1, 10),
[T_COMMENT, '// foo' . "\n", 2], new Token(T_COMMENT, '// foo' . "\n", 2, 11),
[T_CONSTANT_ENCAPSED_STRING, '"b"', 3], new Token(T_CONSTANT_ENCAPSED_STRING, '"b"', 3, 18),
';', new Token(\ord(';'), ';', 3, 21),
new Token(0, "\0", 3, 22),
]; ];
$lexer = $this->getLexer(); $lexer = $this->getLexer();
$lexer->startLexing($code); $lexer->startLexing($code);
$this->assertSame($expectedTokens, $lexer->getTokens()); $this->assertEquals($expectedTokens, $lexer->getTokens());
} }
} }