mirror of
https://github.com/danog/PHP-Parser.git
synced 2024-11-26 20:04:48 +01:00
Canonicalize to PHP 8 comment token format
The trailing newline is no longer part of the comment token.
This commit is contained in:
parent
b58b19ed1d
commit
4abc531213
@ -89,7 +89,7 @@ class Lexer
|
||||
|
||||
error_clear_last();
|
||||
$this->tokens = @token_get_all($code);
|
||||
$this->handleErrors($errorHandler);
|
||||
$this->postprocessTokens($errorHandler);
|
||||
|
||||
if (false !== $scream) {
|
||||
ini_set('xdebug.scream', $scream);
|
||||
@ -131,40 +131,14 @@ class Lexer
|
||||
&& substr($token[1], -2) !== '*/';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether an error *may* have occurred during tokenization.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function errorMayHaveOccurred() : bool {
|
||||
if (defined('HHVM_VERSION')) {
|
||||
// In HHVM token_get_all() does not throw warnings, so we need to conservatively
|
||||
// assume that an error occurred
|
||||
return true;
|
||||
}
|
||||
|
||||
if (PHP_VERSION_ID >= 80000) {
|
||||
// PHP 8 converts the "bad character" case into a parse error, rather than treating
|
||||
// it as a lexing warning. To preserve previous behavior, we need to assume that an
|
||||
// error occurred.
|
||||
// TODO: We should handle this the same way as PHP 8: Only generate T_BAD_CHARACTER
|
||||
// token here (for older PHP versions) and leave generationg of the actual parse error
|
||||
// to the parser. This will also save the full token scan on PHP 8 here.
|
||||
return true;
|
||||
}
|
||||
|
||||
return null !== error_get_last();
|
||||
}
|
||||
|
||||
protected function handleErrors(ErrorHandler $errorHandler) {
|
||||
if (!$this->errorMayHaveOccurred()) {
|
||||
return;
|
||||
}
|
||||
|
||||
protected function postprocessTokens(ErrorHandler $errorHandler) {
|
||||
// PHP's error handling for token_get_all() is rather bad, so if we want detailed
|
||||
// error information we need to compute it ourselves. Invalid character errors are
|
||||
// detected by finding "gaps" in the token array. Unterminated comments are detected
|
||||
// by checking if a trailing comment has a "*/" at the end.
|
||||
//
|
||||
// Additionally, we canonicalize to the PHP 8 comment format here, which does not include
|
||||
// the trailing whitespace anymore
|
||||
|
||||
$filePos = 0;
|
||||
$line = 1;
|
||||
@ -178,6 +152,23 @@ class Lexer
|
||||
$this->handleInvalidCharacterRange($filePos, $filePos + 1, $line, $errorHandler);
|
||||
}
|
||||
|
||||
if ($token[0] === \T_COMMENT && preg_match('/(\r\n|\n|\r)$/D', $token[1], $matches)) {
|
||||
$trailingNewline = $matches[0];
|
||||
$token[1] = substr($token[1], 0, -strlen($trailingNewline));
|
||||
$this->tokens[$i] = $token;
|
||||
if (isset($this->tokens[$i + 1]) && $this->tokens[$i + 1][0] === \T_WHITESPACE) {
|
||||
// Move trailing newline into following T_WHITESPACE token, if it already exists.
|
||||
$this->tokens[$i + 1][1] = $trailingNewline . $this->tokens[$i + 1][1];
|
||||
$this->tokens[$i + 1][2]--;
|
||||
} else {
|
||||
// Otherwise, we need to create a new T_WHITESPACE token.
|
||||
array_splice($this->tokens, $i + 1, 0, [
|
||||
[\T_WHITESPACE, $trailingNewline, $line],
|
||||
]);
|
||||
$numTokens++;
|
||||
}
|
||||
}
|
||||
|
||||
$tokenValue = \is_string($token) ? $token : $token[1];
|
||||
$tokenLen = \strlen($tokenValue);
|
||||
|
||||
|
@ -124,12 +124,12 @@ class LexerTest extends \PHPUnit\Framework\TestCase
|
||||
'comments' => [
|
||||
new Comment('/* comment */',
|
||||
1, 6, 1, 1, 18, 1),
|
||||
new Comment('// comment' . "\n",
|
||||
1, 20, 3, 2, 30, 3),
|
||||
new Comment('// comment',
|
||||
1, 20, 3, 1, 29, 3),
|
||||
new Comment\Doc('/** docComment 1 */',
|
||||
2, 31, 4, 2, 49, 4),
|
||||
2, 31, 5, 2, 49, 5),
|
||||
new Comment\Doc('/** docComment 2 */',
|
||||
2, 50, 5, 2, 68, 5),
|
||||
2, 50, 6, 2, 68, 6),
|
||||
],
|
||||
],
|
||||
['endLine' => 2]
|
||||
@ -185,11 +185,11 @@ class LexerTest extends \PHPUnit\Framework\TestCase
|
||||
],
|
||||
[
|
||||
Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
|
||||
['startTokenPos' => 5], ['endTokenPos' => 5]
|
||||
['startTokenPos' => 6], ['endTokenPos' => 6]
|
||||
],
|
||||
[
|
||||
ord(';'), ';',
|
||||
['startTokenPos' => 6], ['endTokenPos' => 6]
|
||||
['startTokenPos' => 7], ['endTokenPos' => 7]
|
||||
],
|
||||
]
|
||||
],
|
||||
@ -251,14 +251,17 @@ class LexerTest extends \PHPUnit\Framework\TestCase
|
||||
}
|
||||
|
||||
public function testGetTokens() {
|
||||
$code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
|
||||
$code = '<?php "a";' . "\n" . '// foo' . "\n" . '// bar' . "\n\n" . '"b";';
|
||||
$expectedTokens = [
|
||||
[T_OPEN_TAG, '<?php ', 1],
|
||||
[T_CONSTANT_ENCAPSED_STRING, '"a"', 1],
|
||||
';',
|
||||
[T_WHITESPACE, "\n", 1],
|
||||
[T_COMMENT, '// foo' . "\n", 2],
|
||||
[T_CONSTANT_ENCAPSED_STRING, '"b"', 3],
|
||||
[T_COMMENT, '// foo', 2],
|
||||
[T_WHITESPACE, "\n", 2],
|
||||
[T_COMMENT, '// bar', 3],
|
||||
[T_WHITESPACE, "\n\n", 3],
|
||||
[T_CONSTANT_ENCAPSED_STRING, '"b"', 5],
|
||||
';',
|
||||
];
|
||||
|
||||
|
@ -307,12 +307,12 @@ PHP;
|
||||
"comments": [
|
||||
{
|
||||
"nodeType": "Comment",
|
||||
"text": "\/\/ comment\n",
|
||||
"text": "\/\/ comment",
|
||||
"line": 2,
|
||||
"filePos": 6,
|
||||
"tokenPos": 1,
|
||||
"endLine": 3,
|
||||
"endFilePos": 16,
|
||||
"endLine": 2,
|
||||
"endFilePos": 15,
|
||||
"endTokenPos": 1
|
||||
},
|
||||
{
|
||||
@ -320,10 +320,10 @@ PHP;
|
||||
"text": "\/** doc comment *\/",
|
||||
"line": 3,
|
||||
"filePos": 17,
|
||||
"tokenPos": 2,
|
||||
"tokenPos": 3,
|
||||
"endLine": 3,
|
||||
"endFilePos": 34,
|
||||
"endTokenPos": 2
|
||||
"endTokenPos": 3
|
||||
}
|
||||
],
|
||||
"endLine": 6
|
||||
|
@ -83,10 +83,10 @@ EOC;
|
||||
$this->assertInstanceOf(Stmt\Echo_::class, $echo);
|
||||
$this->assertEquals([
|
||||
'comments' => [
|
||||
new Comment("// Line\n",
|
||||
4, 49, 12, 5, 56, 12),
|
||||
new Comment("// Comments\n",
|
||||
5, 61, 14, 6, 72, 14),
|
||||
new Comment("// Line",
|
||||
4, 49, 12, 4, 55, 12),
|
||||
new Comment("// Comments",
|
||||
5, 61, 14, 5, 71, 14),
|
||||
],
|
||||
'startLine' => 6,
|
||||
'endLine' => 6,
|
||||
|
@ -15,8 +15,8 @@ class Foo {
|
||||
public function __construct()
|
||||
{
|
||||
// I'm just a comment
|
||||
|
||||
$foo; }
|
||||
$foo;
|
||||
}
|
||||
}
|
||||
-----
|
||||
<?php
|
||||
@ -72,5 +72,6 @@ class Foo {
|
||||
public function __construct()
|
||||
{
|
||||
// I'm a new comment
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user