mirror of
https://github.com/danog/PHP-Parser.git
synced 2024-11-30 04:19:30 +01:00
Canonicalize to PHP 8 comment token format
The trailing newline is no longer part of the comment token.
This commit is contained in:
parent
b58b19ed1d
commit
4abc531213
@ -89,7 +89,7 @@ class Lexer
|
|||||||
|
|
||||||
error_clear_last();
|
error_clear_last();
|
||||||
$this->tokens = @token_get_all($code);
|
$this->tokens = @token_get_all($code);
|
||||||
$this->handleErrors($errorHandler);
|
$this->postprocessTokens($errorHandler);
|
||||||
|
|
||||||
if (false !== $scream) {
|
if (false !== $scream) {
|
||||||
ini_set('xdebug.scream', $scream);
|
ini_set('xdebug.scream', $scream);
|
||||||
@ -131,40 +131,14 @@ class Lexer
|
|||||||
&& substr($token[1], -2) !== '*/';
|
&& substr($token[1], -2) !== '*/';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
protected function postprocessTokens(ErrorHandler $errorHandler) {
|
||||||
* Check whether an error *may* have occurred during tokenization.
|
|
||||||
*
|
|
||||||
* @return bool
|
|
||||||
*/
|
|
||||||
private function errorMayHaveOccurred() : bool {
|
|
||||||
if (defined('HHVM_VERSION')) {
|
|
||||||
// In HHVM token_get_all() does not throw warnings, so we need to conservatively
|
|
||||||
// assume that an error occurred
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PHP_VERSION_ID >= 80000) {
|
|
||||||
// PHP 8 converts the "bad character" case into a parse error, rather than treating
|
|
||||||
// it as a lexing warning. To preserve previous behavior, we need to assume that an
|
|
||||||
// error occurred.
|
|
||||||
// TODO: We should handle this the same way as PHP 8: Only generate T_BAD_CHARACTER
|
|
||||||
// token here (for older PHP versions) and leave generationg of the actual parse error
|
|
||||||
// to the parser. This will also save the full token scan on PHP 8 here.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null !== error_get_last();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function handleErrors(ErrorHandler $errorHandler) {
|
|
||||||
if (!$this->errorMayHaveOccurred()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// PHP's error handling for token_get_all() is rather bad, so if we want detailed
|
// PHP's error handling for token_get_all() is rather bad, so if we want detailed
|
||||||
// error information we need to compute it ourselves. Invalid character errors are
|
// error information we need to compute it ourselves. Invalid character errors are
|
||||||
// detected by finding "gaps" in the token array. Unterminated comments are detected
|
// detected by finding "gaps" in the token array. Unterminated comments are detected
|
||||||
// by checking if a trailing comment has a "*/" at the end.
|
// by checking if a trailing comment has a "*/" at the end.
|
||||||
|
//
|
||||||
|
// Additionally, we canonicalize to the PHP 8 comment format here, which does not include
|
||||||
|
// the trailing whitespace anymore
|
||||||
|
|
||||||
$filePos = 0;
|
$filePos = 0;
|
||||||
$line = 1;
|
$line = 1;
|
||||||
@ -178,6 +152,23 @@ class Lexer
|
|||||||
$this->handleInvalidCharacterRange($filePos, $filePos + 1, $line, $errorHandler);
|
$this->handleInvalidCharacterRange($filePos, $filePos + 1, $line, $errorHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($token[0] === \T_COMMENT && preg_match('/(\r\n|\n|\r)$/D', $token[1], $matches)) {
|
||||||
|
$trailingNewline = $matches[0];
|
||||||
|
$token[1] = substr($token[1], 0, -strlen($trailingNewline));
|
||||||
|
$this->tokens[$i] = $token;
|
||||||
|
if (isset($this->tokens[$i + 1]) && $this->tokens[$i + 1][0] === \T_WHITESPACE) {
|
||||||
|
// Move trailing newline into following T_WHITESPACE token, if it already exists.
|
||||||
|
$this->tokens[$i + 1][1] = $trailingNewline . $this->tokens[$i + 1][1];
|
||||||
|
$this->tokens[$i + 1][2]--;
|
||||||
|
} else {
|
||||||
|
// Otherwise, we need to create a new T_WHITESPACE token.
|
||||||
|
array_splice($this->tokens, $i + 1, 0, [
|
||||||
|
[\T_WHITESPACE, $trailingNewline, $line],
|
||||||
|
]);
|
||||||
|
$numTokens++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$tokenValue = \is_string($token) ? $token : $token[1];
|
$tokenValue = \is_string($token) ? $token : $token[1];
|
||||||
$tokenLen = \strlen($tokenValue);
|
$tokenLen = \strlen($tokenValue);
|
||||||
|
|
||||||
|
@ -124,12 +124,12 @@ class LexerTest extends \PHPUnit\Framework\TestCase
|
|||||||
'comments' => [
|
'comments' => [
|
||||||
new Comment('/* comment */',
|
new Comment('/* comment */',
|
||||||
1, 6, 1, 1, 18, 1),
|
1, 6, 1, 1, 18, 1),
|
||||||
new Comment('// comment' . "\n",
|
new Comment('// comment',
|
||||||
1, 20, 3, 2, 30, 3),
|
1, 20, 3, 1, 29, 3),
|
||||||
new Comment\Doc('/** docComment 1 */',
|
new Comment\Doc('/** docComment 1 */',
|
||||||
2, 31, 4, 2, 49, 4),
|
2, 31, 5, 2, 49, 5),
|
||||||
new Comment\Doc('/** docComment 2 */',
|
new Comment\Doc('/** docComment 2 */',
|
||||||
2, 50, 5, 2, 68, 5),
|
2, 50, 6, 2, 68, 6),
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
['endLine' => 2]
|
['endLine' => 2]
|
||||||
@ -185,11 +185,11 @@ class LexerTest extends \PHPUnit\Framework\TestCase
|
|||||||
],
|
],
|
||||||
[
|
[
|
||||||
Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
|
Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
|
||||||
['startTokenPos' => 5], ['endTokenPos' => 5]
|
['startTokenPos' => 6], ['endTokenPos' => 6]
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
ord(';'), ';',
|
ord(';'), ';',
|
||||||
['startTokenPos' => 6], ['endTokenPos' => 6]
|
['startTokenPos' => 7], ['endTokenPos' => 7]
|
||||||
],
|
],
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
@ -251,14 +251,17 @@ class LexerTest extends \PHPUnit\Framework\TestCase
|
|||||||
}
|
}
|
||||||
|
|
||||||
public function testGetTokens() {
|
public function testGetTokens() {
|
||||||
$code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
|
$code = '<?php "a";' . "\n" . '// foo' . "\n" . '// bar' . "\n\n" . '"b";';
|
||||||
$expectedTokens = [
|
$expectedTokens = [
|
||||||
[T_OPEN_TAG, '<?php ', 1],
|
[T_OPEN_TAG, '<?php ', 1],
|
||||||
[T_CONSTANT_ENCAPSED_STRING, '"a"', 1],
|
[T_CONSTANT_ENCAPSED_STRING, '"a"', 1],
|
||||||
';',
|
';',
|
||||||
[T_WHITESPACE, "\n", 1],
|
[T_WHITESPACE, "\n", 1],
|
||||||
[T_COMMENT, '// foo' . "\n", 2],
|
[T_COMMENT, '// foo', 2],
|
||||||
[T_CONSTANT_ENCAPSED_STRING, '"b"', 3],
|
[T_WHITESPACE, "\n", 2],
|
||||||
|
[T_COMMENT, '// bar', 3],
|
||||||
|
[T_WHITESPACE, "\n\n", 3],
|
||||||
|
[T_CONSTANT_ENCAPSED_STRING, '"b"', 5],
|
||||||
';',
|
';',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -307,12 +307,12 @@ PHP;
|
|||||||
"comments": [
|
"comments": [
|
||||||
{
|
{
|
||||||
"nodeType": "Comment",
|
"nodeType": "Comment",
|
||||||
"text": "\/\/ comment\n",
|
"text": "\/\/ comment",
|
||||||
"line": 2,
|
"line": 2,
|
||||||
"filePos": 6,
|
"filePos": 6,
|
||||||
"tokenPos": 1,
|
"tokenPos": 1,
|
||||||
"endLine": 3,
|
"endLine": 2,
|
||||||
"endFilePos": 16,
|
"endFilePos": 15,
|
||||||
"endTokenPos": 1
|
"endTokenPos": 1
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -320,10 +320,10 @@ PHP;
|
|||||||
"text": "\/** doc comment *\/",
|
"text": "\/** doc comment *\/",
|
||||||
"line": 3,
|
"line": 3,
|
||||||
"filePos": 17,
|
"filePos": 17,
|
||||||
"tokenPos": 2,
|
"tokenPos": 3,
|
||||||
"endLine": 3,
|
"endLine": 3,
|
||||||
"endFilePos": 34,
|
"endFilePos": 34,
|
||||||
"endTokenPos": 2
|
"endTokenPos": 3
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"endLine": 6
|
"endLine": 6
|
||||||
|
@ -83,10 +83,10 @@ EOC;
|
|||||||
$this->assertInstanceOf(Stmt\Echo_::class, $echo);
|
$this->assertInstanceOf(Stmt\Echo_::class, $echo);
|
||||||
$this->assertEquals([
|
$this->assertEquals([
|
||||||
'comments' => [
|
'comments' => [
|
||||||
new Comment("// Line\n",
|
new Comment("// Line",
|
||||||
4, 49, 12, 5, 56, 12),
|
4, 49, 12, 4, 55, 12),
|
||||||
new Comment("// Comments\n",
|
new Comment("// Comments",
|
||||||
5, 61, 14, 6, 72, 14),
|
5, 61, 14, 5, 71, 14),
|
||||||
],
|
],
|
||||||
'startLine' => 6,
|
'startLine' => 6,
|
||||||
'endLine' => 6,
|
'endLine' => 6,
|
||||||
|
@ -15,8 +15,8 @@ class Foo {
|
|||||||
public function __construct()
|
public function __construct()
|
||||||
{
|
{
|
||||||
// I'm just a comment
|
// I'm just a comment
|
||||||
|
$foo;
|
||||||
$foo; }
|
}
|
||||||
}
|
}
|
||||||
-----
|
-----
|
||||||
<?php
|
<?php
|
||||||
@ -72,5 +72,6 @@ class Foo {
|
|||||||
public function __construct()
|
public function __construct()
|
||||||
{
|
{
|
||||||
// I'm a new comment
|
// I'm a new comment
|
||||||
}
|
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user