mirror of
https://github.com/phabelio/PHP-Parser.git
synced 2024-11-26 20:14:46 +01:00
Insert T_BAD_CHARACTER tokens for missing characters
The token stream should cover all characters in the original code, insert a dummy token for missing illegal characters. We should really be doing this in token_get_all() as well.
This commit is contained in:
parent
a4b43edb03
commit
b9b45dd2bc
@ -6,6 +6,11 @@ use PhpParser\Parser\Tokens;
|
||||
|
||||
class Lexer
|
||||
{
|
||||
/* Token ID used for illegal characters part of the token stream. These are dropped by token_get_all(),
|
||||
* but we restore them here to make sure that the tokens cover the full original text, and to prevent
|
||||
* file positions from going out of sync. */
|
||||
const T_BAD_CHARACTER = -1;
|
||||
|
||||
protected $code;
|
||||
protected $tokens;
|
||||
protected $pos;
|
||||
@ -40,7 +45,7 @@ class Lexer
|
||||
// map of tokens to drop while lexing (the map is only used for isset lookup,
|
||||
// that's why the value is simply set to 1; the value is never actually used.)
|
||||
$this->dropTokens = array_fill_keys(
|
||||
[\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT], 1
|
||||
[\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, self::T_BAD_CHARACTER], 1
|
||||
);
|
||||
|
||||
$defaultAttributes = ['comments', 'startLine', 'endLine'];
|
||||
@ -92,13 +97,9 @@ class Lexer
|
||||
}
|
||||
|
||||
private function handleInvalidCharacterRange($start, $end, $line, ErrorHandler $errorHandler) {
|
||||
$tokens = [];
|
||||
for ($i = $start; $i < $end; $i++) {
|
||||
$chr = $this->code[$i];
|
||||
if ($chr === 'b' || $chr === 'B') {
|
||||
// HHVM does not treat b" tokens correctly, so ignore these
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($chr === "\0") {
|
||||
// PHP cuts error message after null byte, so need special case
|
||||
$errorMsg = 'Unexpected null byte';
|
||||
@ -108,6 +109,7 @@ class Lexer
|
||||
);
|
||||
}
|
||||
|
||||
$tokens[] = [self::T_BAD_CHARACTER, $chr, $line];
|
||||
$errorHandler->handleError(new Error($errorMsg, [
|
||||
'startLine' => $line,
|
||||
'endLine' => $line,
|
||||
@ -115,6 +117,7 @@ class Lexer
|
||||
'endFilePos' => $i,
|
||||
]));
|
||||
}
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -155,16 +158,22 @@ class Lexer
|
||||
|
||||
$filePos = 0;
|
||||
$line = 1;
|
||||
foreach ($this->tokens as $token) {
|
||||
$numTokens = \count($this->tokens);
|
||||
for ($i = 0; $i < $numTokens; $i++) {
|
||||
$token = $this->tokens[$i];
|
||||
$tokenValue = \is_string($token) ? $token : $token[1];
|
||||
$tokenLen = \strlen($tokenValue);
|
||||
|
||||
if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) {
|
||||
// Something is missing, must be an invalid character
|
||||
$nextFilePos = strpos($this->code, $tokenValue, $filePos);
|
||||
$this->handleInvalidCharacterRange(
|
||||
$badCharTokens = $this->handleInvalidCharacterRange(
|
||||
$filePos, $nextFilePos, $line, $errorHandler);
|
||||
$filePos = (int) $nextFilePos;
|
||||
|
||||
array_splice($this->tokens, $i, 0, $badCharTokens);
|
||||
$numTokens += \count($badCharTokens);
|
||||
$i += \count($badCharTokens);
|
||||
}
|
||||
|
||||
$filePos += $tokenLen;
|
||||
@ -187,8 +196,9 @@ class Lexer
|
||||
$this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line];
|
||||
} else {
|
||||
// Invalid characters at the end of the input
|
||||
$this->handleInvalidCharacterRange(
|
||||
$badCharTokens = $this->handleInvalidCharacterRange(
|
||||
$filePos, \strlen($this->code), $line, $errorHandler);
|
||||
$this->tokens = array_merge($this->tokens, $badCharTokens);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -32,24 +32,25 @@ $a = 42;
|
||||
@@{ "\1" }@@
|
||||
$b = 24;
|
||||
-----
|
||||
!!positions
|
||||
Unexpected character "" (ASCII 1) from 4:1 to 4:1
|
||||
array(
|
||||
0: Stmt_Expression(
|
||||
expr: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
0: Stmt_Expression[3:1 - 3:8](
|
||||
expr: Expr_Assign[3:1 - 3:7](
|
||||
var: Expr_Variable[3:1 - 3:2](
|
||||
name: a
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
expr: Scalar_LNumber[3:6 - 3:7](
|
||||
value: 42
|
||||
)
|
||||
)
|
||||
)
|
||||
1: Stmt_Expression(
|
||||
expr: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
1: Stmt_Expression[5:1 - 5:8](
|
||||
expr: Expr_Assign[5:1 - 5:7](
|
||||
var: Expr_Variable[5:1 - 5:2](
|
||||
name: b
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
expr: Scalar_LNumber[5:6 - 5:7](
|
||||
value: 24
|
||||
)
|
||||
)
|
||||
@ -62,24 +63,25 @@ $a = 42;
|
||||
@@{ "\0" }@@
|
||||
$b = 24;
|
||||
-----
|
||||
!!positions
|
||||
Unexpected null byte from 4:1 to 4:1
|
||||
array(
|
||||
0: Stmt_Expression(
|
||||
expr: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
0: Stmt_Expression[3:1 - 3:8](
|
||||
expr: Expr_Assign[3:1 - 3:7](
|
||||
var: Expr_Variable[3:1 - 3:2](
|
||||
name: a
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
expr: Scalar_LNumber[3:6 - 3:7](
|
||||
value: 42
|
||||
)
|
||||
)
|
||||
)
|
||||
1: Stmt_Expression(
|
||||
expr: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
1: Stmt_Expression[5:1 - 5:8](
|
||||
expr: Expr_Assign[5:1 - 5:7](
|
||||
var: Expr_Variable[5:1 - 5:2](
|
||||
name: b
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
expr: Scalar_LNumber[5:6 - 5:7](
|
||||
value: 24
|
||||
)
|
||||
)
|
||||
@ -94,35 +96,36 @@ $b = 2;
|
||||
@@{ "\2" }@@
|
||||
$c = 3;
|
||||
-----
|
||||
Unexpected character "@@{ "\1" }@@" (ASCII 1) from 4:1 to 4:1
|
||||
Unexpected character "@@{ "\2" }@@" (ASCII 2) from 6:1 to 6:1
|
||||
!!positions
|
||||
Unexpected character "" (ASCII 1) from 4:1 to 4:1
|
||||
Unexpected character "" (ASCII 2) from 6:1 to 6:1
|
||||
array(
|
||||
0: Stmt_Expression(
|
||||
expr: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
0: Stmt_Expression[3:1 - 3:7](
|
||||
expr: Expr_Assign[3:1 - 3:6](
|
||||
var: Expr_Variable[3:1 - 3:2](
|
||||
name: a
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
expr: Scalar_LNumber[3:6 - 3:6](
|
||||
value: 1
|
||||
)
|
||||
)
|
||||
)
|
||||
1: Stmt_Expression(
|
||||
expr: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
1: Stmt_Expression[5:1 - 5:7](
|
||||
expr: Expr_Assign[5:1 - 5:6](
|
||||
var: Expr_Variable[5:1 - 5:2](
|
||||
name: b
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
expr: Scalar_LNumber[5:6 - 5:6](
|
||||
value: 2
|
||||
)
|
||||
)
|
||||
)
|
||||
2: Stmt_Expression(
|
||||
expr: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
2: Stmt_Expression[7:1 - 7:7](
|
||||
expr: Expr_Assign[7:1 - 7:6](
|
||||
var: Expr_Variable[7:1 - 7:2](
|
||||
name: c
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
expr: Scalar_LNumber[7:6 - 7:6](
|
||||
value: 3
|
||||
)
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user