mirror of
https://github.com/phabelio/PHP-Parser.git
synced 2024-11-26 20:14:46 +01:00
Insert T_BAD_CHARACTER tokens for missing characters
The token stream should cover all characters in the original code, insert a dummy token for missing illegal characters. We should really be doing this in token_get_all() as well.
This commit is contained in:
parent
a4b43edb03
commit
b9b45dd2bc
@ -6,6 +6,11 @@ use PhpParser\Parser\Tokens;
|
|||||||
|
|
||||||
class Lexer
|
class Lexer
|
||||||
{
|
{
|
||||||
|
/* Token ID used for illegal characters part of the token stream. These are dropped by token_get_all(),
|
||||||
|
* but we restore them here to make sure that the tokens cover the full original text, and to prevent
|
||||||
|
* file positions from going out of sync. */
|
||||||
|
const T_BAD_CHARACTER = -1;
|
||||||
|
|
||||||
protected $code;
|
protected $code;
|
||||||
protected $tokens;
|
protected $tokens;
|
||||||
protected $pos;
|
protected $pos;
|
||||||
@ -40,7 +45,7 @@ class Lexer
|
|||||||
// map of tokens to drop while lexing (the map is only used for isset lookup,
|
// map of tokens to drop while lexing (the map is only used for isset lookup,
|
||||||
// that's why the value is simply set to 1; the value is never actually used.)
|
// that's why the value is simply set to 1; the value is never actually used.)
|
||||||
$this->dropTokens = array_fill_keys(
|
$this->dropTokens = array_fill_keys(
|
||||||
[\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT], 1
|
[\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, self::T_BAD_CHARACTER], 1
|
||||||
);
|
);
|
||||||
|
|
||||||
$defaultAttributes = ['comments', 'startLine', 'endLine'];
|
$defaultAttributes = ['comments', 'startLine', 'endLine'];
|
||||||
@ -92,13 +97,9 @@ class Lexer
|
|||||||
}
|
}
|
||||||
|
|
||||||
private function handleInvalidCharacterRange($start, $end, $line, ErrorHandler $errorHandler) {
|
private function handleInvalidCharacterRange($start, $end, $line, ErrorHandler $errorHandler) {
|
||||||
|
$tokens = [];
|
||||||
for ($i = $start; $i < $end; $i++) {
|
for ($i = $start; $i < $end; $i++) {
|
||||||
$chr = $this->code[$i];
|
$chr = $this->code[$i];
|
||||||
if ($chr === 'b' || $chr === 'B') {
|
|
||||||
// HHVM does not treat b" tokens correctly, so ignore these
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($chr === "\0") {
|
if ($chr === "\0") {
|
||||||
// PHP cuts error message after null byte, so need special case
|
// PHP cuts error message after null byte, so need special case
|
||||||
$errorMsg = 'Unexpected null byte';
|
$errorMsg = 'Unexpected null byte';
|
||||||
@ -108,6 +109,7 @@ class Lexer
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$tokens[] = [self::T_BAD_CHARACTER, $chr, $line];
|
||||||
$errorHandler->handleError(new Error($errorMsg, [
|
$errorHandler->handleError(new Error($errorMsg, [
|
||||||
'startLine' => $line,
|
'startLine' => $line,
|
||||||
'endLine' => $line,
|
'endLine' => $line,
|
||||||
@ -115,6 +117,7 @@ class Lexer
|
|||||||
'endFilePos' => $i,
|
'endFilePos' => $i,
|
||||||
]));
|
]));
|
||||||
}
|
}
|
||||||
|
return $tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -155,16 +158,22 @@ class Lexer
|
|||||||
|
|
||||||
$filePos = 0;
|
$filePos = 0;
|
||||||
$line = 1;
|
$line = 1;
|
||||||
foreach ($this->tokens as $token) {
|
$numTokens = \count($this->tokens);
|
||||||
|
for ($i = 0; $i < $numTokens; $i++) {
|
||||||
|
$token = $this->tokens[$i];
|
||||||
$tokenValue = \is_string($token) ? $token : $token[1];
|
$tokenValue = \is_string($token) ? $token : $token[1];
|
||||||
$tokenLen = \strlen($tokenValue);
|
$tokenLen = \strlen($tokenValue);
|
||||||
|
|
||||||
if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) {
|
if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) {
|
||||||
// Something is missing, must be an invalid character
|
// Something is missing, must be an invalid character
|
||||||
$nextFilePos = strpos($this->code, $tokenValue, $filePos);
|
$nextFilePos = strpos($this->code, $tokenValue, $filePos);
|
||||||
$this->handleInvalidCharacterRange(
|
$badCharTokens = $this->handleInvalidCharacterRange(
|
||||||
$filePos, $nextFilePos, $line, $errorHandler);
|
$filePos, $nextFilePos, $line, $errorHandler);
|
||||||
$filePos = (int) $nextFilePos;
|
$filePos = (int) $nextFilePos;
|
||||||
|
|
||||||
|
array_splice($this->tokens, $i, 0, $badCharTokens);
|
||||||
|
$numTokens += \count($badCharTokens);
|
||||||
|
$i += \count($badCharTokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
$filePos += $tokenLen;
|
$filePos += $tokenLen;
|
||||||
@ -187,8 +196,9 @@ class Lexer
|
|||||||
$this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line];
|
$this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line];
|
||||||
} else {
|
} else {
|
||||||
// Invalid characters at the end of the input
|
// Invalid characters at the end of the input
|
||||||
$this->handleInvalidCharacterRange(
|
$badCharTokens = $this->handleInvalidCharacterRange(
|
||||||
$filePos, \strlen($this->code), $line, $errorHandler);
|
$filePos, \strlen($this->code), $line, $errorHandler);
|
||||||
|
$this->tokens = array_merge($this->tokens, $badCharTokens);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -32,24 +32,25 @@ $a = 42;
|
|||||||
@@{ "\1" }@@
|
@@{ "\1" }@@
|
||||||
$b = 24;
|
$b = 24;
|
||||||
-----
|
-----
|
||||||
|
!!positions
|
||||||
Unexpected character "" (ASCII 1) from 4:1 to 4:1
|
Unexpected character "" (ASCII 1) from 4:1 to 4:1
|
||||||
array(
|
array(
|
||||||
0: Stmt_Expression(
|
0: Stmt_Expression[3:1 - 3:8](
|
||||||
expr: Expr_Assign(
|
expr: Expr_Assign[3:1 - 3:7](
|
||||||
var: Expr_Variable(
|
var: Expr_Variable[3:1 - 3:2](
|
||||||
name: a
|
name: a
|
||||||
)
|
)
|
||||||
expr: Scalar_LNumber(
|
expr: Scalar_LNumber[3:6 - 3:7](
|
||||||
value: 42
|
value: 42
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
1: Stmt_Expression(
|
1: Stmt_Expression[5:1 - 5:8](
|
||||||
expr: Expr_Assign(
|
expr: Expr_Assign[5:1 - 5:7](
|
||||||
var: Expr_Variable(
|
var: Expr_Variable[5:1 - 5:2](
|
||||||
name: b
|
name: b
|
||||||
)
|
)
|
||||||
expr: Scalar_LNumber(
|
expr: Scalar_LNumber[5:6 - 5:7](
|
||||||
value: 24
|
value: 24
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -62,24 +63,25 @@ $a = 42;
|
|||||||
@@{ "\0" }@@
|
@@{ "\0" }@@
|
||||||
$b = 24;
|
$b = 24;
|
||||||
-----
|
-----
|
||||||
|
!!positions
|
||||||
Unexpected null byte from 4:1 to 4:1
|
Unexpected null byte from 4:1 to 4:1
|
||||||
array(
|
array(
|
||||||
0: Stmt_Expression(
|
0: Stmt_Expression[3:1 - 3:8](
|
||||||
expr: Expr_Assign(
|
expr: Expr_Assign[3:1 - 3:7](
|
||||||
var: Expr_Variable(
|
var: Expr_Variable[3:1 - 3:2](
|
||||||
name: a
|
name: a
|
||||||
)
|
)
|
||||||
expr: Scalar_LNumber(
|
expr: Scalar_LNumber[3:6 - 3:7](
|
||||||
value: 42
|
value: 42
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
1: Stmt_Expression(
|
1: Stmt_Expression[5:1 - 5:8](
|
||||||
expr: Expr_Assign(
|
expr: Expr_Assign[5:1 - 5:7](
|
||||||
var: Expr_Variable(
|
var: Expr_Variable[5:1 - 5:2](
|
||||||
name: b
|
name: b
|
||||||
)
|
)
|
||||||
expr: Scalar_LNumber(
|
expr: Scalar_LNumber[5:6 - 5:7](
|
||||||
value: 24
|
value: 24
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -94,35 +96,36 @@ $b = 2;
|
|||||||
@@{ "\2" }@@
|
@@{ "\2" }@@
|
||||||
$c = 3;
|
$c = 3;
|
||||||
-----
|
-----
|
||||||
Unexpected character "@@{ "\1" }@@" (ASCII 1) from 4:1 to 4:1
|
!!positions
|
||||||
Unexpected character "@@{ "\2" }@@" (ASCII 2) from 6:1 to 6:1
|
Unexpected character "" (ASCII 1) from 4:1 to 4:1
|
||||||
|
Unexpected character "" (ASCII 2) from 6:1 to 6:1
|
||||||
array(
|
array(
|
||||||
0: Stmt_Expression(
|
0: Stmt_Expression[3:1 - 3:7](
|
||||||
expr: Expr_Assign(
|
expr: Expr_Assign[3:1 - 3:6](
|
||||||
var: Expr_Variable(
|
var: Expr_Variable[3:1 - 3:2](
|
||||||
name: a
|
name: a
|
||||||
)
|
)
|
||||||
expr: Scalar_LNumber(
|
expr: Scalar_LNumber[3:6 - 3:6](
|
||||||
value: 1
|
value: 1
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
1: Stmt_Expression(
|
1: Stmt_Expression[5:1 - 5:7](
|
||||||
expr: Expr_Assign(
|
expr: Expr_Assign[5:1 - 5:6](
|
||||||
var: Expr_Variable(
|
var: Expr_Variable[5:1 - 5:2](
|
||||||
name: b
|
name: b
|
||||||
)
|
)
|
||||||
expr: Scalar_LNumber(
|
expr: Scalar_LNumber[5:6 - 5:6](
|
||||||
value: 2
|
value: 2
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
2: Stmt_Expression(
|
2: Stmt_Expression[7:1 - 7:7](
|
||||||
expr: Expr_Assign(
|
expr: Expr_Assign[7:1 - 7:6](
|
||||||
var: Expr_Variable(
|
var: Expr_Variable[7:1 - 7:2](
|
||||||
name: c
|
name: c
|
||||||
)
|
)
|
||||||
expr: Scalar_LNumber(
|
expr: Scalar_LNumber[7:6 - 7:6](
|
||||||
value: 3
|
value: 3
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user