mirror of
https://github.com/phabelio/PHP-Parser.git
synced 2024-11-30 04:29:15 +01:00
Support recovery from lexer errors
Lexer::startLexing() no longer throws, instead errors can be fetched using Lexer::getErrors(). Lexer errors now also contain full line and position information.
This commit is contained in:
parent
e926efd62e
commit
c79ea6d1d3
@ -9,6 +9,7 @@ class Lexer
|
||||
{
|
||||
protected $code;
|
||||
protected $tokens;
|
||||
protected $errors;
|
||||
protected $pos;
|
||||
protected $line;
|
||||
protected $filePos;
|
||||
@ -49,11 +50,22 @@ class Lexer
|
||||
/**
|
||||
* Initializes the lexer for lexing the provided source code.
|
||||
*
|
||||
* @param string $code The source code to lex
|
||||
* This function does not throw if lexing errors occur. Instead, errors may be retrieved using
|
||||
* the getErrors() method.
|
||||
*
|
||||
* @throws Error on lexing errors (unterminated comment or unexpected character)
|
||||
* @param string $code The source code to lex
|
||||
*/
|
||||
public function startLexing($code) {
|
||||
$this->code = $code; // keep the code around for __halt_compiler() handling
|
||||
$this->pos = -1;
|
||||
$this->line = 1;
|
||||
$this->filePos = 0;
|
||||
$this->errors = [];
|
||||
|
||||
// If inline HTML occurs without preceding code, treat it as if it had a leading newline.
|
||||
// This ensures proper composability, because having a newline is the "safe" assumption.
|
||||
$this->prevCloseTagHasNewline = true;
|
||||
|
||||
$scream = ini_set('xdebug.scream', '0');
|
||||
|
||||
$this->resetErrors();
|
||||
@ -63,15 +75,6 @@ class Lexer
|
||||
if (false !== $scream) {
|
||||
ini_set('xdebug.scream', $scream);
|
||||
}
|
||||
|
||||
$this->code = $code; // keep the code around for __halt_compiler() handling
|
||||
$this->pos = -1;
|
||||
$this->line = 1;
|
||||
$this->filePos = 0;
|
||||
|
||||
// If inline HTML occurs without preceding code, treat it as if it had a leading newline.
|
||||
// This ensures proper composability, because having a newline is the "safe" assumption.
|
||||
$this->prevCloseTagHasNewline = true;
|
||||
}
|
||||
|
||||
protected function resetErrors() {
|
||||
@ -85,32 +88,85 @@ class Lexer
|
||||
}
|
||||
}
|
||||
|
||||
protected function handleErrors() {
|
||||
private function handleInvalidCharacterRange($start, $end, $line) {
|
||||
for ($i = $start; $i < $end; $i++) {
|
||||
$chr = $this->code[$i];
|
||||
if ($chr === "\0") {
|
||||
// PHP cuts error message after null byte, so need special case
|
||||
$errorMsg = 'Unexpected null byte';
|
||||
} else {
|
||||
$errorMsg = sprintf(
|
||||
'Unexpected character "%s" (ASCII %d)', $chr, ord($chr)
|
||||
);
|
||||
}
|
||||
$this->errors[] = new Error($errorMsg, [
|
||||
'startLine' => $line,
|
||||
'endLine' => $line,
|
||||
'startFilePos' => $i,
|
||||
'endFilePos' => $i,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
private function isUnterminatedComment($token) {
|
||||
return ($token[0] === T_COMMENT || $token[0] === T_DOC_COMMENT)
|
||||
&& substr($token[1], 0, 2) === '/*'
|
||||
&& substr($token[1], -2) !== '*/';
|
||||
}
|
||||
|
||||
private function errorMayHaveOccurred() {
|
||||
if (defined('HHVM_VERSION')) {
|
||||
// In HHVM token_get_all() does not throw warnings, so we need to conservatively
|
||||
// assume that an error occurred
|
||||
return true;
|
||||
}
|
||||
|
||||
$error = error_get_last();
|
||||
if (null === $error) {
|
||||
return null !== $error
|
||||
&& false === strpos($error['message'], 'Undefined variable');
|
||||
}
|
||||
|
||||
protected function handleErrors() {
|
||||
if (!$this->errorMayHaveOccurred()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (preg_match(
|
||||
'~^Unterminated comment starting line ([0-9]+)$~',
|
||||
$error['message'], $matches
|
||||
)) {
|
||||
throw new Error('Unterminated comment', (int) $matches[1]);
|
||||
// PHP's error handling for token_get_all() is rather bad, so if we want detailed
|
||||
// error information we need to compute it ourselves. Invalid character errors are
|
||||
// detected by finding "gaps" in the token array. Unterminated comments are detected
|
||||
// by checking if a trailing comment has a "*/" at the end.
|
||||
|
||||
$filePos = 0;
|
||||
$line = 1;
|
||||
foreach ($this->tokens as $i => $token) {
|
||||
$tokenValue = \is_string($token) ? $token : $token[1];
|
||||
$tokenLen = \strlen($tokenValue);
|
||||
|
||||
if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) {
|
||||
// Something is missing, must be an invalid character
|
||||
$nextFilePos = strpos($this->code, $tokenValue, $filePos);
|
||||
$this->handleInvalidCharacterRange($filePos, $nextFilePos, $line);
|
||||
$filePos = $nextFilePos;
|
||||
}
|
||||
|
||||
$filePos += $tokenLen;
|
||||
$line += substr_count($tokenValue, "\n");
|
||||
}
|
||||
|
||||
if (preg_match(
|
||||
'~^Unexpected character in input: \'(.)\' \(ASCII=([0-9]+)\)~s',
|
||||
$error['message'], $matches
|
||||
)) {
|
||||
throw new Error(sprintf(
|
||||
'Unexpected character "%s" (ASCII %d)',
|
||||
$matches[1], $matches[2]
|
||||
));
|
||||
// Invalid characters at the end of the input
|
||||
if ($filePos !== \strlen($this->code)) {
|
||||
$this->handleInvalidCharacterRange($filePos, \strlen($this->code), $line);
|
||||
}
|
||||
|
||||
// PHP cuts error message after null byte, so need special case
|
||||
if (preg_match('~^Unexpected character in input: \'$~', $error['message'])) {
|
||||
throw new Error('Unexpected null byte');
|
||||
// Check for unterminated comment
|
||||
$lastToken = $this->tokens[count($this->tokens) - 1];
|
||||
if ($this->isUnterminatedComment($lastToken)) {
|
||||
$this->errors[] = new Error('Unterminated comment', [
|
||||
'startLine' => $line - substr_count($lastToken[1], "\n"),
|
||||
'endLine' => $line,
|
||||
'startFilePos' => $filePos - \strlen($lastToken[1]),
|
||||
'endFilePos' => $filePos,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -224,6 +280,15 @@ class Lexer
|
||||
return $this->tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns errors that occurred during lexing.
|
||||
*
|
||||
* @return Error[] Array of lexer errors
|
||||
*/
|
||||
public function getErrors() {
|
||||
return $this->errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles __halt_compiler() by returning the text after it.
|
||||
*
|
||||
|
@ -132,19 +132,11 @@ abstract class ParserAbstract implements Parser
|
||||
* unable to recover from an error).
|
||||
*/
|
||||
public function parse($code) {
|
||||
$this->errors = array();
|
||||
|
||||
// Initialize the lexer
|
||||
try {
|
||||
$this->lexer->startLexing($code);
|
||||
} catch (Error $e) {
|
||||
$this->errors[] = $e;
|
||||
if ($this->throwOnError) {
|
||||
throw $e;
|
||||
} else {
|
||||
// Currently can't recover from lexer errors
|
||||
return null;
|
||||
}
|
||||
// Initialize the lexer and inherit lexing errors
|
||||
$this->lexer->startLexing($code);
|
||||
$this->errors = $this->lexer->getErrors();
|
||||
if ($this->throwOnError && !empty($this->errors)) {
|
||||
throw $this->errors[0];
|
||||
}
|
||||
|
||||
// We start off with no lookahead-token
|
||||
|
@ -14,28 +14,35 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
/**
|
||||
* @dataProvider provideTestError
|
||||
*/
|
||||
public function testError($code, $message) {
|
||||
public function testError($code, $messages) {
|
||||
if (defined('HHVM_VERSION')) {
|
||||
$this->markTestSkipped('HHVM does not throw warnings from token_get_all()');
|
||||
}
|
||||
|
||||
$lexer = $this->getLexer();
|
||||
try {
|
||||
$lexer->startLexing($code);
|
||||
} catch (Error $e) {
|
||||
$this->assertSame($message, $e->getMessage());
|
||||
$lexer = $this->getLexer(['usedAttributes' => [
|
||||
'comments', 'startLine', 'endLine', 'startFilePos', 'endFilePos'
|
||||
]]);
|
||||
$lexer->startLexing($code);
|
||||
$errors = $lexer->getErrors();
|
||||
|
||||
return;
|
||||
$this->assertSame(count($messages), count($errors));
|
||||
for ($i = 0; $i < count($messages); $i++) {
|
||||
$this->assertSame($messages[$i], $errors[$i]->getMessageWithColumnInfo($code));
|
||||
}
|
||||
|
||||
$this->fail('Expected PhpParser\Error');
|
||||
}
|
||||
|
||||
public function provideTestError() {
|
||||
return array(
|
||||
array('<?php /*', 'Unterminated comment on line 1'),
|
||||
array('<?php ' . "\1", 'Unexpected character "' . "\1" . '" (ASCII 1) on unknown line'),
|
||||
array('<?php ' . "\0", 'Unexpected null byte on unknown line'),
|
||||
array("<?php /*", array("Unterminated comment from 1:7 to 1:9")),
|
||||
array("<?php \1", array("Unexpected character \"\1\" (ASCII 1) from 1:7 to 1:7")),
|
||||
array("<?php \0", array("Unexpected null byte from 1:7 to 1:7")),
|
||||
// Error with potentially emulated token
|
||||
array("<?php ?? \0", array("Unexpected null byte from 1:10 to 1:10")),
|
||||
array("<?php\n\0\1 foo /* bar", array(
|
||||
"Unexpected null byte from 2:1 to 2:1",
|
||||
"Unexpected character \"\1\" (ASCII 1) from 2:2 to 2:2",
|
||||
"Unterminated comment from 2:8 to 2:14"
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -30,6 +30,15 @@ abstract class ParserTest extends \PHPUnit_Framework_TestCase
|
||||
$parser->parse('<?php use foo as self;');
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \PhpParser\Error
|
||||
* @expectedExceptionMessage Unterminated comment on line 1
|
||||
*/
|
||||
public function testParserThrowsLexerError() {
|
||||
$parser = $this->getParser(new Lexer());
|
||||
$parser->parse('<?php /*');
|
||||
}
|
||||
|
||||
public function testAttributeAssignment() {
|
||||
$lexer = new Lexer(array(
|
||||
'usedAttributes' => array(
|
||||
|
@ -6,7 +6,23 @@ $a = 42;
|
||||
/*
|
||||
$b = 24;
|
||||
-----
|
||||
Unterminated comment on line 4
|
||||
Unterminated comment from 4:1 to 5:9
|
||||
array(
|
||||
0: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
name: a
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
value: 42
|
||||
)
|
||||
)
|
||||
1: Stmt_Nop(
|
||||
comments: array(
|
||||
0: /*
|
||||
$b = 24;
|
||||
)
|
||||
)
|
||||
)
|
||||
-----
|
||||
<?php
|
||||
|
||||
@ -14,7 +30,25 @@ $a = 42;
|
||||
@@{ "\1" }@@
|
||||
$b = 24;
|
||||
-----
|
||||
Unexpected character "@@{ "\1" }@@" (ASCII 1) on unknown line
|
||||
Unexpected character "@@{ "\1" }@@" (ASCII 1) from 4:1 to 4:1
|
||||
array(
|
||||
0: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
name: a
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
value: 42
|
||||
)
|
||||
)
|
||||
1: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
name: b
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
value: 24
|
||||
)
|
||||
)
|
||||
)
|
||||
-----
|
||||
<?php
|
||||
|
||||
@ -22,4 +56,69 @@ $a = 42;
|
||||
@@{ "\0" }@@
|
||||
$b = 24;
|
||||
-----
|
||||
Unexpected null byte on unknown line
|
||||
Unexpected null byte from 4:1 to 4:1
|
||||
array(
|
||||
0: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
name: a
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
value: 42
|
||||
)
|
||||
)
|
||||
1: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
name: b
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
value: 24
|
||||
)
|
||||
)
|
||||
)
|
||||
-----
|
||||
<?php
|
||||
|
||||
$a = 1;
|
||||
@@{ "\1" }@@
|
||||
$b = 2;
|
||||
@@{ "\2" }@@
|
||||
$c = 3;
|
||||
-----
|
||||
Unexpected character "@@{ "\1" }@@" (ASCII 1) from 4:1 to 4:1
|
||||
Unexpected character "@@{ "\2" }@@" (ASCII 2) from 6:1 to 6:1
|
||||
array(
|
||||
0: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
name: a
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
value: 1
|
||||
)
|
||||
)
|
||||
1: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
name: b
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
value: 2
|
||||
)
|
||||
)
|
||||
2: Expr_Assign(
|
||||
var: Expr_Variable(
|
||||
name: c
|
||||
)
|
||||
expr: Scalar_LNumber(
|
||||
value: 3
|
||||
)
|
||||
)
|
||||
)
|
||||
-----
|
||||
<?php
|
||||
|
||||
if ($b) {
|
||||
$a = 1;
|
||||
/* unterminated
|
||||
}
|
||||
-----
|
||||
Unterminated comment from 5:5 to 6:2
|
||||
Syntax error, unexpected EOF from 6:2 to 6:2
|
Loading…
Reference in New Issue
Block a user