diff --git a/lib/PhpParser/Lexer.php b/lib/PhpParser/Lexer.php index 9ae3607..36c9fcf 100644 --- a/lib/PhpParser/Lexer.php +++ b/lib/PhpParser/Lexer.php @@ -9,6 +9,7 @@ class Lexer { protected $code; protected $tokens; + protected $errors; protected $pos; protected $line; protected $filePos; @@ -49,11 +50,22 @@ class Lexer /** * Initializes the lexer for lexing the provided source code. * - * @param string $code The source code to lex + * This function does not throw if lexing errors occur. Instead, errors may be retrieved using + * the getErrors() method. * - * @throws Error on lexing errors (unterminated comment or unexpected character) + * @param string $code The source code to lex */ public function startLexing($code) { + $this->code = $code; // keep the code around for __halt_compiler() handling + $this->pos = -1; + $this->line = 1; + $this->filePos = 0; + $this->errors = []; + + // If inline HTML occurs without preceding code, treat it as if it had a leading newline. + // This ensures proper composability, because having a newline is the "safe" assumption. + $this->prevCloseTagHasNewline = true; + $scream = ini_set('xdebug.scream', '0'); $this->resetErrors(); @@ -63,15 +75,6 @@ class Lexer if (false !== $scream) { ini_set('xdebug.scream', $scream); } - - $this->code = $code; // keep the code around for __halt_compiler() handling - $this->pos = -1; - $this->line = 1; - $this->filePos = 0; - - // If inline HTML occurs without preceding code, treat it as if it had a leading newline. - // This ensures proper composability, because having a newline is the "safe" assumption. - $this->prevCloseTagHasNewline = true; } protected function resetErrors() { @@ -85,32 +88,85 @@ class Lexer } } - protected function handleErrors() { + private function handleInvalidCharacterRange($start, $end, $line) { + for ($i = $start; $i < $end; $i++) { + $chr = $this->code[$i]; + if ($chr === "\0") { + // PHP cuts error message after null byte, so need special case + $errorMsg = 'Unexpected null byte'; + } else { + $errorMsg = sprintf( + 'Unexpected character "%s" (ASCII %d)', $chr, ord($chr) + ); + } + $this->errors[] = new Error($errorMsg, [ + 'startLine' => $line, + 'endLine' => $line, + 'startFilePos' => $i, + 'endFilePos' => $i, + ]); + } + } + + private function isUnterminatedComment($token) { + return ($token[0] === T_COMMENT || $token[0] === T_DOC_COMMENT) + && substr($token[1], 0, 2) === '/*' + && substr($token[1], -2) !== '*/'; + } + + private function errorMayHaveOccurred() { + if (defined('HHVM_VERSION')) { + // In HHVM token_get_all() does not throw warnings, so we need to conservatively + // assume that an error occurred + return true; + } + $error = error_get_last(); - if (null === $error) { + return null !== $error + && false === strpos($error['message'], 'Undefined variable'); + } + + protected function handleErrors() { + if (!$this->errorMayHaveOccurred()) { return; } - if (preg_match( - '~^Unterminated comment starting line ([0-9]+)$~', - $error['message'], $matches - )) { - throw new Error('Unterminated comment', (int) $matches[1]); + // PHP's error handling for token_get_all() is rather bad, so if we want detailed + // error information we need to compute it ourselves. Invalid character errors are + // detected by finding "gaps" in the token array. Unterminated comments are detected + // by checking if a trailing comment has a "*/" at the end. + + $filePos = 0; + $line = 1; + foreach ($this->tokens as $i => $token) { + $tokenValue = \is_string($token) ? $token : $token[1]; + $tokenLen = \strlen($tokenValue); + + if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) { + // Something is missing, must be an invalid character + $nextFilePos = strpos($this->code, $tokenValue, $filePos); + $this->handleInvalidCharacterRange($filePos, $nextFilePos, $line); + $filePos = $nextFilePos; + } + + $filePos += $tokenLen; + $line += substr_count($tokenValue, "\n"); } - if (preg_match( - '~^Unexpected character in input: \'(.)\' \(ASCII=([0-9]+)\)~s', - $error['message'], $matches - )) { - throw new Error(sprintf( - 'Unexpected character "%s" (ASCII %d)', - $matches[1], $matches[2] - )); + // Invalid characters at the end of the input + if ($filePos !== \strlen($this->code)) { + $this->handleInvalidCharacterRange($filePos, \strlen($this->code), $line); } - // PHP cuts error message after null byte, so need special case - if (preg_match('~^Unexpected character in input: \'$~', $error['message'])) { - throw new Error('Unexpected null byte'); + // Check for unterminated comment + $lastToken = $this->tokens[count($this->tokens) - 1]; + if ($this->isUnterminatedComment($lastToken)) { + $this->errors[] = new Error('Unterminated comment', [ + 'startLine' => $line - substr_count($lastToken[1], "\n"), + 'endLine' => $line, + 'startFilePos' => $filePos - \strlen($lastToken[1]), + 'endFilePos' => $filePos, + ]); } } @@ -224,6 +280,15 @@ class Lexer return $this->tokens; } + /** + * Returns errors that occurred during lexing. + * + * @return Error[] Array of lexer errors + */ + public function getErrors() { + return $this->errors; + } + /** * Handles __halt_compiler() by returning the text after it. * diff --git a/lib/PhpParser/ParserAbstract.php b/lib/PhpParser/ParserAbstract.php index b37fef2..4ba2890 100644 --- a/lib/PhpParser/ParserAbstract.php +++ b/lib/PhpParser/ParserAbstract.php @@ -132,19 +132,11 @@ abstract class ParserAbstract implements Parser * unable to recover from an error). */ public function parse($code) { - $this->errors = array(); - - // Initialize the lexer - try { - $this->lexer->startLexing($code); - } catch (Error $e) { - $this->errors[] = $e; - if ($this->throwOnError) { - throw $e; - } else { - // Currently can't recover from lexer errors - return null; - } + // Initialize the lexer and inherit lexing errors + $this->lexer->startLexing($code); + $this->errors = $this->lexer->getErrors(); + if ($this->throwOnError && !empty($this->errors)) { + throw $this->errors[0]; } // We start off with no lookahead-token diff --git a/test/PhpParser/LexerTest.php b/test/PhpParser/LexerTest.php index 0308816..f81757f 100644 --- a/test/PhpParser/LexerTest.php +++ b/test/PhpParser/LexerTest.php @@ -14,28 +14,35 @@ class LexerTest extends \PHPUnit_Framework_TestCase /** * @dataProvider provideTestError */ - public function testError($code, $message) { + public function testError($code, $messages) { if (defined('HHVM_VERSION')) { $this->markTestSkipped('HHVM does not throw warnings from token_get_all()'); } - $lexer = $this->getLexer(); - try { - $lexer->startLexing($code); - } catch (Error $e) { - $this->assertSame($message, $e->getMessage()); + $lexer = $this->getLexer(['usedAttributes' => [ + 'comments', 'startLine', 'endLine', 'startFilePos', 'endFilePos' + ]]); + $lexer->startLexing($code); + $errors = $lexer->getErrors(); - return; + $this->assertSame(count($messages), count($errors)); + for ($i = 0; $i < count($messages); $i++) { + $this->assertSame($messages[$i], $errors[$i]->getMessageWithColumnInfo($code)); } - - $this->fail('Expected PhpParser\Error'); } public function provideTestError() { return array( - array('parse('getParser(new Lexer()); + $parser->parse(' array( diff --git a/test/code/parser/errorHandling/lexerErrors.test b/test/code/parser/errorHandling/lexerErrors.test index 913c7ef..cfb7282 100644 --- a/test/code/parser/errorHandling/lexerErrors.test +++ b/test/code/parser/errorHandling/lexerErrors.test @@ -6,7 +6,23 @@ $a = 42; /* $b = 24; ----- -Unterminated comment on line 4 +Unterminated comment from 4:1 to 5:9 +array( + 0: Expr_Assign( + var: Expr_Variable( + name: a + ) + expr: Scalar_LNumber( + value: 42 + ) + ) + 1: Stmt_Nop( + comments: array( + 0: /* + $b = 24; + ) + ) +) -----