diff --git a/lib/PhpParser/Lexer.php b/lib/PhpParser/Lexer.php index 59655a8..f6767e1 100644 --- a/lib/PhpParser/Lexer.php +++ b/lib/PhpParser/Lexer.php @@ -8,20 +8,35 @@ class Lexer protected $tokens; protected $pos; protected $line; + protected $filePos; protected $tokenMap; protected $dropTokens; + protected $usedAttributes; + /** * Creates a Lexer. + * + * @param array $options Options array. Currently only the 'usedAttributes' option is supported, + * which is an array of attributes to add to the AST nodes. Possible attributes + * are: 'comments', 'startLine', 'endLine', 'startFilePos', 'endFilePos'. The + * option defaults to the first three. For more info see getNextToken() docs. */ - public function __construct() { + public function __construct(array $options = array()) { // map from internal tokens to PhpParser tokens $this->tokenMap = $this->createTokenMap(); // map of tokens to drop while lexing (the map is only used for isset lookup, // that's why the value is simply set to 1; the value is never actually used.) $this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_OPEN_TAG), 1); + + // the usedAttributes member is a map of the used attribute names to a dummy + // value (here "true") + $options += array( + 'usedAttributes' => array('comments', 'startLine', 'endLine'), + ); + $this->usedAttributes = array_fill_keys($options['usedAttributes'], true); } /** @@ -43,6 +58,7 @@ class Lexer $this->code = $code; // keep the code around for __halt_compiler() handling $this->pos = -1; $this->line = 1; + $this->filePos = 0; } protected function resetErrors() { @@ -81,6 +97,17 @@ class Lexer /** * Fetches the next token. * + * The available attributes are determined by the 'usedAttributes' option, which can + * be specified in the constructor. The following attributes are supported: + * + * * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances, + * representing all comments that occurred between the previous + * non-discarded token and the current one. + * * 'startLine' => Line in which the token starts. + * * 'endLine' => Line in which the token ends. + * * 'startFilePos' => Offset into the code string at which the token starts. + * * 'endFilePos' => EXPERIMENTAL! Offset into the code string one past where the token ends. + * * @param mixed $value Variable to store token content in * @param mixed $startAttributes Variable to store start attributes in * @param mixed $endAttributes Variable to store end attributes in @@ -94,29 +121,57 @@ class Lexer while (isset($this->tokens[++$this->pos])) { $token = $this->tokens[$this->pos]; - if (is_string($token)) { - $startAttributes['startLine'] = $this->line; - $endAttributes['endLine'] = $this->line; + if (isset($this->usedAttributes['startFilePos'])) { + $startAttributes['startFilePos'] = $this->filePos; + } + if (is_string($token)) { // bug in token_get_all if ('b"' === $token) { $value = 'b"'; - return ord('"'); + $this->filePos += 2; + $id = ord('"'); } else { $value = $token; - return ord($token); + $this->filePos += 1; + $id = ord($token); } + + if (isset($this->usedAttributes['startLine'])) { + $startAttributes['startLine'] = $this->line; + } + if (isset($this->usedAttributes['endLine'])) { + $endAttributes['endLine'] = $this->line; + } + if (isset($this->usedAttributes['endFilePos'])) { + $endAttributes['endFilePos'] = $this->filePos; + } + + return $id; } else { $this->line += substr_count($token[1], "\n"); + $this->filePos += strlen($token[1]); if (T_COMMENT === $token[0]) { - $startAttributes['comments'][] = new Comment($token[1], $token[2]); + if (isset($this->usedAttributes['comments'])) { + $startAttributes['comments'][] = new Comment($token[1], $token[2]); + } } elseif (T_DOC_COMMENT === $token[0]) { - $startAttributes['comments'][] = new Comment\Doc($token[1], $token[2]); + if (isset($this->usedAttributes['comments'])) { + $startAttributes['comments'][] = new Comment\Doc($token[1], $token[2]); + } } elseif (!isset($this->dropTokens[$token[0]])) { $value = $token[1]; - $startAttributes['startLine'] = $token[2]; - $endAttributes['endLine'] = $this->line; + + if (isset($this->usedAttributes['startLine'])) { + $startAttributes['startLine'] = $token[2]; + } + if (isset($this->usedAttributes['endLine'])) { + $endAttributes['endLine'] = $this->line; + } + if (isset($this->usedAttributes['endFilePos'])) { + $endAttributes['endFilePos'] = $this->filePos; + } return $this->tokenMap[$token[0]]; } diff --git a/lib/PhpParser/ParserAbstract.php b/lib/PhpParser/ParserAbstract.php index 9c3f931..31f266f 100644 --- a/lib/PhpParser/ParserAbstract.php +++ b/lib/PhpParser/ParserAbstract.php @@ -205,7 +205,7 @@ abstract class ParserAbstract + $endAttributes ); } catch (Error $e) { - if (-1 === $e->getRawLine()) { + if (-1 === $e->getRawLine() && isset($startAttributes['startLine'])) { $e->setRawLine($startAttributes['startLine']); } diff --git a/test/PhpParser/LexerTest.php b/test/PhpParser/LexerTest.php index 2caa72f..6b3fe0c 100644 --- a/test/PhpParser/LexerTest.php +++ b/test/PhpParser/LexerTest.php @@ -4,19 +4,14 @@ namespace PhpParser; class LexerTest extends \PHPUnit_Framework_TestCase { - /** @var Lexer */ - protected $lexer; - - protected function setUp() { - $this->lexer = new Lexer; - } /** * @dataProvider provideTestError */ public function testError($code, $message) { + $lexer = new Lexer; try { - $this->lexer->startLexing($code); + $lexer->startLexing($code); } catch (Error $e) { $this->assertSame($message, $e->getMessage()); @@ -37,9 +32,10 @@ class LexerTest extends \PHPUnit_Framework_TestCase /** * @dataProvider provideTestLex */ - public function testLex($code, $tokens) { - $this->lexer->startLexing($code); - while ($id = $this->lexer->getNextToken($value, $startAttributes, $endAttributes)) { + public function testLex($code, $options, $tokens) { + $lexer = new Lexer($options); + $lexer->startLexing($code); + while ($id = $lexer->getNextToken($value, $startAttributes, $endAttributes)) { $token = array_shift($tokens); $this->assertSame($token[0], $id); @@ -54,6 +50,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase // tests conversion of closing PHP tag and drop of whitespace and opening tags array( 'plaintext', + array(), array( array( Parser::T_STRING, 'tokens', @@ -72,6 +69,7 @@ class LexerTest extends \PHPUnit_Framework_TestCase // tests line numbers array( ' array('startFilePos', 'endFilePos')), + array( + array( + Parser::T_CONSTANT_ENCAPSED_STRING, '"a"', + array('startFilePos' => 6), array('endFilePos' => 9) + ), + array( + ord(';'), ';', + array('startFilePos' => 9), array('endFilePos' => 10) + ), + array( + Parser::T_CONSTANT_ENCAPSED_STRING, '"b"', + array('startFilePos' => 18), array('endFilePos' => 21) + ), + array( + ord(';'), ';', + array('startFilePos' => 21), array('endFilePos' => 22) + ), + ) + ), + // tests all attributes being disabled + array( + ' array()), + array( + array( + Parser::T_VARIABLE, '$bar', + array(), array() + ), + array( + ord(';'), ';', + array(), array() + ) + ) + ) ); } @@ -127,12 +165,13 @@ class LexerTest extends \PHPUnit_Framework_TestCase * @dataProvider provideTestHaltCompiler */ public function testHandleHaltCompiler($code, $remaining) { - $this->lexer->startLexing($code); + $lexer = new Lexer; + $lexer->startLexing($code); - while (Parser::T_HALT_COMPILER !== $this->lexer->getNextToken()); + while (Parser::T_HALT_COMPILER !== $lexer->getNextToken()); - $this->assertSame($this->lexer->handleHaltCompiler(), $remaining); - $this->assertSame(0, $this->lexer->getNextToken()); + $this->assertSame($lexer->handleHaltCompiler(), $remaining); + $this->assertSame(0, $lexer->getNextToken()); } public function provideTestHaltCompiler() { @@ -144,4 +183,4 @@ class LexerTest extends \PHPUnit_Framework_TestCase //array('