mirror of
https://github.com/phabelio/PHP-Parser.git
synced 2024-11-26 20:14:46 +01:00
Support token position attributes in lexer
Also change endFilePos semantics to refer to the last character that is *included* in the token, rather than one past the last character. This ensures that all end* attributes have the same semantics.
This commit is contained in:
parent
e0f3e8a492
commit
46975107a7
@ -20,8 +20,9 @@ class Lexer
|
||||
*
|
||||
* @param array $options Options array. Currently only the 'usedAttributes' option is supported,
|
||||
* which is an array of attributes to add to the AST nodes. Possible attributes
|
||||
* are: 'comments', 'startLine', 'endLine', 'startFilePos', 'endFilePos'. The
|
||||
* option defaults to the first three. For more info see getNextToken() docs.
|
||||
* are: 'comments', 'startLine', 'endLine', 'startTokenPos', 'endTokenPos',
|
||||
* 'startFilePos', 'endFilePos'. The option defaults to the first three.
|
||||
* For more info see getNextToken() docs.
|
||||
*/
|
||||
public function __construct(array $options = array()) {
|
||||
// map from internal tokens to PhpParser tokens
|
||||
@ -100,13 +101,16 @@ class Lexer
|
||||
* The available attributes are determined by the 'usedAttributes' option, which can
|
||||
* be specified in the constructor. The following attributes are supported:
|
||||
*
|
||||
* * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances,
|
||||
* representing all comments that occurred between the previous
|
||||
* non-discarded token and the current one.
|
||||
* * 'startLine' => Line in which the token starts.
|
||||
* * 'endLine' => Line in which the token ends.
|
||||
* * 'startFilePos' => Offset into the code string at which the token starts.
|
||||
* * 'endFilePos' => EXPERIMENTAL! Offset into the code string one past where the token ends.
|
||||
* * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances,
|
||||
* representing all comments that occurred between the previous
|
||||
* non-discarded token and the current one.
|
||||
* * 'startLine' => Line in which the token starts.
|
||||
* * 'endLine' => Line in which the token ends.
|
||||
* * 'startTokenPos' => Position in the token array of the first token in the node.
|
||||
* * 'endTokenPos' => Position in the token array of the last token in the node.
|
||||
* * 'startFilePos' => Offset into the code string at which the token starts.
|
||||
* * 'endFilePos' => Offset into the code string at which the last character that
|
||||
* is part of the token occurs.
|
||||
*
|
||||
* @param mixed $value Variable to store token content in
|
||||
* @param mixed $startAttributes Variable to store start attributes in
|
||||
@ -121,6 +125,9 @@ class Lexer
|
||||
while (isset($this->tokens[++$this->pos])) {
|
||||
$token = $this->tokens[$this->pos];
|
||||
|
||||
if (isset($this->usedAttributes['startTokenPos'])) {
|
||||
$startAttributes['startTokenPos'] = $this->pos;
|
||||
}
|
||||
if (isset($this->usedAttributes['startFilePos'])) {
|
||||
$startAttributes['startFilePos'] = $this->filePos;
|
||||
}
|
||||
@ -143,8 +150,11 @@ class Lexer
|
||||
if (isset($this->usedAttributes['endLine'])) {
|
||||
$endAttributes['endLine'] = $this->line;
|
||||
}
|
||||
if (isset($this->usedAttributes['endTokenPos'])) {
|
||||
$endAttributes['endTokenPos'] = $this->pos;
|
||||
}
|
||||
if (isset($this->usedAttributes['endFilePos'])) {
|
||||
$endAttributes['endFilePos'] = $this->filePos;
|
||||
$endAttributes['endFilePos'] = $this->filePos - 1;
|
||||
}
|
||||
|
||||
return $id;
|
||||
@ -169,8 +179,11 @@ class Lexer
|
||||
if (isset($this->usedAttributes['endLine'])) {
|
||||
$endAttributes['endLine'] = $this->line;
|
||||
}
|
||||
if (isset($this->usedAttributes['endTokenPos'])) {
|
||||
$endAttributes['endTokenPos'] = $this->pos;
|
||||
}
|
||||
if (isset($this->usedAttributes['endFilePos'])) {
|
||||
$endAttributes['endFilePos'] = $this->filePos;
|
||||
$endAttributes['endFilePos'] = $this->filePos - 1;
|
||||
}
|
||||
|
||||
return $this->tokenMap[$token[0]];
|
||||
@ -184,6 +197,20 @@ class Lexer
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the token array for current code.
|
||||
*
|
||||
* The token array is in the same format as provided by the
|
||||
* token_get_all() function and does not discard tokens (i.e.
|
||||
* whitespace and comments are included). The token position
|
||||
* attributes are against this token array.
|
||||
*
|
||||
* @return array Array of tokens in token_get_all() format
|
||||
*/
|
||||
public function getTokens() {
|
||||
return $this->tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles __halt_compiler() by returning the text after it.
|
||||
*
|
||||
|
@ -131,19 +131,42 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
array(
|
||||
array(
|
||||
Parser::T_CONSTANT_ENCAPSED_STRING, '"a"',
|
||||
array('startFilePos' => 6), array('endFilePos' => 9)
|
||||
array('startFilePos' => 6), array('endFilePos' => 8)
|
||||
),
|
||||
array(
|
||||
ord(';'), ';',
|
||||
array('startFilePos' => 9), array('endFilePos' => 10)
|
||||
array('startFilePos' => 9), array('endFilePos' => 9)
|
||||
),
|
||||
array(
|
||||
Parser::T_CONSTANT_ENCAPSED_STRING, '"b"',
|
||||
array('startFilePos' => 18), array('endFilePos' => 21)
|
||||
array('startFilePos' => 18), array('endFilePos' => 20)
|
||||
),
|
||||
array(
|
||||
ord(';'), ';',
|
||||
array('startFilePos' => 21), array('endFilePos' => 22)
|
||||
array('startFilePos' => 21), array('endFilePos' => 21)
|
||||
),
|
||||
)
|
||||
),
|
||||
// tests token offsets
|
||||
array(
|
||||
'<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
|
||||
array('usedAttributes' => array('startTokenPos', 'endTokenPos')),
|
||||
array(
|
||||
array(
|
||||
Parser::T_CONSTANT_ENCAPSED_STRING, '"a"',
|
||||
array('startTokenPos' => 1), array('endTokenPos' => 1)
|
||||
),
|
||||
array(
|
||||
ord(';'), ';',
|
||||
array('startTokenPos' => 2), array('endTokenPos' => 2)
|
||||
),
|
||||
array(
|
||||
Parser::T_CONSTANT_ENCAPSED_STRING, '"b"',
|
||||
array('startTokenPos' => 5), array('endTokenPos' => 5)
|
||||
),
|
||||
array(
|
||||
ord(';'), ';',
|
||||
array('startTokenPos' => 6), array('endTokenPos' => 6)
|
||||
),
|
||||
)
|
||||
),
|
||||
@ -187,4 +210,21 @@ class LexerTest extends \PHPUnit_Framework_TestCase
|
||||
//array('<?php ... __halt_compiler /* */ ( ) ;Remaining Text', 'Remaining Text'),
|
||||
);
|
||||
}
|
||||
|
||||
public function testGetTokens() {
|
||||
$code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
|
||||
$expectedTokens = array(
|
||||
array(T_OPEN_TAG, '<?php ', 1),
|
||||
array(T_CONSTANT_ENCAPSED_STRING, '"a"', 1),
|
||||
';',
|
||||
array(T_WHITESPACE, "\n", 1),
|
||||
array(T_COMMENT, '// foo' . "\n", 2),
|
||||
array(T_CONSTANT_ENCAPSED_STRING, '"b"', 3),
|
||||
';',
|
||||
);
|
||||
|
||||
$lexer = $this->getLexer();
|
||||
$lexer->startLexing($code);
|
||||
$this->assertSame($expectedTokens, $lexer->getTokens());
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user