Partial support for flexible heredoc/nowdoc

This only implements the token emulation part, it does not yet
handle the indentation stripping.

Emulating this feature on old PHP versions is really tough and is
not going to work perfectly, but hopefully this implementation is
"good enough".
This commit is contained in:
Nikita Popov 2018-04-28 22:14:16 +02:00
parent fa6ee28600
commit 83766c8c0e
3 changed files with 304 additions and 7 deletions

View File

@ -2,7 +2,202 @@
namespace PhpParser\Lexer;
use PhpParser\Error;
use PhpParser\ErrorHandler;
class Emulative extends \PhpParser\Lexer
{
/* No features requiring emulation have been added in PHP > 7.0 */
const PHP_7_3 = '7.3.0dev';
/**
* @var array Patches used to reverse changes introduced in the code
*/
private $patches;
public function startLexing(string $code, ErrorHandler $errorHandler = null) {
$this->patches = [];
$preparedCode = $this->prepareCode($code);
if (null === $preparedCode) {
// Nothing to emulate, yay
parent::startLexing($code, $errorHandler);
return;
}
$collector = new ErrorHandler\Collecting();
parent::startLexing($preparedCode, $collector);
$this->fixupTokens();
$errors = $collector->getErrors();
if (!empty($errors)) {
$this->fixupErrors($errors);
foreach ($errors as $error) {
$errorHandler->handleError($error);
}
}
}
/**
* Prepares code for emulation. If nothing has to be emulated null is returned.
*
* @param string $code
* @return null|string
*/
private function prepareCode(string $code) {
if (version_compare(\PHP_VERSION, self::PHP_7_3, '>=')) {
return null;
}
if (strpos($code, '<<<') === false) {
// Definitely doesn't contain heredoc/nowdoc
return null;
}
$flexibleDocStringRegex = <<<'REGEX'
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
(?:.*\r?\n)*?
(?<indentation>\h*)\2(?<separator>(?:;?[\r\n])?)/x
REGEX;
if (!preg_match_all($flexibleDocStringRegex, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
// No heredoc/nowdoc found
return null;
}
// Keep track of how much we need to adjust string offsets due to the modifications we
// already made
$posDelta = 0;
foreach ($matches as $match) {
$indentation = $match['indentation'][0];
$indentationStart = $match['indentation'][1];
$separator = $match['separator'][0];
$separatorStart = $match['separator'][1];
if ($indentation === '' && $separator !== '') {
// Ordinary heredoc/nowdoc
continue;
}
if ($indentation !== '') {
// Remove indentation
$indentationLen = strlen($indentation);
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
$this->patches[] = [$indentationStart + $posDelta, 'add', $indentation];
$posDelta -= $indentationLen;
}
if ($separator === '') {
// Insert newline as separator
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
$this->patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
$posDelta += 1;
}
}
if (empty($this->patches)) {
// We did not end up emulating anything
return null;
}
return $code;
}
private function fixupTokens() {
assert(count($this->patches) > 0);
// Load first patch
$patchIdx = 0;
list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx];
// We use a manual loop over the tokens, because we modify the array on the fly
$pos = 0;
for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) {
$token = $this->tokens[$i];
if (\is_string($token)) {
// We assume that patches don't apply to string tokens
$pos += \strlen($token);
continue;
}
$len = \strlen($token[1]);
$posDelta = 0;
while ($patchPos >= $pos && $patchPos < $pos + $len) {
$patchTextLen = \strlen($patchText);
if ($patchType === 'remove') {
if ($patchPos === $pos && $patchTextLen === $len) {
// Remove token entirely
array_splice($this->tokens, $i, 1, []);
$i--;
$c--;
} else {
// Remove from token string
$this->tokens[$i][1] = substr_replace(
$token[1], '', $patchPos - $pos + $posDelta, $patchTextLen
);
$posDelta -= $patchTextLen;
}
} elseif ($patchType === 'add') {
// Insert into the token string
$this->tokens[$i][1] = substr_replace(
$token[1], $patchText, $patchPos - $pos + $posDelta, 0
);
$posDelta += $patchTextLen;
} else {
assert(false);
}
// Fetch the next patch
$patchIdx++;
if ($patchIdx >= \count($this->patches)) {
// No more patches, we're done
return;
}
list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx];
// Multiple patches may apply to the same token. Reload the current one to check
// If the new patch applies
$token = $this->tokens[$i];
}
$pos += $len;
}
// A patch did not apply
assert(false);
}
/**
* Fixup line and position information in errors.
*
* @param Error[] $errors
*/
private function fixupErrors(array $errors) {
foreach ($errors as $error) {
$attrs = $error->getAttributes();
$posDelta = 0;
$lineDelta = 0;
foreach ($this->patches as $patch) {
list($patchPos, $patchType, $patchText) = $patch;
if ($patchPos >= $attrs['startFilePos']) {
// No longer relevant
break;
}
if ($patchType === 'add') {
$posDelta += strlen($patchText);
$lineDelta += substr_count($patchText, "\n");
} else {
$posDelta -= strlen($patchText);
$lineDelta -= substr_count($patchText, "\n");
}
}
$attrs['startFilePos'] += $posDelta;
$attrs['endFilePos'] += $posDelta;
$attrs['startLine'] += $lineDelta;
$attrs['endLine'] += $lineDelta;
$error->setAttributes($attrs);
}
}
}

View File

@ -2,6 +2,7 @@
namespace PhpParser\Lexer;
use PhpParser\ErrorHandler;
use PhpParser\LexerTest;
use PhpParser\Parser\Tokens;
@ -63,12 +64,11 @@ class EmulativeTest extends LexerTest
$lexer = $this->getLexer();
$lexer->startLexing('<?php ' . $code);
foreach ($expectedTokens as $expectedToken) {
list($expectedTokenType, $expectedTokenText) = $expectedToken;
$this->assertSame($expectedTokenType, $lexer->getNextToken($text));
$this->assertSame($expectedTokenText, $text);
$tokens = [];
while (0 !== $token = $lexer->getNextToken($text)) {
$tokens[] = [$token, $text];
}
$this->assertSame(0, $lexer->getNextToken());
$this->assertSame($expectedTokens, $tokens);
}
/**
@ -85,6 +85,29 @@ class EmulativeTest extends LexerTest
$this->assertSame(0, $lexer->getNextToken());
}
/**
* @dataProvider provideTestLexNewFeatures
*/
public function testErrorAfterEmulation($code) {
$errorHandler = new ErrorHandler\Collecting;
$lexer = $this->getLexer([]);
$lexer->startLexing('<?php ' . $code . "\0", $errorHandler);
$errors = $errorHandler->getErrors();
$this->assertCount(1, $errors);
$error = $errors[0];
$this->assertSame('Unexpected null byte', $error->getRawMessage());
$attrs = $error->getAttributes();
$expPos = strlen('<?php ' . $code);
$expLine = 1 + substr_count('<?php ' . $code, "\n");
$this->assertSame($expPos, $attrs['startFilePos']);
$this->assertSame($expPos, $attrs['endFilePos']);
$this->assertSame($expLine, $attrs['startLine']);
$this->assertSame($expLine, $attrs['endLine']);
}
public function provideTestLexNewFeatures() {
return [
['yield from', [
@ -128,6 +151,38 @@ class EmulativeTest extends LexerTest
[Tokens::T_END_HEREDOC, 'NOWDOC'],
[ord(';'), ';'],
]],
// Flexible heredoc/nowdoc
["<<<LABEL\nLABEL,", [
[Tokens::T_START_HEREDOC, "<<<LABEL\n"],
[Tokens::T_END_HEREDOC, "LABEL"],
[ord(','), ','],
]],
["<<<LABEL\n LABEL,", [
[Tokens::T_START_HEREDOC, "<<<LABEL\n"],
[Tokens::T_END_HEREDOC, " LABEL"],
[ord(','), ','],
]],
["<<<LABEL\n Foo\n LABEL;", [
[Tokens::T_START_HEREDOC, "<<<LABEL\n"],
[Tokens::T_ENCAPSED_AND_WHITESPACE, " Foo\n"],
[Tokens::T_END_HEREDOC, " LABEL"],
[ord(';'), ';'],
]],
["<<<A\n A,<<<A\n A,", [
[Tokens::T_START_HEREDOC, "<<<A\n"],
[Tokens::T_END_HEREDOC, " A"],
[ord(','), ','],
[Tokens::T_START_HEREDOC, "<<<A\n"],
[Tokens::T_END_HEREDOC, " A"],
[ord(','), ','],
]],
// Interpretation changed
["<<<LABEL\n LABEL\nLABEL\n", [
[Tokens::T_START_HEREDOC, "<<<LABEL\n"],
[Tokens::T_END_HEREDOC, " LABEL"],
[Tokens::T_STRING, "LABEL"],
]],
];
}
}

View File

@ -0,0 +1,47 @@
Flexible heredoc/nowdoc (PHP 7.3)
-----
<?php
$ary = [
<<<FOO
Test
FOO,
// Value here is wrong
<<<'BAR'
Test
BAR,
];
-----
array(
0: Stmt_Expression(
expr: Expr_Assign(
var: Expr_Variable(
name: ary
)
expr: Expr_Array(
items: array(
0: Expr_ArrayItem(
key: null
value: Scalar_String(
value: Test
)
byRef: false
)
1: Expr_ArrayItem(
key: null
value: Scalar_String(
value: Test
comments: array(
0: // Value here is wrong
)
)
byRef: false
comments: array(
0: // Value here is wrong
)
)
)
)
)
)
)