From 83766c8c0ef9eebc4edf0cfa3953fcacb72e289e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 28 Apr 2018 22:14:16 +0200 Subject: [PATCH] Partial support for flexible heredoc/nowdoc This only implements the token emulation part, it does not yet handle the indentation stripping. Emulating this feature on old PHP versions is really tough and is not going to work perfectly, but hopefully this implementation is "good enough". --- lib/PhpParser/Lexer/Emulative.php | 199 +++++++++++++++++- test/PhpParser/Lexer/EmulativeTest.php | 65 +++++- .../code/parser/scalar/flexibleDocString.test | 47 +++++ 3 files changed, 304 insertions(+), 7 deletions(-) create mode 100644 test/code/parser/scalar/flexibleDocString.test diff --git a/lib/PhpParser/Lexer/Emulative.php b/lib/PhpParser/Lexer/Emulative.php index 647aaa3..a23d4a7 100644 --- a/lib/PhpParser/Lexer/Emulative.php +++ b/lib/PhpParser/Lexer/Emulative.php @@ -2,7 +2,202 @@ namespace PhpParser\Lexer; +use PhpParser\Error; +use PhpParser\ErrorHandler; + class Emulative extends \PhpParser\Lexer { - /* No features requiring emulation have been added in PHP > 7.0 */ -} + const PHP_7_3 = '7.3.0dev'; + + /** + * @var array Patches used to reverse changes introduced in the code + */ + private $patches; + + public function startLexing(string $code, ErrorHandler $errorHandler = null) { + $this->patches = []; + $preparedCode = $this->prepareCode($code); + if (null === $preparedCode) { + // Nothing to emulate, yay + parent::startLexing($code, $errorHandler); + return; + } + + $collector = new ErrorHandler\Collecting(); + parent::startLexing($preparedCode, $collector); + $this->fixupTokens(); + + $errors = $collector->getErrors(); + if (!empty($errors)) { + $this->fixupErrors($errors); + foreach ($errors as $error) { + $errorHandler->handleError($error); + } + } + } + + /** + * Prepares code for emulation. If nothing has to be emulated null is returned. + * + * @param string $code + * @return null|string + */ + private function prepareCode(string $code) { + if (version_compare(\PHP_VERSION, self::PHP_7_3, '>=')) { + return null; + } + + if (strpos($code, '<<<') === false) { + // Definitely doesn't contain heredoc/nowdoc + return null; + } + + $flexibleDocStringRegex = <<<'REGEX' +/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n +(?:.*\r?\n)*? +(?\h*)\2(?(?:;?[\r\n])?)/x +REGEX; + if (!preg_match_all($flexibleDocStringRegex, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) { + // No heredoc/nowdoc found + return null; + } + + // Keep track of how much we need to adjust string offsets due to the modifications we + // already made + $posDelta = 0; + foreach ($matches as $match) { + $indentation = $match['indentation'][0]; + $indentationStart = $match['indentation'][1]; + + $separator = $match['separator'][0]; + $separatorStart = $match['separator'][1]; + + if ($indentation === '' && $separator !== '') { + // Ordinary heredoc/nowdoc + continue; + } + + if ($indentation !== '') { + // Remove indentation + $indentationLen = strlen($indentation); + $code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen); + $this->patches[] = [$indentationStart + $posDelta, 'add', $indentation]; + $posDelta -= $indentationLen; + } + + if ($separator === '') { + // Insert newline as separator + $code = substr_replace($code, "\n", $separatorStart + $posDelta, 0); + $this->patches[] = [$separatorStart + $posDelta, 'remove', "\n"]; + $posDelta += 1; + } + } + + if (empty($this->patches)) { + // We did not end up emulating anything + return null; + } + + return $code; + } + + private function fixupTokens() { + assert(count($this->patches) > 0); + + // Load first patch + $patchIdx = 0; + list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx]; + + // We use a manual loop over the tokens, because we modify the array on the fly + $pos = 0; + for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) { + $token = $this->tokens[$i]; + if (\is_string($token)) { + // We assume that patches don't apply to string tokens + $pos += \strlen($token); + continue; + } + + $len = \strlen($token[1]); + $posDelta = 0; + while ($patchPos >= $pos && $patchPos < $pos + $len) { + $patchTextLen = \strlen($patchText); + if ($patchType === 'remove') { + if ($patchPos === $pos && $patchTextLen === $len) { + // Remove token entirely + array_splice($this->tokens, $i, 1, []); + $i--; + $c--; + } else { + // Remove from token string + $this->tokens[$i][1] = substr_replace( + $token[1], '', $patchPos - $pos + $posDelta, $patchTextLen + ); + $posDelta -= $patchTextLen; + } + } elseif ($patchType === 'add') { + // Insert into the token string + $this->tokens[$i][1] = substr_replace( + $token[1], $patchText, $patchPos - $pos + $posDelta, 0 + ); + $posDelta += $patchTextLen; + } else { + assert(false); + } + + // Fetch the next patch + $patchIdx++; + if ($patchIdx >= \count($this->patches)) { + // No more patches, we're done + return; + } + + list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx]; + + // Multiple patches may apply to the same token. Reload the current one to check + // If the new patch applies + $token = $this->tokens[$i]; + } + + $pos += $len; + } + + // A patch did not apply + assert(false); + } + + /** + * Fixup line and position information in errors. + * + * @param Error[] $errors + */ + private function fixupErrors(array $errors) { + foreach ($errors as $error) { + $attrs = $error->getAttributes(); + + $posDelta = 0; + $lineDelta = 0; + foreach ($this->patches as $patch) { + list($patchPos, $patchType, $patchText) = $patch; + if ($patchPos >= $attrs['startFilePos']) { + // No longer relevant + break; + } + + if ($patchType === 'add') { + $posDelta += strlen($patchText); + $lineDelta += substr_count($patchText, "\n"); + } else { + $posDelta -= strlen($patchText); + $lineDelta -= substr_count($patchText, "\n"); + } + } + + $attrs['startFilePos'] += $posDelta; + $attrs['endFilePos'] += $posDelta; + $attrs['startLine'] += $lineDelta; + $attrs['endLine'] += $lineDelta; + $error->setAttributes($attrs); + } + } +} \ No newline at end of file diff --git a/test/PhpParser/Lexer/EmulativeTest.php b/test/PhpParser/Lexer/EmulativeTest.php index 3a33ba8..934e0a9 100644 --- a/test/PhpParser/Lexer/EmulativeTest.php +++ b/test/PhpParser/Lexer/EmulativeTest.php @@ -2,6 +2,7 @@ namespace PhpParser\Lexer; +use PhpParser\ErrorHandler; use PhpParser\LexerTest; use PhpParser\Parser\Tokens; @@ -63,12 +64,11 @@ class EmulativeTest extends LexerTest $lexer = $this->getLexer(); $lexer->startLexing('assertSame($expectedTokenType, $lexer->getNextToken($text)); - $this->assertSame($expectedTokenText, $text); + $tokens = []; + while (0 !== $token = $lexer->getNextToken($text)) { + $tokens[] = [$token, $text]; } - $this->assertSame(0, $lexer->getNextToken()); + $this->assertSame($expectedTokens, $tokens); } /** @@ -85,6 +85,29 @@ class EmulativeTest extends LexerTest $this->assertSame(0, $lexer->getNextToken()); } + /** + * @dataProvider provideTestLexNewFeatures + */ + public function testErrorAfterEmulation($code) { + $errorHandler = new ErrorHandler\Collecting; + $lexer = $this->getLexer([]); + $lexer->startLexing('getErrors(); + $this->assertCount(1, $errors); + + $error = $errors[0]; + $this->assertSame('Unexpected null byte', $error->getRawMessage()); + + $attrs = $error->getAttributes(); + $expPos = strlen('assertSame($expPos, $attrs['startFilePos']); + $this->assertSame($expPos, $attrs['endFilePos']); + $this->assertSame($expLine, $attrs['startLine']); + $this->assertSame($expLine, $attrs['endLine']); + } + public function provideTestLexNewFeatures() { return [ ['yield from', [ @@ -128,6 +151,38 @@ class EmulativeTest extends LexerTest [Tokens::T_END_HEREDOC, 'NOWDOC'], [ord(';'), ';'], ]], + + // Flexible heredoc/nowdoc + ["<<