Handle flexible heredoc via TokenEmulator

Extend the interface to support preprocessing.
This commit is contained in:
Nikita Popov 2020-09-06 16:31:33 +02:00
parent 39b046007d
commit 75abbbd2d4
9 changed files with 128 additions and 111 deletions

View File

@ -6,12 +6,13 @@ use PhpParser\Error;
use PhpParser\ErrorHandler;
use PhpParser\Lexer;
use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator;
use PhpParser\Lexer\TokenEmulator\FlexibleDocStringEmulator;
use PhpParser\Lexer\TokenEmulator\FnTokenEmulator;
use PhpParser\Lexer\TokenEmulator\MatchTokenEmulator;
use PhpParser\Lexer\TokenEmulator\NullsafeTokenEmulator;
use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator;
use PhpParser\Lexer\TokenEmulator\ReverseEmulator;
use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface;
use PhpParser\Lexer\TokenEmulator\TokenEmulator;
use PhpParser\Parser\Tokens;
class Emulative extends Lexer
@ -20,16 +21,10 @@ class Emulative extends Lexer
const PHP_7_4 = '7.4dev';
const PHP_8_0 = '8.0dev';
const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
(?:.*\r?\n)*?
(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
REGEX;
/** @var mixed[] Patches used to reverse changes introduced in the code */
private $patches = [];
/** @var TokenEmulatorInterface[] */
/** @var TokenEmulator[] */
private $emulators = [];
/** @var string */
@ -48,6 +43,7 @@ REGEX;
parent::__construct($options);
$emulators = [
new FlexibleDocStringEmulator(),
new FnTokenEmulator(),
new MatchTokenEmulator(),
new CoaleseEqualTokenEmulator(),
@ -68,19 +64,23 @@ REGEX;
}
public function startLexing(string $code, ErrorHandler $errorHandler = null) {
$this->patches = [];
$emulators = array_filter($this->emulators, function($emulator) use($code) {
return $emulator->isEmulationNeeded($code);
});
if ($this->isEmulationNeeded($code) === false) {
if (empty($emulators)) {
// Nothing to emulate, yay
parent::startLexing($code, $errorHandler);
return;
}
$collector = new ErrorHandler\Collecting();
$this->patches = [];
foreach ($emulators as $emulator) {
$code = $emulator->preprocessCode($code, $this->patches);
}
// 1. emulation of heredoc and nowdoc new syntax
$preparedCode = $this->processHeredocNowdoc($code);
parent::startLexing($preparedCode, $collector);
$collector = new ErrorHandler\Collecting();
parent::startLexing($code, $collector);
$this->fixupTokens();
$errors = $collector->getErrors();
@ -91,12 +91,10 @@ REGEX;
}
}
foreach ($this->emulators as $emulator) {
if ($emulator->isEmulationNeeded($code)) {
foreach ($emulators as $emulator) {
$this->tokens = $emulator->emulate($code, $this->tokens);
}
}
}
private function isForwardEmulationNeeded(string $emulatorPhpVersion): bool {
return version_compare(\PHP_VERSION, $emulatorPhpVersion, '<')
@ -108,71 +106,6 @@ REGEX;
&& version_compare($this->targetPhpVersion, $emulatorPhpVersion, '<');
}
private function isHeredocNowdocEmulationNeeded(string $code): bool
{
if (!$this->isForwardEmulationNeeded(self::PHP_7_3)) {
return false;
}
return strpos($code, '<<<') !== false;
}
private function processHeredocNowdoc(string $code): string
{
if ($this->isHeredocNowdocEmulationNeeded($code) === false) {
return $code;
}
if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
// No heredoc/nowdoc found
return $code;
}
// Keep track of how much we need to adjust string offsets due to the modifications we
// already made
$posDelta = 0;
foreach ($matches as $match) {
$indentation = $match['indentation'][0];
$indentationStart = $match['indentation'][1];
$separator = $match['separator'][0];
$separatorStart = $match['separator'][1];
if ($indentation === '' && $separator !== '') {
// Ordinary heredoc/nowdoc
continue;
}
if ($indentation !== '') {
// Remove indentation
$indentationLen = strlen($indentation);
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
$this->patches[] = [$indentationStart + $posDelta, 'add', $indentation];
$posDelta -= $indentationLen;
}
if ($separator === '') {
// Insert newline as separator
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
$this->patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
$posDelta += 1;
}
}
return $code;
}
private function isEmulationNeeded(string $code): bool
{
foreach ($this->emulators as $emulator) {
if ($emulator->isEmulationNeeded($code)) {
return true;
}
}
return $this->isHeredocNowdocEmulationNeeded($code);
}
private function fixupTokens()
{
if (\count($this->patches) === 0) {

View File

@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative;
final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
final class CoaleseEqualTokenEmulator extends TokenEmulator
{
public function getPhpVersion(): string
{

View File

@ -0,0 +1,76 @@
<?php declare(strict_types=1);
namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative;
final class FlexibleDocStringEmulator extends TokenEmulator
{
const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
(?:.*\r?\n)*?
(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
REGEX;
public function getPhpVersion(): string
{
return Emulative::PHP_7_3;
}
public function isEmulationNeeded(string $code) : bool
{
return strpos($code, '<<<') !== false;
}
public function emulate(string $code, array $tokens): array
{
// Handled by preprocessing + fixup.
return $tokens;
}
public function reverseEmulate(string $code, array $tokens): array
{
// Not supported.
return $tokens;
}
public function preprocessCode(string $code, array &$patches): string {
if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
// No heredoc/nowdoc found
return $code;
}
// Keep track of how much we need to adjust string offsets due to the modifications we
// already made
$posDelta = 0;
foreach ($matches as $match) {
$indentation = $match['indentation'][0];
$indentationStart = $match['indentation'][1];
$separator = $match['separator'][0];
$separatorStart = $match['separator'][1];
if ($indentation === '' && $separator !== '') {
// Ordinary heredoc/nowdoc
continue;
}
if ($indentation !== '') {
// Remove indentation
$indentationLen = strlen($indentation);
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
$patches[] = [$indentationStart + $posDelta, 'add', $indentation];
$posDelta -= $indentationLen;
}
if ($separator === '') {
// Insert newline as separator
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
$patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
$posDelta += 1;
}
}
return $code;
}
}

View File

@ -2,7 +2,7 @@
namespace PhpParser\Lexer\TokenEmulator;
abstract class KeywordEmulator implements TokenEmulatorInterface
abstract class KeywordEmulator extends TokenEmulator
{
abstract function getKeywordString(): string;
abstract function getKeywordToken(): int;

View File

@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative;
final class NullsafeTokenEmulator implements TokenEmulatorInterface
final class NullsafeTokenEmulator extends TokenEmulator
{
public function getPhpVersion(): string
{

View File

@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative;
final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
final class NumericLiteralSeparatorEmulator extends TokenEmulator
{
const BIN = '(?:0b[01]+(?:_[01]+)*)';
const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';

View File

@ -5,12 +5,12 @@ namespace PhpParser\Lexer\TokenEmulator;
/**
* Reverses emulation direction of the inner emulator.
*/
final class ReverseEmulator implements TokenEmulatorInterface
final class ReverseEmulator extends TokenEmulator
{
/** @var TokenEmulatorInterface Inner emulator */
/** @var TokenEmulator Inner emulator */
private $emulator;
public function __construct(TokenEmulatorInterface $emulator) {
public function __construct(TokenEmulator $emulator) {
$this->emulator = $emulator;
}
@ -29,4 +29,8 @@ final class ReverseEmulator implements TokenEmulatorInterface
public function reverseEmulate(string $code, array $tokens): array {
return $this->emulator->emulate($code, $tokens);
}
public function preprocessCode(string $code, array &$patches): string {
return $code;
}
}

View File

@ -0,0 +1,25 @@
<?php declare(strict_types=1);
namespace PhpParser\Lexer\TokenEmulator;
/** @internal */
abstract class TokenEmulator
{
abstract public function getPhpVersion(): string;
abstract public function isEmulationNeeded(string $code): bool;
/**
* @return array Modified Tokens
*/
abstract public function emulate(string $code, array $tokens): array;
/**
* @return array Modified Tokens
*/
abstract public function reverseEmulate(string $code, array $tokens): array;
public function preprocessCode(string $code, array &$patches): string {
return $code;
}
}

View File

@ -1,21 +0,0 @@
<?php declare(strict_types=1);
namespace PhpParser\Lexer\TokenEmulator;
/** @internal */
interface TokenEmulatorInterface
{
public function getPhpVersion(): string;
public function isEmulationNeeded(string $code): bool;
/**
* @return array Modified Tokens
*/
public function emulate(string $code, array $tokens): array;
/**
* @return array Modified Tokens
*/
public function reverseEmulate(string $code, array $tokens): array;
}