diff --git a/lib/PhpParser/Lexer/Emulative.php b/lib/PhpParser/Lexer/Emulative.php index bf4837a..7698abb 100644 --- a/lib/PhpParser/Lexer/Emulative.php +++ b/lib/PhpParser/Lexer/Emulative.php @@ -6,12 +6,13 @@ use PhpParser\Error; use PhpParser\ErrorHandler; use PhpParser\Lexer; use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator; +use PhpParser\Lexer\TokenEmulator\FlexibleDocStringEmulator; use PhpParser\Lexer\TokenEmulator\FnTokenEmulator; use PhpParser\Lexer\TokenEmulator\MatchTokenEmulator; use PhpParser\Lexer\TokenEmulator\NullsafeTokenEmulator; use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator; use PhpParser\Lexer\TokenEmulator\ReverseEmulator; -use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface; +use PhpParser\Lexer\TokenEmulator\TokenEmulator; use PhpParser\Parser\Tokens; class Emulative extends Lexer @@ -20,16 +21,10 @@ class Emulative extends Lexer const PHP_7_4 = '7.4dev'; const PHP_8_0 = '8.0dev'; - const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX' -/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n -(?:.*\r?\n)*? -(?\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?(?:;?[\r\n])?)/x -REGEX; - /** @var mixed[] Patches used to reverse changes introduced in the code */ private $patches = []; - /** @var TokenEmulatorInterface[] */ + /** @var TokenEmulator[] */ private $emulators = []; /** @var string */ @@ -48,6 +43,7 @@ REGEX; parent::__construct($options); $emulators = [ + new FlexibleDocStringEmulator(), new FnTokenEmulator(), new MatchTokenEmulator(), new CoaleseEqualTokenEmulator(), @@ -68,19 +64,23 @@ REGEX; } public function startLexing(string $code, ErrorHandler $errorHandler = null) { - $this->patches = []; + $emulators = array_filter($this->emulators, function($emulator) use($code) { + return $emulator->isEmulationNeeded($code); + }); - if ($this->isEmulationNeeded($code) === false) { + if (empty($emulators)) { // Nothing to emulate, yay parent::startLexing($code, $errorHandler); return; } - $collector = new ErrorHandler\Collecting(); + $this->patches = []; + foreach ($emulators as $emulator) { + $code = $emulator->preprocessCode($code, $this->patches); + } - // 1. emulation of heredoc and nowdoc new syntax - $preparedCode = $this->processHeredocNowdoc($code); - parent::startLexing($preparedCode, $collector); + $collector = new ErrorHandler\Collecting(); + parent::startLexing($code, $collector); $this->fixupTokens(); $errors = $collector->getErrors(); @@ -91,10 +91,8 @@ REGEX; } } - foreach ($this->emulators as $emulator) { - if ($emulator->isEmulationNeeded($code)) { - $this->tokens = $emulator->emulate($code, $this->tokens); - } + foreach ($emulators as $emulator) { + $this->tokens = $emulator->emulate($code, $this->tokens); } } @@ -108,71 +106,6 @@ REGEX; && version_compare($this->targetPhpVersion, $emulatorPhpVersion, '<'); } - private function isHeredocNowdocEmulationNeeded(string $code): bool - { - if (!$this->isForwardEmulationNeeded(self::PHP_7_3)) { - return false; - } - - return strpos($code, '<<<') !== false; - } - - private function processHeredocNowdoc(string $code): string - { - if ($this->isHeredocNowdocEmulationNeeded($code) === false) { - return $code; - } - - if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) { - // No heredoc/nowdoc found - return $code; - } - - // Keep track of how much we need to adjust string offsets due to the modifications we - // already made - $posDelta = 0; - foreach ($matches as $match) { - $indentation = $match['indentation'][0]; - $indentationStart = $match['indentation'][1]; - - $separator = $match['separator'][0]; - $separatorStart = $match['separator'][1]; - - if ($indentation === '' && $separator !== '') { - // Ordinary heredoc/nowdoc - continue; - } - - if ($indentation !== '') { - // Remove indentation - $indentationLen = strlen($indentation); - $code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen); - $this->patches[] = [$indentationStart + $posDelta, 'add', $indentation]; - $posDelta -= $indentationLen; - } - - if ($separator === '') { - // Insert newline as separator - $code = substr_replace($code, "\n", $separatorStart + $posDelta, 0); - $this->patches[] = [$separatorStart + $posDelta, 'remove', "\n"]; - $posDelta += 1; - } - } - - return $code; - } - - private function isEmulationNeeded(string $code): bool - { - foreach ($this->emulators as $emulator) { - if ($emulator->isEmulationNeeded($code)) { - return true; - } - } - - return $this->isHeredocNowdocEmulationNeeded($code); - } - private function fixupTokens() { if (\count($this->patches) === 0) { diff --git a/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php index a1be8f2..d91da92 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php +++ b/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php @@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator; use PhpParser\Lexer\Emulative; -final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface +final class CoaleseEqualTokenEmulator extends TokenEmulator { public function getPhpVersion(): string { diff --git a/lib/PhpParser/Lexer/TokenEmulator/FlexibleDocStringEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/FlexibleDocStringEmulator.php new file mode 100644 index 0000000..c15d627 --- /dev/null +++ b/lib/PhpParser/Lexer/TokenEmulator/FlexibleDocStringEmulator.php @@ -0,0 +1,76 @@ +\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?(?:;?[\r\n])?)/x +REGEX; + + public function getPhpVersion(): string + { + return Emulative::PHP_7_3; + } + + public function isEmulationNeeded(string $code) : bool + { + return strpos($code, '<<<') !== false; + } + + public function emulate(string $code, array $tokens): array + { + // Handled by preprocessing + fixup. + return $tokens; + } + + public function reverseEmulate(string $code, array $tokens): array + { + // Not supported. + return $tokens; + } + + public function preprocessCode(string $code, array &$patches): string { + if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) { + // No heredoc/nowdoc found + return $code; + } + + // Keep track of how much we need to adjust string offsets due to the modifications we + // already made + $posDelta = 0; + foreach ($matches as $match) { + $indentation = $match['indentation'][0]; + $indentationStart = $match['indentation'][1]; + + $separator = $match['separator'][0]; + $separatorStart = $match['separator'][1]; + + if ($indentation === '' && $separator !== '') { + // Ordinary heredoc/nowdoc + continue; + } + + if ($indentation !== '') { + // Remove indentation + $indentationLen = strlen($indentation); + $code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen); + $patches[] = [$indentationStart + $posDelta, 'add', $indentation]; + $posDelta -= $indentationLen; + } + + if ($separator === '') { + // Insert newline as separator + $code = substr_replace($code, "\n", $separatorStart + $posDelta, 0); + $patches[] = [$separatorStart + $posDelta, 'remove', "\n"]; + $posDelta += 1; + } + } + + return $code; + } +} diff --git a/lib/PhpParser/Lexer/TokenEmulator/KeywordEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/KeywordEmulator.php index 62a293b..33bf964 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/KeywordEmulator.php +++ b/lib/PhpParser/Lexer/TokenEmulator/KeywordEmulator.php @@ -2,7 +2,7 @@ namespace PhpParser\Lexer\TokenEmulator; -abstract class KeywordEmulator implements TokenEmulatorInterface +abstract class KeywordEmulator extends TokenEmulator { abstract function getKeywordString(): string; abstract function getKeywordToken(): int; diff --git a/lib/PhpParser/Lexer/TokenEmulator/NullsafeTokenEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/NullsafeTokenEmulator.php index 4a7fcb7..686f305 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/NullsafeTokenEmulator.php +++ b/lib/PhpParser/Lexer/TokenEmulator/NullsafeTokenEmulator.php @@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator; use PhpParser\Lexer\Emulative; -final class NullsafeTokenEmulator implements TokenEmulatorInterface +final class NullsafeTokenEmulator extends TokenEmulator { public function getPhpVersion(): string { diff --git a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php index 2cee130..cdf793e 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php +++ b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php @@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator; use PhpParser\Lexer\Emulative; -final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface +final class NumericLiteralSeparatorEmulator extends TokenEmulator { const BIN = '(?:0b[01]+(?:_[01]+)*)'; const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)'; diff --git a/lib/PhpParser/Lexer/TokenEmulator/ReverseEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/ReverseEmulator.php index d324f63..90093f6 100644 --- a/lib/PhpParser/Lexer/TokenEmulator/ReverseEmulator.php +++ b/lib/PhpParser/Lexer/TokenEmulator/ReverseEmulator.php @@ -5,12 +5,12 @@ namespace PhpParser\Lexer\TokenEmulator; /** * Reverses emulation direction of the inner emulator. */ -final class ReverseEmulator implements TokenEmulatorInterface +final class ReverseEmulator extends TokenEmulator { - /** @var TokenEmulatorInterface Inner emulator */ + /** @var TokenEmulator Inner emulator */ private $emulator; - public function __construct(TokenEmulatorInterface $emulator) { + public function __construct(TokenEmulator $emulator) { $this->emulator = $emulator; } @@ -29,4 +29,8 @@ final class ReverseEmulator implements TokenEmulatorInterface public function reverseEmulate(string $code, array $tokens): array { return $this->emulator->emulate($code, $tokens); } + + public function preprocessCode(string $code, array &$patches): string { + return $code; + } } \ No newline at end of file diff --git a/lib/PhpParser/Lexer/TokenEmulator/TokenEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/TokenEmulator.php new file mode 100644 index 0000000..a020bc0 --- /dev/null +++ b/lib/PhpParser/Lexer/TokenEmulator/TokenEmulator.php @@ -0,0 +1,25 @@ +