mirror of
https://github.com/phabelio/PHP-Parser.git
synced 2025-01-22 05:11:39 +01:00
Handle flexible heredoc via TokenEmulator
Extend the interface to support preprocessing.
This commit is contained in:
parent
39b046007d
commit
75abbbd2d4
@ -6,12 +6,13 @@ use PhpParser\Error;
|
||||
use PhpParser\ErrorHandler;
|
||||
use PhpParser\Lexer;
|
||||
use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator;
|
||||
use PhpParser\Lexer\TokenEmulator\FlexibleDocStringEmulator;
|
||||
use PhpParser\Lexer\TokenEmulator\FnTokenEmulator;
|
||||
use PhpParser\Lexer\TokenEmulator\MatchTokenEmulator;
|
||||
use PhpParser\Lexer\TokenEmulator\NullsafeTokenEmulator;
|
||||
use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator;
|
||||
use PhpParser\Lexer\TokenEmulator\ReverseEmulator;
|
||||
use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface;
|
||||
use PhpParser\Lexer\TokenEmulator\TokenEmulator;
|
||||
use PhpParser\Parser\Tokens;
|
||||
|
||||
class Emulative extends Lexer
|
||||
@ -20,16 +21,10 @@ class Emulative extends Lexer
|
||||
const PHP_7_4 = '7.4dev';
|
||||
const PHP_8_0 = '8.0dev';
|
||||
|
||||
const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
|
||||
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
|
||||
(?:.*\r?\n)*?
|
||||
(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
|
||||
REGEX;
|
||||
|
||||
/** @var mixed[] Patches used to reverse changes introduced in the code */
|
||||
private $patches = [];
|
||||
|
||||
/** @var TokenEmulatorInterface[] */
|
||||
/** @var TokenEmulator[] */
|
||||
private $emulators = [];
|
||||
|
||||
/** @var string */
|
||||
@ -48,6 +43,7 @@ REGEX;
|
||||
parent::__construct($options);
|
||||
|
||||
$emulators = [
|
||||
new FlexibleDocStringEmulator(),
|
||||
new FnTokenEmulator(),
|
||||
new MatchTokenEmulator(),
|
||||
new CoaleseEqualTokenEmulator(),
|
||||
@ -68,19 +64,23 @@ REGEX;
|
||||
}
|
||||
|
||||
public function startLexing(string $code, ErrorHandler $errorHandler = null) {
|
||||
$this->patches = [];
|
||||
$emulators = array_filter($this->emulators, function($emulator) use($code) {
|
||||
return $emulator->isEmulationNeeded($code);
|
||||
});
|
||||
|
||||
if ($this->isEmulationNeeded($code) === false) {
|
||||
if (empty($emulators)) {
|
||||
// Nothing to emulate, yay
|
||||
parent::startLexing($code, $errorHandler);
|
||||
return;
|
||||
}
|
||||
|
||||
$collector = new ErrorHandler\Collecting();
|
||||
$this->patches = [];
|
||||
foreach ($emulators as $emulator) {
|
||||
$code = $emulator->preprocessCode($code, $this->patches);
|
||||
}
|
||||
|
||||
// 1. emulation of heredoc and nowdoc new syntax
|
||||
$preparedCode = $this->processHeredocNowdoc($code);
|
||||
parent::startLexing($preparedCode, $collector);
|
||||
$collector = new ErrorHandler\Collecting();
|
||||
parent::startLexing($code, $collector);
|
||||
$this->fixupTokens();
|
||||
|
||||
$errors = $collector->getErrors();
|
||||
@ -91,10 +91,8 @@ REGEX;
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($this->emulators as $emulator) {
|
||||
if ($emulator->isEmulationNeeded($code)) {
|
||||
$this->tokens = $emulator->emulate($code, $this->tokens);
|
||||
}
|
||||
foreach ($emulators as $emulator) {
|
||||
$this->tokens = $emulator->emulate($code, $this->tokens);
|
||||
}
|
||||
}
|
||||
|
||||
@ -108,71 +106,6 @@ REGEX;
|
||||
&& version_compare($this->targetPhpVersion, $emulatorPhpVersion, '<');
|
||||
}
|
||||
|
||||
private function isHeredocNowdocEmulationNeeded(string $code): bool
|
||||
{
|
||||
if (!$this->isForwardEmulationNeeded(self::PHP_7_3)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return strpos($code, '<<<') !== false;
|
||||
}
|
||||
|
||||
private function processHeredocNowdoc(string $code): string
|
||||
{
|
||||
if ($this->isHeredocNowdocEmulationNeeded($code) === false) {
|
||||
return $code;
|
||||
}
|
||||
|
||||
if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
|
||||
// No heredoc/nowdoc found
|
||||
return $code;
|
||||
}
|
||||
|
||||
// Keep track of how much we need to adjust string offsets due to the modifications we
|
||||
// already made
|
||||
$posDelta = 0;
|
||||
foreach ($matches as $match) {
|
||||
$indentation = $match['indentation'][0];
|
||||
$indentationStart = $match['indentation'][1];
|
||||
|
||||
$separator = $match['separator'][0];
|
||||
$separatorStart = $match['separator'][1];
|
||||
|
||||
if ($indentation === '' && $separator !== '') {
|
||||
// Ordinary heredoc/nowdoc
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($indentation !== '') {
|
||||
// Remove indentation
|
||||
$indentationLen = strlen($indentation);
|
||||
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
|
||||
$this->patches[] = [$indentationStart + $posDelta, 'add', $indentation];
|
||||
$posDelta -= $indentationLen;
|
||||
}
|
||||
|
||||
if ($separator === '') {
|
||||
// Insert newline as separator
|
||||
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
|
||||
$this->patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
|
||||
$posDelta += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return $code;
|
||||
}
|
||||
|
||||
private function isEmulationNeeded(string $code): bool
|
||||
{
|
||||
foreach ($this->emulators as $emulator) {
|
||||
if ($emulator->isEmulationNeeded($code)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return $this->isHeredocNowdocEmulationNeeded($code);
|
||||
}
|
||||
|
||||
private function fixupTokens()
|
||||
{
|
||||
if (\count($this->patches) === 0) {
|
||||
|
@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
use PhpParser\Lexer\Emulative;
|
||||
|
||||
final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
|
||||
final class CoaleseEqualTokenEmulator extends TokenEmulator
|
||||
{
|
||||
public function getPhpVersion(): string
|
||||
{
|
||||
|
@ -0,0 +1,76 @@
|
||||
<?php declare(strict_types=1);
|
||||
|
||||
namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
use PhpParser\Lexer\Emulative;
|
||||
|
||||
final class FlexibleDocStringEmulator extends TokenEmulator
|
||||
{
|
||||
const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
|
||||
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
|
||||
(?:.*\r?\n)*?
|
||||
(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
|
||||
REGEX;
|
||||
|
||||
public function getPhpVersion(): string
|
||||
{
|
||||
return Emulative::PHP_7_3;
|
||||
}
|
||||
|
||||
public function isEmulationNeeded(string $code) : bool
|
||||
{
|
||||
return strpos($code, '<<<') !== false;
|
||||
}
|
||||
|
||||
public function emulate(string $code, array $tokens): array
|
||||
{
|
||||
// Handled by preprocessing + fixup.
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
public function reverseEmulate(string $code, array $tokens): array
|
||||
{
|
||||
// Not supported.
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
public function preprocessCode(string $code, array &$patches): string {
|
||||
if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
|
||||
// No heredoc/nowdoc found
|
||||
return $code;
|
||||
}
|
||||
|
||||
// Keep track of how much we need to adjust string offsets due to the modifications we
|
||||
// already made
|
||||
$posDelta = 0;
|
||||
foreach ($matches as $match) {
|
||||
$indentation = $match['indentation'][0];
|
||||
$indentationStart = $match['indentation'][1];
|
||||
|
||||
$separator = $match['separator'][0];
|
||||
$separatorStart = $match['separator'][1];
|
||||
|
||||
if ($indentation === '' && $separator !== '') {
|
||||
// Ordinary heredoc/nowdoc
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($indentation !== '') {
|
||||
// Remove indentation
|
||||
$indentationLen = strlen($indentation);
|
||||
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
|
||||
$patches[] = [$indentationStart + $posDelta, 'add', $indentation];
|
||||
$posDelta -= $indentationLen;
|
||||
}
|
||||
|
||||
if ($separator === '') {
|
||||
// Insert newline as separator
|
||||
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
|
||||
$patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
|
||||
$posDelta += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return $code;
|
||||
}
|
||||
}
|
@ -2,7 +2,7 @@
|
||||
|
||||
namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
abstract class KeywordEmulator implements TokenEmulatorInterface
|
||||
abstract class KeywordEmulator extends TokenEmulator
|
||||
{
|
||||
abstract function getKeywordString(): string;
|
||||
abstract function getKeywordToken(): int;
|
||||
|
@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
use PhpParser\Lexer\Emulative;
|
||||
|
||||
final class NullsafeTokenEmulator implements TokenEmulatorInterface
|
||||
final class NullsafeTokenEmulator extends TokenEmulator
|
||||
{
|
||||
public function getPhpVersion(): string
|
||||
{
|
||||
|
@ -4,7 +4,7 @@ namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
use PhpParser\Lexer\Emulative;
|
||||
|
||||
final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
|
||||
final class NumericLiteralSeparatorEmulator extends TokenEmulator
|
||||
{
|
||||
const BIN = '(?:0b[01]+(?:_[01]+)*)';
|
||||
const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
|
||||
|
@ -5,12 +5,12 @@ namespace PhpParser\Lexer\TokenEmulator;
|
||||
/**
|
||||
* Reverses emulation direction of the inner emulator.
|
||||
*/
|
||||
final class ReverseEmulator implements TokenEmulatorInterface
|
||||
final class ReverseEmulator extends TokenEmulator
|
||||
{
|
||||
/** @var TokenEmulatorInterface Inner emulator */
|
||||
/** @var TokenEmulator Inner emulator */
|
||||
private $emulator;
|
||||
|
||||
public function __construct(TokenEmulatorInterface $emulator) {
|
||||
public function __construct(TokenEmulator $emulator) {
|
||||
$this->emulator = $emulator;
|
||||
}
|
||||
|
||||
@ -29,4 +29,8 @@ final class ReverseEmulator implements TokenEmulatorInterface
|
||||
public function reverseEmulate(string $code, array $tokens): array {
|
||||
return $this->emulator->emulate($code, $tokens);
|
||||
}
|
||||
|
||||
public function preprocessCode(string $code, array &$patches): string {
|
||||
return $code;
|
||||
}
|
||||
}
|
25
lib/PhpParser/Lexer/TokenEmulator/TokenEmulator.php
Normal file
25
lib/PhpParser/Lexer/TokenEmulator/TokenEmulator.php
Normal file
@ -0,0 +1,25 @@
|
||||
<?php declare(strict_types=1);
|
||||
|
||||
namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
/** @internal */
|
||||
abstract class TokenEmulator
|
||||
{
|
||||
abstract public function getPhpVersion(): string;
|
||||
|
||||
abstract public function isEmulationNeeded(string $code): bool;
|
||||
|
||||
/**
|
||||
* @return array Modified Tokens
|
||||
*/
|
||||
abstract public function emulate(string $code, array $tokens): array;
|
||||
|
||||
/**
|
||||
* @return array Modified Tokens
|
||||
*/
|
||||
abstract public function reverseEmulate(string $code, array $tokens): array;
|
||||
|
||||
public function preprocessCode(string $code, array &$patches): string {
|
||||
return $code;
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
<?php declare(strict_types=1);
|
||||
|
||||
namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
/** @internal */
|
||||
interface TokenEmulatorInterface
|
||||
{
|
||||
public function getPhpVersion(): string;
|
||||
|
||||
public function isEmulationNeeded(string $code): bool;
|
||||
|
||||
/**
|
||||
* @return array Modified Tokens
|
||||
*/
|
||||
public function emulate(string $code, array $tokens): array;
|
||||
|
||||
/**
|
||||
* @return array Modified Tokens
|
||||
*/
|
||||
public function reverseEmulate(string $code, array $tokens): array;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user