\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?(?:;?[\r\n])?)/x REGEX; /** @var mixed[] Patches used to reverse changes introduced in the code */ private $patches = []; /** @var TokenEmulatorInterface[] */ private $tokenEmulators = []; /** * @param mixed[] $options */ public function __construct(array $options = []) { parent::__construct($options); $this->tokenEmulators[] = new FnTokenEmulator(); $this->tokenEmulators[] = new MatchTokenEmulator(); $this->tokenEmulators[] = new CoaleseEqualTokenEmulator(); $this->tokenEmulators[] = new NumericLiteralSeparatorEmulator(); $this->tokenMap[self::T_COALESCE_EQUAL] = Tokens::T_COALESCE_EQUAL; $this->tokenMap[self::T_FN] = Tokens::T_FN; $this->tokenMap[self::T_MATCH] = Tokens::T_MATCH; } public function startLexing(string $code, ErrorHandler $errorHandler = null) { $this->patches = []; if ($this->isEmulationNeeded($code) === false) { // Nothing to emulate, yay parent::startLexing($code, $errorHandler); return; } $collector = new ErrorHandler\Collecting(); // 1. emulation of heredoc and nowdoc new syntax $preparedCode = $this->processHeredocNowdoc($code); parent::startLexing($preparedCode, $collector); $this->fixupTokens(); $errors = $collector->getErrors(); if (!empty($errors)) { $this->fixupErrors($errors); foreach ($errors as $error) { $errorHandler->handleError($error); } } // add token emulation foreach ($this->tokenEmulators as $emulativeToken) { if ($emulativeToken->isEmulationNeeded($code)) { $this->tokens = $emulativeToken->emulate($code, $this->tokens); } } } private function isHeredocNowdocEmulationNeeded(string $code): bool { // skip version where this works without emulation if (version_compare(\PHP_VERSION, self::PHP_7_3, '>=')) { return false; } return strpos($code, '<<<') !== false; } private function processHeredocNowdoc(string $code): string { if ($this->isHeredocNowdocEmulationNeeded($code) === false) { return $code; } if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) { // No heredoc/nowdoc found return $code; } // Keep track of how much we need to adjust string offsets due to the modifications we // already made $posDelta = 0; foreach ($matches as $match) { $indentation = $match['indentation'][0]; $indentationStart = $match['indentation'][1]; $separator = $match['separator'][0]; $separatorStart = $match['separator'][1]; if ($indentation === '' && $separator !== '') { // Ordinary heredoc/nowdoc continue; } if ($indentation !== '') { // Remove indentation $indentationLen = strlen($indentation); $code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen); $this->patches[] = [$indentationStart + $posDelta, 'add', $indentation]; $posDelta -= $indentationLen; } if ($separator === '') { // Insert newline as separator $code = substr_replace($code, "\n", $separatorStart + $posDelta, 0); $this->patches[] = [$separatorStart + $posDelta, 'remove', "\n"]; $posDelta += 1; } } return $code; } private function isEmulationNeeded(string $code): bool { foreach ($this->tokenEmulators as $emulativeToken) { if ($emulativeToken->isEmulationNeeded($code)) { return true; } } return $this->isHeredocNowdocEmulationNeeded($code); } private function fixupTokens() { if (\count($this->patches) === 0) { return; } // Load first patch $patchIdx = 0; list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx]; // We use a manual loop over the tokens, because we modify the array on the fly $pos = 0; for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) { $token = $this->tokens[$i]; if (\is_string($token)) { // We assume that patches don't apply to string tokens $pos += \strlen($token); continue; } $len = \strlen($token[1]); $posDelta = 0; while ($patchPos >= $pos && $patchPos < $pos + $len) { $patchTextLen = \strlen($patchText); if ($patchType === 'remove') { if ($patchPos === $pos && $patchTextLen === $len) { // Remove token entirely array_splice($this->tokens, $i, 1, []); $i--; $c--; } else { // Remove from token string $this->tokens[$i][1] = substr_replace( $token[1], '', $patchPos - $pos + $posDelta, $patchTextLen ); $posDelta -= $patchTextLen; } } elseif ($patchType === 'add') { // Insert into the token string $this->tokens[$i][1] = substr_replace( $token[1], $patchText, $patchPos - $pos + $posDelta, 0 ); $posDelta += $patchTextLen; } else { assert(false); } // Fetch the next patch $patchIdx++; if ($patchIdx >= \count($this->patches)) { // No more patches, we're done return; } list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx]; // Multiple patches may apply to the same token. Reload the current one to check // If the new patch applies $token = $this->tokens[$i]; } $pos += $len; } // A patch did not apply assert(false); } /** * Fixup line and position information in errors. * * @param Error[] $errors */ private function fixupErrors(array $errors) { foreach ($errors as $error) { $attrs = $error->getAttributes(); $posDelta = 0; $lineDelta = 0; foreach ($this->patches as $patch) { list($patchPos, $patchType, $patchText) = $patch; if ($patchPos >= $attrs['startFilePos']) { // No longer relevant break; } if ($patchType === 'add') { $posDelta += strlen($patchText); $lineDelta += substr_count($patchText, "\n"); } else { $posDelta -= strlen($patchText); $lineDelta -= substr_count($patchText, "\n"); } } $attrs['startFilePos'] += $posDelta; $attrs['endFilePos'] += $posDelta; $attrs['startLine'] += $lineDelta; $attrs['endLine'] += $lineDelta; $error->setAttributes($attrs); } } }