2019-07-05 22:24:00 +02:00
|
|
|
<?php
|
2021-12-15 04:42:37 +01:00
|
|
|
|
2019-07-05 22:24:00 +02:00
|
|
|
declare(strict_types=1);
|
2021-12-15 04:42:37 +01:00
|
|
|
|
2018-11-06 03:57:36 +01:00
|
|
|
namespace Psalm\Internal\Diff;
|
2018-10-26 06:59:14 +02:00
|
|
|
|
2021-12-03 21:40:18 +01:00
|
|
|
use Exception;
|
|
|
|
|
2019-06-26 22:52:29 +02:00
|
|
|
use function array_reverse;
|
2021-12-03 21:07:25 +01:00
|
|
|
use function count;
|
2019-06-26 22:52:29 +02:00
|
|
|
use function explode;
|
|
|
|
use function min;
|
2019-07-05 22:24:00 +02:00
|
|
|
use function strlen;
|
2019-06-26 22:52:29 +02:00
|
|
|
use function substr;
|
|
|
|
|
2018-10-26 06:59:14 +02:00
|
|
|
/**
|
|
|
|
* Borrows from https://github.com/nikic/PHP-Parser/blob/master/lib/PhpParser/Internal/Differ.php
|
|
|
|
*
|
|
|
|
* Implements the Myers diff algorithm.
|
|
|
|
*
|
|
|
|
* Myers, Eugene W. "An O (ND) difference algorithm and its variations."
|
|
|
|
* Algorithmica 1.1 (1986): 251-266.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
class FileDiffer
|
|
|
|
{
|
|
|
|
/**
|
2020-10-17 18:36:44 +02:00
|
|
|
* @param list<string> $a
|
|
|
|
* @param list<string> $b
|
2019-07-05 22:24:00 +02:00
|
|
|
*
|
2020-10-17 18:36:44 +02:00
|
|
|
* @return array{0:non-empty-list<array<int, int>>, 1: int, 2: int}
|
2020-08-23 19:52:31 +02:00
|
|
|
*
|
|
|
|
* @psalm-pure
|
2018-10-26 06:59:14 +02:00
|
|
|
*/
|
|
|
|
private static function calculateTrace(
|
|
|
|
array $a,
|
|
|
|
array $b
|
2021-12-05 18:51:26 +01:00
|
|
|
): array {
|
2021-12-03 21:07:25 +01:00
|
|
|
$n = count($a);
|
|
|
|
$m = count($b);
|
2018-10-26 06:59:14 +02:00
|
|
|
$max = $n + $m;
|
|
|
|
$v = [1 => 0];
|
|
|
|
$trace = [];
|
2019-07-05 22:24:00 +02:00
|
|
|
for ($d = 0; $d <= $max; ++$d) {
|
2018-10-26 06:59:14 +02:00
|
|
|
$trace[] = $v;
|
|
|
|
for ($k = -$d; $k <= $d; $k += 2) {
|
2019-07-05 22:24:00 +02:00
|
|
|
if ($k === -$d || ($k !== $d && $v[$k - 1] < $v[$k + 1])) {
|
|
|
|
$x = $v[$k + 1];
|
2018-10-26 06:59:14 +02:00
|
|
|
} else {
|
2019-07-05 22:24:00 +02:00
|
|
|
$x = $v[$k - 1] + 1;
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
$y = $x - $k;
|
|
|
|
|
|
|
|
while ($x < $n && $y < $m && $a[$x] === $b[$y]) {
|
2019-07-05 22:24:00 +02:00
|
|
|
++$x;
|
|
|
|
++$y;
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
$v[$k] = $x;
|
|
|
|
if ($x >= $n && $y >= $m) {
|
|
|
|
return [$trace, $x, $y];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-12-03 21:40:18 +01:00
|
|
|
throw new Exception('Should not happen');
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2020-10-17 18:36:44 +02:00
|
|
|
* @param list<array<int, int>> $trace
|
|
|
|
* @param list<string> $a
|
|
|
|
* @param list<string> $b
|
2018-10-26 06:59:14 +02:00
|
|
|
*
|
2020-10-17 18:36:44 +02:00
|
|
|
* @return list<DiffElem>
|
2020-08-25 01:28:53 +02:00
|
|
|
*
|
|
|
|
* @psalm-pure
|
2018-10-26 06:59:14 +02:00
|
|
|
*/
|
2021-12-05 18:51:26 +01:00
|
|
|
private static function extractDiff(array $trace, int $x, int $y, array $a, array $b): array
|
2018-10-26 06:59:14 +02:00
|
|
|
{
|
|
|
|
$result = [];
|
2021-12-03 21:07:25 +01:00
|
|
|
for ($d = count($trace) - 1; $d >= 0; --$d) {
|
2018-10-26 06:59:14 +02:00
|
|
|
$v = $trace[$d];
|
|
|
|
$k = $x - $y;
|
|
|
|
|
2019-07-05 22:24:00 +02:00
|
|
|
if ($k === -$d || ($k !== $d && $v[$k - 1] < $v[$k + 1])) {
|
2018-10-26 06:59:14 +02:00
|
|
|
$prevK = $k + 1;
|
|
|
|
} else {
|
|
|
|
$prevK = $k - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
$prevX = $v[$prevK];
|
|
|
|
$prevY = $prevX - $prevK;
|
|
|
|
|
|
|
|
while ($x > $prevX && $y > $prevY) {
|
|
|
|
$result[] = new DiffElem(
|
|
|
|
DiffElem::TYPE_KEEP,
|
2019-07-05 22:24:00 +02:00
|
|
|
$a[$x - 1],
|
|
|
|
$b[$y - 1]
|
2018-10-26 06:59:14 +02:00
|
|
|
);
|
2019-07-05 22:24:00 +02:00
|
|
|
--$x;
|
|
|
|
--$y;
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if ($d === 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
while ($x > $prevX) {
|
2019-07-05 22:24:00 +02:00
|
|
|
$result[] = new DiffElem(DiffElem::TYPE_REMOVE, $a[$x - 1], null);
|
|
|
|
--$x;
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
while ($y > $prevY) {
|
2019-07-05 22:24:00 +02:00
|
|
|
$result[] = new DiffElem(DiffElem::TYPE_ADD, null, $b[$y - 1]);
|
|
|
|
--$y;
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
}
|
2019-07-05 22:24:00 +02:00
|
|
|
|
2018-10-26 06:59:14 +02:00
|
|
|
return array_reverse($result);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return array<int, array{0: int, 1: int, 2: int, 3: int, 4: int, 5: string}>
|
2020-08-25 01:28:53 +02:00
|
|
|
*
|
|
|
|
* @psalm-pure
|
2018-10-26 06:59:14 +02:00
|
|
|
*/
|
2020-09-04 22:26:33 +02:00
|
|
|
public static function getDiff(string $a_code, string $b_code): array
|
2018-10-26 06:59:14 +02:00
|
|
|
{
|
|
|
|
$a = explode("\n", $a_code);
|
|
|
|
$b = explode("\n", $b_code);
|
2020-09-02 06:17:41 +02:00
|
|
|
[$trace, $x, $y] = self::calculateTrace($a, $b);
|
2018-10-26 06:59:14 +02:00
|
|
|
|
|
|
|
$diff = self::coalesceReplacements(self::extractDiff($trace, $x, $y, $a, $b));
|
|
|
|
|
|
|
|
$a_offset = 0;
|
|
|
|
|
|
|
|
$b_offset = 0;
|
|
|
|
|
|
|
|
$last_diff_type = null;
|
|
|
|
|
|
|
|
/** @var array{0:int, 1:int, 2:int, 3:int, 4:int, 5:string}|null */
|
|
|
|
$last_change = null;
|
|
|
|
|
|
|
|
$changes = [];
|
|
|
|
$i = 0;
|
|
|
|
$line_diff = 0;
|
|
|
|
|
|
|
|
foreach ($diff as $diff_elem) {
|
|
|
|
$diff_type = $diff_elem->type;
|
|
|
|
|
|
|
|
if ($diff_type !== $last_diff_type) {
|
|
|
|
$last_change = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($diff_type === DiffElem::TYPE_REMOVE) {
|
|
|
|
/** @var string $diff_elem->old */
|
|
|
|
$diff_text = $diff_elem->old . "\n";
|
|
|
|
|
|
|
|
$text_length = strlen($diff_text);
|
|
|
|
|
2019-07-05 22:24:00 +02:00
|
|
|
--$line_diff;
|
2018-10-26 06:59:14 +02:00
|
|
|
|
|
|
|
if ($last_change === null) {
|
2019-07-05 22:24:00 +02:00
|
|
|
++$i;
|
2018-10-26 06:59:14 +02:00
|
|
|
$last_change = [
|
|
|
|
$a_offset,
|
|
|
|
$a_offset + $text_length,
|
|
|
|
$b_offset,
|
|
|
|
$b_offset,
|
|
|
|
$line_diff,
|
2019-07-05 22:24:00 +02:00
|
|
|
'',
|
2018-10-26 06:59:14 +02:00
|
|
|
];
|
|
|
|
$changes[$i - 1] = $last_change;
|
|
|
|
} else {
|
|
|
|
$last_change[1] += $text_length;
|
|
|
|
$last_change[4] = $line_diff;
|
|
|
|
$changes[$i - 1] = $last_change;
|
|
|
|
}
|
|
|
|
|
|
|
|
$a_offset += $text_length;
|
|
|
|
} elseif ($diff_type === DiffElem::TYPE_ADD) {
|
|
|
|
/** @var string $diff_elem->new */
|
|
|
|
$diff_text = $diff_elem->new . "\n";
|
|
|
|
|
|
|
|
$text_length = strlen($diff_text);
|
|
|
|
|
2019-07-05 22:24:00 +02:00
|
|
|
++$line_diff;
|
2018-10-26 06:59:14 +02:00
|
|
|
|
|
|
|
if ($last_change === null) {
|
2019-07-05 22:24:00 +02:00
|
|
|
++$i;
|
2018-10-26 06:59:14 +02:00
|
|
|
$last_change = [
|
|
|
|
$a_offset,
|
|
|
|
$a_offset,
|
|
|
|
$b_offset,
|
|
|
|
$b_offset + $text_length,
|
|
|
|
$line_diff,
|
2019-07-05 22:24:00 +02:00
|
|
|
$diff_text,
|
2018-10-26 06:59:14 +02:00
|
|
|
];
|
|
|
|
$changes[$i - 1] = $last_change;
|
|
|
|
} else {
|
|
|
|
$last_change[3] += $text_length;
|
|
|
|
$last_change[4] = $line_diff;
|
|
|
|
$last_change[5] .= $diff_text;
|
|
|
|
|
|
|
|
$changes[$i - 1] = $last_change;
|
|
|
|
}
|
|
|
|
|
|
|
|
$b_offset += $text_length;
|
|
|
|
} elseif ($diff_type === DiffElem::TYPE_REPLACE) {
|
|
|
|
/** @var string $diff_elem->old */
|
|
|
|
$old_diff_text = $diff_elem->old . "\n";
|
|
|
|
|
|
|
|
/** @var string $diff_elem->new */
|
|
|
|
$new_diff_text = $diff_elem->new . "\n";
|
|
|
|
|
|
|
|
$old_text_length = strlen($old_diff_text);
|
|
|
|
$new_text_length = strlen($new_diff_text);
|
|
|
|
|
|
|
|
$max_same_count = min($old_text_length, $new_text_length);
|
|
|
|
|
2019-07-05 22:24:00 +02:00
|
|
|
for ($j = 0; $j < $max_same_count; ++$j) {
|
2018-11-02 00:11:00 +01:00
|
|
|
if ($old_diff_text[$j] !== $new_diff_text[$j]) {
|
2018-10-26 06:59:14 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-07-05 22:24:00 +02:00
|
|
|
++$a_offset;
|
|
|
|
++$b_offset;
|
|
|
|
--$old_text_length;
|
|
|
|
--$new_text_length;
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
|
2018-11-02 00:11:00 +01:00
|
|
|
$new_diff_text = substr($new_diff_text, $j);
|
2018-10-26 06:59:14 +02:00
|
|
|
|
2018-11-02 00:11:00 +01:00
|
|
|
if ($last_change === null || $j) {
|
2019-07-05 22:24:00 +02:00
|
|
|
++$i;
|
2018-10-26 06:59:14 +02:00
|
|
|
$last_change = [
|
|
|
|
$a_offset,
|
|
|
|
$a_offset + $old_text_length,
|
|
|
|
$b_offset,
|
|
|
|
$b_offset + $new_text_length,
|
|
|
|
$line_diff,
|
2019-07-05 22:24:00 +02:00
|
|
|
$new_diff_text,
|
2018-10-26 06:59:14 +02:00
|
|
|
];
|
|
|
|
$changes[$i - 1] = $last_change;
|
|
|
|
} else {
|
|
|
|
$last_change[1] += $old_text_length;
|
2020-03-13 02:39:27 +01:00
|
|
|
$last_change[3] += $new_text_length;
|
2018-10-26 06:59:14 +02:00
|
|
|
$last_change[5] .= $new_diff_text;
|
|
|
|
$changes[$i - 1] = $last_change;
|
|
|
|
}
|
|
|
|
|
|
|
|
$a_offset += $old_text_length;
|
|
|
|
$b_offset += $new_text_length;
|
|
|
|
} else {
|
|
|
|
/** @psalm-suppress MixedArgument */
|
|
|
|
$same_text_length = strlen($diff_elem->new) + 1;
|
|
|
|
|
|
|
|
$a_offset += $same_text_length;
|
|
|
|
$b_offset += $same_text_length;
|
|
|
|
}
|
|
|
|
|
|
|
|
$last_diff_type = $diff_elem->type;
|
|
|
|
}
|
|
|
|
|
|
|
|
return $changes;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Coalesce equal-length sequences of remove+add into a replace operation.
|
|
|
|
*
|
|
|
|
* @param DiffElem[] $diff
|
2019-07-05 22:24:00 +02:00
|
|
|
*
|
2020-10-17 18:36:44 +02:00
|
|
|
* @return list<DiffElem>
|
2020-08-25 01:28:53 +02:00
|
|
|
*
|
|
|
|
* @psalm-pure
|
2018-10-26 06:59:14 +02:00
|
|
|
*/
|
2020-09-04 22:26:33 +02:00
|
|
|
private static function coalesceReplacements(array $diff): array
|
2018-10-26 06:59:14 +02:00
|
|
|
{
|
|
|
|
$newDiff = [];
|
2021-12-03 21:07:25 +01:00
|
|
|
$c = count($diff);
|
2019-07-05 22:24:00 +02:00
|
|
|
for ($i = 0; $i < $c; ++$i) {
|
2018-10-26 06:59:14 +02:00
|
|
|
$diffType = $diff[$i]->type;
|
|
|
|
if ($diffType !== DiffElem::TYPE_REMOVE) {
|
|
|
|
$newDiff[] = $diff[$i];
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$j = $i;
|
|
|
|
while ($j < $c && $diff[$j]->type === DiffElem::TYPE_REMOVE) {
|
2019-07-05 22:24:00 +02:00
|
|
|
++$j;
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
$k = $j;
|
|
|
|
while ($k < $c && $diff[$k]->type === DiffElem::TYPE_ADD) {
|
2019-07-05 22:24:00 +02:00
|
|
|
++$k;
|
2018-10-26 06:59:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if ($j - $i === $k - $j) {
|
|
|
|
$len = $j - $i;
|
2019-07-05 22:24:00 +02:00
|
|
|
for ($n = 0; $n < $len; ++$n) {
|
2018-10-26 06:59:14 +02:00
|
|
|
$newDiff[] = new DiffElem(
|
|
|
|
DiffElem::TYPE_REPLACE,
|
|
|
|
$diff[$i + $n]->old,
|
|
|
|
$diff[$j + $n]->new
|
|
|
|
);
|
|
|
|
}
|
|
|
|
} else {
|
2019-07-05 22:24:00 +02:00
|
|
|
for (; $i < $k; ++$i) {
|
2018-10-26 06:59:14 +02:00
|
|
|
$newDiff[] = $diff[$i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$i = $k - 1;
|
|
|
|
}
|
2019-07-05 22:24:00 +02:00
|
|
|
|
2018-10-26 06:59:14 +02:00
|
|
|
return $newDiff;
|
|
|
|
}
|
|
|
|
}
|