[Regex] add first_match() and every_match() functions (#151)

This commit is contained in:
Toon Verwerft 2021-04-09 21:24:01 +02:00 committed by GitHub
parent 0753087409
commit 5a7fde29a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 481 additions and 16 deletions

View File

@ -14,6 +14,6 @@
- [decode](./../../src/Psl/Json/decode.php#L24) - [decode](./../../src/Psl/Json/decode.php#L24)
- [encode](./../../src/Psl/Json/encode.php#L27) - [encode](./../../src/Psl/Json/encode.php#L27)
- [typed](./../../src/Psl/Json/typed.php#L22) - [typed](./../../src/Psl/Json/typed.php#L20)

View File

@ -12,6 +12,9 @@
#### `Functions` #### `Functions`
- [capture_groups](./../../src/Psl/Regex/capture_groups.php#L17)
- [every_match](./../../src/Psl/Regex/every_match.php#L25)
- [first_match](./../../src/Psl/Regex/first_match.php#L24)
- [matches](./../../src/Psl/Regex/matches.php#L19) - [matches](./../../src/Psl/Regex/matches.php#L19)
- [replace](./../../src/Psl/Regex/replace.php#L26) - [replace](./../../src/Psl/Regex/replace.php#L26)
- [replace_every](./../../src/Psl/Regex/replace_every.php#L27) - [replace_every](./../../src/Psl/Regex/replace_every.php#L27)

View File

@ -258,6 +258,9 @@ final class Loader
'Psl\Math\tan', 'Psl\Math\tan',
'Psl\Math\to_base', 'Psl\Math\to_base',
'Psl\Result\wrap', 'Psl\Result\wrap',
'Psl\Regex\capture_groups',
'Psl\Regex\every_match',
'Psl\Regex\first_match',
'Psl\Regex\split', 'Psl\Regex\split',
'Psl\Regex\matches', 'Psl\Regex\matches',
'Psl\Regex\replace', 'Psl\Regex\replace',

View File

@ -4,33 +4,24 @@ declare(strict_types=1);
namespace Psl\Json; namespace Psl\Json;
use Psl\Type\Exception\AssertException; use Psl\Type;
use Psl\Type\Exception\CoercionException;
use Psl\Type\TypeInterface;
/** /**
* Decode a json encoded string into a dynamic variable. * Decode a json encoded string into a dynamic variable.
* *
* @template T * @template T
* *
* @param TypeInterface<T> $type * @param Type\TypeInterface<T> $type
* *
* @throws Exception\DecodeException If an error occurred. * @throws Exception\DecodeException If an error occurred.
* *
* @return T * @return T
*/ */
function typed(string $json, TypeInterface $type) function typed(string $json, Type\TypeInterface $type)
{ {
$value = decode($json);
try { try {
return $type->assert($value); return $type->coerce(decode($json));
} catch (AssertException $e) { } catch (Type\Exception\CoercionException $e) {
}
try {
return $type->coerce($value);
} catch (CoercionException $e) {
throw new Exception\DecodeException($e->getMessage(), (int)$e->getCode(), $e); throw new Exception\DecodeException($e->getMessage(), (int)$e->getCode(), $e);
} }
} }

View File

@ -0,0 +1,28 @@
<?php
declare(strict_types=1);
namespace Psl\Regex;
use Psl\Dict;
use Psl\Type;
/**
* @param list<array-key> $groups
*
* @return Type\TypeInterface<array<array-key, string>>
*
* @psalm-suppress MixedReturnTypeCoercion - Psalm loses track of the keys. No worries, another psalm plugin fixes this!
*/
function capture_groups(array $groups): Type\TypeInterface
{
return Type\shape(
Dict\from_keys(
Dict\unique([0, ...$groups]),
/**
* @return Type\TypeInterface<string>
*/
static fn(): Type\TypeInterface => Type\string()
)
);
}

View File

@ -0,0 +1,52 @@
<?php
declare(strict_types=1);
namespace Psl\Regex;
use Psl\Exception\InvariantViolationException;
use Psl\Type;
use function preg_match_all;
/**
* Determine if $subject matches the given $pattern and return every matches.
*
* @template T of array|null
*
* @param non-empty-string $pattern The pattern to match against.
* @param ?Type\TypeInterface<T> $capture_groups What shape does a single set of matching items have?
*
* @throws Exception\RuntimeException If an internal error accord.
* @throws Exception\InvalidPatternException If $pattern is invalid.
*
* @return (T is null ? list<array<array-key, string>> : list<T>)|null
*/
function every_match(
string $subject,
string $pattern,
?Type\TypeInterface $capture_groups = null,
int $offset = 0
): ?array {
$matching = Internal\call_preg(
'preg_match_all',
static function () use ($subject, $pattern, $offset): ?array {
$matching = [];
$matches = preg_match_all($pattern, $subject, $matching, PREG_SET_ORDER, $offset);
return $matches === 0 ? null : $matching;
}
);
if ($matching === null) {
return null;
}
$capture_groups ??= Type\dict(Type\array_key(), Type\string());
try {
return Type\vec($capture_groups)->coerce($matching);
} catch (InvariantViolationException | Type\Exception\CoercionException $e) {
throw new Exception\RuntimeException('Invalid capture groups', 0, $e);
}
}

View File

@ -0,0 +1,51 @@
<?php
declare(strict_types=1);
namespace Psl\Regex;
use Psl\Type;
use function preg_match;
/**
* Determine if $subject matches the given $pattern and return the first matches.
*
* @template T of array|null
*
* @param non-empty-string $pattern The pattern to match against.
* @param ?Type\TypeInterface<T> $capture_groups What shape does the matching items have?
*
* @throws Exception\RuntimeException If an internal error accord.
* @throws Exception\InvalidPatternException If $pattern is invalid.
*
* @return (T is null ? array<array-key, string> : T)|null
*/
function first_match(
string $subject,
string $pattern,
?Type\TypeInterface $capture_groups = null,
int $offset = 0
): ?array {
$matching = Internal\call_preg(
'preg_match',
static function () use ($subject, $pattern, $offset): ?array {
$matching = [];
$matches = preg_match($pattern, $subject, $matching, 0, $offset);
return $matches === 0 ? null : $matching;
}
);
if ($matching === null) {
return null;
}
$capture_groups ??= Type\dict(Type\array_key(), Type\string());
try {
return $capture_groups->coerce($matching);
} catch (Type\Exception\CoercionException $e) {
throw new Exception\RuntimeException('Invalid capture groups', 0, $e);
}
}

View File

@ -0,0 +1,21 @@
<?php
declare(strict_types=1);
namespace Psl\Tests\Regex;
use PHPUnit\Framework\TestCase;
use function Psl\Regex\capture_groups;
final class CaptureGroupsTest extends TestCase
{
public function testItAlwaysAddsZeroCaptureResult(): void
{
$data = [0 => 'Hello', 1 => 'World'];
$shape = capture_groups([1]);
$actual = $shape->coerce($data);
static::assertSame($actual, $data);
}
}

View File

@ -0,0 +1,191 @@
<?php
declare(strict_types=1);
namespace Psl\Tests\Regex;
use PHPUnit\Framework\TestCase;
use Psl\Regex;
use Psl\Type\TypeInterface;
use function Psl\Regex\capture_groups;
final class EveryMatchTest extends TestCase
{
/**
* @dataProvider provideMatchingData
*/
public function testMatching(
array $expected,
string $subject,
string $pattern,
TypeInterface $shape = null,
int $offset = 0
): void {
static::assertSame($expected, Regex\every_match($subject, $pattern, $shape, $offset));
}
/**
* @dataProvider provideNonMatchingData
*/
public function testNotMatching(string $subject, string $pattern, int $offset = 0)
{
static::assertNull(Regex\every_match($subject, $pattern, null, $offset));
}
public function testMatchingWithInvalidPattern(): void
{
$this->expectException(Regex\Exception\InvalidPatternException::class);
$this->expectExceptionMessage("No ending delimiter '/' found");
Regex\every_match('hello', '/hello');
}
public function testInvalidCaptureGroup(): void
{
$this->expectException(Regex\Exception\RuntimeException::class);
$this->expectExceptionMessage('Invalid capture groups');
Regex\every_match('hello', '/(hello)/', capture_groups(['doesnotexist']));
}
public function provideMatchingData(): iterable
{
yield [
[
[
0 => 'PHP',
1 => 'PHP',
]
],
'PHP is the web scripting language of choice.',
'/(php)/i',
capture_groups([1])
];
yield [
[
[
0 => 'Hello world',
1 => 'Hello',
]
],
'Hello world is the web scripting language of choice.',
'/(hello) world/i',
capture_groups([1])
];
yield [
[
[
0 => 'web',
1 => 'web',
]
],
'PHP is the web scripting language of choice.',
'/(\bweb\b)/i',
capture_groups([1])
];
yield [
[
[
0 => 'web',
1 => 'web',
]
],
'PHP is the web scripting language of choice.',
'/(\bweb\b)/i'
];
yield [
[
[
0 => 'PHP',
'language' => 'PHP'
],
],
'PHP is the web scripting language of choice.',
'/(?P<language>PHP)/',
capture_groups(['language'])
];
yield [
[
[
0 => 'PHP',
'language' => 'PHP',
1 => 'PHP',
],
],
'PHP is the web scripting language of choice.',
'/(?P<language>PHP)/'
];
yield [
[
[
0 => 'http://www.php.net',
1 => 'www.php.net',
]
],
'http://www.php.net/index.html',
'@^(?:http://)?([^/]+)@i',
capture_groups([1])
];
yield [
[
[
0 => 'a: 1',
1 => 'a',
2 => '1',
],
[
0 => 'b: 2',
1 => 'b',
2 => '2',
],
[
0 => 'c: 3',
1 => 'c',
2 => '3',
],
],
<<<FOO
a: 1
b: 2
c: 3
FOO,
'@(\w+): (\d+)@i',
capture_groups([1, 2])
];
yield [
[
[
0 => 'a: 1',
'name' => 'a',
'digit' => '1',
],
[
0 => 'b: 2',
'name' => 'b',
'digit' => '2',
],
[
0 => 'c: 3',
'name' => 'c',
'digit' => '3',
],
],
<<<FOO
a: 1
b: 2
c: 3
FOO,
'@(?P<name>\w+): (?P<digit>\d+)@i',
capture_groups(['name', 'digit'])
];
}
public function provideNonMatchingData(): iterable
{
yield ['PHP is the web scripting language of choice.', '/php/'];
yield ['PHP is the website scripting language of choice.', '/\bweb\b/i'];
yield ['php is the web scripting language of choice.', '/PHP/'];
yield ['hello', '/[^.]+\.[^.]+$/'];
}
}

View File

@ -0,0 +1,125 @@
<?php
declare(strict_types=1);
namespace Psl\Tests\Regex;
use PHPUnit\Framework\TestCase;
use Psl\Regex;
use Psl\Type\TypeInterface;
use function Psl\Regex\capture_groups;
final class FirstMatchTest extends TestCase
{
/**
* @dataProvider provideMatchingData
*/
public function testMatching(
array $expected,
string $subject,
string $pattern,
TypeInterface $shape = null,
int $offset = 0
): void {
static::assertSame($expected, Regex\first_match($subject, $pattern, $shape, $offset));
}
/**
* @dataProvider provideNonMatchingData
*/
public function testNotMatching(string $subject, string $pattern, int $offset = 0)
{
static::assertNull(Regex\first_match($subject, $pattern, null, $offset));
}
public function testMatchingWithInvalidPattern(): void
{
$this->expectException(Regex\Exception\InvalidPatternException::class);
$this->expectExceptionMessage("No ending delimiter '/' found");
Regex\first_match('hello', '/hello');
}
public function testInvalidCaptureGroup(): void
{
$this->expectException(Regex\Exception\RuntimeException::class);
$this->expectExceptionMessage('Invalid capture groups');
Regex\first_match('hello', '/(hello)/', capture_groups(['doesnotexist']));
}
public function provideMatchingData(): iterable
{
yield [
[
0 => 'PHP',
1 => 'PHP',
],
'PHP is the web scripting language of choice.',
'/(php)/i',
capture_groups([1])
];
yield [
[
0 => 'Hello world',
1 => 'Hello',
],
'Hello world is the web scripting language of choice.',
'/(hello) world/i',
capture_groups([1])
];
yield [
[
0 => 'web',
1 => 'web',
],
'PHP is the web scripting language of choice.',
'/(\bweb\b)/i',
capture_groups([1])
];
yield [
[
0 => 'web',
1 => 'web',
],
'PHP is the web scripting language of choice.',
'/(\bweb\b)/i'
];
yield [
[
0 => 'PHP',
'language' => 'PHP',
],
'PHP is the web scripting language of choice.',
'/(?P<language>PHP)/',
capture_groups(['language'])
];
yield [
[
0 => 'http://www.php.net',
1 => 'www.php.net'
],
'http://www.php.net/index.html',
'@^(?:http://)?([^/]+)@i',
capture_groups([1])
];
yield [
[
0 => 'PHP',
'language' => 'PHP',
1 => 'PHP',
],
'PHP is the web scripting language of choice.',
'/(?P<language>PHP)/',
];
}
public function provideNonMatchingData(): iterable
{
yield ['PHP is the web scripting language of choice.', '/php/'];
yield ['PHP is the website scripting language of choice.', '/\bweb\b/i'];
yield ['php is the web scripting language of choice.', '/PHP/'];
yield ['hello', '/[^.]+\.[^.]+$/'];
}
}