From 5a7fde29a5db7523091a213cc6b7732ca2338fd1 Mon Sep 17 00:00:00 2001 From: Toon Verwerft Date: Fri, 9 Apr 2021 21:24:01 +0200 Subject: [PATCH] [Regex] add first_match() and every_match() functions (#151) --- docs/component/json.md | 2 +- docs/component/regex.md | 3 + integration/Psalm/Plugin.php | 2 +- src/Psl/Internal/Loader.php | 3 + src/Psl/Json/typed.php | 19 +-- src/Psl/Regex/capture_groups.php | 28 ++++ src/Psl/Regex/every_match.php | 52 +++++++ src/Psl/Regex/first_match.php | 51 +++++++ tests/Psl/Regex/CaptureGroupsTest.php | 21 +++ tests/Psl/Regex/EveryMatchTest.php | 191 ++++++++++++++++++++++++++ tests/Psl/Regex/FirstMatchTest.php | 125 +++++++++++++++++ 11 files changed, 481 insertions(+), 16 deletions(-) create mode 100644 src/Psl/Regex/capture_groups.php create mode 100644 src/Psl/Regex/every_match.php create mode 100644 src/Psl/Regex/first_match.php create mode 100644 tests/Psl/Regex/CaptureGroupsTest.php create mode 100644 tests/Psl/Regex/EveryMatchTest.php create mode 100644 tests/Psl/Regex/FirstMatchTest.php diff --git a/docs/component/json.md b/docs/component/json.md index 3c2a1d1..f12bc9c 100644 --- a/docs/component/json.md +++ b/docs/component/json.md @@ -14,6 +14,6 @@ - [decode](./../../src/Psl/Json/decode.php#L24) - [encode](./../../src/Psl/Json/encode.php#L27) -- [typed](./../../src/Psl/Json/typed.php#L22) +- [typed](./../../src/Psl/Json/typed.php#L20) diff --git a/docs/component/regex.md b/docs/component/regex.md index 8cbe4f9..918f3e9 100644 --- a/docs/component/regex.md +++ b/docs/component/regex.md @@ -12,6 +12,9 @@ #### `Functions` +- [capture_groups](./../../src/Psl/Regex/capture_groups.php#L17) +- [every_match](./../../src/Psl/Regex/every_match.php#L25) +- [first_match](./../../src/Psl/Regex/first_match.php#L24) - [matches](./../../src/Psl/Regex/matches.php#L19) - [replace](./../../src/Psl/Regex/replace.php#L26) - [replace_every](./../../src/Psl/Regex/replace_every.php#L27) diff --git a/integration/Psalm/Plugin.php b/integration/Psalm/Plugin.php index b8030a9..9ea82b9 100644 --- a/integration/Psalm/Plugin.php +++ b/integration/Psalm/Plugin.php @@ -10,7 +10,7 @@ use SimpleXMLElement; /** * @deprecated use `php-standard-library/psalm-plugin` package instead. - * + * * @see https://github.com/php-standard-library/psalm-plugin */ final class Plugin implements PluginEntryPointInterface diff --git a/src/Psl/Internal/Loader.php b/src/Psl/Internal/Loader.php index c5381d4..dad6eed 100644 --- a/src/Psl/Internal/Loader.php +++ b/src/Psl/Internal/Loader.php @@ -258,6 +258,9 @@ final class Loader 'Psl\Math\tan', 'Psl\Math\to_base', 'Psl\Result\wrap', + 'Psl\Regex\capture_groups', + 'Psl\Regex\every_match', + 'Psl\Regex\first_match', 'Psl\Regex\split', 'Psl\Regex\matches', 'Psl\Regex\replace', diff --git a/src/Psl/Json/typed.php b/src/Psl/Json/typed.php index c917f2e..ba34355 100644 --- a/src/Psl/Json/typed.php +++ b/src/Psl/Json/typed.php @@ -4,33 +4,24 @@ declare(strict_types=1); namespace Psl\Json; -use Psl\Type\Exception\AssertException; -use Psl\Type\Exception\CoercionException; -use Psl\Type\TypeInterface; +use Psl\Type; /** * Decode a json encoded string into a dynamic variable. * * @template T * - * @param TypeInterface $type + * @param Type\TypeInterface $type * * @throws Exception\DecodeException If an error occurred. * * @return T */ -function typed(string $json, TypeInterface $type) +function typed(string $json, Type\TypeInterface $type) { - $value = decode($json); - try { - return $type->assert($value); - } catch (AssertException $e) { - } - - try { - return $type->coerce($value); - } catch (CoercionException $e) { + return $type->coerce(decode($json)); + } catch (Type\Exception\CoercionException $e) { throw new Exception\DecodeException($e->getMessage(), (int)$e->getCode(), $e); } } diff --git a/src/Psl/Regex/capture_groups.php b/src/Psl/Regex/capture_groups.php new file mode 100644 index 0000000..d606c73 --- /dev/null +++ b/src/Psl/Regex/capture_groups.php @@ -0,0 +1,28 @@ + $groups + * + * @return Type\TypeInterface> + * + * @psalm-suppress MixedReturnTypeCoercion - Psalm loses track of the keys. No worries, another psalm plugin fixes this! + */ +function capture_groups(array $groups): Type\TypeInterface +{ + return Type\shape( + Dict\from_keys( + Dict\unique([0, ...$groups]), + /** + * @return Type\TypeInterface + */ + static fn(): Type\TypeInterface => Type\string() + ) + ); +} diff --git a/src/Psl/Regex/every_match.php b/src/Psl/Regex/every_match.php new file mode 100644 index 0000000..9c187f8 --- /dev/null +++ b/src/Psl/Regex/every_match.php @@ -0,0 +1,52 @@ + $capture_groups What shape does a single set of matching items have? + * + * @throws Exception\RuntimeException If an internal error accord. + * @throws Exception\InvalidPatternException If $pattern is invalid. + * + * @return (T is null ? list> : list)|null + */ +function every_match( + string $subject, + string $pattern, + ?Type\TypeInterface $capture_groups = null, + int $offset = 0 +): ?array { + $matching = Internal\call_preg( + 'preg_match_all', + static function () use ($subject, $pattern, $offset): ?array { + $matching = []; + $matches = preg_match_all($pattern, $subject, $matching, PREG_SET_ORDER, $offset); + + return $matches === 0 ? null : $matching; + } + ); + + if ($matching === null) { + return null; + } + + $capture_groups ??= Type\dict(Type\array_key(), Type\string()); + + try { + return Type\vec($capture_groups)->coerce($matching); + } catch (InvariantViolationException | Type\Exception\CoercionException $e) { + throw new Exception\RuntimeException('Invalid capture groups', 0, $e); + } +} diff --git a/src/Psl/Regex/first_match.php b/src/Psl/Regex/first_match.php new file mode 100644 index 0000000..a7fd80b --- /dev/null +++ b/src/Psl/Regex/first_match.php @@ -0,0 +1,51 @@ + $capture_groups What shape does the matching items have? + * + * @throws Exception\RuntimeException If an internal error accord. + * @throws Exception\InvalidPatternException If $pattern is invalid. + * + * @return (T is null ? array : T)|null + */ +function first_match( + string $subject, + string $pattern, + ?Type\TypeInterface $capture_groups = null, + int $offset = 0 +): ?array { + $matching = Internal\call_preg( + 'preg_match', + static function () use ($subject, $pattern, $offset): ?array { + $matching = []; + $matches = preg_match($pattern, $subject, $matching, 0, $offset); + + return $matches === 0 ? null : $matching; + } + ); + + if ($matching === null) { + return null; + } + + $capture_groups ??= Type\dict(Type\array_key(), Type\string()); + + try { + return $capture_groups->coerce($matching); + } catch (Type\Exception\CoercionException $e) { + throw new Exception\RuntimeException('Invalid capture groups', 0, $e); + } +} diff --git a/tests/Psl/Regex/CaptureGroupsTest.php b/tests/Psl/Regex/CaptureGroupsTest.php new file mode 100644 index 0000000..c827d13 --- /dev/null +++ b/tests/Psl/Regex/CaptureGroupsTest.php @@ -0,0 +1,21 @@ + 'Hello', 1 => 'World']; + $shape = capture_groups([1]); + $actual = $shape->coerce($data); + + static::assertSame($actual, $data); + } +} diff --git a/tests/Psl/Regex/EveryMatchTest.php b/tests/Psl/Regex/EveryMatchTest.php new file mode 100644 index 0000000..05d8eb6 --- /dev/null +++ b/tests/Psl/Regex/EveryMatchTest.php @@ -0,0 +1,191 @@ +expectException(Regex\Exception\InvalidPatternException::class); + $this->expectExceptionMessage("No ending delimiter '/' found"); + + Regex\every_match('hello', '/hello'); + } + + public function testInvalidCaptureGroup(): void + { + $this->expectException(Regex\Exception\RuntimeException::class); + $this->expectExceptionMessage('Invalid capture groups'); + + Regex\every_match('hello', '/(hello)/', capture_groups(['doesnotexist'])); + } + + public function provideMatchingData(): iterable + { + yield [ + [ + [ + 0 => 'PHP', + 1 => 'PHP', + ] + ], + 'PHP is the web scripting language of choice.', + '/(php)/i', + capture_groups([1]) + ]; + yield [ + [ + [ + 0 => 'Hello world', + 1 => 'Hello', + ] + ], + 'Hello world is the web scripting language of choice.', + '/(hello) world/i', + capture_groups([1]) + ]; + yield [ + [ + [ + 0 => 'web', + 1 => 'web', + ] + ], + 'PHP is the web scripting language of choice.', + '/(\bweb\b)/i', + capture_groups([1]) + ]; + yield [ + [ + [ + 0 => 'web', + 1 => 'web', + ] + ], + 'PHP is the web scripting language of choice.', + '/(\bweb\b)/i' + ]; + yield [ + [ + [ + 0 => 'PHP', + 'language' => 'PHP' + ], + ], + 'PHP is the web scripting language of choice.', + '/(?PPHP)/', + capture_groups(['language']) + ]; + yield [ + [ + [ + 0 => 'PHP', + 'language' => 'PHP', + 1 => 'PHP', + ], + ], + 'PHP is the web scripting language of choice.', + '/(?PPHP)/' + ]; + yield [ + [ + [ + 0 => 'http://www.php.net', + 1 => 'www.php.net', + ] + ], + 'http://www.php.net/index.html', + '@^(?:http://)?([^/]+)@i', + capture_groups([1]) + ]; + yield [ + [ + [ + 0 => 'a: 1', + 1 => 'a', + 2 => '1', + ], + [ + 0 => 'b: 2', + 1 => 'b', + 2 => '2', + ], + [ + 0 => 'c: 3', + 1 => 'c', + 2 => '3', + ], + ], + << 'a: 1', + 'name' => 'a', + 'digit' => '1', + ], + [ + 0 => 'b: 2', + 'name' => 'b', + 'digit' => '2', + ], + [ + 0 => 'c: 3', + 'name' => 'c', + 'digit' => '3', + ], + ], + <<\w+): (?P\d+)@i', + capture_groups(['name', 'digit']) + ]; + } + + public function provideNonMatchingData(): iterable + { + yield ['PHP is the web scripting language of choice.', '/php/']; + yield ['PHP is the website scripting language of choice.', '/\bweb\b/i']; + yield ['php is the web scripting language of choice.', '/PHP/']; + yield ['hello', '/[^.]+\.[^.]+$/']; + } +} diff --git a/tests/Psl/Regex/FirstMatchTest.php b/tests/Psl/Regex/FirstMatchTest.php new file mode 100644 index 0000000..27ec4e1 --- /dev/null +++ b/tests/Psl/Regex/FirstMatchTest.php @@ -0,0 +1,125 @@ +expectException(Regex\Exception\InvalidPatternException::class); + $this->expectExceptionMessage("No ending delimiter '/' found"); + + Regex\first_match('hello', '/hello'); + } + + public function testInvalidCaptureGroup(): void + { + $this->expectException(Regex\Exception\RuntimeException::class); + $this->expectExceptionMessage('Invalid capture groups'); + + Regex\first_match('hello', '/(hello)/', capture_groups(['doesnotexist'])); + } + + public function provideMatchingData(): iterable + { + yield [ + [ + 0 => 'PHP', + 1 => 'PHP', + ], + 'PHP is the web scripting language of choice.', + '/(php)/i', + capture_groups([1]) + ]; + yield [ + [ + 0 => 'Hello world', + 1 => 'Hello', + ], + 'Hello world is the web scripting language of choice.', + '/(hello) world/i', + capture_groups([1]) + ]; + yield [ + [ + 0 => 'web', + 1 => 'web', + ], + 'PHP is the web scripting language of choice.', + '/(\bweb\b)/i', + capture_groups([1]) + ]; + yield [ + [ + 0 => 'web', + 1 => 'web', + ], + 'PHP is the web scripting language of choice.', + '/(\bweb\b)/i' + ]; + yield [ + [ + 0 => 'PHP', + 'language' => 'PHP', + ], + 'PHP is the web scripting language of choice.', + '/(?PPHP)/', + capture_groups(['language']) + ]; + yield [ + [ + 0 => 'http://www.php.net', + 1 => 'www.php.net' + ], + 'http://www.php.net/index.html', + '@^(?:http://)?([^/]+)@i', + capture_groups([1]) + ]; + yield [ + [ + 0 => 'PHP', + 'language' => 'PHP', + 1 => 'PHP', + ], + 'PHP is the web scripting language of choice.', + '/(?PPHP)/', + ]; + } + + public function provideNonMatchingData(): iterable + { + yield ['PHP is the web scripting language of choice.', '/php/']; + yield ['PHP is the website scripting language of choice.', '/\bweb\b/i']; + yield ['php is the web scripting language of choice.', '/PHP/']; + yield ['hello', '/[^.]+\.[^.]+$/']; + } +}