From 6fd7154c55ef0c611879226058fbed9cf2b8de5b Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Mon, 15 Apr 2024 15:52:07 +0200 Subject: [PATCH] First commit --- .github/FUNDING.yml | 1 + .github/workflows/main.yml | 73 ++ .gitignore | 9 + .php-cs-fixer.dist.php | 14 + LICENSE | 203 ++++++ NOTICE | 5 + README.md | 83 +++ composer.json | 46 ++ docs/docs/danog/TelegramEntities/Entities.md | 77 +++ .../danog/TelegramEntities/EntityTools.md | 125 ++++ docs/docs/index.md | 20 + docs/index.md | 1 + examples/1-all.php | 56 ++ phpunit.xml | 18 + psalm.xml | 18 + src/Entities.php | 416 ++++++++++++ src/EntityTools.php | 199 ++++++ tests/EntitiesTest.php | 638 ++++++++++++++++++ 18 files changed, 2002 insertions(+) create mode 100644 .github/FUNDING.yml create mode 100644 .github/workflows/main.yml create mode 100644 .gitignore create mode 100644 .php-cs-fixer.dist.php create mode 100644 LICENSE create mode 100644 NOTICE create mode 100644 README.md create mode 100644 composer.json create mode 100644 docs/docs/danog/TelegramEntities/Entities.md create mode 100644 docs/docs/danog/TelegramEntities/EntityTools.md create mode 100644 docs/docs/index.md create mode 120000 docs/index.md create mode 100644 examples/1-all.php create mode 100644 phpunit.xml create mode 100644 psalm.xml create mode 100644 src/Entities.php create mode 100644 src/EntityTools.php create mode 100644 tests/EntitiesTest.php diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..72fc5ff --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: danog diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..b15e8ad --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,73 @@ +name: build +on: + pull_request: + push: +jobs: + run: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + php-versions: ["8.2", "8.3"] + name: PHP ${{ matrix.php-versions }} Test on ${{ matrix.os }} + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + extensions: mbstring, intl, sockets + coverage: xdebug + + - name: Check environment + run: | + php --version + composer --version + + - name: Get composer cache directory + id: composercache + run: echo "::set-output name=dir::$(composer config cache-files-dir)" + + - name: Cache dependencies + uses: actions/cache@v2 + with: + path: ${{ steps.composercache.outputs.dir }} + key: ${{ matrix.os }}-composer-${{ matrix.php-versions }}-${{ hashFiles('**/composer.lock') }} + restore-keys: ${{ matrix.os }}-composer-${{ matrix.php-versions }}- + + - name: Install dependencies + run: | + composer install --prefer-dist + wget https://github.com/infection/infection/releases/download/0.27.0/infection.phar -O /usr/local/bin/infection + chmod +x /usr/local/bin/infection + + - name: Run codestyle check + env: + PHP_CS_FIXER_IGNORE_ENV: 1 + run: | + vendor/bin/php-cs-fixer --diff --dry-run -v fix + + - name: Run unit tests + env: + TOKEN: ${{ secrets.TOKEN }} + DEST: ${{ secrets.DEST }} + run: | + vendor/bin/phpunit --coverage-text --coverage-clover build/logs/clover.xml + + #- name: Run mutation tests + # env: + # STRYKER_DASHBOARD_API_KEY: ${{ secrets.STRYKER_DASHBOARD_API_KEY }} + # run: | + # infection --show-mutations + + - name: Run Psalm analysis + run: | + vendor/bin/psalm --shepherd + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + slug: danog/telegram-entities diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a8db918 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.vscode +/infection/ +a.php +.phpunit.cache +/vendor/ +*.cache +composer.lock +/coverage/ +/.infection-cache diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php new file mode 100644 index 0000000..237e0d9 --- /dev/null +++ b/.php-cs-fixer.dist.php @@ -0,0 +1,14 @@ +getFinder() + ->in(__DIR__ . '/src') + ->in(__DIR__ . '/tests') + ->in(__DIR__ . '/examples'); + +$cacheDir = getenv('TRAVIS') ? getenv('HOME') . '/.php-cs-fixer' : __DIR__; + +$config->setCacheFile($cacheDir . '/.php_cs.cache'); + +return $config; diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6b0b127 --- /dev/null +++ b/LICENSE @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..b0f0892 --- /dev/null +++ b/NOTICE @@ -0,0 +1,5 @@ +TelegramEntities - A library to work with Telegram styled text entities. + +Copyright 2024 Daniil Gentili + +Homepage: https://github.com/danog/telegram-entities diff --git a/README.md b/README.md new file mode 100644 index 0000000..7903e45 --- /dev/null +++ b/README.md @@ -0,0 +1,83 @@ +# Async ORM + +[![codecov](https://codecov.io/gh/danog/telegram-entities/branch/master/graph/badge.svg)](https://codecov.io/gh/danog/telegram-entities) +[![Psalm coverage](https://shepherd.dev/github/danog/telegram-entities/coverage.svg)](https://shepherd.dev/github/danog/telegram-entities) +[![Psalm level 1](https://shepherd.dev/github/danog/telegram-entities/level.svg)](https://shepherd.dev/github/danog/telegram-entities) +![License](https://img.shields.io/github/license/danog/telegram-entities) + +A library to work with Telegram UTF-16 styled text entities, created by Daniil Gentili (https://daniil.it). + +This library can be used to modify entities returned by the Telegram Bot API, or even locally generate them using a custom MarkdownV2 and HTML parser inside of the library. + +This ORM library was initially created for [MadelineProto](https://docs.madelineproto.xyz), an async PHP client API for the telegram MTProto protocol. + +## Installation + +```bash +composer require danog/async-orm +``` + +## Usage + +```php +request(new Request("https://api.telegram.org/bot$token/sendMessage?".http_build_query([ + 'text' => $message, + 'parse_mode' => $parse_mode, + 'entities' => json_encode($entities), + 'chat_id' => $dest + ]))); + + return json_decode($res->getBody()->buffer(), true)['result']; +}; + +$result = $sm("*This is a ❤️ test*", parse_mode: "MarkdownV2"); + +// Convert a message+entities back to HTML +$entities = new Entities($result['text'], $result['entities']); +var_dump($entities->toHTML()); // This is a ❤️ test + +// Modify $entities as needed +$entities->message = "A message with ❤️ emojis"; + +// EntityTools::mb* methods compute the length in UTF-16 code units, as required by the bot API. +$entities->entities[0]['length'] = EntityTools::mbStrlen($entities->message); + +// then resend: +$sm($entities->message, entities: $entities->entities); + +// Convert HTML to an array of entities locally +$entities = Entities::fromHtml("This is a ❤️ nested test"); +$sm($entities->message, entities: $entities->entities); + +// Convert markdown to an array of entities locally +$entities = Entities::fromHtml("This is a ❤️ nested test"); +$sm($entities->message, entities: $entities->entities); +``` + +Many more methods are available, see the [API documentation](https://github.com/danog/telegram-entities/blob/master/docs/docs/index.md) for the full list! + +## API Documentation + +Click [here »](https://github.com/danog/telegram-entities/blob/master/docs/docs/index.md) to view the API documentation. diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..6fa7ff5 --- /dev/null +++ b/composer.json @@ -0,0 +1,46 @@ +{ + "name": "danog/telegram-entities", + "description": "A library to work with Telegram UTF-16 styled text entities.", + "type": "library", + "license": "Apache-2.0", + "autoload": { + "psr-4": { + "danog\\TelegramEntities\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "danog\\TestTelegramEntities\\": "tests/" + } + }, + "authors": [ + { + "name": "Daniil Gentili", + "email": "daniil@daniil.it" + } + ], + "require": { + "php-64bit": ">=8.2.4", + "webmozart/assert": "^1.11", + "symfony/polyfill-mbstring": "*" + }, + "require-dev": { + "vimeo/psalm": "dev-master", + "phpunit/phpunit": "^11.0.9", + "amphp/php-cs-fixer-config": "^2.0.1", + "friendsofphp/php-cs-fixer": "^3.52.1", + "infection/infection": "^0.28.1", + "danog/phpdoc": "^0.1.22", + "amphp/http-client": "^5.0" + }, + "scripts": { + "cs-fix": "PHP_CS_FIXER_IGNORE_ENV=1 php -d pcre.jit=0 vendor/bin/php-cs-fixer fix -v" + }, + "config": { + "allow-plugins": { + "dealerdirect/phpcodesniffer-composer-installer": true, + "infection/extension-installer": true + } + } + +} diff --git a/docs/docs/danog/TelegramEntities/Entities.md b/docs/docs/danog/TelegramEntities/Entities.md new file mode 100644 index 0000000..3680db9 --- /dev/null +++ b/docs/docs/danog/TelegramEntities/Entities.md @@ -0,0 +1,77 @@ +--- +title: "danog\\TelegramEntities\\Entities: Class that represents a message + set of Telegram entities." +description: "" + +--- +# `danog\TelegramEntities\Entities` +[Back to index](../../index.md) + +> Author: Daniil Gentili + + +Class that represents a message + set of Telegram entities. + + + +## Properties +* `$message`: `string` Converted message +* `$entities`: `list` Converted entities. + +## Method list: +* [`__construct(string $message, array $entities)`](#__construct) +* [`fromMarkdown(string $markdown): \danog\TelegramEntities\Entities`](#fromMarkdown) +* [`fromHtml(string $html): \danog\TelegramEntities\Entities`](#fromHtml) +* [`toHTML(bool $allowTelegramTags = false): string`](#toHTML) + +## Methods: +### `__construct(string $message, array $entities)` + +Creates an Entities container using a message and a list of entities. + + +Parameters: + +* `$message`: `string` +* `$entities`: `array` + + + +### `fromMarkdown(string $markdown): \danog\TelegramEntities\Entities` + +Manually convert markdown to a message and a set of entities. + + +Parameters: + +* `$markdown`: `string` + + +Return value: Object containing message and entities + + +### `fromHtml(string $html): \danog\TelegramEntities\Entities` + +Manually convert HTML to a message and a set of entities. + + +Parameters: + +* `$html`: `string` + + +Return value: Object containing message and entities + + +### `toHTML(bool $allowTelegramTags = false): string` + +Convert a message and a set of entities to HTML. + + +Parameters: + +* `$allowTelegramTags`: `bool` Whether to allow telegram-specific tags like tg-spoiler, tg-emoji, mention links and so on... + + + +--- +Generated by [danog/phpdoc](https://phpdoc.daniil.it) diff --git a/docs/docs/danog/TelegramEntities/EntityTools.md b/docs/docs/danog/TelegramEntities/EntityTools.md new file mode 100644 index 0000000..4e2f35c --- /dev/null +++ b/docs/docs/danog/TelegramEntities/EntityTools.md @@ -0,0 +1,125 @@ +--- +title: "danog\\TelegramEntities\\EntityTools: Telegram UTF-16 styled text entity tools." +description: "" + +--- +# `danog\TelegramEntities\EntityTools` +[Back to index](../../index.md) + +> Author: Daniil Gentili + + +Telegram UTF-16 styled text entity tools. + + + + +## Method list: +* [`mbStrlen(string $text): int`](#mbStrlen) +* [`mbSubstr(string $text, integer $offset, (null|int) $length = NULL): string`](#mbSubstr) +* [`mbStrSplit(string $text, integer<0, max> $length): list`](#mbStrSplit) +* [`htmlEscape(string $what): string`](#htmlEscape) +* [`markdownEscape(string $what): string`](#markdownEscape) +* [`markdownCodeblockEscape(string $what): string`](#markdownCodeblockEscape) +* [`markdownCodeEscape(string $what): string`](#markdownCodeEscape) +* [`markdownUrlEscape(string $what): string`](#markdownUrlEscape) + +## Methods: +### `mbStrlen(string $text): int` + +Get length of string in UTF-16 code points. + + +Parameters: + +* `$text`: `string` Text + + + +### `mbSubstr(string $text, integer $offset, (null|int) $length = NULL): string` + +Telegram UTF-16 multibyte substring. + + +Parameters: + +* `$text`: `string` Text to substring +* `$offset`: `integer` Offset +* `$length`: `(null|int)` Length + + + +### `mbStrSplit(string $text, integer<0, max> $length): list` + +Telegram UTF-16 multibyte split. + + +Parameters: + +* `$text`: `string` Text +* `$length`: `integer<0, max>` Length + + +#### See also: +* `max` + + + + +### `htmlEscape(string $what): string` + +Escape string for this library's HTML entity converter. + + +Parameters: + +* `$what`: `string` String to escape + + + +### `markdownEscape(string $what): string` + +Escape string for markdown. + + +Parameters: + +* `$what`: `string` String to escape + + + +### `markdownCodeblockEscape(string $what): string` + +Escape string for markdown codeblock. + + +Parameters: + +* `$what`: `string` String to escape + + + +### `markdownCodeEscape(string $what): string` + +Escape string for markdown code section. + + +Parameters: + +* `$what`: `string` String to escape + + + +### `markdownUrlEscape(string $what): string` + +Escape string for URL. + + +Parameters: + +* `$what`: `string` String to escape + + + +--- +Generated by [danog/phpdoc](https://phpdoc.daniil.it) diff --git a/docs/docs/index.md b/docs/docs/index.md new file mode 100644 index 0000000..5784db9 --- /dev/null +++ b/docs/docs/index.md @@ -0,0 +1,20 @@ +--- +description: "A library to work with Telegram UTF-16 styled text entities." +title: "danog/telegram-entities" + +--- +# `danog/telegram-entities` + +A library to work with Telegram UTF-16 styled text entities. + + + + +## Classes +* [\danog\TelegramEntities\Entities: Class that represents a message + set of Telegram entities.](danog/TelegramEntities/Entities.md) +* [\danog\TelegramEntities\EntityTools: Telegram UTF-16 styled text entity tools.](danog/TelegramEntities/EntityTools.md) + + + +--- +Generated by [danog/phpdoc](https://phpdoc.daniil.it). \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 120000 index 0000000..32d46ee --- /dev/null +++ b/docs/index.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/examples/1-all.php b/examples/1-all.php new file mode 100644 index 0000000..5f49dc8 --- /dev/null +++ b/examples/1-all.php @@ -0,0 +1,56 @@ +request(new Request("https://api.telegram.org/bot$token/sendMessage?".http_build_query([ + 'text' => $message, + 'parse_mode' => $parse_mode, + 'entities' => json_encode($entities), + 'chat_id' => $dest + ]))); + + return json_decode($res->getBody()->buffer(), true)['result']; +}; + +$result = $sm("*This is a ❤️ test*", parse_mode: "MarkdownV2"); + +// Convert a message+entities back to HTML +$entities = new Entities($result['text'], $result['entities']); +var_dump($entities->toHTML()); // This is a ❤️ test + +// Modify $entities as needed +$entities->message = "A message with ❤️ emojis"; + +// EntityTools::mb* methods compute the length in UTF-16 code units, as required by the bot API. +$entities->entities[0]['length'] = EntityTools::mbStrlen($entities->message); + +// then resend: +$sm($entities->message, entities: $entities->entities); + +// Convert HTML to an array of entities locally +$entities = Entities::fromHtml("This is a ❤️ nested test"); +$sm($entities->message, entities: $entities->entities); + +// Convert markdown to an array of entities locally +$entities = Entities::fromHtml("This is a ❤️ nested test"); +$sm($entities->message, entities: $entities->entities); + +// See https://github.com/danog/telegram-entities for the full list of available methods! diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..03ab104 --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,18 @@ + + + + + tests + + + + + src + + + \ No newline at end of file diff --git a/psalm.xml b/psalm.xml new file mode 100644 index 0000000..e484998 --- /dev/null +++ b/psalm.xml @@ -0,0 +1,18 @@ + + + + + + + + + diff --git a/src/Entities.php b/src/Entities.php new file mode 100644 index 0000000..36adab3 --- /dev/null +++ b/src/Entities.php @@ -0,0 +1,416 @@ + + * @copyright 2016-2024 Daniil Gentili + * @license https://opensource.org/license/apache-2-0 Apache 2.0 + * @link https://github.com/danog/telegram-entities TelegramEntities documentation + */ + +namespace danog\TelegramEntities; + +use AssertionError; +use DOMDocument; +use DOMElement; +use DOMNode; +use DOMText; + +/** + * Class that represents a message + set of Telegram entities. + * + * @api + * + * @psalm-type TEntity=( + * array{ + * type: "bold"|"italic"|"code"|"strikethrough"|"underline"|"block_quote"|"url"|"email"|"phone"|"spoiler"|"mention", + * offset: int<0, max>, + * length: int<0, max> + * } + * |array{type: "text_mention", user: array{id: int, ...}, offset: int, length: int} + * |array{type: "custom_emoji", custom_emoji_id: int, offset: int, length: int} + * |array{type: "pre", language?: string, offset: int, length: int} + * |array{type: "text_link", url: string, offset: int, length: int} + * ) + */ +final class Entities +{ + /** + * Creates an Entities container using a message and a list of entities. + */ + public function __construct( + /** Converted message */ + public string $message, + /** + * Converted entities. + * + * @var list + */ + public array $entities, + ) { + } + + /** + * Manually convert markdown to a message and a set of entities. + * + * @return Entities Object containing message and entities + */ + public static function fromMarkdown(string $markdown): self + { + $markdown = \str_replace("\r\n", "\n", $markdown); + $message = ''; + $messageLen = 0; + $entities = []; + $offset = 0; + $stack = []; + while ($offset < \strlen($markdown)) { + $len = \strcspn($markdown, '*_~`[]|!\\', $offset); + $piece = \substr($markdown, $offset, $len); + $offset += $len; + if ($offset === \strlen($markdown)) { + $message .= $piece; + break; + } + + $char = $markdown[$offset++]; + $next = $markdown[$offset] ?? ''; + if ($char === '\\') { + $message .= $piece.$next; + $messageLen += EntityTools::mbStrlen($piece)+1; + $offset++; + continue; + } + + if ($char === '_' && $next === '_') { + $offset++; + $char = '__'; + } elseif ($char === '|') { + if ($next === '|') { + $offset++; + $char = '||'; + } else { + $message .= $piece.$char; + $messageLen += EntityTools::mbStrlen($piece)+1; + continue; + } + } elseif ($char === '!') { + if ($next === '[') { + $offset++; + $char = ']('; + } else { + $message .= $piece.$char; + $messageLen += EntityTools::mbStrlen($piece)+1; + continue; + } + } elseif ($char === '[') { + $char = ']('; + } elseif ($char === ']') { + if (!$stack || \end($stack)[0] !== '](') { + $message .= $piece.$char; + $messageLen += EntityTools::mbStrlen($piece)+1; + continue; + } + if ($next !== '(') { + \array_pop($stack); + $message .= '['.$piece.$char; + $messageLen += EntityTools::mbStrlen($piece)+2; + continue; + } + $offset++; + $char = "]("; + } elseif ($char === '`') { + $message .= $piece; + $messageLen += EntityTools::mbStrlen($piece); + + $token = '`'; + $language = null; + if ($next === '`' && ($markdown[$offset+1] ?? '') === '`') { + $token = '```'; + + $offset += 2; + $langLen = \strcspn($markdown, "\n ", $offset); + $language = \substr($markdown, $offset, $langLen); + $offset += $langLen; + if (($markdown[$offset] ?? '') === "\n") { + $offset++; + } + } + + $piece = ''; + $posClose = $offset; + while (($posClose = \strpos($markdown, $token, $posClose)) !== false) { + if ($markdown[$posClose-1] === '\\') { + $piece .= \substr($markdown, $offset, ($posClose-$offset)-1).$token; + $posClose += \strlen($token); + $offset = $posClose; + continue; + } + break; + } + /** @var int|false $posClose */ + if ($posClose === false) { + throw new AssertionError("Unclosed ``` opened @ pos $offset!"); + } + $piece .= \substr($markdown, $offset, $posClose-$offset); + + $start = $messageLen; + + $message .= $piece; + $pieceLen = EntityTools::mbStrlen($piece); + $messageLen += $pieceLen; + + for ($x = \strlen($piece)-1; $x >= 0; $x--) { + if (!( + $piece[$x] === ' ' + || $piece[$x] === "\r" + || $piece[$x] === "\n" + )) { + break; + } + $pieceLen--; + } + if ($pieceLen > 0) { + \assert($start >= 0); + $tmp = [ + 'type' => match ($token) { + '```' => 'pre', + '`' => 'code', + }, + 'offset' => $start, + 'length' => $pieceLen, + ]; + if ($language !== null) { + $tmp['language'] = $language; + } + $entities []= $tmp; + unset($tmp); + } + + $offset = $posClose+\strlen($token); + continue; + } + + if ($stack && \end($stack)[0] === $char) { + [, $start] = \array_pop($stack); + if ($char === '](') { + $posClose = $offset; + $link = ''; + while (($posClose = \strpos($markdown, ')', $posClose)) !== false) { + if ($markdown[$posClose-1] === '\\') { + $link .= \substr($markdown, $offset, ($posClose-$offset)-1); + $offset = $posClose++; + continue; + } + $link .= \substr($markdown, $offset, ($posClose-$offset)); + break; + } + /** @var int|false $posClose */ + if ($posClose === false) { + throw new AssertionError("Unclosed ) opened @ pos $offset!"); + } + $entity = self::handleLink($link); + $offset = $posClose+1; + } else { + $entity = match ($char) { + '*' => ['type' => 'bold'], + '_' => ['type' => 'italic'], + '__' => ['type' => 'underline'], + '`' => ['type' => 'code'], + '~' => ['type' => 'strikethrough'], + '||' => ['type' => 'spoiler'], + default => throw new AssertionError("Unknown char $char @ pos $offset!") + }; + } + $message .= $piece; + $messageLen += EntityTools::mbStrlen($piece); + + $lengthReal = $messageLen-$start; + for ($x = \strlen($message)-1; $x >= 0; $x--) { + if (!( + $message[$x] === ' ' + || $message[$x] === "\r" + || $message[$x] === "\n" + )) { + break; + } + $lengthReal--; + } + if ($lengthReal > 0) { + $entities []= $entity + ['offset' => $start, 'length' => $lengthReal]; + } + } else { + $message .= $piece; + $messageLen += EntityTools::mbStrlen($piece); + $stack []= [$char, $messageLen]; + } + } + if ($stack) { + throw new AssertionError("Found unclosed markdown elements ".\implode(', ', \array_column($stack, 0))); + } + /** @psalm-suppress MixedArgumentTypeCoercion Psalm bug to fix */ + return new Entities( + \trim($message), + $entities, + ); + } + + /** + * Manually convert HTML to a message and a set of entities. + * + * @return Entities Object containing message and entities + */ + public static function fromHtml(string $html): Entities + { + $dom = new DOMDocument(); + $html = \preg_replace('/\/i', "\n", $html); + \assert($html !== null); + $dom->loadxml('' . \trim($html) . ''); + $message = ''; + $entities = []; + /** @psalm-suppress PossiblyNullArgument Ignore, will throw anyway */ + self::parseNode($dom->getElementsByTagName('body')->item(0), 0, $message, $entities); + return new Entities(\trim($message), $entities); + } + /** + * @return integer Length of the node + * + * @psalm-suppress UnusedReturnValue + * + * @param-out list $entities + * @param list $entities + */ + private static function parseNode(DOMNode|DOMText $node, int $offset, string &$message, array &$entities): int + { + if ($node instanceof DOMText) { + $message .= $node->wholeText; + return EntityTools::mbStrlen($node->wholeText); + } + // @codeCoverageIgnoreStart + if ($node->nodeName === 'br') { + $message .= "\n"; + return 1; + } + // @codeCoverageIgnoreEnd + /** @var DOMElement $node */ + $entity = match ($node->nodeName) { + 's', 'strike', 'del' => ['type' => 'strikethrough'], + 'u' => ['type' => 'underline'], + 'blockquote' => ['type' => 'block_quote'], + 'b', 'strong' => ['type' => 'bold'], + 'i', 'em' => ['type' => 'italic'], + 'code' => ['type' => 'code'], + 'spoiler', 'tg-spoiler' => ['type' => 'spoiler'], + 'pre' => $node->hasAttribute('language') + ? ['type' => 'pre', 'language' => $node->getAttribute('language')] + : ['type' => 'pre'], + 'tg-emoji' => ['type' => 'custom_emoji', 'custom_emoji_id' => (int) $node->getAttribute('emoji-id')], + 'emoji' => ['type' => 'custom_emoji', 'custom_emoji_id' => (int) $node->getAttribute('id')], + 'a' => self::handleLink($node->getAttribute('href')), + default => null, + }; + $length = 0; + /** @var DOMNode|DOMText */ + foreach ($node->childNodes as $sub) { + $length += self::parseNode($sub, $offset+$length, $message, $entities); + } + if ($entity !== null) { + $lengthReal = $length; + for ($x = \strlen($message)-1; $x >= 0; $x--) { + if (!( + $message[$x] === ' ' + || $message[$x] === "\r" + || $message[$x] === "\n" + )) { + break; + } + $lengthReal--; + } + if ($lengthReal > 0) { + \assert($offset >= 0); + $entity['offset'] = $offset; + $entity['length'] = $lengthReal; + /** @psalm-check-type $entity = TEntity */ + $entities []= $entity; + } + } + return $length; + } + /** @return array{type: "text_mention", user: array{id: int}}|array{type: "custom_emoji", custom_emoji_id: int}|array{type: "text_link", url: string} */ + private static function handleLink(string $href): array + { + if (\preg_match('|^mention:(.+)|', $href, $matches) || \preg_match('|^tg://user\\?id=(.+)|', $href, $matches)) { + return ['type' => 'text_mention', 'user' => ['id' => (int) $matches[1]]]; + } + if (\preg_match('|^emoji:(\d+)$|', $href, $matches) || \preg_match('|^tg://emoji\\?id=(.+)|', $href, $matches)) { + return ['type' => 'custom_emoji', 'custom_emoji_id' => (int) $matches[1]]; + } + return ['type' => 'text_link', 'url' => $href]; + } + /** + * Convert a message and a set of entities to HTML. + * + * @param bool $allowTelegramTags Whether to allow telegram-specific tags like tg-spoiler, tg-emoji, mention links and so on... + */ + public function toHTML(bool $allowTelegramTags = false): string + { + $insertions = []; + foreach ($this->entities as $entity) { + ['offset' => $offset, 'length' => $length] = $entity; + $insertions[$offset] ??= ''; + /** @psalm-suppress PossiblyUndefinedArrayOffset */ + $insertions[$offset] .= match ($entity['type']) { + 'bold' => '', + 'italic' => '', + 'code' => '', + 'pre' => isset($entity['language']) && $entity['language'] !== '' ? '
' : '
',
+                'text_link' => '',
+                'strikethrough' => '',
+                "underline" => '',
+                "block_quote" => '
', + "url" => '', + "email" => '', + "phone" => '', + "mention" => '', + "spoiler" => $allowTelegramTags ? '' : '', + "custom_emoji" => $allowTelegramTags ? '' : '', + "text_mention" => $allowTelegramTags ? '' : '', + }; + $offset += $length; + $insertions[$offset] = match ($entity['type']) { + "bold" => '', + "italic" => '', + "code" => '', + "pre" => '
', + "text_link", "url", "email", "mention", "phone" => '', + "strikethrough" => '', + "underline" => '', + "block_quote" => '', + "spoiler" => $allowTelegramTags ? '' : '', + "custom_emoji" => $allowTelegramTags ? "" : '', + "text_mention" => $allowTelegramTags ? '' : '', + } . ($insertions[$offset] ?? ''); + } + \ksort($insertions); + $final = ''; + $pos = 0; + foreach ($insertions as $offset => $insertion) { + $final .= EntityTools::htmlEscape(EntityTools::mbSubstr($this->message, $pos, $offset-$pos)); + $final .= $insertion; + $pos = $offset; + } + return \str_replace("\n", "
", $final.EntityTools::htmlEscape(EntityTools::mbSubstr($this->message, $pos))); + } +} diff --git a/src/EntityTools.php b/src/EntityTools.php new file mode 100644 index 0000000..21a1ac6 --- /dev/null +++ b/src/EntityTools.php @@ -0,0 +1,199 @@ + + * @copyright 2016-2024 Daniil Gentili + * @license https://opensource.org/license/apache-2-0 Apache 2.0 + * @link https://github.com/danog/telegram-entities TelegramEntities documentation + */ + +namespace danog\TelegramEntities; + +use Webmozart\Assert\Assert; + +/** + * Telegram UTF-16 styled text entity tools. + * + * @api + */ +final class EntityTools +{ + // @codeCoverageIgnoreStart + /** + * @psalm-suppress UnusedConstructor + * + * @internal Can only be used statically. + */ + private function __construct() + { + } + // @codeCoverageIgnoreEnd + + /** + * Get length of string in UTF-16 code points. + * + * @param string $text Text + */ + public static function mbStrlen(string $text): int + { + $length = 0; + $textlength = \strlen($text); + for ($x = 0; $x < $textlength; $x++) { + $char = \ord($text[$x]); + if (($char & 0xc0) != 0x80) { + $length += 1 + ($char >= 0xf0 ? 1 : 0); + } + } + return $length; + } + /** + * Telegram UTF-16 multibyte substring. + * + * @param string $text Text to substring + * @param integer $offset Offset + * @param null|int $length Length + */ + public static function mbSubstr(string $text, int $offset, ?int $length = null): string + { + /** @var string */ + $converted = \mb_convert_encoding($text, 'UTF-16'); + /** @var string */ + return \mb_convert_encoding( + \substr( + $converted, + $offset<<1, + $length === null ? null : ($length<<1), + ), + 'UTF-8', + 'UTF-16', + ); + } + /** + * Telegram UTF-16 multibyte split. + * + * @param string $text Text + * @param integer<0, max> $length Length + * @return list + */ + public static function mbStrSplit(string $text, int $length): array + { + $result = []; + /** @var string */ + $text = \mb_convert_encoding($text, 'UTF-16'); + /** @psalm-suppress ArgumentTypeCoercion */ + foreach (\str_split($text, $length<<1) as $chunk) { + $chunk = \mb_convert_encoding($chunk, 'UTF-8', 'UTF-16'); + Assert::string($chunk); + $result []= $chunk; + } + /** @var list */ + return $result; + } + /** + * Escape string for this library's HTML entity converter. + * + * @param string $what String to escape + */ + public static function htmlEscape(string $what): string + { + return \htmlspecialchars($what, ENT_QUOTES|ENT_SUBSTITUTE|ENT_XML1); + } + /** + * Escape string for markdown. + * + * @param string $what String to escape + */ + public static function markdownEscape(string $what): string + { + return \str_replace( + [ + '\\', + '_', + '*', + '[', + ']', + '(', + ')', + '~', + '`', + '>', + '#', + '+', + '-', + '=', + '|', + '{', + '}', + '.', + '!', + ], + [ + '\\\\', + '\\_', + '\\*', + '\\[', + '\\]', + '\\(', + '\\)', + '\\~', + '\\`', + '\\>', + '\\#', + '\\+', + '\\-', + '\\=', + '\\|', + '\\{', + '\\}', + '\\.', + '\\!', + ], + $what + ); + } + /** + * Escape string for markdown codeblock. + * + * @param string $what String to escape + */ + public static function markdownCodeblockEscape(string $what): string + { + return \str_replace('```', '\\```', $what); + } + /** + * Escape string for markdown code section. + * + * @param string $what String to escape + */ + public static function markdownCodeEscape(string $what): string + { + return \str_replace('`', '\\`', $what); + } + /** + * Escape string for URL. + * + * @param string $what String to escape + */ + public static function markdownUrlEscape(string $what): string + { + return \str_replace(')', '\\)', $what); + } +} diff --git a/tests/EntitiesTest.php b/tests/EntitiesTest.php new file mode 100644 index 0000000..ec769c0 --- /dev/null +++ b/tests/EntitiesTest.php @@ -0,0 +1,638 @@ +assertEquals(1, EntityTools::mbStrlen('t')); + $this->assertEquals(1, EntityTools::mbStrlen('я')); + $this->assertEquals(2, EntityTools::mbStrlen('👍')); + $this->assertEquals(4, EntityTools::mbStrlen('🇺🇦')); + + $this->assertEquals('st', EntityTools::mbSubstr('test', 2)); + $this->assertEquals('aя', EntityTools::mbSubstr('aяaя', 2)); + $this->assertEquals('a👍', EntityTools::mbSubstr('a👍a👍', 3)); + $this->assertEquals('🇺🇦', EntityTools::mbSubstr('🇺🇦🇺🇦', 4)); + + $this->assertEquals(['te', 'st'], EntityTools::mbStrSplit('test', 2)); + $this->assertEquals(['aя', 'aя'], EntityTools::mbStrSplit('aяaя', 2)); + $this->assertEquals(['a👍', 'a👍'], EntityTools::mbStrSplit('a👍a👍', 3)); + $this->assertEquals(['🇺🇦', '🇺🇦'], EntityTools::mbStrSplit('🇺🇦🇺🇦', 4)); + } + private static function render(string $message, string $parse_mode): Entities + { + return match ($parse_mode) { + 'html' => Entities::fromHtml($message), + 'markdown' => Entities::fromMarkdown($message), + }; + } + public function testEntities(): void + { + foreach ($this->provideEntities() as $params) { + $this->testEntitiesInner(...$params); + } + } + public function testUnclosed(): void + { + $this->expectExceptionMessage("Found unclosed markdown elements ]("); + Entities::fromMarkdown('['); + } + public function testUnclosedLink(): void + { + $this->expectExceptionMessage("Unclosed ) opened @ pos 7!"); + Entities::fromMarkdown('[test](https://google.com'); + } + public function testUnclosedCode(): void + { + $this->expectExceptionMessage('Unclosed ``` opened @ pos 3!'); + Entities::fromMarkdown('```'); + } + public function testStandalone(): void + { + $test = Entities::fromMarkdown(']'); + $this->assertEmpty($test->entities); + $this->assertSame(']', $test->message); + + $test = Entities::fromMarkdown('!!'); + $this->assertEmpty($test->entities); + $this->assertSame('!!', $test->message); + + $test = Entities::fromMarkdown('|'); + $this->assertEmpty($test->entities); + $this->assertSame('|', $test->message); + } + private function testEntitiesInner(string $mode, string $html, string $bare, array $entities, ?string $htmlReverse = null): void + { + $result = self::render(message: $html, parse_mode: $mode); + $this->assertEquals($bare, $result->message); + $this->assertEquals($entities, $result->entities); + + if ( + !\str_contains($html, 'tg://emoji') + && !\str_contains($html, 'request(new Request( + "https://api.telegram.org/bot{$token}/sendMessage?".\http_build_query([ + 'chat_id'=> $dest, + 'parse_mode'=> match ($mode) { + 'markdown' => 'MarkdownV2', + 'html' => 'html' + }, + 'text' => $html + ]) + ))->getBody()->buffer(), true); + + if (!isset($resultApi['result'])) { + throw new AssertionError(\json_encode($resultApi)); + } + + $entities = $resultApi['result']['entities'] ?? []; + $entities = \array_map(function (array $e): array { + if (isset($e['user'])) { + $e['user'] = ['id' => $e['user']['id']]; + } + return $e; + }, $entities); + $this->assertEquals($bare, $resultApi['result']['text']); + $this->assertEquals($entities, $entities); + } + + if (\strtolower($mode) === 'html') { + $this->assertEquals( + \trim(\str_replace(['
', ' ', 'mention:'], ['
', ' ', 'tg://user?id='], $htmlReverse ?? $html)), + $result->toHTML(true) + ); + $result = self::render(message: EntityTools::htmlEscape($html), parse_mode: $mode); + $this->assertEquals($html, $result->message); + $this->assertNoRelevantEntities($result->entities); + } else { + $result = self::render(message: EntityTools::markdownEscape($html), parse_mode: $mode); + $this->assertEquals($html, $result->message); + $this->assertNoRelevantEntities($result->entities); + + $result = self::render(message: "```\n".EntityTools::markdownCodeblockEscape($html)."\n```", parse_mode: $mode); + $this->assertEquals($html, \rtrim($result->message)); + $this->assertEquals([['offset' => 0, 'length' => EntityTools::mbStrlen($html), 'language' => '', 'type' => 'pre']], $result->entities); + } + } + + private function assertNoRelevantEntities(array $entities): void + { + $entities = \array_filter($entities, static fn (array $e) => !\in_array( + $e['type'], + ['url', 'email', 'phone_number', 'mention', 'bot_command'], + true + )); + $this->assertEmpty($entities); + } + + private function provideEntities(): array + { + return [ + [ + 'html', + 'test', + 'test', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'bold', + ], + ], + ], + [ + 'html', + 'test
test', + "test\ntest", + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'bold', + ], + ], + ], + [ + 'html', + 'test
test', + "test\ntest", + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'bold', + ], + ], + ], + [ + 'html', + '🇺🇦🇺🇦', + '🇺🇦🇺🇦', + [ + [ + 'offset' => 4, + 'length' => 4, + 'type' => 'bold', + ], + ], + ], + [ + 'html', + 'testtest ', + 'testtest', + [ + [ + 'offset' => 4, + 'length' => 4, + 'type' => 'bold', + ], + ], + ], + [ + 'html', + 'è»testtest test', + 'è»testtest test', + [ + [ + 'offset' => 6, + 'length' => 4, + 'type' => 'bold', + ], + ], + ], + [ + 'html', + 'test test', + 'test test', + [ + [ + 'offset' => 4, + 'length' => 5, + 'type' => 'bold', + ], + ], + ], + [ + 'markdown', + 'test* test*', + 'test test', + [ + [ + 'offset' => 4, + 'length' => 5, + 'type' => 'bold', + ], + ], + ], + [ + 'html', + 'test
test test
test
test strikethrough underline
blockquote
https://google.com daniil@daniil.it +39398172758722 @daniilgentili spoiler <b>not_bold</b>', + "test\ntest test test test strikethrough underline blockquote https://google.com daniil@daniil.it +39398172758722 @daniilgentili spoiler not_bold", + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'bold', + ], + [ + 'offset' => 5, + 'length' => 4, + 'type' => 'italic', + ], + [ + 'offset' => 10, + 'length' => 4, + 'type' => 'code', + ], + [ + 'offset' => 15, + 'length' => 4, + 'language' => 'html', + 'type' => 'pre', + ], + [ + 'offset' => 20, + 'length' => 4, + 'url' => 'https://example.com/', + 'type' => 'text_link', + ], + [ + 'offset' => 25, + 'length' => 13, + 'type' => 'strikethrough', + ], + [ + 'offset' => 39, + 'length' => 9, + 'type' => 'underline', + ], + [ + 'offset' => 49, + 'length' => 10, + 'type' => 'block_quote', + ], + [ + 'offset' => 127, + 'length' => 7, + 'type' => 'spoiler', + ], + ], + 'test
test test
test
test strikethrough underline
blockquote
https://google.com daniil@daniil.it +39398172758722 @daniilgentili spoiler <b>not_bold</b>', + ], + [ + 'markdown', + 'test *bold _bold and italic_ bold*', + 'test bold bold and italic bold', + [ + [ + 'offset' => 10, + 'length' => 15, + 'type' => 'italic', + ], + [ + 'offset' => 5, + 'length' => 25, + 'type' => 'bold', + ], + ], + ], + [ + 'markdown', + "a\nb\nc", + "a\nb\nc", + [], + ], + [ + 'markdown', + "a\n\nb\n\nc", + "a\n\nb\n\nc", + [], + ], + [ + 'markdown', + "a\n\n\nb\n\n\nc", + "a\n\n\nb\n\n\nc", + [], + ], + [ + 'markdown', + "a\n```php\n 2, + 'length' => 17, + 'type' => 'pre', + 'language' => 'php', + ], + ], + ], + [ + 'html', + '\'"', + '\'"', + [ + [ + 'offset' => 0, + 'length' => 2, + 'type' => 'bold', + ], + ], + ''"', + ], + [ + 'html', + 'mention1 mention2', + 'mention1 mention2', + [ + [ + 'offset' => 0, + 'length' => 8, + 'type' => 'text_mention', + 'user' => ['id' => 101374607], + ], + [ + 'offset' => 9, + 'length' => 8, + 'type' => 'text_mention', + 'user' => ['id' => 101374607], + ], + ], + ], + [ + 'html', + 'mention1 mention2', + 'mention1 mention2', + [ + [ + 'offset' => 0, + 'length' => 8, + 'type' => 'text_mention', + 'user' => ['id' => 101374607], + ], + [ + 'offset' => 9, + 'length' => 8, + 'type' => 'text_mention', + 'user' => ['id' => 101374607], + ], + ], + ], + [ + 'markdown', + '_a b c & " \' \_ \* \~ \\__', + 'a b c & " \' _ * ~ _', + [ + [ + 'offset' => 0, + 'length' => 23, + 'type' => 'italic', + ], + ], + ], + [ + 'markdown', + EntityTools::markdownEscape('\\ test testovich _*~'), + '\\ test testovich _*~', + [], + ], + [ + 'markdown', + "```\na_b\n".EntityTools::markdownCodeblockEscape('\\ ```').'```', + "a_b\n\\ ```", + [ + [ + 'offset' => 0, + 'length' => 9, + 'type' => 'pre', + 'language' => '', + ], + ], + ], + [ + 'markdown', + '`c_d '.EntityTools::markdownCodeEscape('`').'`', + 'c_d `', + [ + [ + 'offset' => 0, + 'length' => 5, + 'type' => 'code', + ], + ], + ], + [ + 'markdown', + '[link ](https://google.com/)test', + 'link test', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'text_link', + 'url' => 'https://google.com/', + ], + ], + ], + [ + 'markdown', + '[link]('.EntityTools::markdownUrlEscape('https://transfer.sh/(/test/test.PNG,/test/test.MP4).zip').')', + 'link', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'text_link', + 'url' => 'https://transfer.sh/(/test/test.PNG,/test/test.MP4).zip', + ], + ], + ], + [ + 'markdown', + '[link]('.EntityTools::markdownUrlEscape('https://google.com/').')', + 'link', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'text_link', + 'url' => 'https://google.com/', + ], + ], + ], + [ + 'markdown', + '[link]('.EntityTools::markdownUrlEscape('https://google.com/?v=\\test').')', + 'link', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'text_link', + 'url' => 'https://google.com/?v=\\test', + ], + ], + ], + [ + 'markdown', + '[link ](https://google.com/)', + 'link', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'text_link', + 'url' => 'https://google.com/', + ], + ], + ], + [ + 'markdown', + '![link ](tg://emoji?id=5368324170671202286)', + 'link', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'custom_emoji', + 'custom_emoji_id' => 5368324170671202286, + ], + ], + ], + [ + 'markdown', + '[not a link]', + '[not a link]', + [], + ], + [ + 'html', + 'link test', + 'link test', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'text_link', + 'url' => 'https://google.com/', + ], + ], + 'link test', + ], + [ + 'html', + 'link ', + 'link', + [ + [ + 'offset' => 0, + 'length' => 4, + 'type' => 'text_link', + 'url' => 'https://google.com/', + ], + ], + 'link ', + ], + [ + 'markdown', + 'test _italic_ *bold* __underlined__ ~strikethrough~ ```test pre``` `code` ||spoiler||', + 'test italic bold underlined strikethrough pre code spoiler', + [ + [ + 'offset' => 5, + 'length' => 6, + 'type' => 'italic', + ], + [ + 'offset' => 12, + 'length' => 4, + 'type' => 'bold', + ], + [ + 'offset' => 17, + 'length' => 10, + 'type' => 'underline', + ], + [ + 'offset' => 28, + 'length' => 13, + 'type' => 'strikethrough', + ], + [ + 'offset' => 42, + 'length' => 4, + 'type' => 'pre', + 'language' => 'test', + ], + [ + 'offset' => 47, + 'length' => 4, + 'type' => 'code', + ], + [ + 'offset' => 52, + 'length' => 7, + 'type' => 'spoiler', + ], + ], + ], + [ + 'markdown', + '[special link]('.EntityTools::markdownUrlEscape('https://google.com/)').')', + 'special link', + [ + [ + 'offset' => 0, + 'length' => 12, + 'type' => 'text_link', + 'url' => 'https://google.com/)', + ], + ], + 'link ', + ], + [ + 'markdown', + '`'.EntityTools::markdownCodeEscape('``').'`', + '``', + [ + [ + 'offset' => 0, + 'length' => 2, + 'type' => 'code', + ], + ], + '`\`\``', + ], + ]; + } +}