mirror of
https://github.com/danog/MadelineProto.git
synced 2024-11-30 06:18:58 +01:00
Switch to custom MarkdownV2 parser
This commit is contained in:
parent
f4688e4954
commit
e81f4be805
@ -298,11 +298,6 @@ trait ResponseHandler
|
||||
case 500:
|
||||
case -500:
|
||||
case -503:
|
||||
if ($response['error_message'] === 'MSG_WAIT_FAILED') {
|
||||
$this->call_queue[$request->getQueueId()] = [];
|
||||
$this->methodRecall(message_id: $request->getMsgId(), postpone: true);
|
||||
return null;
|
||||
}
|
||||
if ((($response['error_code'] === -503 || $response['error_message'] === '-503') && !\in_array($request->getConstructor(), ['messages.getBotCallbackAnswer', 'messages.getInlineBotResults'], true))
|
||||
|| (\in_array($response['error_message'], ['MSGID_DECREASE_RETRY', 'HISTORY_GET_FAILED', 'RPC_CONNECT_FAILED', 'RPC_CALL_FAIL', 'RPC_MCGET_FAIL', 'PERSISTENT_TIMESTAMP_OUTDATED', 'RPC_MCGET_FAIL', 'no workers running', 'No workers running'], true))) {
|
||||
EventLoop::delay(1.0, fn () => $this->methodRecall(message_id: $request->getMsgId()));
|
||||
|
@ -22,6 +22,7 @@ namespace danog\MadelineProto;
|
||||
|
||||
use danog\MadelineProto\TL\Conversion\DOMEntities;
|
||||
use danog\MadelineProto\TL\Conversion\Extension;
|
||||
use danog\MadelineProto\TL\Conversion\MarkdownEntities;
|
||||
use Parsedown;
|
||||
use Webmozart\Assert\Assert;
|
||||
|
||||
@ -107,11 +108,11 @@ abstract class StrTools extends Extension
|
||||
*
|
||||
* @see https://docs.madelineproto.xyz/API_docs/methods/messages.sendMessage.html#usage-of-parse_mode
|
||||
*
|
||||
* @return \danog\MadelineProto\TL\Conversion\DOMEntities Object containing message and entities
|
||||
* @return \danog\MadelineProto\TL\Conversion\MarkdownEntities Object containing message and entities
|
||||
*/
|
||||
public static function markdownToMessageEntities(string $markdown): \danog\MadelineProto\TL\Conversion\DOMEntities
|
||||
public static function markdownToMessageEntities(string $markdown): \danog\MadelineProto\TL\Conversion\MarkdownEntities
|
||||
{
|
||||
return new DOMEntities(Parsedown::instance()->line($markdown));
|
||||
return new MarkdownEntities($markdown);
|
||||
}
|
||||
/**
|
||||
* Convert a message and a set of entities to HTML.
|
||||
|
@ -415,7 +415,7 @@ trait BotAPI
|
||||
* @param array $arguments Arguments
|
||||
* @internal
|
||||
*/
|
||||
public function parseMode(array $arguments): array
|
||||
public static function parseMode(array $arguments): array
|
||||
{
|
||||
if (($arguments['message'] ?? '') === '' || !isset($arguments['parse_mode'])) {
|
||||
return $arguments;
|
||||
@ -430,10 +430,11 @@ trait BotAPI
|
||||
$arguments['parse_mode'] = \str_replace('textParseMode', '', $arguments['parse_mode']['_']);
|
||||
}
|
||||
if (\stripos($arguments['parse_mode'], 'markdown') !== false) {
|
||||
$arguments['message'] = Parsedown::instance()->line($arguments['message']);
|
||||
$arguments['parse_mode'] = 'HTML';
|
||||
}
|
||||
if (\stripos($arguments['parse_mode'], 'html') !== false) {
|
||||
$entities = new MarkdownEntities($arguments['message']);
|
||||
$arguments['message'] = $entities->message;
|
||||
$arguments['entities'] = \array_merge($arguments['entities'] ?? [], $entities->entities);
|
||||
unset($arguments['parse_mode']);
|
||||
} elseif (\stripos($arguments['parse_mode'], 'html') !== false) {
|
||||
$entities = new DOMEntities($arguments['message']);
|
||||
$arguments['message'] = $entities->message;
|
||||
$arguments['entities'] = \array_merge($arguments['entities'] ?? [], $entities->entities);
|
||||
@ -470,7 +471,7 @@ trait BotAPI
|
||||
if (\trim($cur) !== '') {
|
||||
$multiple_args[] = [
|
||||
...$multiple_args_base,
|
||||
'message' => $cur
|
||||
'message' => $cur,
|
||||
];
|
||||
}
|
||||
$cur = $vv;
|
||||
@ -481,7 +482,7 @@ trait BotAPI
|
||||
if (\trim($cur) !== '') {
|
||||
$multiple_args[] = [
|
||||
...$multiple_args_base,
|
||||
'message' => $cur
|
||||
'message' => $cur,
|
||||
];
|
||||
}
|
||||
|
||||
@ -500,35 +501,17 @@ trait BotAPI
|
||||
$newentity['length'] = $entity['length'] - (StrTools::mbStrlen($multiple_args[$i]['message']) - $entity['offset']);
|
||||
$entity['length'] = StrTools::mbStrlen($multiple_args[$i]['message']) - $entity['offset'];
|
||||
$offset += $entity['length'];
|
||||
//StrTools::mbStrlen($multiple_args[$i]['message']);
|
||||
$newentity['offset'] = $offset;
|
||||
$prev_length = StrTools::mbStrlen($multiple_args[$i]['message']);
|
||||
$multiple_args[$i]['message'] = \rtrim($multiple_args[$i]['message']);
|
||||
$diff = $prev_length - StrTools::mbStrlen($multiple_args[$i]['message']);
|
||||
if ($diff) {
|
||||
$entity['length'] -= $diff;
|
||||
foreach ($args['entities'] as $key => &$eentity) {
|
||||
if ($key > $k) {
|
||||
$eentity['offset'] -= $diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
$orig = $multiple_args[$i]['message'];
|
||||
$trimmed = rtrim($orig);
|
||||
$diff = StrTools::mbStrlen($orig) - StrTools::mbStrlen($trimmed);
|
||||
$entity['length'] -= $diff;
|
||||
$multiple_args[$i]['message'] = $trimmed;
|
||||
$multiple_args[$i]['entities'][] = $entity;
|
||||
$i++;
|
||||
$entity = $newentity;
|
||||
continue;
|
||||
}
|
||||
$prev_length = StrTools::mbStrlen($multiple_args[$i]['message']);
|
||||
$multiple_args[$i]['message'] = \rtrim($multiple_args[$i]['message']);
|
||||
$diff = $prev_length - StrTools::mbStrlen($multiple_args[$i]['message']);
|
||||
if ($diff) {
|
||||
$entity['length'] -= $diff;
|
||||
foreach ($args['entities'] as $key => &$eentity) {
|
||||
if ($key > $k) {
|
||||
$eentity['offset'] -= $diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
$multiple_args[$i]['entities'][] = $entity;
|
||||
break;
|
||||
} while (true);
|
||||
|
@ -15,7 +15,7 @@ use Throwable;
|
||||
/**
|
||||
* Class that converts HTML to a message + set of entities.
|
||||
*/
|
||||
final class DOMEntities
|
||||
final class DOMEntities extends Entities
|
||||
{
|
||||
/** Converted entities */
|
||||
public readonly array $entities;
|
||||
@ -52,6 +52,11 @@ final class DOMEntities
|
||||
$message .= "\n";
|
||||
return 1;
|
||||
}
|
||||
$length = 0;
|
||||
if ($node->nodeName === 'li') {
|
||||
$message .= "- ";
|
||||
$length += 2;
|
||||
}
|
||||
/** @var DOMElement $node */
|
||||
$entity = match ($node->nodeName) {
|
||||
's', 'strike', 'del' =>['_' => 'messageEntityStrike'],
|
||||
@ -64,10 +69,9 @@ final class DOMEntities
|
||||
'pre' => ['_' => 'messageEntityPre', 'language' => $node->getAttribute('language') ?? ''],
|
||||
'tg-emoji' => ['_' => 'messageEntityCustomEmoji', 'document_id' => (int) $node->getAttribute('emoji-id')],
|
||||
'emoji' => ['_' => 'messageEntityCustomEmoji', 'document_id' => (int) $node->getAttribute('id')],
|
||||
'a' => self::handleA($node),
|
||||
'a' => self::handleLink($node->getAttribute('href')),
|
||||
default => null,
|
||||
};
|
||||
$length = 0;
|
||||
foreach ($node->childNodes as $sub) {
|
||||
$length += self::parseNode($sub, $offset+$length, $message, $entities);
|
||||
}
|
||||
@ -91,16 +95,4 @@ final class DOMEntities
|
||||
}
|
||||
return $length;
|
||||
}
|
||||
|
||||
private static function handleA(DOMElement $node): array
|
||||
{
|
||||
$href = $node->getAttribute('href');
|
||||
if (\preg_match('|^mention:(.+)|', $href, $matches) || \preg_match('|^tg://user\\?id=(.+)|', $href, $matches)) {
|
||||
return ['_' => 'inputMessageEntityMentionName', 'user_id' => $matches[1]];
|
||||
}
|
||||
if (\preg_match('|^emoji:(\d+)$|', $href, $matches) || \preg_match('|^tg://emoji\\?id=(.+)|', $href, $matches)) {
|
||||
return ['_' => 'messageEntityCustomEmoji', 'document_id' => (int) $matches[1]];
|
||||
}
|
||||
return ['_' => 'messageEntityTextUrl', 'url' => $href];
|
||||
}
|
||||
}
|
||||
|
32
src/TL/Conversion/Entities.php
Normal file
32
src/TL/Conversion/Entities.php
Normal file
@ -0,0 +1,32 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace danog\MadelineProto\TL\Conversion;
|
||||
|
||||
use danog\MadelineProto\Exception;
|
||||
use danog\MadelineProto\StrTools;
|
||||
use DOMDocument;
|
||||
use DOMElement;
|
||||
use DOMNode;
|
||||
use DOMText;
|
||||
use Throwable;
|
||||
|
||||
/**
|
||||
* Class that converts HTML or markdown to a message + set of entities.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
abstract class Entities
|
||||
{
|
||||
protected static function handleLink(string $href): array
|
||||
{
|
||||
if (\preg_match('|^mention:(.+)|', $href, $matches) || \preg_match('|^tg://user\\?id=(.+)|', $href, $matches)) {
|
||||
return ['_' => 'inputMessageEntityMentionName', 'user_id' => $matches[1]];
|
||||
}
|
||||
if (\preg_match('|^emoji:(\d+)$|', $href, $matches) || \preg_match('|^tg://emoji\\?id=(.+)|', $href, $matches)) {
|
||||
return ['_' => 'messageEntityCustomEmoji', 'document_id' => (int) $matches[1]];
|
||||
}
|
||||
return ['_' => 'messageEntityTextUrl', 'url' => $href];
|
||||
}
|
||||
}
|
191
src/TL/Conversion/MarkdownEntities.php
Normal file
191
src/TL/Conversion/MarkdownEntities.php
Normal file
@ -0,0 +1,191 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace danog\MadelineProto\TL\Conversion;
|
||||
|
||||
use AssertionError;
|
||||
use danog\MadelineProto\Exception;
|
||||
use danog\MadelineProto\StrTools;
|
||||
use DOMDocument;
|
||||
use DOMElement;
|
||||
use DOMNode;
|
||||
use DOMText;
|
||||
use Throwable;
|
||||
|
||||
/**
|
||||
* Class that converts Markdown to a message + set of entities.
|
||||
*/
|
||||
final class MarkdownEntities extends Entities
|
||||
{
|
||||
/** Converted entities */
|
||||
public readonly array $entities;
|
||||
/** Converted message */
|
||||
public readonly string $message;
|
||||
|
||||
/**
|
||||
* @param string $markdown Markdown to parse
|
||||
*/
|
||||
public function __construct(string $markdown)
|
||||
{
|
||||
$markdown = str_replace("\r\n", "\n", $markdown);
|
||||
try {
|
||||
$message = '';
|
||||
$messageLen = 0;
|
||||
$entities = [];
|
||||
$offset = 0;
|
||||
$stack = [];
|
||||
while ($offset < strlen($markdown)) {
|
||||
$len = strcspn($markdown, '*_~`[]|\\', $offset);
|
||||
$piece = substr($markdown, $offset, $len);
|
||||
$offset += $len;
|
||||
if ($offset === strlen($markdown)) {
|
||||
$message .= $piece;
|
||||
break;
|
||||
}
|
||||
|
||||
$char = $markdown[$offset++];
|
||||
$next = $markdown[$offset] ?? '';
|
||||
if ($char === '\\') {
|
||||
$message .= $piece.$next;
|
||||
$messageLen += StrTools::mbStrlen($piece)+1;
|
||||
$offset++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($char === '_' && $next === '_') {
|
||||
$offset++;
|
||||
$char = '__';
|
||||
} elseif ($char === '|') {
|
||||
if ($next === '|') {
|
||||
$offset++;
|
||||
$char = '||';
|
||||
} else {
|
||||
$message .= $piece.$char;
|
||||
$messageLen += StrTools::mbStrlen($piece)+1;
|
||||
continue;
|
||||
}
|
||||
} elseif ($char === '[') {
|
||||
$char = '](';
|
||||
} elseif ($char === ']') {
|
||||
if (!$stack || end($stack)[0] !== '](') {
|
||||
$message .= $piece.$char;
|
||||
$messageLen += StrTools::mbStrlen($piece)+1;
|
||||
continue;
|
||||
}
|
||||
if ($next !== '(') {
|
||||
throw new AssertionError("( expected @ pos $offset!");
|
||||
}
|
||||
$offset++;
|
||||
$char = "](";
|
||||
} elseif ($char === '`' && $next === '`' && ($markdown[$offset+1] ?? '') === '`') {
|
||||
$message .= $piece;
|
||||
$messageLen += StrTools::mbStrlen($piece);
|
||||
|
||||
$offset += 2;
|
||||
$langLen = strcspn($markdown, "\n ", $offset);
|
||||
$language = substr($markdown, $offset, $langLen);
|
||||
$offset += $langLen;
|
||||
if ($markdown[$offset] === "\n") {
|
||||
$offset++;
|
||||
}
|
||||
|
||||
$posClose = $offset;
|
||||
while (($posClose = strpos($markdown, '```', $posClose)) !== false) {
|
||||
if ($markdown[$posClose-1] === '\\') {
|
||||
$posClose++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if ($posClose === false) {
|
||||
throw new AssertionError("Unclosed ``` opened @ pos $offset!");
|
||||
}
|
||||
|
||||
$start = $messageLen;
|
||||
|
||||
$message .= $piece = substr($markdown, $offset, $posClose-$offset);
|
||||
$pieceLen = StrTools::mbStrlen($piece);
|
||||
$messageLen += $pieceLen;
|
||||
|
||||
for ($x = \strlen($piece)-1; $x >= 0; $x--) {
|
||||
if (!(
|
||||
$piece[$x] === ' '
|
||||
|| $piece[$x] === "\r"
|
||||
|| $piece[$x] === "\n"
|
||||
)) {
|
||||
break;
|
||||
}
|
||||
$pieceLen--;
|
||||
}
|
||||
if ($pieceLen > 0) {
|
||||
$entities []= [
|
||||
'_' => 'messageEntityPre',
|
||||
'language' => $language,
|
||||
'offset' => $start,
|
||||
'length' => $pieceLen
|
||||
];
|
||||
}
|
||||
|
||||
$offset = $posClose+3;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($stack && end($stack)[0] === $char) {
|
||||
[, $start] = array_pop($stack);
|
||||
if ($char === '](') {
|
||||
$posClose = $offset;
|
||||
while (($posClose = strpos($markdown, ')', $posClose)) !== false) {
|
||||
if ($markdown[$posClose-1] === '\\') {
|
||||
$posClose++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if ($posClose === false) {
|
||||
throw new AssertionError("Unclosed ) opened @ pos $offset!");
|
||||
}
|
||||
$entity = self::handleLink(substr($markdown, $offset, $posClose-$offset));
|
||||
$offset = $posClose+1;
|
||||
} else {
|
||||
$entity = match ($char) {
|
||||
'*' => ['_' => 'messageEntityBold'],
|
||||
'_' => ['_' => 'messageEntityItalic'],
|
||||
'__' => ['_' => 'messageEntityUnderline'],
|
||||
'`' => ['_' => 'messageEntityCode'],
|
||||
'~' => ['_' => 'messageEntityStrike'],
|
||||
'||' => ['_' => 'messageEntitySpoiler'],
|
||||
default => throw new AssertionError("Unknown char $char @ pos $offset!")
|
||||
};
|
||||
}
|
||||
$message .= $piece;
|
||||
$messageLen += StrTools::mbStrlen($piece);
|
||||
|
||||
$lengthReal = $messageLen-$start;
|
||||
for ($x = strlen($message)-1; $x >= 0; $x--) {
|
||||
if (!(
|
||||
$message[$x] === ' '
|
||||
|| $message[$x] === "\r"
|
||||
|| $message[$x] === "\n"
|
||||
)) {
|
||||
break;
|
||||
}
|
||||
$lengthReal--;
|
||||
}
|
||||
if ($lengthReal > 0) {
|
||||
$entities []= $entity + ['offset' => $start, 'length' => $lengthReal];
|
||||
}
|
||||
} else {
|
||||
$message .= $piece;
|
||||
$messageLen += StrTools::mbStrlen($piece);
|
||||
$stack []= [$char, $messageLen];
|
||||
}
|
||||
}
|
||||
|
||||
$this->message = $message;
|
||||
$this->entities = $entities;
|
||||
} catch (Throwable $e) {
|
||||
throw new Exception("An error occurred while parsing $markdown: {$e->getMessage()}", $e->getCode());
|
||||
}
|
||||
}
|
||||
}
|
@ -39,7 +39,7 @@ class EntitiesTest extends MadelineTestCase
|
||||
if (\strtolower($mode) === 'html') {
|
||||
$this->assertEquals(
|
||||
\str_replace(['<br/>', ' </b>', 'mention:'], ['<br>', '</b> ', 'tg://user?id='], $htmlReverse ?? $html),
|
||||
StrTools::messageEntitiesToHtml(
|
||||
StrTools::entitiesToHtml(
|
||||
$resultMTProto['message'],
|
||||
$resultMTProto['entities'],
|
||||
true
|
||||
@ -138,7 +138,7 @@ class EntitiesTest extends MadelineTestCase
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
'test** test**',
|
||||
'test* test*',
|
||||
'test test',
|
||||
[
|
||||
[
|
||||
@ -220,7 +220,7 @@ class EntitiesTest extends MadelineTestCase
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
'test **bold *bold and italic* bold**',
|
||||
'test *bold _bold and italic_ bold*',
|
||||
'test bold bold and italic bold',
|
||||
[
|
||||
[
|
||||
@ -235,6 +235,37 @@ class EntitiesTest extends MadelineTestCase
|
||||
],
|
||||
],
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
"a\nb\nc",
|
||||
"a\nb\nc",
|
||||
[],
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
"a\n\nb\n\nc",
|
||||
"a\n\nb\n\nc",
|
||||
[],
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
"a\n\n\nb\n\n\nc",
|
||||
"a\n\n\nb\n\n\nc",
|
||||
[],
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
"a\n```php\n<?php\necho 'yay';\n```",
|
||||
"a\n<?php\necho 'yay';\n",
|
||||
[
|
||||
[
|
||||
'offset' => 2,
|
||||
'length' => 17,
|
||||
'type' => 'pre',
|
||||
'language' => 'php'
|
||||
]
|
||||
],
|
||||
],
|
||||
[
|
||||
'html',
|
||||
'<b>\'"</b>',
|
||||
@ -269,20 +300,74 @@ class EntitiesTest extends MadelineTestCase
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
'_a b c <b> & " '_',
|
||||
'a b c <b> & " \'',
|
||||
'_a b c <b> & " \' \_ \* \~ \\__',
|
||||
'a b c <b> & " \' _ * ~ _',
|
||||
[
|
||||
[
|
||||
'offset' => 0,
|
||||
'length' => 15,
|
||||
'length' => 23,
|
||||
'type' => 'italic',
|
||||
],
|
||||
],
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
'test *italic* **bold** <u>underlined</u> ~~strikethrough~~ <pre language="test">pre</pre> <code>code</code> <spoiler>spoiler</spoiler>',
|
||||
'test italic bold underlined strikethrough pre code spoiler',
|
||||
'[link ](https://google.com/)test',
|
||||
'link test',
|
||||
[
|
||||
[
|
||||
'offset' => 0,
|
||||
'length' => 4,
|
||||
'type' => 'text_url',
|
||||
'url' => 'https://google.com/'
|
||||
],
|
||||
],
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
'[link ](https://google.com/)',
|
||||
'link ',
|
||||
[
|
||||
[
|
||||
'offset' => 0,
|
||||
'length' => 4,
|
||||
'type' => 'text_url',
|
||||
'url' => 'https://google.com/'
|
||||
],
|
||||
],
|
||||
],
|
||||
[
|
||||
'html',
|
||||
'<a href="https://google.com/">link </a>test',
|
||||
'link test',
|
||||
[
|
||||
[
|
||||
'offset' => 0,
|
||||
'length' => 4,
|
||||
'type' => 'text_url',
|
||||
'url' => 'https://google.com/'
|
||||
],
|
||||
],
|
||||
'<a href="https://google.com/">link</a> test',
|
||||
],
|
||||
[
|
||||
'html',
|
||||
'<a href="https://google.com/">link </a>',
|
||||
'link ',
|
||||
[
|
||||
[
|
||||
'offset' => 0,
|
||||
'length' => 4,
|
||||
'type' => 'text_url',
|
||||
'url' => 'https://google.com/'
|
||||
],
|
||||
],
|
||||
'<a href="https://google.com/">link</a> ',
|
||||
],
|
||||
[
|
||||
'markdown',
|
||||
'test _italic_ *bold* __underlined__ ~strikethrough~ ```test pre``` `code` ||spoiler||',
|
||||
'test italic bold underlined strikethrough pre code spoiler',
|
||||
[
|
||||
[
|
||||
'offset' => 5,
|
||||
@ -306,17 +391,17 @@ class EntitiesTest extends MadelineTestCase
|
||||
],
|
||||
[
|
||||
'offset' => 42,
|
||||
'length' => 3,
|
||||
'length' => 4,
|
||||
'type' => 'pre',
|
||||
'language' => 'test',
|
||||
],
|
||||
[
|
||||
'offset' => 46,
|
||||
'offset' => 47,
|
||||
'length' => 4,
|
||||
'type' => 'code',
|
||||
],
|
||||
[
|
||||
'offset' => 51,
|
||||
'offset' => 52,
|
||||
'length' => 7,
|
||||
'type' => 'spoiler',
|
||||
],
|
||||
|
Loading…
Reference in New Issue
Block a user