diff --git a/src/danog/MadelineProto/MTProto.php b/src/danog/MadelineProto/MTProto.php
index 6b5d2c33e..570b369d7 100644
--- a/src/danog/MadelineProto/MTProto.php
+++ b/src/danog/MadelineProto/MTProto.php
@@ -67,6 +67,7 @@ class MTProto extends AsyncConstruct implements TLCallback
use \danog\MadelineProto\SecretChats\ResponseHandler;
use \danog\MadelineProto\SecretChats\SeqNoHandler;
use \danog\MadelineProto\TL\Conversion\BotAPI;
+ use \danog\MadelineProto\TL\Conversion\Entities;
use \danog\MadelineProto\TL\Conversion\BotAPIFiles;
use \danog\MadelineProto\TL\Conversion\TD;
use \danog\MadelineProto\VoIP\AuthKeyHandler;
@@ -112,7 +113,7 @@ class MTProto extends AsyncConstruct implements TLCallback
*
* @var int
*/
- const V = 157;
+ const V = 158;
/**
* Release version.
*
diff --git a/src/danog/MadelineProto/TL/Conversion/BotAPI.php b/src/danog/MadelineProto/TL/Conversion/BotAPI.php
index a1f83cc6a..d5dc1f7a6 100644
--- a/src/danog/MadelineProto/TL/Conversion/BotAPI.php
+++ b/src/danog/MadelineProto/TL/Conversion/BotAPI.php
@@ -29,10 +29,7 @@ use const danog\Decoder\TYPES_IDS;
trait BotAPI
{
- private function htmlEntityDecode(string $stuff): string
- {
- return \html_entity_decode(\preg_replace('#< *br */? *>#', "\n", $stuff));
- }
+
/**
* @return ((bool|mixed|string)[][]|string)[][]
*
@@ -355,109 +352,6 @@ trait BotAPI
}
return $arguments;
}
- private function parseNode($node, &$entities, &$new_message, &$offset): \Generator
- {
- switch ($node->nodeName) {
- case 'br':
- $new_message .= "\n";
- $offset++;
- break;
- case 's':
- case 'strike':
- case 'del':
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $entities[] = ['_' => 'messageEntityStrike', 'offset' => $offset, 'length' => $length];
- $new_message .= $text;
- $offset += $length;
- break;
- case 'u':
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $entities[] = ['_' => 'messageEntityUnderline', 'offset' => $offset, 'length' => $length];
- $new_message .= $text;
- $offset += $length;
- break;
- case 'blockquote':
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $entities[] = ['_' => 'messageEntityBlockquote', 'offset' => $offset, 'length' => $length];
- $new_message .= $text;
- $offset += $length;
- break;
- case 'b':
- case 'strong':
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $entities[] = ['_' => 'messageEntityBold', 'offset' => $offset, 'length' => $length];
- $new_message .= $text;
- $offset += $length;
- break;
- case 'i':
- case 'em':
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $entities[] = ['_' => 'messageEntityItalic', 'offset' => $offset, 'length' => $length];
- $new_message .= $text;
- $offset += $length;
- break;
- case 'code':
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $entities[] = ['_' => 'messageEntityCode', 'offset' => $offset, 'length' => $length];
- $new_message .= $text;
- $offset += $length;
- break;
- case 'pre':
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $language = $node->getAttribute('language');
- if ($language === null) {
- $language = '';
- }
- $entities[] = ['_' => 'messageEntityPre', 'offset' => $offset, 'length' => $length, 'language' => $language];
- $new_message .= $text;
- $offset += $length;
- break;
- case 'p':
- foreach ($node->childNodes as $node) {
- yield from $this->parseNode($node, $entities, $new_message, $offset);
- }
- break;
- case 'a':
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $href = $node->getAttribute('href');
- if (\preg_match('|mention:(.*)|', $href, $matches) || \preg_match('|tg://user\\?id=(.*)|', $href, $matches)) {
- $mention = yield from $this->getInfo($matches[1]);
- if (!isset($mention['InputUser'])) {
- throw new \danog\MadelineProto\Exception(\danog\MadelineProto\Lang::$current_lang['peer_not_in_db']);
- }
- $entities[] = ['_' => 'inputMessageEntityMentionName', 'offset' => $offset, 'length' => $length, 'user_id' => $mention['InputUser']];
- } elseif (\preg_match('|buttonurl:(.*)|', $href)) {
- if (!isset($entities['buttons'])) {
- $entities['buttons'] = [];
- }
- if (\strpos(\substr($href, -4), '|:new|') !== false) {
- $entities['buttons'][] = ['_' => 'keyboardButtonUrl', 'text' => $text, 'url' => \str_replace(['buttonurl:', ':new'], '', $href), 'new' => true];
- } else {
- $entities['buttons'][] = ['_' => 'keyboardButtonUrl', 'text' => $text, 'url' => \str_replace('buttonurl:', '', $href)];
- }
- break;
- } else {
- $entities[] = ['_' => 'messageEntityTextUrl', 'offset' => $offset, 'length' => $length, 'url' => $href];
- }
- $new_message .= $text;
- $offset += $length;
- break;
- default:
- $text = $this->htmlEntityDecode($node->textContent);
- $length = StrTools::mbStrlen($text);
- $new_message .= $text;
- $offset += $length;
- break;
- }
- }
/**
* Convert markdown and HTML messages.
*
@@ -479,28 +373,29 @@ trait BotAPI
$arguments['parse_mode'] = \str_replace('textParseMode', '', $arguments['parse_mode']['_']);
}
if (\stripos($arguments['parse_mode'], 'markdown') !== false) {
- $arguments['message'] = \Parsedown::instance()->line($arguments['message']);
- $arguments['parse_mode'] = 'HTML';
+ [$arguments['message'],$arguments['entities']] = $this->parseText(($arguments['message']),'markdown');
}
if (\stripos($arguments['parse_mode'], 'html') !== false) {
- $new_message = '';
- $arguments['message'] = \trim($this->htmlFixtags($arguments['message']));
- $dom = new \DOMDocument();
- $dom->loadHTML(\mb_convert_encoding($arguments['message'], 'HTML-ENTITIES', 'UTF-8'));
- if (!isset($arguments['entities'])) {
- $arguments['entities'] = [];
- }
- $offset = 0;
- foreach ($dom->getElementsByTagName('body')->item(0)->childNodes as $node) {
- yield from $this->parseNode($node, $arguments['entities'], $new_message, $offset);
- }
+ [$arguments['message'],$arguments['entities']] = $this->parseText(($arguments['message']),'html');
+
+ /**
+ * deprecated future or you can fix it in future
+ * @deprecated
+ */
+ /*
if (isset($arguments['entities']['buttons'])) {
$arguments['reply_markup'] = $this->buildRows($arguments['entities']['buttons']);
unset($arguments['entities']['buttons']);
- }
- unset($arguments['parse_mode']);
- $arguments['message'] = $new_message;
+ } */
+
}
+ /**
+ * this is new future added! but for use must scape some special chars ...
+ */
+ if(\stripos($arguments['parse_mode'], 'combined') !== false){
+ [$arguments['message'],$arguments['entities']] = yield $this->parseText(\trim($arguments['message']),'markdownhtml');
+ }
+ unset($arguments['parse_mode']);
return $arguments;
}
/**
@@ -630,41 +525,6 @@ trait BotAPI
}
return $finalArray;
}
- private function htmlFixtags($text): string
- {
- $diff = 0;
- \preg_match_all('#(.*?)(<(\\bu\\b|\\bs\\b|\\ba\\b|\\bb\\b|\\bstrong\\b|\\bblockquote\\b|\\bstrike\\b|\\bdel\\b|\\bem\\b|i|\\bcode\\b|\\bpre\\b)[^>]*>)(.*?)([<]\\s*/\\s*\\3[>])#is', $text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
- if ($matches) {
- foreach ($matches as $match) {
- if (\trim($match[1][0]) != '') {
- $mod = \htmlentities($match[1][0]);
- $temp = \substr($text, 0, $match[1][1] + $diff);
- $temp .= $mod;
- $temp .= \substr($text, $match[1][1] + $diff + \strlen($match[1][0]));
- $diff += \strlen($mod) - \strlen($match[1][0]);
- $text = $temp;
- }
- $mod = \htmlentities($match[4][0]);
- $temp = \substr($text, 0, $match[4][1] + $diff);
- $temp .= $mod;
- $temp .= \substr($text, $match[4][1] + $diff + \strlen($match[4][0]));
- $diff += \strlen($mod) - \strlen($match[4][0]);
- $text = $temp;
- }
- $diff = 0;
- \preg_match_all('##is', $text, $matches, PREG_OFFSET_CAPTURE);
- foreach ($matches[2] as $match) {
- $mod = \htmlentities($match[0]);
- $temp = \substr($text, 0, $match[1] + $diff);
- $temp .= $mod;
- $temp .= \substr($text, $match[1] + $diff + \strlen($match[0]));
- $diff += \strlen($mod) - \strlen($match[0]);
- $text = $temp;
- }
- return $text;
- }
- return \htmlentities($text);
- }
/**
* @return ((array|string)[][]|string)[]
*
diff --git a/src/danog/MadelineProto/TL/Conversion/Entities.php b/src/danog/MadelineProto/TL/Conversion/Entities.php
new file mode 100644
index 000000000..4139b30f0
--- /dev/null
+++ b/src/danog/MadelineProto/TL/Conversion/Entities.php
@@ -0,0 +1,1474 @@
+.
+ *
+ * @author Daniil Gentili
/i", $separator, $string);
+ }
+
+ /**
+ * getEntityName
+ * get entity name by it codes
+ *
+ * @param int $code
+ *
+ * @return string|bool
+ */
+ private function getEntityName(int $code): string|bool
+ {
+ switch ($code) {
+ case BOLD:
+ return "messageEntityBold";
+
+ case ITALIC:
+ return "messageEntityItalic";
+
+ case UNDERLINE:
+ return "messageEntityUnderline";
+
+ case STRIKE:
+ return "messageEntityStrike";
+
+ case SPOILER:
+ return "messageEntitySpoiler";
+
+ case TEXTURL:
+ return "messageEntityTextUrl";
+
+ case TEXTMENTION:
+ return "messageEntityMentionName";
+
+ case CODE:
+ return "messageEntityCode";
+
+ case PRE:
+ return "messageEntityPre";
+ }
+ return false;
+ }
+
+ /**
+ * getEntityCode
+ * get entity name by it name
+ *
+ * @param string $name
+ *
+ * @return int|bool
+ */
+ private function getEntityCode(string $name): int|bool
+ {
+ switch ($name) {
+ case $this->getEntityName(BOLD):
+ return BOLD;
+
+ case $this->getEntityName(ITALIC):
+ return ITALIC;
+
+ case $this->getEntityName(UNDERLINE):
+ return UNDERLINE;
+
+ case $this->getEntityName(STRIKE):
+ return STRIKE;
+
+ case $this->getEntityName(SPOILER):
+ return SPOILER;
+
+ case $this->getEntityName(TEXTURL):
+ return TEXTURL;
+
+ case $this->getEntityName(TEXTMENTION):
+ return TEXTMENTION;
+
+ case $this->getEntityName(CODE):
+ return CODE;
+
+ case $this->getEntityName(PRE):
+ return PRE;
+ }
+ return false;
+ }
+
+ /**
+ * getEntityNameFromTag
+ * get entity name by it tag
+ *
+ * @param string $tag
+ *
+ * @return int|bool
+ */
+ private function getEntityNameFromTag(string $tag): int|bool
+ {
+ switch ($tag) {
+ case "b":
+ case "strong":
+ case "bold":
+ return BOLD;
+
+ case "i":
+ case "em":
+ case "italic":
+ return ITALIC;
+
+ case "ins":
+ case "u":
+ case "underline":
+ return UNDERLINE;
+
+ case "s":
+ case "del":
+ case "strike":
+ case "strikethrough":
+ return STRIKE;
+
+ case "spoiler":
+ case "tg-spoiler":
+ return SPOILER;
+
+ case "span":
+ return SPANTAG;
+
+ case "a":
+ return ATAG;
+
+ case "code":
+ return CODE;
+
+ case "pre":
+ return PRE;
+ }
+ return false;
+ }
+
+ /**
+ * setText
+ * set text string and offset
+ *
+ * @param string $text
+ *
+ * @return void
+ */
+ private function setText(string $text)
+ {
+ $text = htmlspecialchars_decode($text);
+ $l = $this->strlen($text);
+ $this->text .= $text;
+ $this->offset = $this->offset + $l;
+ }
+
+ /**
+ * decode
+ * decode text from UTF-8 to UTF-16LE to easily parse it tags
+ *
+ * @param string $str
+ *
+ * @return array|string|false
+ */
+ private function decode(string $str): array|string|false
+ {
+ return mb_convert_encoding($str, "UTF-8", "UTF-16LE");
+ }
+
+ /**
+ * encode
+ *
+ * encode parsed text from UTF-16LE to UTF-8
+ * @param string $str
+ *
+ * @return array|string|false
+ */
+ private function encode(string $str): array|string|false
+ {
+ return mb_convert_encoding($str, "UTF-16LE", "UTF-8");
+ }
+
+ /**
+ * strlen
+ *
+ * @param string $str
+ *
+ * @return int|float
+ */
+ private function strlen(string $str): int|float
+ {
+ return strlen($this->encode($str)) / 2;
+ }
+
+ /**
+ * substr
+ *
+ * @param string $string
+ *
+ * @param int $offset
+ *
+ * @param null|int $length
+ *
+ * @return array|string|false
+ */
+ private function substr(
+ string $string,
+ int $offset,
+ ?int $length = null
+ ): array|string|false {
+ return $this->decode(substr($string, $offset * 2, $length * 2));
+ }
+
+ /**
+ * setOffset
+ * setOffset for text
+ *
+ * @param string $start
+ *
+ * @return void
+ */
+ private function setOffset(string $start, mixed $end = "", "
");
+ } elseif ($type == TEXTURL && isset($entity["url"])) {
+ $this->setOffset('');
+ } elseif ($type == TEXTMENTION) {
+ $this->setOffset(
+ ''
+ );
+ } elseif ($type == PRE) {
+ if (isset($entity["language"])) {
+ $this->setOffset(
+ '
"
+ );
+ } else {
+ $this->setOffset("',
+ "
", "
");
+ }
+ } elseif ($type == BOLD) {
+ $this->setOffset("", "");
+ } elseif ($type == SPOILER) {
+ $this->setOffset("", "");
+ } elseif ($type == ITALIC) {
+ $this->setOffset("", "");
+ } elseif ($type == STRIKE) {
+ $this->setOffset("", "");
+ } elseif ($type == UNDERLINE) {
+ $this->setOffset("", "");
+ }
+ }
+ foreach ($this->setOffset2 as $key => $value) {
+ if (!isset($this->setOffset[$key])) {
+ $this->setOffset[$key] = [];
+ }
+ $this->setOffset[$key] = array_merge(
+ array_reverse($this->setOffset2[$key]),
+ $this->setOffset[$key]
+ );
+ }
+ $htmlext = "";
+ $deltag = [
+ "&" . "\0" . "a" . "\0" . "m" . "\0" . "p" . "\0" . ";" . "\0" . "",
+ "&" . "\0" . "l" . "\0" . "t" . "\0" . ";" . "\0" . "",
+ "&" . "\0" . "g" . "\0" . "t" . "\0" . ";" . "\0" . "",
+ ];
+ $deltag2 = [
+ '/&\\000/',
+ '/\\<\\000/',
+ '/\\>\\000/',
+ ];
+ for ($offset = 0; $offset < strlen($utf16) / 2; $offset++) {
+ $t = substr($utf16, $offset * 2, 2);
+ if (isset($this->setOffset[$offset])) {
+ foreach ($this->setOffset[$offset] as $tt) {
+ $htmlext .= $this->encode($tt);
+ }
+ unset($this->setOffset[$offset]);
+ }
+ $htmlext .= $specialchars ? preg_replace($deltag2, $deltag, $t) : $t;
+ }
+ foreach ($this->setOffset as $off) {
+ foreach ($off as $tt) {
+ $htmlext .= $this->encode($tt);
+ }
+ }
+ return $this->decode($htmlext);
+ }
+
+ /**
+ * entitiesToMarkdownV1
+ * Covert entities to html tags v1 (Telegram version)
+ *
+ * @param string $text
+ *
+ * @param object|array $entities
+ *
+ * @param bool $slashmarkdown
+ *
+ * @return string
+ */
+
+ public function entitiesToMarkdownV1(
+ string $text,
+ object|array $entities = [],
+ bool $slashmarkdown = true
+ ): string {
+ $this->setOffset = [];
+ $this->setOffset2 = [];
+ $utf16 = $this->encode($text);
+ foreach ($entities as $entity) {
+ $entity = $this->checkEntity($entity, $type);
+ if ($type == CODE) {
+ $this->setOffset("`", "`");
+ } elseif ($type == TEXTURL && isset($entity["url"])) {
+ $this->setOffset("[", "](" . $entity["url"] . ")");
+ } elseif ($type == TEXTMENTION) {
+ $this->setOffset(
+ "[",
+ "](tg://user?id=" .
+ ($entity["user"]["id"] ?? ($entity["user_id"] ?? 0)) .
+ ")"
+ );
+ } elseif ($type == PRE) {
+ if (isset($entity["language"])) {
+ $this->setOffset("```" . $entity["language"] . "\n", "```");
+ } else {
+ $this->setOffset("```", "```");
+ }
+ } elseif ($type == BOLD) {
+ $this->setOffset("*", "*");
+ } elseif ($type == ITALIC) {
+ $this->setOffset("_", "_");
+ }
+ }
+ foreach ($this->setOffset2 as $key => $value) {
+ if (!isset($this->setOffset[$key])) {
+ $this->setOffset[$key] = [];
+ }
+ $this->setOffset[$key] = array_merge(
+ array_reverse($this->setOffset2[$key]),
+ $this->setOffset[$key]
+ );
+ }
+ $htmlext = "";
+ $deltag = [
+ "\\" . "\0" . "_" . "\0" . "",
+ "\\" . "\0" . "*" . "\0" . "",
+ "\\" . "\0" . "`" . "\0" . "",
+ "\\" . "\0" . "[" . "\0" . "",
+ ];
+ $deltag2 = [
+ '/_\\000/',
+ '/\\*\\000/',
+ '/`\\000/',
+ '/\\[\\000/',
+ ];
+ for ($offset = 0; $offset < strlen($utf16) / 2; $offset++) {
+ $t = substr($utf16, $offset * 2, 2);
+ if (isset($this->setOffset[$offset])) {
+ foreach ($this->setOffset[$offset] as $tt) {
+ $htmlext .= $this->encode($tt);
+ }
+ unset($this->setOffset[$offset]);
+ }
+ $htmlext .= $slashmarkdown ? preg_replace($deltag2, $deltag, $t) : $t;
+ }
+ foreach ($this->setOffset as $off) {
+ foreach ($off as $tt) {
+ $htmlext .= $this->encode($tt);
+ }
+ }
+ return $this->decode($htmlext);
+ }
+
+ /**
+ * entitiesToMarkdown
+ * convert given entities to markdown
+ *
+ * @param string $text
+ *
+ * @param object|array $entities
+ *
+ * @param bool $slashmarkdown
+ *
+ * @return string
+ */
+ public function entitiesToMarkdown(
+ string $text,
+ object|array $entities = [],
+ bool $slashmarkdown = true
+ ): string {
+ $this->setOffset = [];
+ $this->setOffset2 = [];
+ $utf16 = $this->encode($text);
+ foreach ($entities as $entity) {
+ $entity = $this->checkEntity($entity, $type);
+ if ($type == CODE) {
+ $this->setOffset("`", "`");
+ } elseif ($type == TEXTURL && isset($entity["url"])) {
+ $this->setOffset("[", "](" . $entity["url"] . ")");
+ } elseif ($type == TEXTMENTION) {
+ $this->setOffset(
+ "[",
+ "](tg://user?id=" .
+ ($entity["user"]["id"] ?? ($entity["user_id"] ?? 0)) .
+ ")"
+ );
+ } elseif ($type == PRE) {
+ if (isset($entity["language"])) {
+ $this->setOffset("```" . $entity["language"] . "\n", "```");
+ } else {
+ $this->setOffset("```", "```");
+ }
+ } elseif ($type == SPOILER) {
+ $this->setOffset("||", "||");
+ } elseif ($type == BOLD) {
+ $this->setOffset("**", "**");
+ } elseif ($type == ITALIC) {
+ $this->setOffset("__", "__");
+ } elseif ($type == STRIKE) {
+ $this->setOffset("~~", "~~");
+ } elseif ($type == UNDERLINE) {
+ $this->setOffset("_", "_");
+ }
+ }
+ foreach ($this->setOffset2 as $key => $value) {
+ if (!isset($this->setOffset[$key])) {
+ $this->setOffset[$key] = [];
+ }
+ $this->setOffset[$key] = array_merge(
+ array_reverse($this->setOffset2[$key]),
+ $this->setOffset[$key]
+ );
+ }
+ $htmlext = "";
+ $deltag = [
+ "\\" . "\0" . "_" . "\0" . "",
+ "\\" . "\0" . "*" . "\0" . "",
+ "\\" . "\0" . "[" . "\0" . "",
+ "\\" . "\0" . "]" . "\0" . "",
+ "\\" . "\0" . "(" . "\0" . "",
+ "\\" . "\0" . ")" . "\0" . "",
+ "\\" . "\0" . "~" . "\0" . "",
+ "\\" . "\0" . "" . "\0" . "",
+ "\\" . "\0" . ">" . "\0" . "",
+ "\\" . "\0" . "#" . "\0" . "",
+ "\\" . "\0" . "+" . "\0" . "",
+ "\\" . "\0" . "-" . "\0" . "",
+ "\\" . "\0" . "=" . "\0" . "",
+ "\\" . "\0" . "|" . "\0" . "",
+ "\\" . "\0" . "{" . "\0" . "",
+ "\\" . "\0" . "}" . "\0" . "",
+ "\\" . "\0" . "." . "\0" . "",
+ "\\" . "\0" . "!" . "\0" . "",
+ ];
+
+ $deltag2 = [
+ '/_\\000/',
+ '/\\*\\000/',
+ '/\\[\\000/',
+ '/\\]\\000/',
+ '/\\(\\000/',
+ '/\\)\\000/',
+ '/~\\000/',
+ '/`\\000/',
+ '/\\>\\000/',
+ '/\\#\\000/',
+ '/\\+\\000/',
+ '/\\-\\000/',
+ '/\\=\\000/',
+ '/\\|\\000/',
+ '/\\{\\000/',
+ '/\\}\\000/',
+ '/\\.\\000/',
+ '/\\!\\000/',
+ ];
+ for ($offset = 0; $offset < strlen($utf16) / 2; $offset++) {
+ $t = substr($utf16, $offset * 2, 2);
+ if (isset($this->setOffset[$offset])) {
+ foreach ($this->setOffset[$offset] as $tt) {
+ $htmlext .= $this->encode($tt);
+ }
+ unset($this->setOffset[$offset]);
+ }
+ $htmlext .= $slashmarkdown ? preg_replace($deltag2, $deltag, $t) : $t;
+ }
+ foreach ($this->setOffset as $off) {
+ foreach ($off as $tt) {
+ $htmlext .= $this->encode($tt);
+ }
+ }
+ return $this->decode($htmlext);
+ }
+
+ /**
+ * markdownV1ToHtml
+ * convert markdownv1 to html
+ *
+ * @param string $str
+ *
+ * @param bool $specialchars
+ *
+ * @return string
+ */
+ public function markdownV1ToHtml(string $str, bool $specialchars = true): string
+ {
+ if ($specialchars) {
+ $str = $this->htmlSpecialChars($str);
+ }
+ $len = mb_strlen($str);
+ $backslash = ["_", "*", "`", "["];
+ $marks = [];
+ $marksi = -1;
+
+ $i = 0;
+ $is = function ($string) use (&$i, &$str) {
+ return mb_substr($str, $i, mb_strlen($string)) == $string;
+ };
+ $find = function ($str, $find, &$i) use ($backslash) {
+ $findlen = mb_strlen($find);
+ $newstr = "";
+ for ($i = 0; $i < mb_strlen($str); $i++) {
+ $curchar = mb_substr($str, $i, 1);
+
+ if (
+ $curchar == "\\" &&
+ in_array(mb_substr($str, $i + 1, 1), $backslash)
+ ) {
+ $newstr .= mb_substr($str, $i + 1, 1);
+ $i++;
+ } elseif (mb_substr($str, $i, $findlen) == $find) {
+ return $newstr;
+ } else {
+ $newstr .= $curchar;
+ }
+ }
+ return false;
+ };
+ $html = "";
+ $htmli = 0;
+ $setstr = function ($starttag) use (&$html, &$htmli) {
+ $html .= $starttag;
+ $htmli += mb_strlen($starttag);
+ };
+ $i = 0;
+ $setmark = function ($mark, &$currentmarki = 0, $fakemark = false) use (
+ &$marks,
+ &$marksi,
+ &$htmli,
+ &$i
+ ) {
+ if ($marksi === -1 || $marks[$marksi][0] !== $mark) {
+ $marksi++;
+ $marks[$marksi] = [
+ $fakemark == false ? $mark : $fakemark,
+ $htmli,
+ $i,
+ ];
+ return true;
+ } else {
+ $currentmarki = $marks[$marksi][1];
+ unset($marks[$marksi]);
+ $marksi--;
+ return false;
+ }
+ };
+ $currentmarki = 0;
+ $setstr2 = function ($endtag, $starttaglen) use (
+ &$setstr,
+ &$html,
+ &$htmli,
+ &$currentmarki
+ ) {
+ if ($htmli - $currentmarki > $starttaglen) {
+ $setstr($endtag);
+ } else {
+ $htmli -= $starttaglen;
+ $html = mb_substr($html, 0, $htmli);
+ }
+ };
+ for ($i = 0; $i < $len; $i++) {
+ $curchar = mb_substr($str, $i, 1);
+ if (
+ $curchar == "\\" &&
+ in_array(mb_substr($str, $i + 1, 1), $backslash)
+ ) {
+ $setstr(mb_substr($str, $i + 1, 1));
+ $i++;
+ } elseif ($curchar == "*") {
+ if ($setmark("*", $currentmarki)) {
+ $setstr("");
+ } else {
+ $setstr2("", 3);
+ }
+ } elseif ($curchar == "_") {
+ if ($setmark($curchar, $currentmarki)) {
+ $setstr("");
+ } else {
+ $setstr2("", 3);
+ }
+ } elseif ($curchar == "[") {
+ $setmark("[", $currentmarki, "]");
+ } elseif ($curchar == "]") {
+ if (!$setmark("]", $currentmarki, false) && $is("](")) {
+ $txt = mb_substr(
+ $html,
+ $currentmarki,
+ $htmli - $currentmarki
+ );
+ if ($txt !== "") {
+ $i++;
+ $strfind = $find(mb_substr($str, $i + 1), ")", $pos);
+ if ($strfind !== false) {
+ $i += $pos + 1;
+ $html =
+ mb_substr($html, 0, $currentmarki) .
+ '' .
+ $txt .
+ "";
+ $htmli = mb_strlen($html);
+ }
+ }
+ }
+ } elseif ($curchar == "`") {
+ if ($is("```")) {
+ $i += 2;
+ $strfind = $find(mb_substr($str, $i + 1), "```", $pos);
+ if ($strfind !== false) {
+ $i += $pos + 3;
+ if ($strfind !== "") {
+ $lang = "";
+ $ex = explode("\n", $f, 2);
+ if (isset($ex[1])) {
+ $exx = explode(" ", $ex[0], 2);
+ if (isset($exx[1])) {
+ $ex[1] = " " . $exx[1];
+ }
+ $lang = trim($exx[0]);
+ $strfind = $ex[1];
+ }
+
+ if ($lang) {
+ $strfind = trim($f);
+ if ($strfind !== "") {
+ $setstr(
+ '
' .
+ $strfind .
+ "
"
+ );
+ }
+ } else {
+ $setstr("" . $strfind . ""); + } + } + } else { + throw new Exception( + 'Can\'t find end of Pre entity at byte offset ' . $i + ); + } + } else { + $strfind = $find(mb_substr($str, $i + 1), "`", $pos); + if ($strfind !== false) { + if ($strfind !== "") { + $setstr("
" . $strfind . "
");
+ }
+ $i += $pos + 1;
+ } else {
+ throw new Exception(
+ 'Can\'t find end of Code entity at byte offset ' .
+ $i
+ );
+ }
+ }
+ } elseif (in_array($curchar, $backslash)) {
+ throw new Exception(
+ "Character '$curchar' is reserved and must be escaped with the preceding '\'"
+ );
+ } else {
+ $setstr($curchar);
+ }
+ }
+ foreach ($marks as $mark) {
+ $ar = [
+ "*" => "Bold",
+ "_" => "Italic",
+ "]" => "TextUrl",
+ ];
+ throw new Exception(
+ 'Can\'t find end of ' .
+ ($ar[$mark[0]] ?? $mark[0]) .
+ " entity at byte offset " .
+ $mark[2]
+ );
+ }
+ return $html;
+ }
+
+ /**
+ * markdownToHtml
+ * convert html tags to markdown format
+ *
+ * @param string $str
+ *
+ * @param bool $specialchars
+ *
+ * @return string
+ */
+ public function markdownToHtml(string $str, bool $specialchars = true): string
+ {
+ if ($specialchars) {
+ $str = $this->htmlSpecialChars($str);
+ }
+ $len = mb_strlen($str);
+ $backslash = [
+ "_",
+ "*",
+ "[",
+ "]",
+ "(",
+ ")",
+ "~",
+ "`",
+ ">",
+ "#",
+ "+",
+ "-",
+ "=",
+ "|",
+ "{",
+ "}",
+ ".",
+ "!",
+ ];
+ $marks = [];
+ $marksi = -1;
+
+ $i = 0;
+ $is = function ($string) use (&$i, &$str) {
+ return mb_substr($str, $i, mb_strlen($string)) == $string;
+ };
+ $find = function ($str, $find, &$i) use ($backslash) {
+ $findlen = mb_strlen($find);
+ $newstr = "";
+ for ($i = 0; $i < mb_strlen($str); $i++) {
+ $curchar = mb_substr($str, $i, 1);
+
+ if (
+ $curchar == "\\" &&
+ in_array(mb_substr($str, $i + 1, 1), $backslash)
+ ) {
+ $newstr .= mb_substr($str, $i + 1, 1);
+ $i++;
+ } elseif (mb_substr($str, $i, $findlen) == $find) {
+ return $newstr;
+ } else {
+ $newstr .= $curchar;
+ }
+ }
+ return false;
+ };
+ $html = "";
+ $htmli = 0;
+ $setstr = function ($starttag) use (&$html, &$htmli) {
+ $html .= $starttag;
+ $htmli += mb_strlen($starttag);
+ };
+ $i = 0;
+ $setmark = function ($mark, &$currentmarki = 0, $fakemark = false) use (
+ &$marks,
+ &$marksi,
+ &$htmli,
+ &$i
+ ) {
+ if ($marksi === -1 || $marks[$marksi][0] !== $mark) {
+ $marksi++;
+ $marks[$marksi] = [
+ $fakemark == false ? $mark : $fakemark,
+ $htmli,
+ $i,
+ ];
+ return true;
+ } else {
+ $currentmarki = $marks[$marksi][1];
+ unset($marks[$marksi]);
+ $marksi--;
+ return false;
+ }
+ };
+ $currentmarki = 0;
+ $setstr2 = function ($endtag, $starttaglen) use (
+ &$setstr,
+ &$html,
+ &$htmli,
+ &$currentmarki
+ ) {
+ if ($htmli - $currentmarki > $starttaglen) {
+ $setstr($endtag);
+ } else {
+ $htmli -= $starttaglen;
+ $html = mb_substr($html, 0, $htmli);
+ }
+ };
+ for ($i = 0; $i < $len; $i++) {
+ $curchar = mb_substr($str, $i, 1);
+ if (
+ $curchar == "\\" &&
+ in_array(mb_substr($str, $i + 1, 1), $backslash)
+ ) {
+ $setstr(mb_substr($str, $i + 1, 1));
+ $i++;
+ } elseif ($curchar == "*") {
+ $tag = "i";
+ if (
+ ($marksi === -1 || $marks[$marksi][0] !== $curchar) &&
+ $is("**")
+ ) {
+ $curchar = "**";
+ $tag = "b";
+ $i++;
+ }
+ if ($setmark($curchar, $currentmarki)) {
+ $setstr("<$tag>");
+ } else {
+ $setstr2("$tag>", 3);
+ }
+ } elseif ($curchar == "_") {
+ $tag = "u";
+ if (
+ ($marksi === -1 || $marks[$marksi][0] !== $curchar) &&
+ $is("__")
+ ) {
+ $curchar = "__";
+ $tag = "i";
+ $i++;
+ }
+ if ($setmark($curchar, $currentmarki)) {
+ $setstr("<$tag>");
+ } else {
+ $setstr2("$tag>", 3);
+ }
+ } elseif ($curchar == "~" && $is("~~")) {
+ if ($setmark("~~", $currentmarki)) {
+ $setstr("' .
+ $strfind .
+ "
"
+ );
+ }
+ } else {
+ $setstr("" . $strfind . ""); + } + } + } else { + throw new Exception( + 'Can\'t find end of Pre entity at byte offset ' . $i + ); + } + } else { + $strfind = $find(mb_substr($str, $i + 1), "`", $pos); + if ($strfind !== false) { + if ($strfind !== "") { + $setstr("
" . $strfind . "
");
+ }
+ $i += $pos + 1;
+ } else {
+ throw new Exception(
+ 'Can\'t find end of Code entity at byte offset ' .
+ $i
+ );
+ }
+ }
+ } elseif (in_array($curchar, $backslash)) {
+ throw new Exception(
+ "Character '$curchar' is reserved and must be escaped with the preceding '\'"
+ );
+ } else {
+ $setstr($curchar);
+ }
+ }
+ foreach ($marks as $mark) {
+ $ar = [
+ "**" => "Bold",
+ "__" => "Italic",
+ "*" => "Italic",
+ "_" => "Underline",
+ "~~" => "Strikethrough",
+ "]" => "TextUrl",
+ "||" => "Spoiler",
+ ];
+ throw new Exception(
+ 'Can\'t find end of ' .
+ ($ar[$mark[0]] ?? $mark[0]) .
+ " entity at byte offset " .
+ $mark[2]
+ );
+ }
+ return $html;
+ }
+
+ /**
+ * elementReader
+ *
+ * @param mixed $element
+ *
+ * @param bool $tag
+ *
+ * @return void
+ */
+ private function elementReader($element, $tag = false): void
+ {
+ $obj = ["tag" => $element->tagName];
+ foreach ($element->attributes as $attribute) {
+ $obj[$attribute->name] = $attribute->value;
+ }
+ $entitie = false;
+ $entitie_name = $this->getEntityNameFromTag($obj["tag"]);
+ if ($entitie_name !== false) {
+ $entitie = $this->setEntitie($entitie_name, $obj, $tag);
+ if ($entitie) {
+ $this->entities[$this->entitiesid] = [];
+ $ident = $this->entitiesid;
+ $this->entitiesid++;
+ }
+ } else {
+ if ($tag !== false) {
+ throw new Exception(
+ "Tag " .
+ $element->tagName .
+ ' invalid
+ in line ' .
+ $element->getLineNo()
+ );
+ }
+ }
+ foreach ($element->childNodes as $subElement) {
+ if ($subElement->nodeType == XML_TEXT_NODE) {
+ $this->setText($subElement->wholeText);
+ } else {
+ $this->elementReader($subElement, $obj["tag"]);
+ }
+ }
+ if ($entitie) {
+ $entitie["length"] = $this->offset - $entitie["offset"];
+ if ($entitie["length"] > 0) {
+ $this->entities[$ident] = array_merge(
+ $entitie,
+ $this->entities[$ident]
+ );
+ } else {
+ unset($this->entities[$ident]);
+ }
+ }
+ }
+ /**
+ * htmlToEntities
+ * convert html tags to entities
+ *
+ * @param string $text
+ *
+ * @param mixed &$entities
+ *
+ * @return string
+ */
+ public function htmlToEntities(string $text, &$entities): string
+ {
+ $text = $this->br2nl($text);
+ $this->entities = [];
+ $this->entitiesid = 0;
+ $this->offset = 0;
+ $this->text = "";
+ $dom = new \DOMDocument();
+ $internalErrors = libxml_use_internal_errors(true);
+ $dom->loadxml("" . str_replace(['&', ''', '"', '&'], ['&', '\'', "\"", '&'], $text) . "");
+ $ar = libxml_get_errors();
+ if (!empty($ar)) {
+ libxml_clear_errors();
+ foreach ($ar as $er) {
+ $er->message = preg_replace(
+ [
+ "/: and body/",
+ "/and body(.+)/isu",
+ "/body line (.*?) and /",
+ ],
+ [": ", ""],
+ $er->message
+ );
+ if (in_array($er->code, [76, 40, 801, 73, 800])) {
+ if (
+ $er->code == 801 &&
+ $this->getEntityNameFromTag(
+ explode(" ", $er->message, 3)[1]
+ ) !== false
+ ) {
+ continue;
+ }
+ libxml_use_internal_errors($internalErrors);
+ throw new Exception(
+ $er->message . " in line " . $er->line . PHP_EOL
+ );
+ }
+ }
+ }
+ libxml_use_internal_errors($internalErrors);
+ $this->elementReader($dom->getElementsByTagName("body")[0]);
+ $entities = $this->entities;
+ return $this->text;
+ }
+
+ /**
+ * markdownToEntities
+ * convert markdown format to entities
+ *
+ * @param string $text
+ *
+ * @param mixed &$entities
+ *
+ * @return string
+ */
+ public function markdownToEntities(string $text, &$entities): string
+ {
+ return $this->htmlToEntities(
+ $this->markdownToHtml($text),
+ $entities
+ );
+ }
+
+ /**
+ * markdownV1ToEntities
+ * convert markdownV1 to entities
+ *
+ * @param string $text
+ *
+ * @param mixed &$entities
+ *
+ * @return string
+ */
+ public function markdownV1ToEntities(string $text, &$entities): string
+ {
+ return $this->htmlToEntities(
+ $this->markdownV1ToHtml($text),
+ $entities
+ );
+ }
+
+ /**
+ * markdownhtmlToEntities
+ * convert mixed format(with markdown and html) to entities
+ *
+ * @param string $text
+ *
+ * @param mixed &$entities
+ *
+ * @return string
+ */
+ public function markdownhtmlToEntities(string $text, &$entities): string
+ {
+ return $this->htmlToEntities(
+ $this->markdownToHtml($text, false),
+ $entities
+ );
+ }
+
+ /**
+ * markdownV1htmlToEntities
+ * convert mixed format(with markdownv1 and html) to entities
+ *
+ * @param string $text
+ *
+ * @param mixed &$entities
+ *
+ * @return string
+ */
+ public function markdownV1htmlToEntities(string $text, &$entities): string
+ {
+ return $this->htmlToEntities(
+ $this->markdownV1ToHtml($text, false),
+ $entities
+ );
+ }
+
+ /**
+ * htmlToMarkdown
+ * convert html tags to markdown format
+ *
+ * @param string $str
+ *
+ * @param bool $slashmarkdown
+ *
+ * @return string
+ */
+ public function htmlToMarkdown(string $str, bool $slashmarkdown = true): string
+ {
+ $str = $this->htmlToEntities($str, $entities);
+ return $this->entitiesToMarkdown($str, $entities, $slashmarkdown);
+ }
+
+ /**
+ * htmlToMarkdownv1
+ * convert html tags to markdownv1 format
+ *
+ * @param string $str
+ *
+ * @param bool $slashmarkdown
+ *
+ * @return string
+ */
+ public function htmlToMarkdownv1(string $str, bool $slashmarkdown = true): string
+ {
+ $str = $this->htmlToEntities($str, $entities);
+ return $this->entitiesToMarkdownV1($str, $entities, $slashmarkdown);
+ }
+
+ /**
+ * htmlSpecialChars
+ *
+ * @param string $str
+ *
+ * @return string
+ */
+ private function htmlSpecialChars(string $str): string
+ {
+ return str_replace(["&", "<", ">"], ["&", "<", ">"], $str);
+ }
+
+ /**
+ * parseText (main function)
+ * function return formated text with entities or tags or format it to markdown & markdownv1
+ *
+ * @param string $text
+ *
+ * @param string $mode
+ *
+ * @return string|array
+ */
+ public function parseText(string $text, string $mode = "html"): string|array
+ {
+ $mode = strtolower($mode);
+ $entities = [];
+
+ $text = match ($mode) {
+ 'html' => $this->htmlToEntities($text, $entities),
+ 'markdown', 'markdownv2' => $this->markdownToEntities($text, $entities),
+ 'markdownv1' => $this->markdownV1ToEntities($text, $entities),
+ 'markdownhtml', 'markdownv2html' => $this->markdownhtmlToEntities($text, $entities),
+ 'markdownv1html' => $this->markdownV1htmlToEntities($text, $entities),
+ default => throw new Exception("unsupported mode")
+ };
+
+ return [$text, $entities];
+ }
+}