From 61001dc4c935a43e8532266927a39ffa60de3cb1 Mon Sep 17 00:00:00 2001 From: Hunter Ashton Date: Tue, 9 May 2017 04:34:19 -0500 Subject: [PATCH] HTML parser (#109) * pwr requests (#108) * on/off switcher for pwrtelegram peer info requests * Fix PWRTelgram requets if we have no info about peer * Apply fixes from StyleCI * Add a new function, html_fixtags This parses any message with parse_mode of Markdown or HTML, and converts it into a format parse-able by Dom (if it wasn't already readable) * Apply fixes from Style CI --- .../MadelineProto/TL/Conversion/BotAPI.php | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/danog/MadelineProto/TL/Conversion/BotAPI.php b/src/danog/MadelineProto/TL/Conversion/BotAPI.php index 553b4e185..8191cf98b 100644 --- a/src/danog/MadelineProto/TL/Conversion/BotAPI.php +++ b/src/danog/MadelineProto/TL/Conversion/BotAPI.php @@ -431,6 +431,7 @@ trait BotAPI if (preg_match('/html/i', $arguments['parse_mode'])) { $nmessage = ''; try { + $arguments['message'] = $this->html_fixtags($arguments['message']); $dom = new \DOMDocument(); $dom->loadHTML(mb_convert_encoding($arguments['message'], 'HTML-ENTITIES', 'UTF-8')); if (!isset($arguments['entities'])) { @@ -487,4 +488,29 @@ trait BotAPI return $finalArray; } + + public function html_fixtags($text) + { + preg_match_all("#(.*?)(<(a|b|strong|em|i|code|pre)[^>]*>)(.*?)(<\/\\3>)(.*)?#is", $text, $matches, PREG_SET_ORDER); + if ($matches) { + $last = count($matches) - 1; + foreach ($matches as $val) { + if (trim($val[1]) != '') { + $text = str_replace($val[1], htmlentities($val[1]), $text); + } + $text = str_replace($val[4], htmlentities(trim($val[4])), $text); + if ($val == $matches[$last]) { + $text = str_replace($val[6], $this->html_fixtags($val[6]), $text); + } + } + preg_match_all("##is", $text, $matches); + foreach ($matches[1] as $match) { + $text = str_replace($match, htmlentities($match), $text); + } + + return $text; + } else { + return htmlentities($text); + } + } }