From a3a254c2eac3d72861f602e627016f862d68f223 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Tue, 18 Nov 2025 19:09:36 +0800 Subject: [PATCH] fix telegram markdown --- app/modules/telegram/telegram.py | 149 ++++++++++--------------------- requirements.in | 1 + 2 files changed, 48 insertions(+), 102 deletions(-) diff --git a/app/modules/telegram/telegram.py b/app/modules/telegram/telegram.py index 0baf64ed..c2def845 100644 --- a/app/modules/telegram/telegram.py +++ b/app/modules/telegram/telegram.py @@ -10,6 +10,7 @@ import telebot from telebot import apihelper from telebot.types import InputFile, InlineKeyboardMarkup, InlineKeyboardButton from telebot.types import InputMediaPhoto +import telegramify_markdown from app.core.config import settings from app.core.context import MediaInfo, Context @@ -31,7 +32,6 @@ class Telegram: _callback_handlers: Dict[str, Callable] = {} # 存储回调处理器 _user_chat_mapping: Dict[str, str] = {} # userid -> chat_id mapping for reply targeting _bot_username: Optional[str] = None # Bot username for mention detection - _escape_chars = r'_*[]()~`>#+-=|{}.!' # Telegram MarkdownV2 def __init__(self, TELEGRAM_TOKEN: Optional[str] = None, TELEGRAM_CHAT_ID: Optional[str] = None, **kwargs): """ @@ -237,21 +237,23 @@ class Telegram: return False try: - if title: - title = self.escape_markdown_smart(title) - - if text: - text = self.escape_markdown_smart(text) - if title: - caption = f"*{title}*\n{text}" - else: - caption = text + # 构建完整的 Markdown 文本,然后统一转换 + if title and text: + caption = f"**{title}**\n{text}" + elif title: + caption = f"**{title}**" + elif text: + caption = text else: - caption = f"*{title}*" + caption = "" if link: caption = f"{caption}\n[查看详情]({link})" + # 使用 telegramify-markdown 转换整个文本 + if caption: + caption = self.escape_markdown_smart(caption) + # Determine target chat_id with improved logic using user mapping chat_id = self._determine_target_chat_id(userid, original_chat_id) @@ -314,10 +316,8 @@ class Telegram: return None try: - if title: - title = self.escape_markdown_smart(title) - - index, image, caption = 1, "", "*%s*" % title + # 构建完整的 Markdown 文本,然后统一转换 + index, image, caption = 1, "", f"**{title}**" if title else "" for media in medias: if not image: image = media.get_message_image() @@ -339,6 +339,10 @@ class Telegram: if link: caption = f"{caption}\n[查看详情]({link})" + # 使用 telegramify-markdown 转换整个文本 + if caption: + caption = self.escape_markdown_smart(caption) + # Determine target chat_id with improved logic using user mapping chat_id = self._determine_target_chat_id(userid, original_chat_id) @@ -378,10 +382,8 @@ class Telegram: return None try: - if title: - title = self.escape_markdown_smart(title) - - index, caption = 1, "*%s*" % title + # 构建完整的 Markdown 文本,然后统一转换 + index, caption = 1, f"**{title}**" if title else "" image = torrents[0].media_info.get_message_image() for context in torrents: torrent = context.torrent_info @@ -402,6 +404,10 @@ class Telegram: if link: caption = f"{caption}\n[查看详情]({link})" + # 使用 telegramify-markdown 转换整个文本 + if caption: + caption = self.escape_markdown_smart(caption) + # Determine target chat_id with improved logic using user mapping chat_id = self._determine_target_chat_id(userid, original_chat_id) @@ -610,93 +616,32 @@ class Telegram: self._polling_thread.join() logger.info("Telegram消息接收服务已停止") - def escape_markdown_smart(self, text: str) -> str: + @staticmethod + def escape_markdown_smart(text: str) -> str: """ - 智能转义Markdown文本:只转义不在Markdown标记内的特殊字符 - 这样可以保留已有的Markdown格式(如*粗体*、_斜体_、[链接](url)等), - 同时转义普通文本中的特殊字符以避免API错误 + 使用 telegramify-markdown 库将文本转换为 Telegram MarkdownV2 格式 + 支持原始 Markdown 格式转换,自动处理特殊字符转义 - 注意:Telegram MarkdownV2不支持以下语法,这些字符会被转义: - - 标题语法(#、##、###)会被转义为 \#、\##、\### - - 列表语法(-、*、+)会被转义为 \-、\*、\+ - - 引用语法(>)会被转义为 \> - - 建议使用加粗文本模拟标题:*标题文本* - - :param text: 要转义的文本 - :return: 转义后的文本 + :param text: 要转换的文本(可以是纯文本或包含 Markdown 格式) + :return: 转换后的 Telegram MarkdownV2 格式文本 """ if not isinstance(text, str): return str(text) if text is not None else "" - # 如果没有特殊字符,直接返回 - if not any(char in self._escape_chars for char in text): - return text + if not text: + return "" - # 标记受保护的位置(只保护Markdown分隔符本身,不保护内容区域) - protected = [False] * len(text) - - # 按优先级匹配Markdown标记(从最复杂到最简单) - # 1. 链接:[text](url) - 必须最先匹配,只保护分隔符 [ ] ( ) - link_pattern = r'\[([^\]]*)\]\(([^)]*)\)' - for match in re.finditer(link_pattern, text): - # 只保护分隔符:[, ], (, ) - protected[match.start()] = True # [ - # match.end(1) 是第一个捕获组结束位置,即 ] 的位置 - protected[match.end(1)] = True # ] - # ( 在 ] 之后一个字符 - if match.end(1) + 1 < len(text): - protected[match.end(1) + 1] = True # ( - # ) 在匹配结束前一个字符 - if match.end() > 0: - protected[match.end() - 1] = True # ) - - # 2. 粗体:*text*(单个*,不是**),只保护分隔符 * - bold_pattern = r'(?#+\-=|{}.!])', r'\\\1', text) \ No newline at end of file diff --git a/requirements.in b/requirements.in index dace6e0f..de28a79e 100644 --- a/requirements.in +++ b/requirements.in @@ -37,6 +37,7 @@ beautifulsoup4~=4.13.4 pillow~=11.2.1 pillow-avif-plugin~=1.5.2 pyTelegramBotAPI~=4.27.0 +telegramify-markdown~=0.5.2 playwright~=1.53.0 cf_clearance~=0.31.0 torrentool~=1.2.0