From 6e329b17a90066ae6bd3392744cfcea28594e4e3 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Mon, 17 Nov 2025 13:49:56 +0800 Subject: [PATCH] Enhance Telegram message formatting: add detailed guidelines for MarkdownV2 usage, including support for strikethrough, headings, and lists. Implement smart escaping for Markdown to preserve formatting while avoiding API errors. --- app/agent/prompt/__init__.py | 20 +++++++- app/modules/telegram/telegram.py | 82 +++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/app/agent/prompt/__init__.py b/app/agent/prompt/__init__.py index e383e165..2d3f2066 100644 --- a/app/agent/prompt/__init__.py +++ b/app/agent/prompt/__init__.py @@ -68,12 +68,28 @@ class PromptManager: if "telegram" in channel_lower: return """Messages are being sent through the **Telegram** channel. You must follow these format requirements: +**Supported Formatting:** - **Bold text**: Use `*text*` (single asterisk, not double asterisks) - **Italic text**: Use `_text_` (underscore) - **Code**: Use `` `text` `` (backtick) - **Links**: Use `[text](url)` format -- **Important**: Avoid using special characters that need escaping in MarkdownV2: `_*[]()~`>#+-=|{}.!` unless they are part of the formatting syntax -- **Best practice**: Keep formatting simple, avoid nested formatting to ensure proper rendering in Telegram""" +- **Strikethrough**: Use `~text~` (tilde) + +**IMPORTANT - Headings and Lists:** +- **DO NOT use heading syntax** (`#`, `##`, `###`) - Telegram MarkdownV2 does NOT support it +- **Instead, use bold text for headings**: `*Heading Text*` followed by a blank line +- **DO NOT use list syntax** (`-`, `*`, `+` at line start) - these will be escaped and won't display as lists +- **For lists**, use plain text with line breaks, or use bold for list item labels: `*Item 1:* description` + +**Examples:** +- ❌ Wrong heading: `# Main Title` or `## Subtitle` +- ✅ Correct heading: `*Main Title*` (followed by blank line) or `*Subtitle*` (followed by blank line) +- ❌ Wrong list: `- Item 1` or `* Item 2` +- ✅ Correct list format: `*Item 1:* description` or use plain text with line breaks + +**Special Characters:** +- Avoid using special characters that need escaping in MarkdownV2: `_*[]()~`>#+-=|{}.!` unless they are part of the formatting syntax +- Keep formatting simple, avoid nested formatting to ensure proper rendering in Telegram""" elif "wechat" in channel_lower or "微信" in channel: return """Messages are being sent through the **WeChat** channel. Please follow these format requirements: diff --git a/app/modules/telegram/telegram.py b/app/modules/telegram/telegram.py index fec0e9c7..90825bba 100644 --- a/app/modules/telegram/telegram.py +++ b/app/modules/telegram/telegram.py @@ -240,10 +240,15 @@ class Telegram: try: if title: + # 标题总是转义(因为通常标题不包含Markdown格式) title = self.escape_markdown(title) if text: if escape_markdown: + # 完全转义模式:转义所有特殊字符 text = self.escape_markdown(text) + else: + # 智能转义模式:保留Markdown格式,只转义普通文本中的特殊字符 + text = self.escape_markdown_smart(text) if title: caption = f"*{title}*\n{text}" else: @@ -610,4 +615,79 @@ class Telegram: # 按 Telegram MarkdownV2 规则转义特殊字符 if not isinstance(text, str): return str(text) if text is not None else "" - return self._markdown_escape_pattern.sub(r'\\\1', text) \ No newline at end of file + return self._markdown_escape_pattern.sub(r'\\\1', text) + + def escape_markdown_smart(self, text: str) -> str: + """ + 智能转义Markdown文本:只转义不在Markdown标记内的特殊字符 + 这样可以保留已有的Markdown格式(如*粗体*、_斜体_、[链接](url)等), + 同时转义普通文本中的特殊字符以避免API错误 + + 注意:Telegram MarkdownV2不支持以下语法,这些字符会被转义: + - 标题语法(#、##、###)会被转义为 \#、\##、\### + - 列表语法(-、*、+)会被转义为 \-、\*、\+ + - 引用语法(>)会被转义为 \> + + 建议使用加粗文本模拟标题:*标题文本* + + :param text: 要转义的文本 + :return: 转义后的文本 + """ + if not isinstance(text, str): + return str(text) if text is not None else "" + + # 如果没有特殊字符,直接返回 + if not any(char in self._escape_chars for char in text): + return text + + # 标记受保护的区域(Markdown标记内的内容不转义) + protected = [False] * len(text) + + # 按优先级匹配Markdown标记(从最复杂到最简单) + # 1. 链接:[text](url) - 必须最先匹配 + link_pattern = r'\[([^\]]*)\]\(([^)]*)\)' + for match in re.finditer(link_pattern, text): + for i in range(match.start(), match.end()): + protected[i] = True + + # 2. 粗体:*text*(单个*,不是**) + bold_pattern = r'(?