From 6e329b17a90066ae6bd3392744cfcea28594e4e3 Mon Sep 17 00:00:00 2001
From: jxxghp <jxxghp@gmail.com>
Date: Mon, 17 Nov 2025 13:49:56 +0800
Subject: [PATCH] Enhance Telegram message formatting: add detailed guidelines
 for MarkdownV2 usage, including support for strikethrough, headings, and
 lists. Implement smart escaping for Markdown to preserve formatting while
 avoiding API errors.

---
 app/agent/prompt/__init__.py     | 20 +++++++-
 app/modules/telegram/telegram.py | 82 +++++++++++++++++++++++++++++++-
 2 files changed, 99 insertions(+), 3 deletions(-)

diff --git a/app/agent/prompt/__init__.py b/app/agent/prompt/__init__.py
index e383e165..2d3f2066 100644
--- a/app/agent/prompt/__init__.py
+++ b/app/agent/prompt/__init__.py
@@ -68,12 +68,28 @@ class PromptManager:
         if "telegram" in channel_lower:
             return """Messages are being sent through the **Telegram** channel. You must follow these format requirements:
 
+**Supported Formatting:**
 - **Bold text**: Use `*text*` (single asterisk, not double asterisks)
 - **Italic text**: Use `_text_` (underscore)
 - **Code**: Use `` `text` `` (backtick)
 - **Links**: Use `[text](url)` format
-- **Important**: Avoid using special characters that need escaping in MarkdownV2: `_*[]()~`>#+-=|{}.!` unless they are part of the formatting syntax
-- **Best practice**: Keep formatting simple, avoid nested formatting to ensure proper rendering in Telegram"""
+- **Strikethrough**: Use `~text~` (tilde)
+
+**IMPORTANT - Headings and Lists:**
+- **DO NOT use heading syntax** (`#`, `##`, `###`) - Telegram MarkdownV2 does NOT support it
+- **Instead, use bold text for headings**: `*Heading Text*` followed by a blank line
+- **DO NOT use list syntax** (`-`, `*`, `+` at line start) - these will be escaped and won't display as lists
+- **For lists**, use plain text with line breaks, or use bold for list item labels: `*Item 1:* description`
+
+**Examples:**
+- ❌ Wrong heading: `# Main Title` or `## Subtitle`
+- ✅ Correct heading: `*Main Title*` (followed by blank line) or `*Subtitle*` (followed by blank line)
+- ❌ Wrong list: `- Item 1` or `* Item 2`
+- ✅ Correct list format: `*Item 1:* description` or use plain text with line breaks
+
+**Special Characters:**
+- Avoid using special characters that need escaping in MarkdownV2: `_*[]()~`>#+-=|{}.!` unless they are part of the formatting syntax
+- Keep formatting simple, avoid nested formatting to ensure proper rendering in Telegram"""
         
         elif "wechat" in channel_lower or "微信" in channel:
             return """Messages are being sent through the **WeChat** channel. Please follow these format requirements:
diff --git a/app/modules/telegram/telegram.py b/app/modules/telegram/telegram.py
index fec0e9c7..90825bba 100644
--- a/app/modules/telegram/telegram.py
+++ b/app/modules/telegram/telegram.py
@@ -240,10 +240,15 @@ class Telegram:
 
         try:
             if title:
+                # 标题总是转义（因为通常标题不包含Markdown格式）
                 title = self.escape_markdown(title)
             if text:
                 if escape_markdown:
+                    # 完全转义模式：转义所有特殊字符
                     text = self.escape_markdown(text)
+                else:
+                    # 智能转义模式：保留Markdown格式，只转义普通文本中的特殊字符
+                    text = self.escape_markdown_smart(text)
                 if title:
                     caption = f"*{title}*\n{text}"
                 else:
@@ -610,4 +615,79 @@ class Telegram:
         # 按 Telegram MarkdownV2 规则转义特殊字符
         if not isinstance(text, str):
             return str(text) if text is not None else ""
-        return self._markdown_escape_pattern.sub(r'\\\1', text)
\ No newline at end of file
+        return self._markdown_escape_pattern.sub(r'\\\1', text)
+
+    def escape_markdown_smart(self, text: str) -> str:
+        """
+        智能转义Markdown文本：只转义不在Markdown标记内的特殊字符
+        这样可以保留已有的Markdown格式（如*粗体*、_斜体_、[链接](url)等），
+        同时转义普通文本中的特殊字符以避免API错误
+        
+        注意：Telegram MarkdownV2不支持以下语法，这些字符会被转义：
+        - 标题语法（#、##、###）会被转义为 \#、\##、\###
+        - 列表语法（-、*、+）会被转义为 \-、\*、\+
+        - 引用语法（>）会被转义为 \>
+        
+        建议使用加粗文本模拟标题：*标题文本*
+        
+        :param text: 要转义的文本
+        :return: 转义后的文本
+        """
+        if not isinstance(text, str):
+            return str(text) if text is not None else ""
+        
+        # 如果没有特殊字符，直接返回
+        if not any(char in self._escape_chars for char in text):
+            return text
+        
+        # 标记受保护的区域（Markdown标记内的内容不转义）
+        protected = [False] * len(text)
+        
+        # 按优先级匹配Markdown标记（从最复杂到最简单）
+        # 1. 链接：[text](url) - 必须最先匹配
+        link_pattern = r'\[([^\]]*)\]\(([^)]*)\)'
+        for match in re.finditer(link_pattern, text):
+            for i in range(match.start(), match.end()):
+                protected[i] = True
+        
+        # 2. 粗体：*text*（单个*，不是**）
+        bold_pattern = r'(?<!\*)\*(?!\*)([^*]+?)(?<!\*)\*(?!\*)'
+        for match in re.finditer(bold_pattern, text):
+            if not any(protected[match.start():match.end()]):
+                for i in range(match.start(), match.end()):
+                    protected[i] = True
+        
+        # 3. 斜体：_text_（单个_，不是__）
+        italic_pattern = r'(?<!_)_(?!_)([^_]+?)(?<!_)_(?!_)'
+        for match in re.finditer(italic_pattern, text):
+            if not any(protected[match.start():match.end()]):
+                for i in range(match.start(), match.end()):
+                    protected[i] = True
+        
+        # 4. 代码：`text`
+        code_pattern = r'`([^`]+)`'
+        for match in re.finditer(code_pattern, text):
+            if not any(protected[match.start():match.end()]):
+                for i in range(match.start(), match.end()):
+                    protected[i] = True
+        
+        # 5. 删除线：~text~
+        strikethrough_pattern = r'~([^~]+)~'
+        for match in re.finditer(strikethrough_pattern, text):
+            if not any(protected[match.start():match.end()]):
+                for i in range(match.start(), match.end()):
+                    protected[i] = True
+        
+        # 构建结果：只转义未保护区域的特殊字符
+        result = []
+        for i, char in enumerate(text):
+            if protected[i]:
+                # 受保护区域（Markdown标记内），不转义
+                result.append(char)
+            elif char in self._escape_chars:
+                # 未保护区域，转义特殊字符
+                result.append('\\' + char)
+            else:
+                result.append(char)
+        
+        return ''.join(result)
\ No newline at end of file