Enhance Telegram message formatting: add detailed guidelines for MarkdownV2 usage, including support for strikethrough, headings, and lists. Implement smart escaping for Markdown to preserve formatting while avoiding API errors.

This commit is contained in:
jxxghp
2025-11-17 13:49:56 +08:00
parent 6a492198a8
commit 6e329b17a9
2 changed files with 99 additions and 3 deletions

View File

@@ -240,10 +240,15 @@ class Telegram:
try:
if title:
# 标题总是转义因为通常标题不包含Markdown格式
title = self.escape_markdown(title)
if text:
if escape_markdown:
# 完全转义模式:转义所有特殊字符
text = self.escape_markdown(text)
else:
# 智能转义模式保留Markdown格式只转义普通文本中的特殊字符
text = self.escape_markdown_smart(text)
if title:
caption = f"*{title}*\n{text}"
else:
@@ -610,4 +615,79 @@ class Telegram:
# 按 Telegram MarkdownV2 规则转义特殊字符
if not isinstance(text, str):
return str(text) if text is not None else ""
return self._markdown_escape_pattern.sub(r'\\\1', text)
return self._markdown_escape_pattern.sub(r'\\\1', text)
def escape_markdown_smart(self, text: str) -> str:
"""
智能转义Markdown文本只转义不在Markdown标记内的特殊字符
这样可以保留已有的Markdown格式如*粗体*、_斜体_、[链接](url)等),
同时转义普通文本中的特殊字符以避免API错误
注意Telegram MarkdownV2不支持以下语法这些字符会被转义
- 标题语法(#、##、###)会被转义为 \#、\##、\###
- 列表语法(-、*、+)会被转义为 \-、\*、\+
- 引用语法(>)会被转义为 \>
建议使用加粗文本模拟标题:*标题文本*
:param text: 要转义的文本
:return: 转义后的文本
"""
if not isinstance(text, str):
return str(text) if text is not None else ""
# 如果没有特殊字符,直接返回
if not any(char in self._escape_chars for char in text):
return text
# 标记受保护的区域Markdown标记内的内容不转义
protected = [False] * len(text)
# 按优先级匹配Markdown标记从最复杂到最简单
# 1. 链接:[text](url) - 必须最先匹配
link_pattern = r'\[([^\]]*)\]\(([^)]*)\)'
for match in re.finditer(link_pattern, text):
for i in range(match.start(), match.end()):
protected[i] = True
# 2. 粗体:*text*(单个*,不是**
bold_pattern = r'(?<!\*)\*(?!\*)([^*]+?)(?<!\*)\*(?!\*)'
for match in re.finditer(bold_pattern, text):
if not any(protected[match.start():match.end()]):
for i in range(match.start(), match.end()):
protected[i] = True
# 3. 斜体_text_单个_不是__
italic_pattern = r'(?<!_)_(?!_)([^_]+?)(?<!_)_(?!_)'
for match in re.finditer(italic_pattern, text):
if not any(protected[match.start():match.end()]):
for i in range(match.start(), match.end()):
protected[i] = True
# 4. 代码:`text`
code_pattern = r'`([^`]+)`'
for match in re.finditer(code_pattern, text):
if not any(protected[match.start():match.end()]):
for i in range(match.start(), match.end()):
protected[i] = True
# 5. 删除线:~text~
strikethrough_pattern = r'~([^~]+)~'
for match in re.finditer(strikethrough_pattern, text):
if not any(protected[match.start():match.end()]):
for i in range(match.start(), match.end()):
protected[i] = True
# 构建结果:只转义未保护区域的特殊字符
result = []
for i, char in enumerate(text):
if protected[i]:
# 受保护区域Markdown标记内不转义
result.append(char)
elif char in self._escape_chars:
# 未保护区域,转义特殊字符
result.append('\\' + char)
else:
result.append(char)
return ''.join(result)