Merge pull request #5537 from jxxghp/copilot/optimize-message-logic

This commit is contained in:
jxxghp
2026-03-03 20:45:00 +08:00
committed by GitHub
3 changed files with 66 additions and 8 deletions

View File

@@ -490,18 +490,14 @@ class MessageChain(ChainBase):
# 重新搜索/下载
content = re.sub(r"(搜索|下载)[:\s]*", "", text)
action = "ReSearch"
elif text.startswith("#") \
or re.search(r"^请[问帮你]", text) \
or re.search(r"[?]$", text) \
or StringUtils.count_words(text) > 10 \
or text.find("继续") != -1:
# 聊天
content = text
action = "Chat"
elif StringUtils.is_link(text):
# 链接
content = text
action = "Link"
elif not StringUtils.is_media_title_like(text):
# 聊天
content = text
action = "Chat"
else:
# 搜索
content = text

View File

@@ -23,6 +23,17 @@ _special_domains = [
_version_map = {"stable": -1, "rc": -2, "beta": -3, "alpha": -4}
# 不符合的版本号
_other_version = -5
_max_media_title_words = 10
_min_media_title_length = 2
_non_media_title_pattern = re.compile(r"^#|^请[问帮你]|[?]$|^继续$")
_chat_intent_pattern = re.compile(r"帮我|请问|怎么|如何|为什么|可以|能否|推荐|介绍|谢谢|想看|找一下|搜一下")
_media_feature_pattern = re.compile(
r"\s*[0-9一二三四五六七八九十百零]+\s*[季集]|S\d{1,2}(?:E\d{1,4})?|E\d{1,4}|(?:19|20)\d{2}",
re.IGNORECASE
)
_media_separator_pattern = re.compile(r"[\s\-_.::·'\"()\[\]【】]+")
_media_sentence_punctuation_pattern = re.compile(r"[,。!?!?,;]")
_media_title_char_pattern = re.compile(r"[\u4e00-\u9fffA-Za-z]")
class StringUtils:
@@ -531,6 +542,31 @@ class StringUtils:
return chinese_count + english_count
@staticmethod
def is_media_title_like(text: str) -> bool:
"""
判断文本是否像影视剧名称
"""
if not text:
return False
text = re.sub(r'\s+', ' ', text).strip()
if not text:
return False
if _non_media_title_pattern.search(text) \
or StringUtils.count_words(text) > _max_media_title_words:
return False
if "://" in text or text.startswith("magnet:?"):
return False
if _chat_intent_pattern.search(text):
return False
if _media_sentence_punctuation_pattern.search(text):
return False
# 先移除季/集/年份等媒体特征,再移除分隔符,只保留核心名称用于最终判定
candidate = _media_feature_pattern.sub("", text)
candidate = _media_separator_pattern.sub("", candidate)
return len(candidate) >= _min_media_title_length and _media_title_char_pattern.search(candidate) is not None
@staticmethod
def split_text(text: str, max_length: int) -> Generator:
"""

26
tests/test_string.py Normal file
View File

@@ -0,0 +1,26 @@
from unittest import TestCase
from app.utils.string import StringUtils
class StringUtilsTest(TestCase):
def test_is_media_title_like_true(self):
self.assertTrue(StringUtils.is_media_title_like("盗梦空间"))
self.assertTrue(StringUtils.is_media_title_like("The Lord of the Rings"))
self.assertTrue(StringUtils.is_media_title_like("庆余年 第2季"))
self.assertTrue(StringUtils.is_media_title_like("The Office S01E01"))
self.assertTrue(StringUtils.is_media_title_like("权力的游戏 Game of Thrones"))
self.assertTrue(StringUtils.is_media_title_like("Spider-Man: No Way Home 2021"))
def test_is_media_title_like_false(self):
self.assertFalse(StringUtils.is_media_title_like(""))
self.assertFalse(StringUtils.is_media_title_like(" "))
self.assertFalse(StringUtils.is_media_title_like("a"))
self.assertFalse(StringUtils.is_media_title_like("第2季"))
self.assertFalse(StringUtils.is_media_title_like("S01E01"))
self.assertFalse(StringUtils.is_media_title_like("#推荐电影"))
self.assertFalse(StringUtils.is_media_title_like("请帮我推荐一部电影"))
self.assertFalse(StringUtils.is_media_title_like("盗梦空间怎么样?"))
self.assertFalse(StringUtils.is_media_title_like("我想看盗梦空间"))
self.assertFalse(StringUtils.is_media_title_like("继续"))