From 86000ea19a86cd43ca6d64643a7a946d36d5cb31 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 12:14:25 +0000 Subject: [PATCH] feat: improve user message media-title detection Co-authored-by: jxxghp <51039935+jxxghp@users.noreply.github.com> --- app/chain/message.py | 12 ++++-------- app/utils/string.py | 32 ++++++++++++++++++++++++++++++++ tests/test_string.py | 18 ++++++++++++++++++ 3 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 tests/test_string.py diff --git a/app/chain/message.py b/app/chain/message.py index d327a709..74b7817d 100644 --- a/app/chain/message.py +++ b/app/chain/message.py @@ -490,18 +490,14 @@ class MessageChain(ChainBase): # 重新搜索/下载 content = re.sub(r"(搜索|下载)[::\s]*", "", text) action = "ReSearch" - elif text.startswith("#") \ - or re.search(r"^请[问帮你]", text) \ - or re.search(r"[??]$", text) \ - or StringUtils.count_words(text) > 10 \ - or text.find("继续") != -1: - # 聊天 - content = text - action = "Chat" elif StringUtils.is_link(text): # 链接 content = text action = "Link" + elif not StringUtils.is_media_title_like(text): + # 聊天 + content = text + action = "Chat" else: # 搜索 content = text diff --git a/app/utils/string.py b/app/utils/string.py index 1bd37b07..6f73736c 100644 --- a/app/utils/string.py +++ b/app/utils/string.py @@ -531,6 +531,38 @@ class StringUtils: return chinese_count + english_count + @staticmethod + def is_media_title_like(text: str) -> bool: + """ + 判断文本是否像影视剧名称 + """ + if not text: + return False + text = re.sub(r'\s+', ' ', text).strip() + if not text: + return False + if text.startswith("#") \ + or re.search(r"^请[问帮你]", text) \ + or re.search(r"[??]$", text) \ + or StringUtils.count_words(text) > 10 \ + or "继续" in text: + return False + if StringUtils.is_link(text): + return False + if re.search(r"(帮我|请问|怎么|如何|为什么|可以|能否|推荐|介绍|谢谢|想看|找一下|搜一下)", text): + return False + if re.search(r"[,。!?!?,;;]", text): + return False + + candidate = re.sub( + r"第\s*[0-9一二三四五六七八九十百零]+\s*[季集]|S\d{1,2}(?:E\d{1,4})?|E\d{1,4}|(?:19|20)\d{2}", + "", + text, + flags=re.IGNORECASE + ) + candidate = re.sub(r"[\s\-_.::·'\"()\[\]【】]+", "", candidate) + return len(candidate) >= 2 and bool(re.search(r"[\u4e00-\u9fffA-Za-z]", candidate)) + @staticmethod def split_text(text: str, max_length: int) -> Generator: """ diff --git a/tests/test_string.py b/tests/test_string.py new file mode 100644 index 00000000..beec6450 --- /dev/null +++ b/tests/test_string.py @@ -0,0 +1,18 @@ +from unittest import TestCase + +from app.utils.string import StringUtils + + +class StringUtilsTest(TestCase): + + def test_is_media_title_like_true(self): + self.assertTrue(StringUtils.is_media_title_like("盗梦空间")) + self.assertTrue(StringUtils.is_media_title_like("The Lord of the Rings")) + self.assertTrue(StringUtils.is_media_title_like("庆余年 第2季")) + + def test_is_media_title_like_false(self): + self.assertFalse(StringUtils.is_media_title_like("#推荐电影")) + self.assertFalse(StringUtils.is_media_title_like("请帮我推荐一部电影")) + self.assertFalse(StringUtils.is_media_title_like("盗梦空间怎么样?")) + self.assertFalse(StringUtils.is_media_title_like("我想看盗梦空间")) + self.assertFalse(StringUtils.is_media_title_like("继续"))