fix 优化逻辑

2026-03-20 03:57:30 +08:00 · 2024-10-09 16:50:54 +08:00
parent 7ea01c1109
commit 16f6303609
2 changed files with 103 additions and 94 deletions
--- a/app/chain/search.py
+++ b/app/chain/search.py
@@ -23,7 +23,7 @@ class SearchChain(ChainBase):
    """
    站点资源搜索处理链
    """
-    
+
    __result_temp_file = "__search_result__"

    def __init__(self):
@@ -103,7 +103,8 @@ class SearchChain(ChainBase):
                no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None,
                sites: List[int] = None,
                rule_groups: List[str] = None,
-                area: str = "title") -> List[Context]:
+                area: str = "title",
+                custom_words: List[str] = None) -> List[Context]:
        """
        根据媒体信息搜索种子资源，精确匹配，应用过滤规则，同时根据no_exists过滤本地已存在的资源
        :param mediainfo: 媒体信息
@@ -112,6 +113,7 @@ class SearchChain(ChainBase):
        :param sites: 站点ID列表，为空时搜索所有站点
        :param rule_groups: 过滤规则组名称列表
        :param area: 搜索范围，title or imdbid
+        :param custom_words: 自定义识别词列表
        """

        def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
@@ -177,51 +179,8 @@ class SearchChain(ChainBase):
        # 开始新进度
        self.progress.start(ProgressKey.Search)

-        # 开始匹配
-        _match_torrents = []
-        # 总数
-        _total = len(torrents)
-        # 已处理数
-        _count = 0
-        if mediainfo:
-            # 英文标题应该在别名/原标题中，不需要再匹配
-            logger.info(f"开始匹配结果 标题：{mediainfo.title}，原标题：{mediainfo.original_title}，别名：{mediainfo.names}")
-            self.progress.update(value=0, text=f'开始匹配，总 {_total} 个资源 ...', key=ProgressKey.Search)
-            for torrent in torrents:
-                _count += 1
-                self.progress.update(value=(_count / _total) * 96,
-                                     text=f'正在匹配 {torrent.site_name}，已完成 {_count} / {_total} ...',
-                                     key=ProgressKey.Search)
-                if not torrent.title:
-                    continue
-                # 比对IMDBID
-                if torrent.imdbid \
-                        and mediainfo.imdb_id \
-                        and torrent.imdbid == mediainfo.imdb_id:
-                    logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源：{torrent.site_name} - {torrent.title}')
-                    _match_torrents.append(torrent)
-                    continue
-                # 识别
-                torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
-                if torrent.title != torrent_meta.org_string:
-                    logger.info(f"种子名称应用识别词后发生改变：{torrent.title} => {torrent_meta.org_string}")
-                # 比对种子
-                if self.torrenthelper.match_torrent(mediainfo=mediainfo,
-                                                    torrent_meta=torrent_meta,
-                                                    torrent=torrent):
-                    # 匹配成功
-                    _match_torrents.append(torrent)
-                    continue
-            # 匹配完成
-            logger.info(f"匹配完成，共匹配到 {len(_match_torrents)} 个资源")
-            self.progress.update(value=97,
-                                 text=f'匹配完成，共匹配到 {len(_match_torrents)} 个资源',
-                                 key=ProgressKey.Search)
-        else:
-            _match_torrents = torrents
-
        # 开始过滤
-        self.progress.update(value=98, text=f'开始过滤，总 {len(_match_torrents)} 个资源，请稍候...',
+        self.progress.update(value=0, text=f'开始过滤，总 {len(torrents)} 个资源，请稍候...',
                             key=ProgressKey.Search)

        # 开始过滤规则过滤
@@ -230,21 +189,67 @@ class SearchChain(ChainBase):
            rule_groups: List[str] = self.systemconfig.get(SystemConfigKey.SearchFilterRuleGroups)
        if rule_groups:
            logger.info(f'开始过滤规则/剧集过滤，使用规则组：{rule_groups} ...')
-            _match_torrents = __do_filter(_match_torrents)
-            if not _match_torrents:
+            torrents = __do_filter(torrents)
+            if not torrents:
                logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
                return []
-            logger.info(f"过滤规则/剧集过滤完成，剩余 {len(_match_torrents)} 个资源")
+            logger.info(f"过滤规则/剧集过滤完成，剩余 {len(torrents)} 个资源")
+
+        # 过滤完成
+        self.progress.update(value=50, text=f'过滤完成，剩余 {len(torrents)} 个资源', key=ProgressKey.Search)
+
+        # 开始匹配
+        _match_torrents = []
+        # 总数
+        _total = len(torrents)
+        # 已处理数
+        _count = 0
+
+        if mediainfo:
+            # 英文标题应该在别名/原标题中，不需要再匹配
+            logger.info(f"开始匹配结果 标题：{mediainfo.title}，原标题：{mediainfo.original_title}，别名：{mediainfo.names}")
+            self.progress.update(value=51, text=f'开始匹配，总 {_total} 个资源 ...', key=ProgressKey.Search)
+            for torrent in torrents:
+                _count += 1
+                self.progress.update(value=(_count / _total) * 96,
+                                     text=f'正在匹配 {torrent.site_name}，已完成 {_count} / {_total} ...',
+                                     key=ProgressKey.Search)
+                if not torrent.title:
+                    continue
+                # 识别元数据
+                torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description,
+                                        custom_words=custom_words)
+                if torrent.title != torrent_meta.org_string:
+                    logger.info(f"种子名称应用识别词后发生改变：{torrent.title} => {torrent_meta.org_string}")
+                # 比对IMDBID
+                if torrent.imdbid \
+                        and mediainfo.imdb_id \
+                        and torrent.imdbid == mediainfo.imdb_id:
+                    logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源：{torrent.site_name} - {torrent.title}')
+                    _match_torrents.append((torrent, torrent_meta))
+                    continue
+                # 比对种子
+                if self.torrenthelper.match_torrent(mediainfo=mediainfo,
+                                                    torrent_meta=torrent_meta,
+                                                    torrent=torrent):
+                    # 匹配成功
+                    _match_torrents.append((torrent, torrent_meta))
+                    continue
+            # 匹配完成
+            logger.info(f"匹配完成，共匹配到 {len(_match_torrents)} 个资源")
+            self.progress.update(value=97,
+                                 text=f'匹配完成，共匹配到 {len(_match_torrents)} 个资源',
+                                 key=ProgressKey.Search)
+        else:
+            _match_torrents = [(t, MetaInfo(title=t.title, subtitle=t.description)) for t in torrents]

        # 去掉mediainfo中多余的数据
        mediainfo.clear()

        # 组装上下文
-        contexts = [Context(meta_info=MetaInfo(title=torrent.title, subtitle=torrent.description),
+        contexts = [Context(torrent_info=t[0],
                            media_info=mediainfo,
-                            torrent_info=torrent) for torrent in _match_torrents]
-
-        self.progress.update(value=99, text=f'过滤完成，剩余 {len(contexts)} 个资源', key=ProgressKey.Search)
+                            meta_info=t[1]) for t in _match_torrents]

        # 排序
        self.progress.update(value=99,
@@ -253,10 +258,10 @@ class SearchChain(ChainBase):
        contexts = self.torrenthelper.sort_torrents(contexts)

        # 结束进度
+        logger.info(f'搜索完成，共 {len(contexts)} 个资源')
        self.progress.update(value=100,
                             text=f'搜索完成，共 {len(contexts)} 个资源',
                             key=ProgressKey.Search)
-        logger.info(f'搜索完成，共 {len(contexts)} 个资源')
        self.progress.end(ProgressKey.Search)

        # 返回
--- a/app/chain/subscribe.py
+++ b/app/chain/subscribe.py
@@ -14,6 +14,7 @@ from app.core.config import settings
 from app.core.context import TorrentInfo, Context, MediaInfo
 from app.core.event import eventmanager, Event, EventManager
 from app.core.meta import MetaBase
+from app.core.meta.words import WordsMatcher
 from app.core.metainfo import MetaInfo
 from app.db.downloadhistory_oper import DownloadHistoryOper
 from app.db.models.subscribe import Subscribe
@@ -242,6 +243,7 @@ class SubscribeChain(ChainBase):
        # 遍历订阅
        for subscribe in subscribes:
            mediakey = subscribe.tmdbid or subscribe.doubanid
+            custom_word_list = subscribe.custom_words.split("\n") if subscribe.custom_words else None
            # 校验当前时间减订阅创建时间是否大于1分钟，否则跳过先，留出编辑订阅的时间
            if subscribe.date:
                now = datetime.now()
@@ -343,7 +345,8 @@ class SubscribeChain(ChainBase):
                                                no_exists=no_exists,
                                                sites=sites,
                                                rule_groups=rule_groups,
-                                                area="imdbid" if subscribe.search_imdbid else "title")
+                                                area="imdbid" if subscribe.search_imdbid else "title",
+                                                custom_words=custom_word_list)
            if not contexts:
                logger.warn(f'订阅 {subscribe.keyword or subscribe.name} 未搜索到资源')
                self.finish_subscribe_or_not(subscribe=subscribe, meta=meta,
@@ -519,6 +522,10 @@ class SubscribeChain(ChainBase):
        if not torrents:
            logger.warn('没有缓存资源，无法匹配订阅')
            return
+
+        # 记录重新识别过的种子
+        _recognize_cached = []
+
        # 所有订阅
        subscribes = self.subscribeoper.list('R')
        # 遍历订阅
@@ -538,8 +545,6 @@ class SubscribeChain(ChainBase):
            domains = []
            if subscribe.sites:
                domains = self.siteoper.get_domains_by_ids(subscribe.sites)
-            # 自定义识别词
-            custom_words = subscribe.custom_words.split("\n") if subscribe.custom_words else []
            # 识别媒体信息
            mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type,
                                                        tmdbid=subscribe.tmdbid,
@@ -616,51 +621,45 @@ class SubscribeChain(ChainBase):
                        logger.debug(f"{torrent_info.site_name} - {torrent_info.title} 不符合订阅站点要求")
                        continue

-                    # 匹配订阅参数
-                    if not self.torrenthelper.filter_torrent(torrent_info=torrent_info,
-                                                             filter_params=self.get_params(subscribe)):
-                        continue
-
-                    # 先判断是否有没识别的种子，有则重新识别；如果订阅有自定义识别词，则不使用预识别的信息
-                    if not torrent_mediainfo \
-                            or (not torrent_mediainfo.tmdb_id and not torrent_mediainfo.douban_id) \
-                            or subscribe.custom_words:
-                        if not subscribe.custom_words:
-                            logger.info(
-                                f'{torrent_info.site_name} - {torrent_info.title} 订阅缓存为未识别状态，'
-                                f'尝试重新识别媒体信息...')
-                        else:
-                            logger.info(
-                                f'{torrent_info.site_name} - {torrent_info.title} 因订阅存在自定义识别词，'
-                                f'正在重新识别元数据和媒体信息...')
+                    # 有自定义识别词时，需要判断是否需要重新识别
+                    if subscribe.custom_words:
+                        _, apply_words = WordsMatcher().prepare(torrent_info.title,
+                                                                custom_words=subscribe.custom_words.split("\n"))
+                        if apply_words:
+                            logger.info(f'{torrent_info.site_name} - {torrent_info.title} 因订阅存在自定义识别词，重新识别元数据...')
                            # 重新识别元数据
                            torrent_meta = MetaInfo(title=torrent_info.title, subtitle=torrent_info.description,
-                                                    custom_words=custom_words)
-                        # 重新识别媒体信息
-                        if subscribe.custom_words:
+                                                    custom_words=subscribe.custom_word)
+                            # 媒体信息需要重新识别
+                            torrent_mediainfo = None
+
+                    # 先判断是否有没识别的种子，否则重新识别
+                    if not torrent_mediainfo \
+                            or (not torrent_mediainfo.tmdb_id and not torrent_mediainfo.douban_id):
+                        # 避免重复处理
+                        _cache_key = f"{torrent_meta.org_string}_{torrent_info.description}"
+                        if _cache_key not in _recognize_cached:
+                            _recognize_cached.append(_cache_key)
+                            # 重新识别媒体信息
                            torrent_mediainfo = self.recognize_media(meta=torrent_meta)
-                        else:
-                            # 不使用识别缓存
-                            torrent_mediainfo = self.recognize_media(meta=torrent_meta, cache=False)
                            if torrent_mediainfo:
                                # 更新种子缓存
                                context.media_info = torrent_mediainfo
-                        if not torrent_mediainfo:
-                            # 通过标题匹配兜底
-                            logger.warn(
-                                f'{torrent_info.site_name} - {torrent_info.title} 重新识别失败，尝试通过标题匹配...')
-                            if self.torrenthelper.match_torrent(mediainfo=mediainfo,
-                                                                torrent_meta=torrent_meta,
-                                                                torrent=torrent_info):
-                                # 匹配成功
-                                logger.info(
-                                    f'{mediainfo.title_year} 通过标题匹配到可用资源：{torrent_info.site_name} - {torrent_info.title}')
-                                if not subscribe.custom_words:
+                            if not torrent_mediainfo:
+                                # 通过标题匹配兜底
+                                logger.warn(
+                                    f'{torrent_info.site_name} - {torrent_info.title} 重新识别失败，尝试通过标题匹配...')
+                                if self.torrenthelper.match_torrent(mediainfo=mediainfo,
+                                                                    torrent_meta=torrent_meta,
+                                                                    torrent=torrent_info):
+                                    # 匹配成功
+                                    logger.info(
+                                        f'{mediainfo.title_year} 通过标题匹配到可选资源：{torrent_info.site_name} - {torrent_info.title}')
                                    # 更新种子缓存
                                    torrent_mediainfo = mediainfo
                                    context.media_info = mediainfo
-                            else:
-                                continue
+                                else:
+                                    continue

                    # 直接比对媒体信息
                    if torrent_mediainfo and (torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id):
@@ -673,7 +672,7 @@ class SubscribeChain(ChainBase):
                                and torrent_mediainfo.douban_id != mediainfo.douban_id:
                            continue
                        logger.info(
-                            f'{mediainfo.title_year} 通过媒体信ID匹配到可用资源：{torrent_info.site_name} - {torrent_info.title}')
+                            f'{mediainfo.title_year} 通过媒体信ID匹配到可选资源：{torrent_info.site_name} - {torrent_info.title}')
                    else:
                        continue

@@ -715,6 +714,11 @@ class SubscribeChain(ChainBase):
                                    logger.debug(f'{subscribe.name} 正在洗版，{torrent_info.title} 不是整季')
                                    continue

+                    # 匹配订阅附加参数
+                    if not self.torrenthelper.filter_torrent(torrent_info=torrent_info,
+                                                             filter_params=self.get_params(subscribe)):
+                        continue
+
                    # 优先级过滤规则
                    if subscribe.best_version:
                        rule_groups = subscribe.filter_groups \