diff --git a/app/chain/search.py b/app/chain/search.py index 9df34887..ac0aefd7 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -592,6 +592,66 @@ class SearchChain(ChainBase): torrent_list=torrent_list, mediainfo=mediainfo) or [] + def __do_site_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: + """ + 执行单个站点的过滤流程 + """ + if not torrent_list: + return [] + + filtered_torrents = torrent_list + if filter_params: + torrenthelper = TorrentHelper() + filtered_torrents = [ + torrent for torrent in filtered_torrents + if torrenthelper.filter_torrent(torrent, filter_params) + ] + + if rule_groups and filtered_torrents: + filtered_torrents = __do_filter(filtered_torrents) + + return filtered_torrents + + def __do_parallel_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: + """ + 按站点并发执行过滤,保持站点内顺序不变 + """ + if not torrent_list or (not filter_params and not rule_groups): + return torrent_list + + site_torrents: Dict[Tuple[Optional[int], Optional[str]], List[TorrentInfo]] = {} + for torrent in torrent_list: + site_key = (torrent.site, torrent.site_name) + if site_key not in site_torrents: + site_torrents[site_key] = [] + site_torrents[site_key].append(torrent) + + if len(site_torrents) <= 1: + return __do_site_filter(torrent_list) + + finished_count = 0 + filtered_by_site: Dict[Tuple[Optional[int], Optional[str]], List[TorrentInfo]] = {} + max_workers = min(len(site_torrents), settings.CONF.threadpool or len(site_torrents)) + with ThreadPoolExecutor(max_workers=max_workers) as executor: + all_tasks = { + executor.submit(__do_site_filter, site_torrent_list): site_key + for site_key, site_torrent_list in site_torrents.items() + } + for future in as_completed(all_tasks): + finished_count += 1 + filtered_by_site[all_tasks[future]] = future.result() or [] + progress.update( + value=finished_count / len(site_torrents) * 50, + text=f'正在过滤,已完成 {finished_count} / {len(site_torrents)} 个站点 ...' + ) + + filtered_ids = { + id(torrent) + for filtered_torrents in filtered_by_site.values() + for torrent in filtered_torrents + } + return [torrent for torrent in torrent_list if id(torrent) in filtered_ids] + if not torrents: logger.warn(f'{keyword or mediainfo.title} 未搜索到资源') return [] @@ -605,14 +665,14 @@ class SearchChain(ChainBase): # 匹配订阅附加参数 if filter_params: logger.info(f'开始附加参数过滤,附加参数:{filter_params} ...') - torrents = [torrent for torrent in torrents if TorrentHelper().filter_torrent(torrent, filter_params)] # 开始过滤规则过滤 if rule_groups is None: # 取搜索过滤规则 rule_groups: List[str] = SystemConfigOper().get(SystemConfigKey.SearchFilterRuleGroups) if rule_groups: logger.info(f'开始过滤规则/剧集过滤,使用规则组:{rule_groups} ...') - torrents = __do_filter(torrents) + torrents = __do_parallel_filter(torrents) + if rule_groups: if not torrents: logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源') return [] diff --git a/app/modules/filter/__init__.py b/app/modules/filter/__init__.py index f6026b60..786863e7 100644 --- a/app/modules/filter/__init__.py +++ b/app/modules/filter/__init__.py @@ -15,10 +15,6 @@ from app.utils.string import StringUtils class FilterModule(_ModuleBase): CONFIG_WATCH = {SystemConfigKey.CustomFilterRules.value} - # 规则解析器 - parser: RuleParser = None - # 媒体信息 - media: MediaInfo = None # 保留一份只读内置规则定义,方便查询工具准确区分“内置规则”和“自定义规则”。 builtin_rule_set: Dict[str, dict] = deepcopy(BUILTIN_RULE_SET) @@ -30,7 +26,6 @@ class FilterModule(_ModuleBase): self.rulehelper = RuleHelper() def init_module(self) -> None: - self.parser = RuleParser() # 每次重载都先恢复为纯内置规则,避免旧的自定义规则残留在内存里。 self.rule_set = deepcopy(self.builtin_rule_set) self.__init_custom_rules() @@ -90,7 +85,7 @@ class FilterModule(_ModuleBase): """ if not rule_groups: return torrent_list - self.media = mediainfo + parser = RuleParser() # 查询规则表详情 groups = self.rulehelper.get_rule_group_by_media(media=mediainfo, group_names=rule_groups) if groups: @@ -99,12 +94,16 @@ class FilterModule(_ModuleBase): torrent_list = self.__filter_torrents( rule_string=group.rule_string, rule_name=group.name, - torrent_list=torrent_list + torrent_list=torrent_list, + mediainfo=mediainfo, + parser=parser, ) return torrent_list def __filter_torrents(self, rule_string: str, rule_name: str, - torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: + torrent_list: List[TorrentInfo], + mediainfo: MediaInfo, + parser: RuleParser) -> List[TorrentInfo]: """ 过滤种子 """ @@ -112,7 +111,7 @@ class FilterModule(_ModuleBase): ret_torrents = [] for torrent in torrent_list: # 能命中优先级的才返回 - if not self.__get_order(torrent, rule_string): + if not self.__get_order(torrent, rule_string, mediainfo, parser): logger.debug(f"种子 {torrent.site_name} - {torrent.title} {torrent.description or ''} " f"不匹配 {rule_name} 过滤规则") continue @@ -120,7 +119,8 @@ class FilterModule(_ModuleBase): return ret_torrents - def __get_order(self, torrent: TorrentInfo, rule_str: str) -> Optional[TorrentInfo]: + def __get_order(self, torrent: TorrentInfo, rule_str: str, + mediainfo: MediaInfo, parser: RuleParser) -> Optional[TorrentInfo]: """ 获取种子匹配的规则优先级,值越大越优先,未匹配时返回None """ @@ -133,8 +133,8 @@ class FilterModule(_ModuleBase): for rule_group in rule_groups: # 解析规则组 - parsed_group = self.parser.parse(rule_group.strip()) - if self.__match_group(torrent, parsed_group.as_list()[0]): + parsed_group = parser.parse(rule_group.strip()) + if self.__match_group(torrent, parsed_group.as_list()[0], mediainfo): # 出现匹配时中断 matched = True logger.debug(f"种子 {torrent.site_name} - {torrent.title} 优先级为 {100 - res_order + 1}") @@ -145,27 +145,31 @@ class FilterModule(_ModuleBase): return None if not matched else torrent - def __match_group(self, torrent: TorrentInfo, rule_group: Union[list, str]) -> Optional[bool]: + def __match_group(self, torrent: TorrentInfo, rule_group: Union[list, str], + mediainfo: MediaInfo) -> Optional[bool]: """ 判断种子是否匹配规则组 """ if not isinstance(rule_group, list): # 不是列表,说明是规则名称 - return self.__match_rule(torrent, rule_group) + return self.__match_rule(torrent, rule_group, mediainfo) elif isinstance(rule_group, list) and len(rule_group) == 1: # 只有一个规则项 - return self.__match_group(torrent, rule_group[0]) + return self.__match_group(torrent, rule_group[0], mediainfo) elif rule_group[0] == "not": # 非操作 - return not self.__match_group(torrent, rule_group[1:]) + return not self.__match_group(torrent, rule_group[1:], mediainfo) elif rule_group[1] == "and": # 与操作 - return self.__match_group(torrent, rule_group[0]) and self.__match_group(torrent, rule_group[2:]) + return self.__match_group(torrent, rule_group[0], mediainfo) \ + and self.__match_group(torrent, rule_group[2:], mediainfo) elif rule_group[1] == "or": # 或操作 - return self.__match_group(torrent, rule_group[0]) or self.__match_group(torrent, rule_group[2:]) + return self.__match_group(torrent, rule_group[0], mediainfo) \ + or self.__match_group(torrent, rule_group[2:], mediainfo) - def __match_rule(self, torrent: TorrentInfo, rule_name: str) -> bool: + def __match_rule(self, torrent: TorrentInfo, rule_name: str, + mediainfo: MediaInfo) -> bool: """ 判断种子是否匹配规则项 """ @@ -176,7 +180,7 @@ class FilterModule(_ModuleBase): # TMDB规则 tmdb = self.rule_set[rule_name].get("tmdb") # 符合TMDB规则的直接返回True,即不过滤 - if tmdb and self.__match_tmdb(tmdb): + if tmdb and self.__match_tmdb(tmdb, mediainfo): logger.debug(f"种子 {torrent.site_name} - {torrent.title} 符合 {rule_name} 的TMDB规则,匹配成功") return True # 匹配项:标题、副标题、标签 @@ -259,18 +263,19 @@ class FilterModule(_ModuleBase): return True - def __match_tmdb(self, tmdb: dict) -> bool: + @staticmethod + def __match_tmdb(tmdb: dict, mediainfo: MediaInfo) -> bool: """ 判断种子是否匹配TMDB规则 """ def __get_media_value(key: str): try: - return getattr(self.media, key) + return getattr(mediainfo, key) except ValueError: return "" - if not self.media: + if not mediainfo: return False for attr, value in tmdb.items():