From f9a287b52b25c4d510a0ee1348f5b3a6b126d296 Mon Sep 17 00:00:00 2001 From: shaw Date: Mon, 2 Jun 2025 00:38:10 +0800 Subject: [PATCH] =?UTF-8?q?feat(core):=20=E5=A2=9E=E5=8A=A0=E5=89=A7?= =?UTF-8?q?=E9=9B=86=E4=BA=A4=E9=9B=86=E6=9C=80=E5=B0=8F=E7=BD=AE=E4=BF=A1?= =?UTF-8?q?=E5=BA=A6=E8=AE=BE=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增了剧集交集最小置信度的配置项,用于过滤掉包含过多不需要剧集的种子。实现了以下功能: - 在 config.py 中添加了 EPISODE_INTERSECTION_MIN_CONFIDENCE 配置项,默认值为 0.0 - 修改了 download.py 中的下载逻辑,增加了计算种子与目标缺失集之间交集比例的函数 - 使用交集比例来筛选和排序种子,优先下载与缺失集交集较大的种子 -可以通过配置项设置交集比例的阈值,低于阈值的种子将被跳过 这个改动可以提高下载效率,避免下载过多不必要的剧集。 --- app/chain/download.py | 81 +++++++++++++++++++++++++++++++++---------- app/core/config.py | 2 ++ 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/app/chain/download.py b/app/chain/download.py index e917a424..71dcc3f7 100644 --- a/app/chain/download.py +++ b/app/chain/download.py @@ -450,7 +450,18 @@ class DownloadChain(ChainBase): if not no_exist.get(season): return 9999 return no_exist[season].total_episode - + def _calculate_intersection_ratio(episodes_set: set, target_set: set) -> Tuple[float, set]: + """ + 计算种子与目标缺失集之间的交集比例。 + :param episodes_set (Set[int]): 当前种子的集数集合。 + :param target_set (Set[int]): 当前季缺失的集数集合。 + :return: Tuple[float, Set[int]]: - 交集比例(0~1)- 交集集合(Set[int]) + """ + cal_intersection = episodes_set & target_set + if not cal_intersection: + return 0.0, set() + cal_ratio = len(cal_intersection) / len(episodes_set) + return cal_ratio, cal_intersection # 发送资源选择事件,允许外部修改上下文数据 logger.debug(f"Initial contexts: {len(contexts)} items, Downloader: {downloader}") event_data = ResourceSelectionEventData( @@ -609,6 +620,8 @@ class DownloadChain(ChainBase): # 缺失整季的转化为缺失集进行比较 if not need_episodes: need_episodes = list(range(start_episode, total_episode + 1)) + # 计算每个种子的集数与缺失集数的交集比例 shaw + torrent_ratios = [] # 循环种子 for context in contexts: if global_vars.is_system_stopped: @@ -635,24 +648,54 @@ class DownloadChain(ChainBase): # 整季的不处理 if not torrent_episodes: continue - # 为需要集的子集则下载 - if torrent_episodes.issubset(set(need_episodes)): - # 下载 - logger.info(f"开始下载 {meta.title} ...") - download_id = self.download_single(context, save_path=save_path, - channel=channel, source=source, - userid=userid, username=username, - downloader=downloader) - if download_id: - # 下载成功 - logger.info(f"{meta.title} 添加下载成功") - downloaded_list.append(context) - # 更新仍需集数 - need_episodes = __update_episodes(_mid=need_mid, - _need=need_episodes, - _sea=need_season, - _current=torrent_episodes) - logger.info(f"季 {need_season} 剩余需要集:{need_episodes}") + # 计算交集 + # 若种子[5-10],[7-10],[9-10] need_episodes=[9,10,11,12,13,14] + # 计算后的交集比例( len(torrent_episodes ∩ need_episodes) / len(torrent_episodes) )分别 0.33 0.66 1.0 + ratio, intersection = _calculate_intersection_ratio(torrent_episodes,set(need_episodes)) + if ratio <= (settings.EPISODE_INTERSECTION_MIN_CONFIDENCE or 0.05): + # 可以设定阈值 + logger.info( + f"{context.meta_info.title} 与当前缺失集数交集比例过低:{ratio:.2%},跳过") + continue + + # 收集候选种子 + torrent_ratios.append((context, ratio, len(intersection))) + if not torrent_ratios: + continue + # 按交集比例排序 + torrent_ratios.sort(key=lambda x: (x[1], x[2]), reverse=True) + # 按排序后的顺序下载 + for context, _, _ in torrent_ratios: + if global_vars.is_system_stopped: + break + # 重新计算与当前need_episodes的交集比例 + current_episodes = set(context.meta_info.episode_list) + current_ratio, current_intersection = _calculate_intersection_ratio(current_episodes, + set(need_episodes)) + if current_ratio <= (settings.EPISODE_INTERSECTION_MIN_CONFIDENCE or 0.05): + # 可以设定阈值 + logger.info( + f"{context.meta_info.title} 与当前缺失集数交集比例过低:{current_ratio:.2%},跳过") + continue + # 下载 + logger.info(f"开始下载 {context.meta_info.title} ...") + download_id = self.download_single(context, save_path=save_path, + channel=channel, source=source, + userid=userid, username=username, + downloader=downloader) + if download_id: + # 下载成功 + logger.info(f"{context.meta_info.title} 添加下载成功") + downloaded_list.append(context) + # 更新仍需集数 + need_episodes = __update_episodes(_mid=need_mid, + _need=need_episodes, + _sea=need_season, + _current=current_intersection) + logger.info(f"季 {need_season} 剩余需要集:{need_episodes}") + # 如果已经没有需要下载的集数,跳出当前循环 + if not need_episodes: + break # 仍然缺失的剧集,从整季中选择需要的集数文件下载,仅支持QB和TR if no_exists: diff --git a/app/core/config.py b/app/core/config.py index 94891e08..6537de3c 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -285,6 +285,8 @@ class ConfigModel(BaseModel): DEFAULT_SUB: Optional[str] = "zh-cn" # Docker Client API地址 DOCKER_CLIENT_API: Optional[str] = "tcp://127.0.0.1:38379" + # 剧集交集最小置信度 计算后的交集比例( len(torrent_episodes ∩ need_episodes) / len(torrent_episodes) 低于这个阈值表明包含过多不需要的剧集 + EPISODE_INTERSECTION_MIN_CONFIDENCE: float = 0.0 class Settings(BaseSettings, ConfigModel, LogConfigModel):