From 104138b9a738b6839ebf047fc2c98c3413c61c19 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Fri, 1 Aug 2025 15:18:05 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E5=87=8F=E5=B0=91=E6=97=A0?= =?UTF-8?q?=E6=95=88=E6=90=9C=E7=B4=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/chain/__init__.py | 12 +- app/chain/search.py | 105 ++++++++--- app/modules/indexer/__init__.py | 296 +++++++++++++------------------- 3 files changed, 199 insertions(+), 214 deletions(-) diff --git a/app/chain/__init__.py b/app/chain/__init__.py index 66e5a93a..6aa172e6 100644 --- a/app/chain/__init__.py +++ b/app/chain/__init__.py @@ -616,33 +616,33 @@ class ChainBase(metaclass=ABCMeta): return await self.async_run_module("async_search_collections", name=name) def search_torrents(self, site: dict, - keywords: List[str], + keyword: str, mtype: Optional[MediaType] = None, page: Optional[int] = 0) -> List[TorrentInfo]: """ 搜索一个站点的种子资源 :param site: 站点 - :param keywords: 搜索关键词列表 + :param keyword: 搜索关键词 :param mtype: 媒体类型 :param page: 页码 :reutrn: 资源列表 """ - return self.run_module("search_torrents", site=site, keywords=keywords, + return self.run_module("search_torrents", site=site, keyword=keyword, mtype=mtype, page=page) async def async_search_torrents(self, site: dict, - keywords: List[str], + keyword: str, mtype: Optional[MediaType] = None, page: Optional[int] = 0) -> List[TorrentInfo]: """ 异步搜索一个站点的种子资源 :param site: 站点 - :param keywords: 搜索关键词列表 + :param keyword: 搜索关键词 :param mtype: 媒体类型 :param page: 页码 :reutrn: 资源列表 """ - return await self.async_run_module("async_search_torrents", site=site, keywords=keywords, + return await self.async_run_module("async_search_torrents", site=site, keyword=keyword, mtype=mtype, page=page) def refresh_torrents(self, site: dict, keyword: Optional[str] = None, diff --git a/app/chain/search.py b/app/chain/search.py index 6ebc8496..1500734d 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -1,12 +1,13 @@ import asyncio import pickle +import random +import time import traceback from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from typing import Dict, Tuple from typing import List, Optional -from app.helper.sites import SitesHelper # noqa from fastapi.concurrency import run_in_threadpool from app.chain import ChainBase @@ -17,6 +18,7 @@ from app.core.event import eventmanager, Event from app.core.metainfo import MetaInfo from app.db.systemconfig_oper import SystemConfigOper from app.helper.progress import ProgressHelper +from app.helper.sites import SitesHelper # noqa from app.helper.torrent import TorrentHelper from app.log import logger from app.schemas import NotExistMediaInfo @@ -74,7 +76,7 @@ class SearchChain(ChainBase): else: logger.info(f'开始浏览资源,站点:{sites} ...') # 搜索 - torrents = self.__search_all_sites(keywords=[title], sites=sites, page=page) or [] + torrents = self.__search_all_sites(keywords=title, sites=sites, page=page) or [] if not torrents: logger.warn(f'{title} 未搜索到资源') return [] @@ -335,8 +337,21 @@ class SearchChain(ChainBase): key=ProgressKey.Search) progress.end(ProgressKey.Search) - # 返回 - return contexts + # 去重后返回 + return self.__remove_duplicate(contexts) + + @staticmethod + def __remove_duplicate(_torrents: List[Context]) -> List[Context]: + """ + 去除重复的种子 + :param _torrents: 种子列表 + :return: 去重后的种子列表 + """ + if not settings.SEARCH_MULTIPLE_NAME: + return _torrents + # 通过encosure去重 + return list({f"{t.torrent_info.site_name}_{t.torrent_info.title}_{t.torrent_info.description}": t + for t in _torrents}.values()) def process(self, mediainfo: MediaInfo, keyword: Optional[str] = None, @@ -381,13 +396,28 @@ class SearchChain(ChainBase): no_exists=no_exists ) - # 执行搜索 - torrents: List[TorrentInfo] = self.__search_all_sites( - mediainfo=mediainfo, - keywords=keywords, - sites=sites, - area=area - ) + # 站点搜索结果 + torrents: List[TorrentInfo] = [] + # 站点搜索次数 + search_count = 0 + + # 多关键字执行搜索 + for search_word in keywords: + # 强制休眠 1-10 秒 + if search_count > 0: + logger.info(f"已搜索 {search_count} 次,强制休眠 1-10 秒 ...") + time.sleep(random.randint(1, 10)) + # 搜索站点 + torrents.extend( + self.__search_all_sites( + mediainfo=mediainfo, + keyword=search_word, + sites=sites, + area=area + ) or [] + ) + search_count += 1 + # 处理结果 return self.__parse_result( torrents=torrents, @@ -442,13 +472,32 @@ class SearchChain(ChainBase): no_exists=no_exists ) - # 执行搜索 - torrents: List[TorrentInfo] = await self.__async_search_all_sites( - mediainfo=mediainfo, - keywords=keywords, - sites=sites, - area=area - ) + # 站点搜索结果 + torrents: List[TorrentInfo] = [] + # 站点搜索次数 + search_count = 0 + + # 多关键字执行搜索 + for search_word in keywords: + # 强制休眠 1-10 秒 + if search_count > 0: + logger.info(f"已搜索 {search_count} 次,强制休眠 1-10 秒 ...") + await asyncio.sleep(random.randint(1, 10)) + # 搜索站点 + torrents.extend( + await self.__async_search_all_sites( + mediainfo=mediainfo, + keyword=search_word, + sites=sites, + area=area + ) or [] + ) + search_count += 1 + # 有结果则停止 + if torrents: + logger.info(f"共搜索到 {len(torrents)} 个资源,停止搜索") + break + # 处理结果 return await run_in_threadpool(self.__parse_result, torrents=torrents, @@ -460,7 +509,7 @@ class SearchChain(ChainBase): filter_params=filter_params ) - def __search_all_sites(self, keywords: List[str], + def __search_all_sites(self, keyword: str, mediainfo: Optional[MediaInfo] = None, sites: List[int] = None, page: Optional[int] = 0, @@ -468,7 +517,7 @@ class SearchChain(ChainBase): """ 多线程搜索多个站点 :param mediainfo: 识别的媒体信息 - :param keywords: 搜索关键词列表 + :param keyword: 搜索关键词 :param sites: 指定站点ID列表,如有则只搜索指定站点,否则搜索所有站点 :param page: 搜索页码 :param area: 搜索区域 title or imdbid @@ -511,13 +560,13 @@ class SearchChain(ChainBase): if area == "imdbid": # 搜索IMDBID task = executor.submit(self.search_torrents, site=site, - keywords=[mediainfo.imdb_id] if mediainfo else None, + keyword=mediainfo.imdb_id if mediainfo else None, mtype=mediainfo.type if mediainfo else None, page=page) else: # 搜索标题 task = executor.submit(self.search_torrents, site=site, - keywords=keywords, + keyword=keyword, mtype=mediainfo.type if mediainfo else None, page=page) all_task.append(task) @@ -530,7 +579,7 @@ class SearchChain(ChainBase): results.extend(result) logger.info(f"站点搜索进度:{finish_count} / {total_num}") progress.update(value=finish_count / total_num * 100, - text=f"正在搜索{keywords or ''},已完成 {finish_count} / {total_num} 个站点 ...", + text=f"正在搜索{keyword or ''},已完成 {finish_count} / {total_num} 个站点 ...", key=ProgressKey.Search) # 计算耗时 end_time = datetime.now() @@ -545,7 +594,7 @@ class SearchChain(ChainBase): # 返回 return results - async def __async_search_all_sites(self, keywords: List[str], + async def __async_search_all_sites(self, keyword: str, mediainfo: Optional[MediaInfo] = None, sites: List[int] = None, page: Optional[int] = 0, @@ -553,7 +602,7 @@ class SearchChain(ChainBase): """ 异步搜索多个站点 :param mediainfo: 识别的媒体信息 - :param keywords: 搜索关键词列表 + :param keyword: 搜索关键词 :param sites: 指定站点ID列表,如有则只搜索指定站点,否则搜索所有站点 :param page: 搜索页码 :param area: 搜索区域 title or imdbid @@ -596,13 +645,13 @@ class SearchChain(ChainBase): if area == "imdbid": # 搜索IMDBID task = self.async_search_torrents(site=site, - keywords=[mediainfo.imdb_id] if mediainfo else None, + keyword=mediainfo.imdb_id if mediainfo else None, mtype=mediainfo.type if mediainfo else None, page=page) else: # 搜索标题 task = self.async_search_torrents(site=site, - keywords=keywords, + keyword=keyword, mtype=mediainfo.type if mediainfo else None, page=page) tasks.append(task) @@ -617,7 +666,7 @@ class SearchChain(ChainBase): results.extend(result) logger.info(f"站点搜索进度:{finish_count} / {total_num}") progress.update(value=finish_count / total_num * 100, - text=f"正在搜索{keywords or ''},已完成 {finish_count} / {total_num} 个站点 ...", + text=f"正在搜索{keyword or ''},已完成 {finish_count} / {total_num} 个站点 ...", key=ProgressKey.Search) # 计算耗时 diff --git a/app/modules/indexer/__init__.py b/app/modules/indexer/__init__.py index 5a528b25..d166b7ea 100644 --- a/app/modules/indexer/__init__.py +++ b/app/modules/indexer/__init__.py @@ -1,10 +1,6 @@ -import asyncio -import random -import time from datetime import datetime from typing import List, Optional, Tuple, Union -from app.core.config import settings from app.core.context import TorrentInfo from app.db.site_oper import SiteOper from app.helper.module import ModuleHelper @@ -134,48 +130,33 @@ class IndexerModule(_ModuleBase): await SiteOper().async_success(domain=domain, seconds=seconds) @staticmethod - def __parse_result(site: dict, result_array: list, search_count: int, seconds: int) -> TorrentInfo: + def __parse_result(site: dict, result_array: list, seconds: int) -> TorrentInfo: """ 解析搜索结果为 TorrentInfo 对象 """ - - def __remove_duplicate(_torrents: List[TorrentInfo]) -> List[TorrentInfo]: - """ - 去除重复的种子 - :param _torrents: 种子列表 - :return: 去重后的种子列表 - """ - if not settings.SEARCH_MULTIPLE_NAME: - return _torrents - # 通过encosure去重 - return list({f"{t.title}_{t.description}": t for t in _torrents}.values()) - if not result_array or len(result_array) == 0: - logger.warn(f"{site.get('name')} 未搜索到数据,共搜索 {search_count} 次,耗时 {seconds} 秒") + logger.warn(f"{site.get('name')} 未搜索到数据,耗时 {seconds} 秒") return [] - else: - logger.info( - f"{site.get('name')} 搜索完成,共搜索 {search_count} 次,耗时 {seconds} 秒,返回数据:{len(result_array)}") - torrents = [TorrentInfo(site=site.get("id"), - site_name=site.get("name"), - site_cookie=site.get("cookie"), - site_ua=site.get("ua"), - site_proxy=site.get("proxy"), - site_order=site.get("pri"), - site_downloader=site.get("downloader"), - **result) for result in result_array] - # 去重 - return __remove_duplicate(torrents) + logger.info( + f"{site.get('name')} 搜索完成,耗时 {seconds} 秒,返回数据:{len(result_array)}") + return [TorrentInfo(site=site.get("id"), + site_name=site.get("name"), + site_cookie=site.get("cookie"), + site_ua=site.get("ua"), + site_proxy=site.get("proxy"), + site_order=site.get("pri"), + site_downloader=site.get("downloader"), + **result) for result in result_array] def search_torrents(self, site: dict, - keywords: List[str] = None, + keyword: str = None, mtype: MediaType = None, cat: Optional[str] = None, page: Optional[int] = 0) -> List[TorrentInfo]: """ 搜索一个站点 :param site: 站点 - :param keywords: 搜索关键词列表 + :param keyword: 搜索关键词 :param mtype: 媒体类型 :param cat: 分类 :param page: 页码 @@ -188,79 +169,59 @@ class IndexerModule(_ModuleBase): start_time = datetime.now() # 错误标志 error_flag = False - # 搜索次数 - search_count = 0 - for search_word in keywords or ['']: - # 检查是否可以执行搜索 - if not self.__search_check(site, search_word): - continue - # 强制休眠 1-10 秒 - if search_count > 0: - logger.info(f"站点 {site.get('name')} 已搜索 {search_count} 次,强制休眠 1-10 秒 ...") - time.sleep(random.randint(1, 10)) + # 检查是否可以执行搜索 + if not self.__search_check(site, keyword): + return [] - # 去除搜索关键字中的特殊字符 - search_word = self.__clear_search_text(search_word) + # 去除搜索关键字中的特殊字符 + search_word = self.__clear_search_text(keyword) - # 开始搜索 - try: - if site.get('parser') == "TNodeSpider": - error_flag, result = TNodeSpider(site).search( - keyword=search_word, - page=page - ) - elif site.get('parser') == "TorrentLeech": - error_flag, result = TorrentLeech(site).search( - keyword=search_word, - page=page - ) - elif site.get('parser') == "mTorrent": - error_flag, result = MTorrentSpider(site).search( - keyword=search_word, - mtype=mtype, - page=page - ) - elif site.get('parser') == "Yema": - error_flag, result = YemaSpider(site).search( - keyword=search_word, - mtype=mtype, - page=page - ) - elif site.get('parser') == "Haidan": - error_flag, result = HaiDanSpider(site).search( - keyword=search_word, - mtype=mtype - ) - elif site.get('parser') == "HDDolby": - error_flag, result = HddolbySpider(site).search( - keyword=search_word, - mtype=mtype, - page=page - ) - else: - error_flag, result = self.__spider_search( - search_word=search_word, - indexer=site, - mtype=mtype, - cat=cat, - page=page - ) - if error_flag: - break - if not result: - continue - if settings.SEARCH_MULTIPLE_NAME: - # 合并多个结果 - result_array.extend(result) - else: - # 有结果就停止 - result_array = result - break - except Exception as err: - logger.error(f"{site.get('name')} 搜索出错:{str(err)}") - finally: - search_count += 1 + # 开始搜索 + try: + if site.get('parser') == "TNodeSpider": + error_flag, result = TNodeSpider(site).search( + keyword=search_word, + page=page + ) + elif site.get('parser') == "TorrentLeech": + error_flag, result = TorrentLeech(site).search( + keyword=search_word, + page=page + ) + elif site.get('parser') == "mTorrent": + error_flag, result = MTorrentSpider(site).search( + keyword=search_word, + mtype=mtype, + page=page + ) + elif site.get('parser') == "Yema": + error_flag, result = YemaSpider(site).search( + keyword=search_word, + mtype=mtype, + page=page + ) + elif site.get('parser') == "Haidan": + error_flag, result = HaiDanSpider(site).search( + keyword=search_word, + mtype=mtype + ) + elif site.get('parser') == "HDDolby": + error_flag, result = HddolbySpider(site).search( + keyword=search_word, + mtype=mtype, + page=page + ) + else: + error_flag, result = self.__spider_search( + search_word=search_word, + indexer=site, + mtype=mtype, + cat=cat, + page=page + ) + except Exception as err: + logger.error(f"{site.get('name')} 搜索出错:{str(err)}") # 索引花费的时间 seconds = (datetime.now() - start_time).seconds @@ -272,19 +233,18 @@ class IndexerModule(_ModuleBase): return self.__parse_result( site=site, result_array=result_array, - search_count=search_count, seconds=seconds ) async def async_search_torrents(self, site: dict, - keywords: List[str] = None, + keyword: str = None, mtype: MediaType = None, cat: Optional[str] = None, page: Optional[int] = 0) -> List[TorrentInfo]: """ 异步搜索一个站点 :param site: 站点 - :param keywords: 搜索关键词列表 + :param keyword: 搜索关键词 :param mtype: 媒体类型 :param cat: 分类 :param page: 页码 @@ -297,82 +257,59 @@ class IndexerModule(_ModuleBase): start_time = datetime.now() # 错误标志 error_flag = False - # 搜索次数 - search_count = 0 - # 遍历搜索关键字 - for search_word in keywords or ['']: - # 检查是否可以执行搜索 - if not self.__search_check(site, search_word): - continue - # 强制休眠 1-10 秒 - if search_count > 0: - logger.info(f"站点 {site.get('name')} 已搜索 {search_count} 次,强制休眠 1-10 秒 ...") - await asyncio.sleep(random.randint(1, 10)) + # 检查是否可以执行搜索 + if not self.__search_check(site, keyword): + return [] - # 去除搜索关键字中的特殊字符 - search_word = self.__clear_search_text(search_word) + # 去除搜索关键字中的特殊字符 + search_word = self.__clear_search_text(keyword) - # 开始搜索 - try: - if site.get('parser') == "TNodeSpider": - error_flag, result = await TNodeSpider(site).async_search( - keyword=search_word, - page=page - ) - elif site.get('parser') == "TorrentLeech": - error_flag, result = await TorrentLeech(site).async_search( - keyword=search_word, - page=page - ) - elif site.get('parser') == "mTorrent": - error_flag, result = await MTorrentSpider(site).async_search( - keyword=search_word, - mtype=mtype, - page=page - ) - elif site.get('parser') == "Yema": - error_flag, result = await YemaSpider(site).async_search( - keyword=search_word, - mtype=mtype, - page=page - ) - elif site.get('parser') == "Haidan": - error_flag, result = await HaiDanSpider(site).async_search( - keyword=search_word, - mtype=mtype - ) - elif site.get('parser') == "HDDolby": - error_flag, result = await HddolbySpider(site).async_search( - keyword=search_word, - mtype=mtype, - page=page - ) - else: - error_flag, result = await self.__async_spider_search( - search_word=search_word, - indexer=site, - mtype=mtype, - cat=cat, - page=page - ) - if error_flag: - break - if not result: - continue - - if settings.SEARCH_MULTIPLE_NAME: - # 合并多个结果 - result_array.extend(result) - else: - # 有结果就停止 - result_array = result - break - - except Exception as err: - logger.error(f"{site.get('name')} 搜索出错:{str(err)}") - finally: - search_count += 1 + # 开始搜索 + try: + if site.get('parser') == "TNodeSpider": + error_flag, result = await TNodeSpider(site).async_search( + keyword=search_word, + page=page + ) + elif site.get('parser') == "TorrentLeech": + error_flag, result = await TorrentLeech(site).async_search( + keyword=search_word, + page=page + ) + elif site.get('parser') == "mTorrent": + error_flag, result = await MTorrentSpider(site).async_search( + keyword=search_word, + mtype=mtype, + page=page + ) + elif site.get('parser') == "Yema": + error_flag, result = await YemaSpider(site).async_search( + keyword=search_word, + mtype=mtype, + page=page + ) + elif site.get('parser') == "Haidan": + error_flag, result = await HaiDanSpider(site).async_search( + keyword=search_word, + mtype=mtype + ) + elif site.get('parser') == "HDDolby": + error_flag, result = await HddolbySpider(site).async_search( + keyword=search_word, + mtype=mtype, + page=page + ) + else: + error_flag, result = await self.__async_spider_search( + search_word=search_word, + indexer=site, + mtype=mtype, + cat=cat, + page=page + ) + except Exception as err: + logger.error(f"{site.get('name')} 搜索出错:{str(err)}") # 索引花费的时间 seconds = (datetime.now() - start_time).seconds @@ -384,7 +321,6 @@ class IndexerModule(_ModuleBase): return self.__parse_result( site=site, result_array=result_array, - search_count=search_count, seconds=seconds )