From d657bf8ed88edf67e90a8185288f4fd6bf0d09e1 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Fri, 1 Aug 2025 08:40:25 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E5=8D=8F=E7=A8=8B=E6=90=9C?= =?UTF-8?q?=E7=B4=A2=20part3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/chain/__init__.py | 2 +- app/modules/indexer/__init__.py | 155 ++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/app/chain/__init__.py b/app/chain/__init__.py index 74a8289a..53e959b2 100644 --- a/app/chain/__init__.py +++ b/app/chain/__init__.py @@ -642,7 +642,7 @@ class ChainBase(metaclass=ABCMeta): :param page: 页码 :reutrn: 资源列表 """ - return await self.async_run_module("search_torrents", site=site, keywords=keywords, + return await self.async_run_module("async_search_torrents", site=site, keywords=keywords, mtype=mtype, page=page) def refresh_torrents(self, site: dict, keyword: Optional[str] = None, diff --git a/app/modules/indexer/__init__.py b/app/modules/indexer/__init__.py index 45f6def0..39dbc4c4 100644 --- a/app/modules/indexer/__init__.py +++ b/app/modules/indexer/__init__.py @@ -1,3 +1,4 @@ +import asyncio import random import time from datetime import datetime @@ -228,6 +229,160 @@ class IndexerModule(_ModuleBase): # 去重 return __remove_duplicate(torrents) + async def async_search_torrents(self, site: dict, + keywords: List[str] = None, + mtype: MediaType = None, + cat: Optional[str] = None, + page: Optional[int] = 0) -> List[TorrentInfo]: + """ + 异步搜索一个站点 + :param site: 站点 + :param keywords: 搜索关键词列表 + :param mtype: 媒体类型 + :param cat: 分类 + :param page: 页码 + :return: 资源列表 + """ + + def __remove_duplicate(_torrents: List[TorrentInfo]) -> List[TorrentInfo]: + """ + 去除重复的种子 + :param _torrents: 种子列表 + :return: 去重后的种子列表 + """ + if not settings.SEARCH_MULTIPLE_NAME: + return _torrents + # 通过encosure去重 + return list({f"{t.title}_{t.description}": t for t in _torrents}.values()) + + # 确认搜索的名字 + if not keywords: + # 浏览种子页 + keywords = [''] + + # 开始索引 + result_array = [] + + # 开始计时 + start_time = datetime.now() + + # 搜索多个关键字 + error_flag = False + search_count = 0 + for search_word in keywords: + # 可能为关键字或ttxxxx + if search_word \ + and site.get('language') == "en" \ + and StringUtils.is_chinese(search_word): + # 不支持中文 + logger.warn(f"{site.get('name')} 不支持中文搜索") + continue + + # 站点流控 + state, msg = SitesHelper().check(StringUtils.get_url_domain(site.get("domain"))) + if state: + logger.warn(msg) + continue + + if search_count > 0: + # 强制休眠 1-10 秒 + logger.info(f"站点 {site.get('name')} 已搜索 {search_count} 次,强制休眠 1-10 秒 ...") + await asyncio.sleep(random.randint(1, 10)) + + # 去除搜索关键字中的特殊字符 + if search_word: + search_word = StringUtils.clear(search_word, replace_word=" ", allow_space=True) + + try: + if site.get('parser') == "TNodeSpider": + # 目前这些特殊爬虫还没有异步版本,暂时使用同步版本 + error_flag, result = await TNodeSpider(site).async_search( + keyword=search_word, + page=page + ) + elif site.get('parser') == "TorrentLeech": + error_flag, result = await TorrentLeech(site).async_search( + keyword=search_word, + page=page + ) + elif site.get('parser') == "mTorrent": + error_flag, result = await MTorrentSpider(site).async_search( + keyword=search_word, + mtype=mtype, + page=page + ) + elif site.get('parser') == "Yema": + error_flag, result = await YemaSpider(site).async_search( + keyword=search_word, + mtype=mtype, + page=page + ) + elif site.get('parser') == "Haidan": + error_flag, result = await HaiDanSpider(site).async_search( + keyword=search_word, + mtype=mtype + ) + elif site.get('parser') == "HDDolby": + error_flag, result = await HddolbySpider(site).async_search( + keyword=search_word, + mtype=mtype, + page=page + ) + else: + error_flag, result = await self.__async_spider_search( + search_word=search_word, + indexer=site, + mtype=mtype, + cat=cat, + page=page + ) + if error_flag: + break + if not result: + continue + + if settings.SEARCH_MULTIPLE_NAME: + # 合并多个结果 + result_array.extend(result) + else: + # 有结果就停止 + result_array = result + break + + except Exception as err: + logger.error(f"{site.get('name')} 搜索出错:{str(err)}") + finally: + search_count += 1 + + # 索引花费的时间 + seconds = (datetime.now() - start_time).seconds + + # 统计索引情况 + domain = StringUtils.get_url_domain(site.get("domain")) + if error_flag: + SiteOper().fail(domain) + else: + SiteOper().success(domain=domain, seconds=seconds) + + # 返回结果 + if not result_array or len(result_array) == 0: + logger.warn(f"{site.get('name')} 未搜索到数据,共搜索 {search_count} 次,耗时 {seconds} 秒") + return [] + else: + logger.info( + f"{site.get('name')} 搜索完成,共搜索 {search_count} 次,耗时 {seconds} 秒,返回数据:{len(result_array)}") + # TorrentInfo + torrents = [TorrentInfo(site=site.get("id"), + site_name=site.get("name"), + site_cookie=site.get("cookie"), + site_ua=site.get("ua"), + site_proxy=site.get("proxy"), + site_order=site.get("pri"), + site_downloader=site.get("downloader"), + **result) for result in result_array] + # 去重 + return __remove_duplicate(torrents) + @staticmethod def __spider_search(indexer: dict, search_word: Optional[str] = None,