fix:减少无效搜索

This commit is contained in:
jxxghp
2025-08-01 15:18:05 +08:00
parent 0c8fd5121a
commit 104138b9a7
3 changed files with 199 additions and 214 deletions

View File

@@ -616,33 +616,33 @@ class ChainBase(metaclass=ABCMeta):
return await self.async_run_module("async_search_collections", name=name)
def search_torrents(self, site: dict,
keywords: List[str],
keyword: str,
mtype: Optional[MediaType] = None,
page: Optional[int] = 0) -> List[TorrentInfo]:
"""
搜索一个站点的种子资源
:param site: 站点
:param keywords: 搜索关键词列表
:param keyword: 搜索关键词
:param mtype: 媒体类型
:param page: 页码
:reutrn: 资源列表
"""
return self.run_module("search_torrents", site=site, keywords=keywords,
return self.run_module("search_torrents", site=site, keyword=keyword,
mtype=mtype, page=page)
async def async_search_torrents(self, site: dict,
keywords: List[str],
keyword: str,
mtype: Optional[MediaType] = None,
page: Optional[int] = 0) -> List[TorrentInfo]:
"""
异步搜索一个站点的种子资源
:param site: 站点
:param keywords: 搜索关键词列表
:param keyword: 搜索关键词
:param mtype: 媒体类型
:param page: 页码
:reutrn: 资源列表
"""
return await self.async_run_module("async_search_torrents", site=site, keywords=keywords,
return await self.async_run_module("async_search_torrents", site=site, keyword=keyword,
mtype=mtype, page=page)
def refresh_torrents(self, site: dict, keyword: Optional[str] = None,

View File

@@ -1,12 +1,13 @@
import asyncio
import pickle
import random
import time
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from typing import Dict, Tuple
from typing import List, Optional
from app.helper.sites import SitesHelper # noqa
from fastapi.concurrency import run_in_threadpool
from app.chain import ChainBase
@@ -17,6 +18,7 @@ from app.core.event import eventmanager, Event
from app.core.metainfo import MetaInfo
from app.db.systemconfig_oper import SystemConfigOper
from app.helper.progress import ProgressHelper
from app.helper.sites import SitesHelper # noqa
from app.helper.torrent import TorrentHelper
from app.log import logger
from app.schemas import NotExistMediaInfo
@@ -74,7 +76,7 @@ class SearchChain(ChainBase):
else:
logger.info(f'开始浏览资源,站点:{sites} ...')
# 搜索
torrents = self.__search_all_sites(keywords=[title], sites=sites, page=page) or []
torrents = self.__search_all_sites(keywords=title, sites=sites, page=page) or []
if not torrents:
logger.warn(f'{title} 未搜索到资源')
return []
@@ -335,8 +337,21 @@ class SearchChain(ChainBase):
key=ProgressKey.Search)
progress.end(ProgressKey.Search)
# 返回
return contexts
# 去重后返回
return self.__remove_duplicate(contexts)
@staticmethod
def __remove_duplicate(_torrents: List[Context]) -> List[Context]:
"""
去除重复的种子
:param _torrents: 种子列表
:return: 去重后的种子列表
"""
if not settings.SEARCH_MULTIPLE_NAME:
return _torrents
# 通过encosure去重
return list({f"{t.torrent_info.site_name}_{t.torrent_info.title}_{t.torrent_info.description}": t
for t in _torrents}.values())
def process(self, mediainfo: MediaInfo,
keyword: Optional[str] = None,
@@ -381,13 +396,28 @@ class SearchChain(ChainBase):
no_exists=no_exists
)
# 执行搜索
torrents: List[TorrentInfo] = self.__search_all_sites(
mediainfo=mediainfo,
keywords=keywords,
sites=sites,
area=area
)
# 站点搜索结果
torrents: List[TorrentInfo] = []
# 站点搜索次数
search_count = 0
# 多关键字执行搜索
for search_word in keywords:
# 强制休眠 1-10 秒
if search_count > 0:
logger.info(f"已搜索 {search_count} 次,强制休眠 1-10 秒 ...")
time.sleep(random.randint(1, 10))
# 搜索站点
torrents.extend(
self.__search_all_sites(
mediainfo=mediainfo,
keyword=search_word,
sites=sites,
area=area
) or []
)
search_count += 1
# 处理结果
return self.__parse_result(
torrents=torrents,
@@ -442,13 +472,32 @@ class SearchChain(ChainBase):
no_exists=no_exists
)
# 执行搜索
torrents: List[TorrentInfo] = await self.__async_search_all_sites(
mediainfo=mediainfo,
keywords=keywords,
sites=sites,
area=area
)
# 站点搜索结果
torrents: List[TorrentInfo] = []
# 站点搜索次数
search_count = 0
# 多关键字执行搜索
for search_word in keywords:
# 强制休眠 1-10 秒
if search_count > 0:
logger.info(f"已搜索 {search_count} 次,强制休眠 1-10 秒 ...")
await asyncio.sleep(random.randint(1, 10))
# 搜索站点
torrents.extend(
await self.__async_search_all_sites(
mediainfo=mediainfo,
keyword=search_word,
sites=sites,
area=area
) or []
)
search_count += 1
# 有结果则停止
if torrents:
logger.info(f"共搜索到 {len(torrents)} 个资源,停止搜索")
break
# 处理结果
return await run_in_threadpool(self.__parse_result,
torrents=torrents,
@@ -460,7 +509,7 @@ class SearchChain(ChainBase):
filter_params=filter_params
)
def __search_all_sites(self, keywords: List[str],
def __search_all_sites(self, keyword: str,
mediainfo: Optional[MediaInfo] = None,
sites: List[int] = None,
page: Optional[int] = 0,
@@ -468,7 +517,7 @@ class SearchChain(ChainBase):
"""
多线程搜索多个站点
:param mediainfo: 识别的媒体信息
:param keywords: 搜索关键词列表
:param keyword: 搜索关键词
:param sites: 指定站点ID列表如有则只搜索指定站点否则搜索所有站点
:param page: 搜索页码
:param area: 搜索区域 title or imdbid
@@ -511,13 +560,13 @@ class SearchChain(ChainBase):
if area == "imdbid":
# 搜索IMDBID
task = executor.submit(self.search_torrents, site=site,
keywords=[mediainfo.imdb_id] if mediainfo else None,
keyword=mediainfo.imdb_id if mediainfo else None,
mtype=mediainfo.type if mediainfo else None,
page=page)
else:
# 搜索标题
task = executor.submit(self.search_torrents, site=site,
keywords=keywords,
keyword=keyword,
mtype=mediainfo.type if mediainfo else None,
page=page)
all_task.append(task)
@@ -530,7 +579,7 @@ class SearchChain(ChainBase):
results.extend(result)
logger.info(f"站点搜索进度:{finish_count} / {total_num}")
progress.update(value=finish_count / total_num * 100,
text=f"正在搜索{keywords or ''},已完成 {finish_count} / {total_num} 个站点 ...",
text=f"正在搜索{keyword or ''},已完成 {finish_count} / {total_num} 个站点 ...",
key=ProgressKey.Search)
# 计算耗时
end_time = datetime.now()
@@ -545,7 +594,7 @@ class SearchChain(ChainBase):
# 返回
return results
async def __async_search_all_sites(self, keywords: List[str],
async def __async_search_all_sites(self, keyword: str,
mediainfo: Optional[MediaInfo] = None,
sites: List[int] = None,
page: Optional[int] = 0,
@@ -553,7 +602,7 @@ class SearchChain(ChainBase):
"""
异步搜索多个站点
:param mediainfo: 识别的媒体信息
:param keywords: 搜索关键词列表
:param keyword: 搜索关键词
:param sites: 指定站点ID列表如有则只搜索指定站点否则搜索所有站点
:param page: 搜索页码
:param area: 搜索区域 title or imdbid
@@ -596,13 +645,13 @@ class SearchChain(ChainBase):
if area == "imdbid":
# 搜索IMDBID
task = self.async_search_torrents(site=site,
keywords=[mediainfo.imdb_id] if mediainfo else None,
keyword=mediainfo.imdb_id if mediainfo else None,
mtype=mediainfo.type if mediainfo else None,
page=page)
else:
# 搜索标题
task = self.async_search_torrents(site=site,
keywords=keywords,
keyword=keyword,
mtype=mediainfo.type if mediainfo else None,
page=page)
tasks.append(task)
@@ -617,7 +666,7 @@ class SearchChain(ChainBase):
results.extend(result)
logger.info(f"站点搜索进度:{finish_count} / {total_num}")
progress.update(value=finish_count / total_num * 100,
text=f"正在搜索{keywords or ''},已完成 {finish_count} / {total_num} 个站点 ...",
text=f"正在搜索{keyword or ''},已完成 {finish_count} / {total_num} 个站点 ...",
key=ProgressKey.Search)
# 计算耗时

View File

@@ -1,10 +1,6 @@
import asyncio
import random
import time
from datetime import datetime
from typing import List, Optional, Tuple, Union
from app.core.config import settings
from app.core.context import TorrentInfo
from app.db.site_oper import SiteOper
from app.helper.module import ModuleHelper
@@ -134,48 +130,33 @@ class IndexerModule(_ModuleBase):
await SiteOper().async_success(domain=domain, seconds=seconds)
@staticmethod
def __parse_result(site: dict, result_array: list, search_count: int, seconds: int) -> TorrentInfo:
def __parse_result(site: dict, result_array: list, seconds: int) -> TorrentInfo:
"""
解析搜索结果为 TorrentInfo 对象
"""
def __remove_duplicate(_torrents: List[TorrentInfo]) -> List[TorrentInfo]:
"""
去除重复的种子
:param _torrents: 种子列表
:return: 去重后的种子列表
"""
if not settings.SEARCH_MULTIPLE_NAME:
return _torrents
# 通过encosure去重
return list({f"{t.title}_{t.description}": t for t in _torrents}.values())
if not result_array or len(result_array) == 0:
logger.warn(f"{site.get('name')} 未搜索到数据,共搜索 {search_count} 次,耗时 {seconds}")
logger.warn(f"{site.get('name')} 未搜索到数据,耗时 {seconds}")
return []
else:
logger.info(
f"{site.get('name')} 搜索完成,共搜索 {search_count} 次,耗时 {seconds} 秒,返回数据:{len(result_array)}")
torrents = [TorrentInfo(site=site.get("id"),
site_name=site.get("name"),
site_cookie=site.get("cookie"),
site_ua=site.get("ua"),
site_proxy=site.get("proxy"),
site_order=site.get("pri"),
site_downloader=site.get("downloader"),
**result) for result in result_array]
# 去重
return __remove_duplicate(torrents)
logger.info(
f"{site.get('name')} 搜索完成,耗时 {seconds} 秒,返回数据:{len(result_array)}")
return [TorrentInfo(site=site.get("id"),
site_name=site.get("name"),
site_cookie=site.get("cookie"),
site_ua=site.get("ua"),
site_proxy=site.get("proxy"),
site_order=site.get("pri"),
site_downloader=site.get("downloader"),
**result) for result in result_array]
def search_torrents(self, site: dict,
keywords: List[str] = None,
keyword: str = None,
mtype: MediaType = None,
cat: Optional[str] = None,
page: Optional[int] = 0) -> List[TorrentInfo]:
"""
搜索一个站点
:param site: 站点
:param keywords: 搜索关键词列表
:param keyword: 搜索关键词
:param mtype: 媒体类型
:param cat: 分类
:param page: 页码
@@ -188,79 +169,59 @@ class IndexerModule(_ModuleBase):
start_time = datetime.now()
# 错误标志
error_flag = False
# 搜索次数
search_count = 0
for search_word in keywords or ['']:
# 检查是否可以执行搜索
if not self.__search_check(site, search_word):
continue
# 强制休眠 1-10 秒
if search_count > 0:
logger.info(f"站点 {site.get('name')} 已搜索 {search_count} 次,强制休眠 1-10 秒 ...")
time.sleep(random.randint(1, 10))
# 检查是否可以执行搜索
if not self.__search_check(site, keyword):
return []
# 去除搜索关键字中的特殊字符
search_word = self.__clear_search_text(search_word)
# 去除搜索关键字中的特殊字符
search_word = self.__clear_search_text(keyword)
# 开始搜索
try:
if site.get('parser') == "TNodeSpider":
error_flag, result = TNodeSpider(site).search(
keyword=search_word,
page=page
)
elif site.get('parser') == "TorrentLeech":
error_flag, result = TorrentLeech(site).search(
keyword=search_word,
page=page
)
elif site.get('parser') == "mTorrent":
error_flag, result = MTorrentSpider(site).search(
keyword=search_word,
mtype=mtype,
page=page
)
elif site.get('parser') == "Yema":
error_flag, result = YemaSpider(site).search(
keyword=search_word,
mtype=mtype,
page=page
)
elif site.get('parser') == "Haidan":
error_flag, result = HaiDanSpider(site).search(
keyword=search_word,
mtype=mtype
)
elif site.get('parser') == "HDDolby":
error_flag, result = HddolbySpider(site).search(
keyword=search_word,
mtype=mtype,
page=page
)
else:
error_flag, result = self.__spider_search(
search_word=search_word,
indexer=site,
mtype=mtype,
cat=cat,
page=page
)
if error_flag:
break
if not result:
continue
if settings.SEARCH_MULTIPLE_NAME:
# 合并多个结果
result_array.extend(result)
else:
# 有结果就停止
result_array = result
break
except Exception as err:
logger.error(f"{site.get('name')} 搜索出错:{str(err)}")
finally:
search_count += 1
# 开始搜索
try:
if site.get('parser') == "TNodeSpider":
error_flag, result = TNodeSpider(site).search(
keyword=search_word,
page=page
)
elif site.get('parser') == "TorrentLeech":
error_flag, result = TorrentLeech(site).search(
keyword=search_word,
page=page
)
elif site.get('parser') == "mTorrent":
error_flag, result = MTorrentSpider(site).search(
keyword=search_word,
mtype=mtype,
page=page
)
elif site.get('parser') == "Yema":
error_flag, result = YemaSpider(site).search(
keyword=search_word,
mtype=mtype,
page=page
)
elif site.get('parser') == "Haidan":
error_flag, result = HaiDanSpider(site).search(
keyword=search_word,
mtype=mtype
)
elif site.get('parser') == "HDDolby":
error_flag, result = HddolbySpider(site).search(
keyword=search_word,
mtype=mtype,
page=page
)
else:
error_flag, result = self.__spider_search(
search_word=search_word,
indexer=site,
mtype=mtype,
cat=cat,
page=page
)
except Exception as err:
logger.error(f"{site.get('name')} 搜索出错:{str(err)}")
# 索引花费的时间
seconds = (datetime.now() - start_time).seconds
@@ -272,19 +233,18 @@ class IndexerModule(_ModuleBase):
return self.__parse_result(
site=site,
result_array=result_array,
search_count=search_count,
seconds=seconds
)
async def async_search_torrents(self, site: dict,
keywords: List[str] = None,
keyword: str = None,
mtype: MediaType = None,
cat: Optional[str] = None,
page: Optional[int] = 0) -> List[TorrentInfo]:
"""
异步搜索一个站点
:param site: 站点
:param keywords: 搜索关键词列表
:param keyword: 搜索关键词
:param mtype: 媒体类型
:param cat: 分类
:param page: 页码
@@ -297,82 +257,59 @@ class IndexerModule(_ModuleBase):
start_time = datetime.now()
# 错误标志
error_flag = False
# 搜索次数
search_count = 0
# 遍历搜索关键字
for search_word in keywords or ['']:
# 检查是否可以执行搜索
if not self.__search_check(site, search_word):
continue
# 强制休眠 1-10 秒
if search_count > 0:
logger.info(f"站点 {site.get('name')} 已搜索 {search_count} 次,强制休眠 1-10 秒 ...")
await asyncio.sleep(random.randint(1, 10))
# 检查是否可以执行搜索
if not self.__search_check(site, keyword):
return []
# 去除搜索关键字中的特殊字符
search_word = self.__clear_search_text(search_word)
# 去除搜索关键字中的特殊字符
search_word = self.__clear_search_text(keyword)
# 开始搜索
try:
if site.get('parser') == "TNodeSpider":
error_flag, result = await TNodeSpider(site).async_search(
keyword=search_word,
page=page
)
elif site.get('parser') == "TorrentLeech":
error_flag, result = await TorrentLeech(site).async_search(
keyword=search_word,
page=page
)
elif site.get('parser') == "mTorrent":
error_flag, result = await MTorrentSpider(site).async_search(
keyword=search_word,
mtype=mtype,
page=page
)
elif site.get('parser') == "Yema":
error_flag, result = await YemaSpider(site).async_search(
keyword=search_word,
mtype=mtype,
page=page
)
elif site.get('parser') == "Haidan":
error_flag, result = await HaiDanSpider(site).async_search(
keyword=search_word,
mtype=mtype
)
elif site.get('parser') == "HDDolby":
error_flag, result = await HddolbySpider(site).async_search(
keyword=search_word,
mtype=mtype,
page=page
)
else:
error_flag, result = await self.__async_spider_search(
search_word=search_word,
indexer=site,
mtype=mtype,
cat=cat,
page=page
)
if error_flag:
break
if not result:
continue
if settings.SEARCH_MULTIPLE_NAME:
# 合并多个结果
result_array.extend(result)
else:
# 有结果就停止
result_array = result
break
except Exception as err:
logger.error(f"{site.get('name')} 搜索出错:{str(err)}")
finally:
search_count += 1
# 开始搜索
try:
if site.get('parser') == "TNodeSpider":
error_flag, result = await TNodeSpider(site).async_search(
keyword=search_word,
page=page
)
elif site.get('parser') == "TorrentLeech":
error_flag, result = await TorrentLeech(site).async_search(
keyword=search_word,
page=page
)
elif site.get('parser') == "mTorrent":
error_flag, result = await MTorrentSpider(site).async_search(
keyword=search_word,
mtype=mtype,
page=page
)
elif site.get('parser') == "Yema":
error_flag, result = await YemaSpider(site).async_search(
keyword=search_word,
mtype=mtype,
page=page
)
elif site.get('parser') == "Haidan":
error_flag, result = await HaiDanSpider(site).async_search(
keyword=search_word,
mtype=mtype
)
elif site.get('parser') == "HDDolby":
error_flag, result = await HddolbySpider(site).async_search(
keyword=search_word,
mtype=mtype,
page=page
)
else:
error_flag, result = await self.__async_spider_search(
search_word=search_word,
indexer=site,
mtype=mtype,
cat=cat,
page=page
)
except Exception as err:
logger.error(f"{site.get('name')} 搜索出错:{str(err)}")
# 索引花费的时间
seconds = (datetime.now() - start_time).seconds
@@ -384,7 +321,6 @@ class IndexerModule(_ModuleBase):
return self.__parse_result(
site=site,
result_array=result_array,
search_count=search_count,
seconds=seconds
)