Files
MoviePilot/app/chain/torrents.py
Attente b89825525a refactor(helper): 将DirectoryHelper、RuleHelper和TorrentHelper方法改为静态方法
- 移除不必要的实例化操作,直接使用类方法
2026-03-13 19:50:10 +08:00

373 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
import traceback
from typing import Dict, List, Union, Optional
from app.chain import ChainBase
from app.chain.media import MediaChain
from app.core.config import settings, global_vars
from app.core.context import TorrentInfo, Context, MediaInfo
from app.core.metainfo import MetaInfo
from app.db.site_oper import SiteOper
from app.db.systemconfig_oper import SystemConfigOper
from app.helper.rss import RssHelper
from app.helper.sites import SitesHelper # noqa
from app.helper.torrent import TorrentHelper
from app.log import logger
from app.schemas import Notification
from app.schemas.types import SystemConfigKey, MessageChannel, NotificationType, MediaType
from app.utils.string import StringUtils
class TorrentsChain(ChainBase):
"""
站点首页或RSS种子处理链服务于订阅、刷流等
"""
_spider_file = "__torrents_cache__"
_rss_file = "__rss_cache__"
@property
def cache_file(self) -> str:
"""
返回缓存文件列表
"""
if settings.SUBSCRIBE_MODE == 'spider':
return self._spider_file
return self._rss_file
def remote_refresh(self, channel: MessageChannel, userid: Union[str, int] = None):
"""
远程刷新订阅,发送消息
"""
self.post_message(Notification(channel=channel,
title=f"开始刷新种子 ...", userid=userid))
self.refresh()
self.post_message(Notification(channel=channel,
title=f"种子刷新完成!", userid=userid))
def get_torrents(self, stype: Optional[str] = None) -> Dict[str, List[Context]]:
"""
获取当前缓存的种子
:param stype: 强制指定缓存类型spider:爬虫缓存rss:rss缓存
"""
if not stype:
stype = settings.SUBSCRIBE_MODE
# 读取缓存
if stype == 'spider':
torrents_cache = self.load_cache(self._spider_file) or {}
else:
torrents_cache = self.load_cache(self._rss_file) or {}
# 兼容性处理为旧版本的Context对象添加失败次数字段
self._ensure_context_compatibility(torrents_cache)
return torrents_cache
async def async_get_torrents(self, stype: Optional[str] = None) -> Dict[str, List[Context]]:
"""
异步获取当前缓存的种子
:param stype: 强制指定缓存类型spider:爬虫缓存rss:rss缓存
"""
if not stype:
stype = settings.SUBSCRIBE_MODE
# 异步读取缓存
if stype == 'spider':
torrents_cache = await self.async_load_cache(self._spider_file) or {}
else:
torrents_cache = await self.async_load_cache(self._rss_file) or {}
# 兼容性处理为旧版本的Context对象添加失败次数字段
self._ensure_context_compatibility(torrents_cache)
return torrents_cache
def clear_torrents(self):
"""
清理种子缓存数据
"""
logger.info(f'开始清理种子缓存数据 ...')
self.remove_cache(self._spider_file)
self.remove_cache(self._rss_file)
logger.info(f'种子缓存数据清理完成')
async def async_clear_torrents(self):
"""
异步清理种子缓存数据
"""
logger.info(f'开始异步清理种子缓存数据 ...')
await self.async_remove_cache(self._spider_file)
await self.async_remove_cache(self._rss_file)
logger.info(f'异步种子缓存数据清理完成')
def browse(self, domain: str, keyword: Optional[str] = None, cat: Optional[str] = None,
page: Optional[int] = 0) -> List[TorrentInfo]:
"""
浏览站点首页内容返回种子清单TTL缓存5分钟
:param domain: 站点域名
:param keyword: 搜索标题
:param cat: 搜索分类
:param page: 页码
"""
logger.info(f'开始获取站点 {domain} 最新种子 ...')
site = SitesHelper().get_indexer(domain)
if not site:
logger.error(f'站点 {domain} 不存在!')
return []
return self.refresh_torrents(site=site, keyword=keyword, cat=cat, page=page)
async def async_browse(self, domain: str, keyword: Optional[str] = None, cat: Optional[str] = None,
page: Optional[int] = 0) -> List[TorrentInfo]:
"""
异步浏览站点首页内容返回种子清单TTL缓存5分钟
:param domain: 站点域名
:param keyword: 搜索标题
:param cat: 搜索分类
:param page: 页码
"""
logger.info(f'开始获取站点 {domain} 最新种子 ...')
site = await SitesHelper().async_get_indexer(domain)
if not site:
logger.error(f'站点 {domain} 不存在!')
return []
return await self.async_refresh_torrents(site=site, keyword=keyword, cat=cat, page=page)
def rss(self, domain: str) -> List[TorrentInfo]:
"""
获取站点RSS内容返回种子清单TTL缓存3分钟
:param domain: 站点域名
"""
logger.info(f'开始获取站点 {domain} RSS ...')
site = SitesHelper().get_indexer(domain)
if not site:
logger.error(f'站点 {domain} 不存在!')
return []
if not site.get("rss"):
logger.error(f'站点 {domain} 未配置RSS地址')
return []
# 解析RSS
rss_items = RssHelper().parse(site.get("rss"), True if site.get("proxy") else False,
timeout=int(site.get("timeout") or 30), ua=site.get("ua") if site.get("ua") else None)
if rss_items is None:
# rss过期尝试保留原配置生成新的rss
self.__renew_rss_url(domain=domain, site=site)
return []
if not rss_items:
logger.error(f'站点 {domain} 未获取到RSS数据')
return []
# 组装种子
ret_torrents: List[TorrentInfo] = []
try:
for item in rss_items:
if not item.get("title"):
continue
torrentinfo = TorrentInfo(
site=site.get("id"),
site_name=site.get("name"),
site_cookie=site.get("cookie"),
site_ua=site.get("ua") or settings.USER_AGENT,
site_proxy=site.get("proxy"),
site_order=site.get("pri"),
site_downloader=site.get("downloader"),
title=item.get("title"),
enclosure=item.get("enclosure"),
page_url=item.get("link"),
size=item.get("size"),
pubdate=item["pubdate"].strftime("%Y-%m-%d %H:%M:%S") if item.get("pubdate") else None,
)
ret_torrents.append(torrentinfo)
finally:
rss_items.clear()
del rss_items
return ret_torrents
def refresh(self, stype: Optional[str] = None, sites: List[int] = None) -> Dict[str, List[Context]]:
"""
刷新站点最新资源,识别并缓存起来
:param stype: 强制指定缓存类型spider:爬虫缓存rss:rss缓存
:param sites: 强制指定站点ID列表为空则读取设置的订阅站点
"""
def __is_no_cache_site(_domain: str) -> bool:
"""
判断站点是否不需要缓存
"""
for url_key in settings.NO_CACHE_SITE_KEY.split(','):
if url_key in _domain:
return True
return False
# 刷新类型
if not stype:
stype = settings.SUBSCRIBE_MODE
# 刷新站点
if not sites:
sites = SystemConfigOper().get(SystemConfigKey.RssSites) or []
# 读取缓存
torrents_cache = self.get_torrents()
torrenthelper = TorrentHelper()
# 缓存过滤掉无效种子
for _domain, _torrents in torrents_cache.items():
torrents_cache[_domain] = [_torrent for _torrent in _torrents
if not torrenthelper.is_invalid(_torrent.torrent_info.enclosure)]
# 需要刷新的站点domain
domains = []
# 遍历站点缓存资源
for indexer in SitesHelper().get_indexers():
if global_vars.is_system_stopped:
break
# 未开启的站点不刷新
if sites and indexer.get("id") not in sites:
continue
domain = StringUtils.get_url_domain(indexer.get("domain"))
domains.append(domain)
if stype == "spider":
# 刷新首页种子
torrents: List[TorrentInfo] = []
# 读取第0页和第1页
for page in range(2):
page_torrents = self.browse(domain=domain, page=page)
if page_torrents:
torrents.extend(page_torrents)
else:
# 如果某一页没有数据,说明已经到最后一页,停止获取
break
else:
# 刷新RSS种子
torrents: List[TorrentInfo] = self.rss(domain=domain)
# 按pubdate降序排列
torrents.sort(key=lambda x: x.pubdate or '', reverse=True)
# 取前N条
torrents = torrents[:settings.CONF.refresh]
if torrents:
if __is_no_cache_site(domain):
# 不需要缓存的站点,直接处理
logger.info(f'{indexer.get("name")}{len(torrents)} 个种子 (不缓存)')
torrents_cache[domain] = []
else:
# 过滤出没有处理过的种子 - 优化:使用集合查找,避免重复创建字符串列表
cached_signatures = {f'{t.torrent_info.title}{t.torrent_info.description}'
for t in torrents_cache.get(domain) or []}
torrents = [torrent for torrent in torrents
if f'{torrent.title}{torrent.description}' not in cached_signatures]
if torrents:
logger.info(f'{indexer.get("name")}{len(torrents)} 个新种子')
else:
logger.info(f'{indexer.get("name")} 没有新种子')
continue
try:
for torrent in torrents:
if global_vars.is_system_stopped:
break
if not torrent.enclosure:
logger.warn(f"缺少种子链接,忽略处理: {torrent.title}")
continue
logger.info(f'处理资源:{torrent.title} ...')
# 识别
meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
if torrent.title != meta.org_string:
logger.info(f'种子名称应用识别词后发生改变:{torrent.title} => {meta.org_string}')
# 使用站点种子分类,校正类型识别
if meta.type != MediaType.TV \
and torrent.category == MediaType.TV.value:
meta.type = MediaType.TV
# 识别媒体信息
mediainfo: MediaInfo = MediaChain().recognize_by_meta(meta)
if not mediainfo:
logger.warn(f'{torrent.title} 未识别到媒体信息')
# 存储空的媒体信息
mediainfo = MediaInfo()
# 清理多余数据,减少内存占用
mediainfo.clear()
# 上下文
context = Context(meta_info=meta, media_info=mediainfo, torrent_info=torrent)
# 如果未识别到媒体信息设置初始失败次数为1
if not mediainfo or (not mediainfo.tmdb_id and not mediainfo.douban_id):
context.media_recognize_fail_count = 1
# 添加到缓存
if not torrents_cache.get(domain):
torrents_cache[domain] = [context]
else:
torrents_cache[domain].append(context)
# 如果超过了限制条数则移除掉前面的
if len(torrents_cache[domain]) > settings.CONF.torrents:
torrents_cache[domain] = torrents_cache[domain][-settings.CONF.torrents:]
finally:
torrents.clear()
del torrents
else:
logger.info(f'{indexer.get("name")} 没有获取到种子')
# 保存缓存到本地
if stype == "spider":
self.save_cache(torrents_cache, self._spider_file)
else:
self.save_cache(torrents_cache, self._rss_file)
# 去除不在站点范围内的缓存种子
if sites and torrents_cache:
torrents_cache = {k: v for k, v in torrents_cache.items() if k in domains}
return torrents_cache
@staticmethod
def _ensure_context_compatibility(torrents_cache: Dict[str, List[Context]]):
"""
确保Context对象的兼容性为旧版本添加缺失的字段
"""
for domain, contexts in torrents_cache.items():
for context in contexts:
# 如果Context对象没有media_recognize_fail_count字段添加默认值
if not hasattr(context, 'media_recognize_fail_count'):
context.media_recognize_fail_count = 0
# 如果媒体信息未识别,设置初始失败次数
if (not context.media_info or
(not context.media_info.tmdb_id and not context.media_info.douban_id)):
context.media_recognize_fail_count = 1
def __renew_rss_url(self, domain: str, site: dict):
"""
保留原配置生成新的rss地址
"""
try:
# RSS链接过期
logger.error(f"站点 {domain} RSS链接已过期正在尝试自动获取")
# 自动生成rss地址
rss_url, errmsg = RssHelper().get_rss_link(
url=site.get("url"),
cookie=site.get("cookie"),
ua=site.get("ua") or settings.USER_AGENT,
proxy=True if site.get("proxy") else False,
timeout=site.get("timeout"),
)
if rss_url:
# 获取新的日期的passkey
match = re.search(r'passkey=([a-zA-Z0-9]+)', rss_url)
if match:
new_passkey = match.group(1)
# 获取过期rss除去passkey部分
new_rss = re.sub(r'&passkey=([a-zA-Z0-9]+)', f'&passkey={new_passkey}', site.get("rss"))
logger.info(f"更新站点 {domain} RSS地址 ...")
SiteOper().update_rss(domain=domain, rss=new_rss)
else:
# 发送消息
self.post_message(
Notification(mtype=NotificationType.SiteMessage, title=f"站点 {domain} RSS链接已过期",
link=settings.MP_DOMAIN('#/site'))
)
else:
self.post_message(
Notification(mtype=NotificationType.SiteMessage, title=f"站点 {domain} RSS链接已过期",
link=settings.MP_DOMAIN('#/site')))
except Exception as e:
logger.error(f"站点 {domain} RSS链接自动获取失败{str(e)} - {traceback.format_exc()}")
self.post_message(Notification(mtype=NotificationType.SiteMessage, title=f"站点 {domain} RSS链接已过期",
link=settings.MP_DOMAIN('#/site')))