mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-03-20 03:57:30 +08:00
373 lines
16 KiB
Python
373 lines
16 KiB
Python
import re
|
||
import traceback
|
||
from typing import Dict, List, Union, Optional
|
||
|
||
from app.chain import ChainBase
|
||
from app.chain.media import MediaChain
|
||
from app.core.config import settings, global_vars
|
||
from app.core.context import TorrentInfo, Context, MediaInfo
|
||
from app.core.metainfo import MetaInfo
|
||
from app.db.site_oper import SiteOper
|
||
from app.db.systemconfig_oper import SystemConfigOper
|
||
from app.helper.rss import RssHelper
|
||
from app.helper.sites import SitesHelper # noqa
|
||
from app.helper.torrent import TorrentHelper
|
||
from app.log import logger
|
||
from app.schemas import Notification
|
||
from app.schemas.types import SystemConfigKey, MessageChannel, NotificationType, MediaType
|
||
from app.utils.string import StringUtils
|
||
|
||
|
||
class TorrentsChain(ChainBase):
|
||
"""
|
||
站点首页或RSS种子处理链,服务于订阅、刷流等
|
||
"""
|
||
|
||
_spider_file = "__torrents_cache__"
|
||
_rss_file = "__rss_cache__"
|
||
|
||
@property
|
||
def cache_file(self) -> str:
|
||
"""
|
||
返回缓存文件列表
|
||
"""
|
||
if settings.SUBSCRIBE_MODE == 'spider':
|
||
return self._spider_file
|
||
return self._rss_file
|
||
|
||
def remote_refresh(self, channel: MessageChannel, userid: Union[str, int] = None):
|
||
"""
|
||
远程刷新订阅,发送消息
|
||
"""
|
||
self.post_message(Notification(channel=channel,
|
||
title=f"开始刷新种子 ...", userid=userid))
|
||
self.refresh()
|
||
self.post_message(Notification(channel=channel,
|
||
title=f"种子刷新完成!", userid=userid))
|
||
|
||
def get_torrents(self, stype: Optional[str] = None) -> Dict[str, List[Context]]:
|
||
"""
|
||
获取当前缓存的种子
|
||
:param stype: 强制指定缓存类型,spider:爬虫缓存,rss:rss缓存
|
||
"""
|
||
|
||
if not stype:
|
||
stype = settings.SUBSCRIBE_MODE
|
||
|
||
# 读取缓存
|
||
if stype == 'spider':
|
||
torrents_cache = self.load_cache(self._spider_file) or {}
|
||
else:
|
||
torrents_cache = self.load_cache(self._rss_file) or {}
|
||
|
||
# 兼容性处理:为旧版本的Context对象添加失败次数字段
|
||
self._ensure_context_compatibility(torrents_cache)
|
||
|
||
return torrents_cache
|
||
|
||
async def async_get_torrents(self, stype: Optional[str] = None) -> Dict[str, List[Context]]:
|
||
"""
|
||
异步获取当前缓存的种子
|
||
:param stype: 强制指定缓存类型,spider:爬虫缓存,rss:rss缓存
|
||
"""
|
||
|
||
if not stype:
|
||
stype = settings.SUBSCRIBE_MODE
|
||
|
||
# 异步读取缓存
|
||
if stype == 'spider':
|
||
torrents_cache = await self.async_load_cache(self._spider_file) or {}
|
||
else:
|
||
torrents_cache = await self.async_load_cache(self._rss_file) or {}
|
||
|
||
# 兼容性处理:为旧版本的Context对象添加失败次数字段
|
||
self._ensure_context_compatibility(torrents_cache)
|
||
|
||
return torrents_cache
|
||
|
||
def clear_torrents(self):
|
||
"""
|
||
清理种子缓存数据
|
||
"""
|
||
logger.info(f'开始清理种子缓存数据 ...')
|
||
self.remove_cache(self._spider_file)
|
||
self.remove_cache(self._rss_file)
|
||
logger.info(f'种子缓存数据清理完成')
|
||
|
||
async def async_clear_torrents(self):
|
||
"""
|
||
异步清理种子缓存数据
|
||
"""
|
||
logger.info(f'开始异步清理种子缓存数据 ...')
|
||
await self.async_remove_cache(self._spider_file)
|
||
await self.async_remove_cache(self._rss_file)
|
||
logger.info(f'异步种子缓存数据清理完成')
|
||
|
||
def browse(self, domain: str, keyword: Optional[str] = None, cat: Optional[str] = None,
|
||
page: Optional[int] = 0) -> List[TorrentInfo]:
|
||
"""
|
||
浏览站点首页内容,返回种子清单,TTL缓存5分钟
|
||
:param domain: 站点域名
|
||
:param keyword: 搜索标题
|
||
:param cat: 搜索分类
|
||
:param page: 页码
|
||
"""
|
||
logger.info(f'开始获取站点 {domain} 最新种子 ...')
|
||
site = SitesHelper().get_indexer(domain)
|
||
if not site:
|
||
logger.error(f'站点 {domain} 不存在!')
|
||
return []
|
||
return self.refresh_torrents(site=site, keyword=keyword, cat=cat, page=page)
|
||
|
||
async def async_browse(self, domain: str, keyword: Optional[str] = None, cat: Optional[str] = None,
|
||
page: Optional[int] = 0) -> List[TorrentInfo]:
|
||
"""
|
||
异步浏览站点首页内容,返回种子清单,TTL缓存5分钟
|
||
:param domain: 站点域名
|
||
:param keyword: 搜索标题
|
||
:param cat: 搜索分类
|
||
:param page: 页码
|
||
"""
|
||
logger.info(f'开始获取站点 {domain} 最新种子 ...')
|
||
site = await SitesHelper().async_get_indexer(domain)
|
||
if not site:
|
||
logger.error(f'站点 {domain} 不存在!')
|
||
return []
|
||
return await self.async_refresh_torrents(site=site, keyword=keyword, cat=cat, page=page)
|
||
|
||
def rss(self, domain: str) -> List[TorrentInfo]:
|
||
"""
|
||
获取站点RSS内容,返回种子清单,TTL缓存3分钟
|
||
:param domain: 站点域名
|
||
"""
|
||
logger.info(f'开始获取站点 {domain} RSS ...')
|
||
site = SitesHelper().get_indexer(domain)
|
||
if not site:
|
||
logger.error(f'站点 {domain} 不存在!')
|
||
return []
|
||
if not site.get("rss"):
|
||
logger.error(f'站点 {domain} 未配置RSS地址!')
|
||
return []
|
||
# 解析RSS
|
||
rss_items = RssHelper().parse(site.get("rss"), True if site.get("proxy") else False,
|
||
timeout=int(site.get("timeout") or 30), ua=site.get("ua") if site.get("ua") else None)
|
||
if rss_items is None:
|
||
# rss过期,尝试保留原配置生成新的rss
|
||
self.__renew_rss_url(domain=domain, site=site)
|
||
return []
|
||
if not rss_items:
|
||
logger.error(f'站点 {domain} 未获取到RSS数据!')
|
||
return []
|
||
# 组装种子
|
||
ret_torrents: List[TorrentInfo] = []
|
||
try:
|
||
for item in rss_items:
|
||
if not item.get("title"):
|
||
continue
|
||
torrentinfo = TorrentInfo(
|
||
site=site.get("id"),
|
||
site_name=site.get("name"),
|
||
site_cookie=site.get("cookie"),
|
||
site_ua=site.get("ua") or settings.USER_AGENT,
|
||
site_proxy=site.get("proxy"),
|
||
site_order=site.get("pri"),
|
||
site_downloader=site.get("downloader"),
|
||
title=item.get("title"),
|
||
enclosure=item.get("enclosure"),
|
||
page_url=item.get("link"),
|
||
size=item.get("size"),
|
||
pubdate=item["pubdate"].strftime("%Y-%m-%d %H:%M:%S") if item.get("pubdate") else None,
|
||
)
|
||
ret_torrents.append(torrentinfo)
|
||
finally:
|
||
rss_items.clear()
|
||
del rss_items
|
||
return ret_torrents
|
||
|
||
def refresh(self, stype: Optional[str] = None, sites: List[int] = None) -> Dict[str, List[Context]]:
|
||
"""
|
||
刷新站点最新资源,识别并缓存起来
|
||
:param stype: 强制指定缓存类型,spider:爬虫缓存,rss:rss缓存
|
||
:param sites: 强制指定站点ID列表,为空则读取设置的订阅站点
|
||
"""
|
||
|
||
def __is_no_cache_site(_domain: str) -> bool:
|
||
"""
|
||
判断站点是否不需要缓存
|
||
"""
|
||
for url_key in settings.NO_CACHE_SITE_KEY.split(','):
|
||
if url_key in _domain:
|
||
return True
|
||
return False
|
||
|
||
# 刷新类型
|
||
if not stype:
|
||
stype = settings.SUBSCRIBE_MODE
|
||
|
||
# 刷新站点
|
||
if not sites:
|
||
sites = SystemConfigOper().get(SystemConfigKey.RssSites) or []
|
||
|
||
# 读取缓存
|
||
torrents_cache = self.get_torrents()
|
||
torrenthelper = TorrentHelper()
|
||
|
||
# 缓存过滤掉无效种子
|
||
for _domain, _torrents in torrents_cache.items():
|
||
torrents_cache[_domain] = [_torrent for _torrent in _torrents
|
||
if not torrenthelper.is_invalid(_torrent.torrent_info.enclosure)]
|
||
|
||
# 需要刷新的站点domain
|
||
domains = []
|
||
# 遍历站点缓存资源
|
||
for indexer in SitesHelper().get_indexers():
|
||
if global_vars.is_system_stopped:
|
||
break
|
||
# 未开启的站点不刷新
|
||
if sites and indexer.get("id") not in sites:
|
||
continue
|
||
domain = StringUtils.get_url_domain(indexer.get("domain"))
|
||
domains.append(domain)
|
||
if stype == "spider":
|
||
# 刷新首页种子
|
||
torrents: List[TorrentInfo] = []
|
||
# 读取第0页和第1页
|
||
for page in range(2):
|
||
page_torrents = self.browse(domain=domain, page=page)
|
||
if page_torrents:
|
||
torrents.extend(page_torrents)
|
||
else:
|
||
# 如果某一页没有数据,说明已经到最后一页,停止获取
|
||
break
|
||
else:
|
||
# 刷新RSS种子
|
||
torrents: List[TorrentInfo] = self.rss(domain=domain)
|
||
# 按pubdate降序排列
|
||
torrents.sort(key=lambda x: x.pubdate or '', reverse=True)
|
||
# 取前N条
|
||
torrents = torrents[:settings.CONF.refresh]
|
||
if torrents:
|
||
if __is_no_cache_site(domain):
|
||
# 不需要缓存的站点,直接处理
|
||
logger.info(f'{indexer.get("name")} 有 {len(torrents)} 个种子 (不缓存)')
|
||
torrents_cache[domain] = []
|
||
else:
|
||
# 过滤出没有处理过的种子 - 优化:使用集合查找,避免重复创建字符串列表
|
||
cached_signatures = {f'{t.torrent_info.title}{t.torrent_info.description}'
|
||
for t in torrents_cache.get(domain) or []}
|
||
torrents = [torrent for torrent in torrents
|
||
if f'{torrent.title}{torrent.description}' not in cached_signatures]
|
||
if torrents:
|
||
logger.info(f'{indexer.get("name")} 有 {len(torrents)} 个新种子')
|
||
else:
|
||
logger.info(f'{indexer.get("name")} 没有新种子')
|
||
continue
|
||
try:
|
||
for torrent in torrents:
|
||
if global_vars.is_system_stopped:
|
||
break
|
||
if not torrent.enclosure:
|
||
logger.warn(f"缺少种子链接,忽略处理: {torrent.title}")
|
||
continue
|
||
logger.info(f'处理资源:{torrent.title} ...')
|
||
# 识别
|
||
meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
|
||
if torrent.title != meta.org_string:
|
||
logger.info(f'种子名称应用识别词后发生改变:{torrent.title} => {meta.org_string}')
|
||
# 使用站点种子分类,校正类型识别
|
||
if meta.type != MediaType.TV \
|
||
and torrent.category == MediaType.TV.value:
|
||
meta.type = MediaType.TV
|
||
# 识别媒体信息
|
||
mediainfo: MediaInfo = MediaChain().recognize_by_meta(meta)
|
||
if not mediainfo:
|
||
logger.warn(f'{torrent.title} 未识别到媒体信息')
|
||
# 存储空的媒体信息
|
||
mediainfo = MediaInfo()
|
||
# 清理多余数据,减少内存占用
|
||
mediainfo.clear()
|
||
# 上下文
|
||
context = Context(meta_info=meta, media_info=mediainfo, torrent_info=torrent)
|
||
# 如果未识别到媒体信息,设置初始失败次数为1
|
||
if not mediainfo or (not mediainfo.tmdb_id and not mediainfo.douban_id):
|
||
context.media_recognize_fail_count = 1
|
||
# 添加到缓存
|
||
if not torrents_cache.get(domain):
|
||
torrents_cache[domain] = [context]
|
||
else:
|
||
torrents_cache[domain].append(context)
|
||
# 如果超过了限制条数则移除掉前面的
|
||
if len(torrents_cache[domain]) > settings.CONF.torrents:
|
||
torrents_cache[domain] = torrents_cache[domain][-settings.CONF.torrents:]
|
||
finally:
|
||
torrents.clear()
|
||
del torrents
|
||
else:
|
||
logger.info(f'{indexer.get("name")} 没有获取到种子')
|
||
|
||
# 保存缓存到本地
|
||
if stype == "spider":
|
||
self.save_cache(torrents_cache, self._spider_file)
|
||
else:
|
||
self.save_cache(torrents_cache, self._rss_file)
|
||
|
||
# 去除不在站点范围内的缓存种子
|
||
if sites and torrents_cache:
|
||
torrents_cache = {k: v for k, v in torrents_cache.items() if k in domains}
|
||
|
||
return torrents_cache
|
||
|
||
@staticmethod
|
||
def _ensure_context_compatibility(torrents_cache: Dict[str, List[Context]]):
|
||
"""
|
||
确保Context对象的兼容性,为旧版本添加缺失的字段
|
||
"""
|
||
for domain, contexts in torrents_cache.items():
|
||
for context in contexts:
|
||
# 如果Context对象没有media_recognize_fail_count字段,添加默认值
|
||
if not hasattr(context, 'media_recognize_fail_count'):
|
||
context.media_recognize_fail_count = 0
|
||
# 如果媒体信息未识别,设置初始失败次数
|
||
if (not context.media_info or
|
||
(not context.media_info.tmdb_id and not context.media_info.douban_id)):
|
||
context.media_recognize_fail_count = 1
|
||
|
||
def __renew_rss_url(self, domain: str, site: dict):
|
||
"""
|
||
保留原配置生成新的rss地址
|
||
"""
|
||
try:
|
||
# RSS链接过期
|
||
logger.error(f"站点 {domain} RSS链接已过期,正在尝试自动获取!")
|
||
# 自动生成rss地址
|
||
rss_url, errmsg = RssHelper().get_rss_link(
|
||
url=site.get("url"),
|
||
cookie=site.get("cookie"),
|
||
ua=site.get("ua") or settings.USER_AGENT,
|
||
proxy=True if site.get("proxy") else False,
|
||
timeout=site.get("timeout"),
|
||
)
|
||
if rss_url:
|
||
# 获取新的日期的passkey
|
||
match = re.search(r'passkey=([a-zA-Z0-9]+)', rss_url)
|
||
if match:
|
||
new_passkey = match.group(1)
|
||
# 获取过期rss除去passkey部分
|
||
new_rss = re.sub(r'&passkey=([a-zA-Z0-9]+)', f'&passkey={new_passkey}', site.get("rss"))
|
||
logger.info(f"更新站点 {domain} RSS地址 ...")
|
||
SiteOper().update_rss(domain=domain, rss=new_rss)
|
||
else:
|
||
# 发送消息
|
||
self.post_message(
|
||
Notification(mtype=NotificationType.SiteMessage, title=f"站点 {domain} RSS链接已过期",
|
||
link=settings.MP_DOMAIN('#/site'))
|
||
)
|
||
else:
|
||
self.post_message(
|
||
Notification(mtype=NotificationType.SiteMessage, title=f"站点 {domain} RSS链接已过期",
|
||
link=settings.MP_DOMAIN('#/site')))
|
||
except Exception as e:
|
||
logger.error(f"站点 {domain} RSS链接自动获取失败:{str(e)} - {traceback.format_exc()}")
|
||
self.post_message(Notification(mtype=NotificationType.SiteMessage, title=f"站点 {domain} RSS链接已过期",
|
||
link=settings.MP_DOMAIN('#/site')))
|