feat:支持订阅绑定类别和自定义识别词

This commit is contained in:
jxxghp
2024-10-09 15:21:32 +08:00
parent e31df15b5e
commit 7ea01c1109
13 changed files with 191 additions and 124 deletions

View File

@@ -207,7 +207,8 @@ class DownloadChain(ChainBase):
save_path: str = None,
userid: Union[str, int] = None,
username: str = None,
downloader: str = None) -> Optional[str]:
downloader: str = None,
media_category: str = None) -> Optional[str]:
"""
下载及发送通知
:param context: 资源上下文
@@ -219,6 +220,7 @@ class DownloadChain(ChainBase):
:param userid: 用户ID
:param username: 调用下载的用户名/插件名
:param downloader: 下载器
:param media_category: 自定义媒体类别
"""
_torrent = context.torrent_info
_media = context.media_info
@@ -318,7 +320,8 @@ class DownloadChain(ChainBase):
userid=userid,
username=username,
channel=channel.value if channel else None,
date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
media_category=media_category
)
# 登记下载文件
@@ -382,7 +385,8 @@ class DownloadChain(ChainBase):
channel: MessageChannel = None,
source: str = None,
userid: str = None,
username: str = None
username: str = None,
media_category: str = None
) -> Tuple[List[Context], Dict[Union[int, str], Dict[int, NotExistMediaInfo]]]:
"""
根据缺失数据,自动种子列表中组合择优下载
@@ -393,6 +397,7 @@ class DownloadChain(ChainBase):
:param source: 通知来源
:param userid: 用户ID
:param username: 调用下载的用户名/插件名
:param media_category: 自定义媒体类别
:return: 已经下载的资源列表、剩余未下载到的剧集 no_exists[tmdb_id/douban_id] = {season: NotExistMediaInfo}
"""
# 已下载的项目
@@ -461,7 +466,8 @@ class DownloadChain(ChainBase):
if context.media_info.type == MediaType.MOVIE:
logger.info(f"开始下载电影 {context.torrent_info.title} ...")
if self.download_single(context, save_path=save_path, channel=channel,
source=source, userid=userid, username=username):
source=source, userid=userid, username=username,
media_category=media_category):
# 下载成功
logger.info(f"{context.torrent_info.title} 添加下载成功")
downloaded_list.append(context)
@@ -543,14 +549,16 @@ class DownloadChain(ChainBase):
channel=channel,
source=source,
userid=userid,
username=username
username=username,
media_category=media_category
)
else:
# 下载
logger.info(f"开始下载 {torrent.title} ...")
download_id = self.download_single(context, save_path=save_path,
channel=channel, source=source,
userid=userid, username=username)
userid=userid, username=username,
media_category=media_category)
if download_id:
# 下载成功
@@ -618,7 +626,8 @@ class DownloadChain(ChainBase):
logger.info(f"开始下载 {meta.title} ...")
download_id = self.download_single(context, save_path=save_path,
channel=channel, source=source,
userid=userid, username=username)
userid=userid, username=username,
media_category=media_category)
if download_id:
# 下载成功
logger.info(f"{meta.title} 添加下载成功")
@@ -704,7 +713,8 @@ class DownloadChain(ChainBase):
channel=channel,
source=source,
userid=userid,
username=username
username=username,
media_category=media_category
)
if not download_id:
continue

View File

@@ -1,8 +1,7 @@
import json
import copy
import random
import time
from datetime import datetime
from json import JSONDecodeError
from typing import Dict, List, Optional, Union, Tuple
from app.chain import ChainBase
@@ -332,9 +331,11 @@ class SubscribeChain(ChainBase):
# 优先级过滤规则
if subscribe.best_version:
rule_groups = self.systemconfig.get(SystemConfigKey.BestVersionFilterRuleGroups)
rule_groups = subscribe.filter_groups \
or self.systemconfig.get(SystemConfigKey.BestVersionFilterRuleGroups)
else:
rule_groups = self.systemconfig.get(SystemConfigKey.SubscribeFilterRuleGroups)
rule_groups = subscribe.filter_groups \
or self.systemconfig.get(SystemConfigKey.SubscribeFilterRuleGroups)
# 搜索,同时电视剧会过滤掉不需要的剧集
contexts = self.searchchain.process(mediainfo=mediainfo,
@@ -381,7 +382,8 @@ class SubscribeChain(ChainBase):
no_exists=no_exists,
userid=subscribe.username,
username=subscribe.username,
save_path=subscribe.save_path
save_path=subscribe.save_path,
media_category=subscribe.media_category
)
# 判断是否应完成订阅
@@ -478,21 +480,17 @@ class SubscribeChain(ChainBase):
# 如果订阅未指定站点信息,直接返回默认站点
if not subscribe.sites:
return default_sites
try:
# 尝试解析订阅中的站点数据
user_sites = subscribe.sites
# 计算 user_sites 和 default_sites 的交集
intersection_sites = [site for site in user_sites if site in default_sites]
# 如果交集与原始订阅不一致,更新数据库
if set(intersection_sites) != set(user_sites):
self.subscribeoper.update(subscribe.id, {
"sites": intersection_sites
})
# 如果交集为空,返回默认站点
return intersection_sites if intersection_sites else default_sites
except JSONDecodeError:
# 如果 JSON 解析失败,返回默认站点
return default_sites
# 尝试解析订阅中的站点数据
user_sites = subscribe.sites
# 计算 user_sites 和 default_sites 的交集
intersection_sites = [site for site in user_sites if site in default_sites]
# 如果交集与原始订阅不一致,更新数据库
if set(intersection_sites) != set(user_sites):
self.subscribeoper.update(subscribe.id, {
"sites": intersection_sites
})
# 如果交集为空,返回默认站点
return intersection_sites if intersection_sites else default_sites
def get_subscribed_sites(self) -> Optional[List[int]]:
"""
@@ -521,8 +519,6 @@ class SubscribeChain(ChainBase):
if not torrents:
logger.warn('没有缓存资源,无法匹配订阅')
return
# 记录重新识别过的种子
_recognize_cached = []
# 所有订阅
subscribes = self.subscribeoper.list('R')
# 遍历订阅
@@ -541,12 +537,9 @@ class SubscribeChain(ChainBase):
# 订阅的站点域名列表
domains = []
if subscribe.sites:
try:
siteids = subscribe.sites
if siteids:
domains = self.siteoper.get_domains_by_ids(siteids)
except JSONDecodeError:
pass
domains = self.siteoper.get_domains_by_ids(subscribe.sites)
# 自定义识别词
custom_words = subscribe.custom_words.split("\n") if subscribe.custom_words else []
# 识别媒体信息
mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type,
tmdbid=subscribe.tmdbid,
@@ -612,34 +605,62 @@ class SubscribeChain(ChainBase):
continue
logger.debug(f'开始匹配站点:{domain},共缓存了 {len(contexts)} 个种子...')
for context in contexts:
# 检查是否匹配
torrent_meta = context.meta_info
torrent_mediainfo = context.media_info
# 提取信息
torrent_meta = copy.deepcopy(context.meta_info)
torrent_mediainfo = copy.deepcopy(context.media_info)
torrent_info = context.torrent_info
# 先判断是否有没识别的种子
if not torrent_mediainfo or (not torrent_mediainfo.tmdb_id and not torrent_mediainfo.douban_id):
_cache_key = f"{torrent_info.title}_{torrent_info.description}"
if _cache_key not in _recognize_cached:
_recognize_cached.append(_cache_key)
# 不在订阅站点范围的不处理
sub_sites = self.get_sub_sites(subscribe)
if sub_sites and torrent_info.site not in sub_sites:
logger.debug(f"{torrent_info.site_name} - {torrent_info.title} 不符合订阅站点要求")
continue
# 匹配订阅参数
if not self.torrenthelper.filter_torrent(torrent_info=torrent_info,
filter_params=self.get_params(subscribe)):
continue
# 先判断是否有没识别的种子,有则重新识别;如果订阅有自定义识别词,则不使用预识别的信息
if not torrent_mediainfo \
or (not torrent_mediainfo.tmdb_id and not torrent_mediainfo.douban_id) \
or subscribe.custom_words:
if not subscribe.custom_words:
logger.info(
f'{torrent_info.site_name} - {torrent_info.title} 订阅缓存为未识别状态,尝试重新识别...')
# 重新识别(不使用缓存)
f'{torrent_info.site_name} - {torrent_info.title} 订阅缓存为未识别状态,'
f'尝试重新识别媒体信息...')
else:
logger.info(
f'{torrent_info.site_name} - {torrent_info.title} 因订阅存在自定义识别词,'
f'正在重新识别元数据和媒体信息...')
# 重新识别元数据
torrent_meta = MetaInfo(title=torrent_info.title, subtitle=torrent_info.description,
custom_words=custom_words)
# 重新识别媒体信息
if subscribe.custom_words:
torrent_mediainfo = self.recognize_media(meta=torrent_meta)
else:
# 不使用识别缓存
torrent_mediainfo = self.recognize_media(meta=torrent_meta, cache=False)
if not torrent_mediainfo:
logger.warn(
f'{torrent_info.site_name} - {torrent_info.title} 重新识别失败,尝试通过标题匹配...')
if self.torrenthelper.match_torrent(mediainfo=mediainfo,
torrent_meta=torrent_meta,
torrent=torrent_info):
# 匹配成功
logger.info(
f'{mediainfo.title_year} 通过标题匹配到资源:{torrent_info.site_name} - {torrent_info.title}')
# 更新缓存
if torrent_mediainfo:
# 更新种子缓存
context.media_info = torrent_mediainfo
if not torrent_mediainfo:
# 通过标题匹配兜底
logger.warn(
f'{torrent_info.site_name} - {torrent_info.title} 重新识别失败,尝试通过标题匹配...')
if self.torrenthelper.match_torrent(mediainfo=mediainfo,
torrent_meta=torrent_meta,
torrent=torrent_info):
# 匹配成功
logger.info(
f'{mediainfo.title_year} 通过标题匹配到可用资源:{torrent_info.site_name} - {torrent_info.title}')
if not subscribe.custom_words:
# 更新种子缓存
torrent_mediainfo = mediainfo
context.media_info = mediainfo
else:
continue
else:
continue
# 直接比对媒体信息
if torrent_mediainfo and (torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id):
@@ -652,30 +673,10 @@ class SubscribeChain(ChainBase):
and torrent_mediainfo.douban_id != mediainfo.douban_id:
continue
logger.info(
f'{mediainfo.title_year} 通过媒体信ID匹配到资源{torrent_info.site_name} - {torrent_info.title}')
f'{mediainfo.title_year} 通过媒体信ID匹配到可用资源:{torrent_info.site_name} - {torrent_info.title}')
else:
continue
# 过滤规则
if subscribe.best_version:
rule_groups = self.systemconfig.get(SystemConfigKey.BestVersionFilterRuleGroups)
else:
rule_groups = self.systemconfig.get(SystemConfigKey.SubscribeFilterRuleGroups)
result: List[TorrentInfo] = self.filter_torrents(
rule_groups=rule_groups,
torrent_list=[torrent_info],
mediainfo=torrent_mediainfo)
if result is not None and not result:
# 不符合过滤规则
logger.debug(f"{torrent_info.title} 不匹配过滤规则")
continue
# 不在订阅站点范围的不处理
sub_sites = self.get_sub_sites(subscribe)
if sub_sites and torrent_info.site not in sub_sites:
logger.debug(f"{torrent_info.site_name} - {torrent_info.title} 不符合订阅站点要求")
continue
# 如果是电视剧
if torrent_mediainfo.type == MediaType.TV:
# 有多季的不要
@@ -714,6 +715,22 @@ class SubscribeChain(ChainBase):
logger.debug(f'{subscribe.name} 正在洗版,{torrent_info.title} 不是整季')
continue
# 优先级过滤规则
if subscribe.best_version:
rule_groups = subscribe.filter_groups \
or self.systemconfig.get(SystemConfigKey.BestVersionFilterRuleGroups)
else:
rule_groups = subscribe.filter_groups \
or self.systemconfig.get(SystemConfigKey.SubscribeFilterRuleGroups)
result: List[TorrentInfo] = self.filter_torrents(
rule_groups=rule_groups,
torrent_list=[torrent_info],
mediainfo=torrent_mediainfo)
if result is not None and not result:
# 不符合过滤规则
logger.debug(f"{torrent_info.title} 不匹配过滤规则")
continue
# 洗版时,优先级小于已下载优先级的不要
if subscribe.best_version:
if subscribe.current_priority \
@@ -721,11 +738,6 @@ class SubscribeChain(ChainBase):
logger.info(f'{subscribe.name} 正在洗版,{torrent_info.title} 优先级低于或等于已下载优先级')
continue
# 匹配订阅参数
if not self.torrenthelper.filter_torrent(torrent_info=torrent_info,
filter_params=self.get_params(subscribe)):
continue
# 匹配成功
logger.info(f'{mediainfo.title_year} 匹配成功:{torrent_info.title}')
_match_context.append(context)
@@ -743,7 +755,8 @@ class SubscribeChain(ChainBase):
no_exists=no_exists,
userid=subscribe.username,
username=subscribe.username,
save_path=subscribe.save_path)
save_path=subscribe.save_path,
media_category=subscribe.media_category)
# 判断是否要完成订阅
self.finish_subscribe_or_not(subscribe=subscribe, meta=meta, mediainfo=mediainfo,
downloads=downloads, lefts=lefts)
@@ -1191,8 +1204,6 @@ class SubscribeChain(ChainBase):
download_his = self.downloadhis.get_by_mediaid(tmdbid=subscribe.tmdbid, doubanid=subscribe.doubanid)
if download_his:
for his in download_his:
# 种子链接
torrent_url = f"{his.torrent_site}{his.torrent_name}"
# 查询下载文件
files = self.downloadhis.get_files_by_hash(his.hash)
if files:

View File

@@ -1,4 +1,3 @@
import json
import re
import threading
from pathlib import Path
@@ -118,6 +117,9 @@ class TransferChain(ChainBase):
if mediainfo:
# 补充图片
self.obtain_images(mediainfo)
# 更新自定义媒体类别
if downloadhis.media_category:
mediainfo.category = downloadhis.media_category
else:
# 非MoviePilot下载的任务按文件识别
mediainfo = None

View File

@@ -14,7 +14,7 @@ class WordsMatcher(metaclass=Singleton):
def __init__(self):
self.systemconfig = SystemConfigOper()
def prepare(self, title: str) -> Tuple[str, List[str]]:
def prepare(self, title: str, custom_words: List[str] = None) -> Tuple[str, List[str]]:
"""
预处理标题,支持三种格式
1屏蔽词
@@ -23,7 +23,7 @@ class WordsMatcher(metaclass=Singleton):
"""
appley_words = []
# 读取自定义识别词
words: List[str] = self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
words: List[str] = custom_words or self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
for word in words:
if not word or word.startswith("#"):
continue

View File

@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Tuple
from typing import Tuple, List
import regex as re
@@ -10,17 +10,18 @@ from app.log import logger
from app.schemas.types import MediaType
def MetaInfo(title: str, subtitle: str = None) -> MetaBase:
def MetaInfo(title: str, subtitle: str = None, custom_words: List[str] = None) -> MetaBase:
"""
根据标题和副标题识别元数据
:param title: 标题、种子名、文件名
:param subtitle: 副标题、描述
:param custom_words: 自定义识别词列表
:return: MetaAnime、MetaVideo
"""
# 原标题
org_title = title
# 预处理标题
title, apply_words = WordsMatcher().prepare(title)
title, apply_words = WordsMatcher().prepare(title, custom_words=custom_words)
# 获取标题中媒体信息
title, metainfo = find_metainfo(title)
# 判断是否处理文件

View File

@@ -1,11 +1,9 @@
import json
from typing import Any, Self, List, Tuple, Optional, Generator
from sqlalchemy import create_engine, QueuePool, and_, inspect
from sqlalchemy.orm import declared_attr, sessionmaker, Session, scoped_session, as_declarative
from app.core.config import settings
from app.utils.object import ObjectUtils
# 数据库引擎
Engine = create_engine(

View File

@@ -47,6 +47,8 @@ class DownloadHistory(Base):
date = Column(String)
# 附加信息
note = Column(JSON)
# 自定义媒体类别
media_category = Column(String)
@staticmethod
@db_query

View File

@@ -74,6 +74,12 @@ class Subscribe(Base):
search_imdbid = Column(Integer, default=0)
# 是否手动修改过总集数 0否 1是
manual_total_episode = Column(Integer, default=0)
# 自定义识别词
custom_words = Column(String)
# 自定义媒体类别
media_category = Column(String)
# 过滤规则组
filter_groups = Column(JSON, default=list)
@staticmethod
@db_query

View File

@@ -326,10 +326,16 @@ class Monitor(metaclass=Singleton):
download_hash = download_history.download_hash
# 识别媒体信息
if download_history and download_history.tmdbid:
if download_history and (download_history.tmdbid or download_history.doubanid):
# 下载记录中已存在识别信息
mediainfo: MediaInfo = self.mediaChain.recognize_media(mtype=MediaType(download_history.type),
tmdbid=download_history.tmdbid,
doubanid=download_history.doubanid)
if mediainfo:
# 更新自定义媒体类别
if download_history.media_category:
mediainfo.category = download_history.media_category
else:
mediainfo: MediaInfo = self.mediaChain.recognize_by_meta(file_meta)
if not mediainfo:

View File

@@ -1,7 +1,6 @@
import json
from typing import Optional, List, Dict, Any
from pydantic import BaseModel, validator
from pydantic import BaseModel
class Subscribe(BaseModel):
@@ -65,17 +64,12 @@ class Subscribe(BaseModel):
search_imdbid: Optional[int] = 0
# 时间
date: Optional[str] = None
@validator('sites', pre=True)
def parse_json_fields(cls, value):
if value:
if isinstance(value, str):
try:
return json.loads(value)
except json.JSONDecodeError:
return []
return value
return []
# 自定义识别词
custom_words: Optional[str] = None
# 自定义媒体类别
media_category: Optional[str] = None
# 过滤规则组
filter_groups: Optional[List[str]] = []
class Config:
orm_mode = True

View File

@@ -1,7 +1,6 @@
import json
from typing import Optional
from pydantic import BaseModel, validator
from pydantic import BaseModel
# Shared properties
@@ -23,17 +22,6 @@ class UserBase(BaseModel):
# 个性化设置
settings: Optional[dict] = {}
@validator('permissions', 'settings', pre=True)
def parse_json_fields(cls, value):
if value:
if isinstance(value, str):
try:
return json.loads(value)
except json.JSONDecodeError:
return {}
return value
return {}
class Config:
orm_mode = True