feat：支持订阅绑定类别和自定义识别词

2026-03-20 03:57:30 +08:00 · 2024-10-09 15:21:32 +08:00
parent e31df15b5e
commit 7ea01c1109
13 changed files with 191 additions and 124 deletions
--- a/app/chain/download.py
+++ b/app/chain/download.py
@@ -207,7 +207,8 @@ class DownloadChain(ChainBase):
                        save_path: str = None,
                        userid: Union[str, int] = None,
                        username: str = None,
-                        downloader: str = None) -> Optional[str]:
+                        downloader: str = None,
+                        media_category: str = None) -> Optional[str]:
        """
        下载及发送通知
        :param context: 资源上下文
@@ -219,6 +220,7 @@ class DownloadChain(ChainBase):
        :param userid: 用户ID
        :param username: 调用下载的用户名/插件名
        :param downloader: 下载器
+        :param media_category: 自定义媒体类别
        """
        _torrent = context.torrent_info
        _media = context.media_info
@@ -318,7 +320,8 @@ class DownloadChain(ChainBase):
                userid=userid,
                username=username,
                channel=channel.value if channel else None,
-                date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
+                media_category=media_category
            )

            # 登记下载文件
@@ -382,7 +385,8 @@ class DownloadChain(ChainBase):
                       channel: MessageChannel = None,
                       source: str = None,
                       userid: str = None,
-                       username: str = None
+                       username: str = None,
+                       media_category: str = None
                       ) -> Tuple[List[Context], Dict[Union[int, str], Dict[int, NotExistMediaInfo]]]:
        """
        根据缺失数据，自动种子列表中组合择优下载
@@ -393,6 +397,7 @@ class DownloadChain(ChainBase):
        :param source:  通知来源
        :param userid:  用户ID
        :param username: 调用下载的用户名/插件名
+        :param media_category: 自定义媒体类别
        :return: 已经下载的资源列表、剩余未下载到的剧集 no_exists[tmdb_id/douban_id] = {season: NotExistMediaInfo}
        """
        # 已下载的项目
@@ -461,7 +466,8 @@ class DownloadChain(ChainBase):
            if context.media_info.type == MediaType.MOVIE:
                logger.info(f"开始下载电影 {context.torrent_info.title} ...")
                if self.download_single(context, save_path=save_path, channel=channel,
-                                        source=source, userid=userid, username=username):
+                                        source=source, userid=userid, username=username,
+                                        media_category=media_category):
                    # 下载成功
                    logger.info(f"{context.torrent_info.title} 添加下载成功")
                    downloaded_list.append(context)
@@ -543,14 +549,16 @@ class DownloadChain(ChainBase):
                                        channel=channel,
                                        source=source,
                                        userid=userid,
-                                        username=username
+                                        username=username,
+                                        media_category=media_category
                                    )
                            else:
                                # 下载
                                logger.info(f"开始下载 {torrent.title} ...")
                                download_id = self.download_single(context, save_path=save_path,
                                                                   channel=channel, source=source,
-                                                                   userid=userid, username=username)
+                                                                   userid=userid, username=username,
+                                                                   media_category=media_category)

                            if download_id:
                                # 下载成功
@@ -618,7 +626,8 @@ class DownloadChain(ChainBase):
                                logger.info(f"开始下载 {meta.title} ...")
                                download_id = self.download_single(context, save_path=save_path,
                                                                   channel=channel, source=source,
-                                                                   userid=userid, username=username)
+                                                                   userid=userid, username=username,
+                                                                   media_category=media_category)
                                if download_id:
                                    # 下载成功
                                    logger.info(f"{meta.title} 添加下载成功")
@@ -704,7 +713,8 @@ class DownloadChain(ChainBase):
                                channel=channel,
                                source=source,
                                userid=userid,
-                                username=username
+                                username=username,
+                                media_category=media_category
                            )
                            if not download_id:
                                continue
--- a/app/chain/subscribe.py
+++ b/app/chain/subscribe.py
@@ -1,8 +1,7 @@
-import json
+import copy
 import random
 import time
 from datetime import datetime
-from json import JSONDecodeError
 from typing import Dict, List, Optional, Union, Tuple

 from app.chain import ChainBase
@@ -332,9 +331,11 @@ class SubscribeChain(ChainBase):

            # 优先级过滤规则
            if subscribe.best_version:
-                rule_groups = self.systemconfig.get(SystemConfigKey.BestVersionFilterRuleGroups)
+                rule_groups = subscribe.filter_groups \
+                              or self.systemconfig.get(SystemConfigKey.BestVersionFilterRuleGroups)
            else:
-                rule_groups = self.systemconfig.get(SystemConfigKey.SubscribeFilterRuleGroups)
+                rule_groups = subscribe.filter_groups \
+                              or self.systemconfig.get(SystemConfigKey.SubscribeFilterRuleGroups)

            # 搜索，同时电视剧会过滤掉不需要的剧集
            contexts = self.searchchain.process(mediainfo=mediainfo,
@@ -381,7 +382,8 @@ class SubscribeChain(ChainBase):
                no_exists=no_exists,
                userid=subscribe.username,
                username=subscribe.username,
-                save_path=subscribe.save_path
+                save_path=subscribe.save_path,
+                media_category=subscribe.media_category
            )

            # 判断是否应完成订阅
@@ -478,21 +480,17 @@ class SubscribeChain(ChainBase):
        # 如果订阅未指定站点信息，直接返回默认站点
        if not subscribe.sites:
            return default_sites
-        try:
-            # 尝试解析订阅中的站点数据
-            user_sites = subscribe.sites
-            # 计算 user_sites 和 default_sites 的交集
-            intersection_sites = [site for site in user_sites if site in default_sites]
-            # 如果交集与原始订阅不一致，更新数据库
-            if set(intersection_sites) != set(user_sites):
-                self.subscribeoper.update(subscribe.id, {
-                    "sites": intersection_sites
-                })
-            # 如果交集为空，返回默认站点
-            return intersection_sites if intersection_sites else default_sites
-        except JSONDecodeError:
-            # 如果 JSON 解析失败，返回默认站点
-            return default_sites
+        # 尝试解析订阅中的站点数据
+        user_sites = subscribe.sites
+        # 计算 user_sites 和 default_sites 的交集
+        intersection_sites = [site for site in user_sites if site in default_sites]
+        # 如果交集与原始订阅不一致，更新数据库
+        if set(intersection_sites) != set(user_sites):
+            self.subscribeoper.update(subscribe.id, {
+                "sites": intersection_sites
+            })
+        # 如果交集为空，返回默认站点
+        return intersection_sites if intersection_sites else default_sites

    def get_subscribed_sites(self) -> Optional[List[int]]:
        """
@@ -521,8 +519,6 @@ class SubscribeChain(ChainBase):
        if not torrents:
            logger.warn('没有缓存资源，无法匹配订阅')
            return
-        # 记录重新识别过的种子
-        _recognize_cached = []
        # 所有订阅
        subscribes = self.subscribeoper.list('R')
        # 遍历订阅
@@ -541,12 +537,9 @@ class SubscribeChain(ChainBase):
            # 订阅的站点域名列表
            domains = []
            if subscribe.sites:
-                try:
-                    siteids = subscribe.sites
-                    if siteids:
-                        domains = self.siteoper.get_domains_by_ids(siteids)
-                except JSONDecodeError:
-                    pass
+                domains = self.siteoper.get_domains_by_ids(subscribe.sites)
+            # 自定义识别词
+            custom_words = subscribe.custom_words.split("\n") if subscribe.custom_words else []
            # 识别媒体信息
            mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type,
                                                        tmdbid=subscribe.tmdbid,
@@ -612,34 +605,62 @@ class SubscribeChain(ChainBase):
                    continue
                logger.debug(f'开始匹配站点：{domain}，共缓存了 {len(contexts)} 个种子...')
                for context in contexts:
-                    # 检查是否匹配
-                    torrent_meta = context.meta_info
-                    torrent_mediainfo = context.media_info
+                    # 提取信息
+                    torrent_meta = copy.deepcopy(context.meta_info)
+                    torrent_mediainfo = copy.deepcopy(context.media_info)
                    torrent_info = context.torrent_info

-                    # 先判断是否有没识别的种子
-                    if not torrent_mediainfo or (not torrent_mediainfo.tmdb_id and not torrent_mediainfo.douban_id):
-                        _cache_key = f"{torrent_info.title}_{torrent_info.description}"
-                        if _cache_key not in _recognize_cached:
-                            _recognize_cached.append(_cache_key)
+                    # 不在订阅站点范围的不处理
+                    sub_sites = self.get_sub_sites(subscribe)
+                    if sub_sites and torrent_info.site not in sub_sites:
+                        logger.debug(f"{torrent_info.site_name} - {torrent_info.title} 不符合订阅站点要求")
+                        continue
+
+                    # 匹配订阅参数
+                    if not self.torrenthelper.filter_torrent(torrent_info=torrent_info,
+                                                             filter_params=self.get_params(subscribe)):
+                        continue
+
+                    # 先判断是否有没识别的种子，有则重新识别；如果订阅有自定义识别词，则不使用预识别的信息
+                    if not torrent_mediainfo \
+                            or (not torrent_mediainfo.tmdb_id and not torrent_mediainfo.douban_id) \
+                            or subscribe.custom_words:
+                        if not subscribe.custom_words:
                            logger.info(
-                                f'{torrent_info.site_name} - {torrent_info.title} 订阅缓存为未识别状态，尝试重新识别...')
-                            # 重新识别（不使用缓存）
+                                f'{torrent_info.site_name} - {torrent_info.title} 订阅缓存为未识别状态，'
+                                f'尝试重新识别媒体信息...')
+                        else:
+                            logger.info(
+                                f'{torrent_info.site_name} - {torrent_info.title} 因订阅存在自定义识别词，'
+                                f'正在重新识别元数据和媒体信息...')
+                            # 重新识别元数据
+                            torrent_meta = MetaInfo(title=torrent_info.title, subtitle=torrent_info.description,
+                                                    custom_words=custom_words)
+                        # 重新识别媒体信息
+                        if subscribe.custom_words:
+                            torrent_mediainfo = self.recognize_media(meta=torrent_meta)
+                        else:
+                            # 不使用识别缓存
                            torrent_mediainfo = self.recognize_media(meta=torrent_meta, cache=False)
-                            if not torrent_mediainfo:
-                                logger.warn(
-                                    f'{torrent_info.site_name} - {torrent_info.title} 重新识别失败，尝试通过标题匹配...')
-                                if self.torrenthelper.match_torrent(mediainfo=mediainfo,
-                                                                    torrent_meta=torrent_meta,
-                                                                    torrent=torrent_info):
-                                    # 匹配成功
-                                    logger.info(
-                                        f'{mediainfo.title_year} 通过标题匹配到资源：{torrent_info.site_name} - {torrent_info.title}')
-                                    # 更新缓存
+                            if torrent_mediainfo:
+                                # 更新种子缓存
+                                context.media_info = torrent_mediainfo
+                        if not torrent_mediainfo:
+                            # 通过标题匹配兜底
+                            logger.warn(
+                                f'{torrent_info.site_name} - {torrent_info.title} 重新识别失败，尝试通过标题匹配...')
+                            if self.torrenthelper.match_torrent(mediainfo=mediainfo,
+                                                                torrent_meta=torrent_meta,
+                                                                torrent=torrent_info):
+                                # 匹配成功
+                                logger.info(
+                                    f'{mediainfo.title_year} 通过标题匹配到可用资源：{torrent_info.site_name} - {torrent_info.title}')
+                                if not subscribe.custom_words:
+                                    # 更新种子缓存
                                    torrent_mediainfo = mediainfo
                                    context.media_info = mediainfo
-                                else:
-                                    continue
+                            else:
+                                continue

                    # 直接比对媒体信息
                    if torrent_mediainfo and (torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id):
@@ -652,30 +673,10 @@ class SubscribeChain(ChainBase):
                                and torrent_mediainfo.douban_id != mediainfo.douban_id:
                            continue
                        logger.info(
-                            f'{mediainfo.title_year} 通过媒体信ID匹配到资源：{torrent_info.site_name} - {torrent_info.title}')
+                            f'{mediainfo.title_year} 通过媒体信ID匹配到可用资源：{torrent_info.site_name} - {torrent_info.title}')
                    else:
                        continue

-                    # 过滤规则
-                    if subscribe.best_version:
-                        rule_groups = self.systemconfig.get(SystemConfigKey.BestVersionFilterRuleGroups)
-                    else:
-                        rule_groups = self.systemconfig.get(SystemConfigKey.SubscribeFilterRuleGroups)
-                    result: List[TorrentInfo] = self.filter_torrents(
-                        rule_groups=rule_groups,
-                        torrent_list=[torrent_info],
-                        mediainfo=torrent_mediainfo)
-                    if result is not None and not result:
-                        # 不符合过滤规则
-                        logger.debug(f"{torrent_info.title} 不匹配过滤规则")
-                        continue
-
-                    # 不在订阅站点范围的不处理
-                    sub_sites = self.get_sub_sites(subscribe)
-                    if sub_sites and torrent_info.site not in sub_sites:
-                        logger.debug(f"{torrent_info.site_name} - {torrent_info.title} 不符合订阅站点要求")
-                        continue
-
                    # 如果是电视剧
                    if torrent_mediainfo.type == MediaType.TV:
                        # 有多季的不要
@@ -714,6 +715,22 @@ class SubscribeChain(ChainBase):
                                    logger.debug(f'{subscribe.name} 正在洗版，{torrent_info.title} 不是整季')
                                    continue

+                    # 优先级过滤规则
+                    if subscribe.best_version:
+                        rule_groups = subscribe.filter_groups \
+                                      or self.systemconfig.get(SystemConfigKey.BestVersionFilterRuleGroups)
+                    else:
+                        rule_groups = subscribe.filter_groups \
+                                      or self.systemconfig.get(SystemConfigKey.SubscribeFilterRuleGroups)
+                    result: List[TorrentInfo] = self.filter_torrents(
+                        rule_groups=rule_groups,
+                        torrent_list=[torrent_info],
+                        mediainfo=torrent_mediainfo)
+                    if result is not None and not result:
+                        # 不符合过滤规则
+                        logger.debug(f"{torrent_info.title} 不匹配过滤规则")
+                        continue
+
                    # 洗版时，优先级小于已下载优先级的不要
                    if subscribe.best_version:
                        if subscribe.current_priority \
@@ -721,11 +738,6 @@ class SubscribeChain(ChainBase):
                            logger.info(f'{subscribe.name} 正在洗版，{torrent_info.title} 优先级低于或等于已下载优先级')
                            continue

-                    # 匹配订阅参数
-                    if not self.torrenthelper.filter_torrent(torrent_info=torrent_info,
-                                                             filter_params=self.get_params(subscribe)):
-                        continue
-
                    # 匹配成功
                    logger.info(f'{mediainfo.title_year} 匹配成功：{torrent_info.title}')
                    _match_context.append(context)
@@ -743,7 +755,8 @@ class SubscribeChain(ChainBase):
                                                                 no_exists=no_exists,
                                                                 userid=subscribe.username,
                                                                 username=subscribe.username,
-                                                                 save_path=subscribe.save_path)
+                                                                 save_path=subscribe.save_path,
+                                                                 media_category=subscribe.media_category)
            # 判断是否要完成订阅
            self.finish_subscribe_or_not(subscribe=subscribe, meta=meta, mediainfo=mediainfo,
                                         downloads=downloads, lefts=lefts)
@@ -1191,8 +1204,6 @@ class SubscribeChain(ChainBase):
        download_his = self.downloadhis.get_by_mediaid(tmdbid=subscribe.tmdbid, doubanid=subscribe.doubanid)
        if download_his:
            for his in download_his:
-                # 种子链接
-                torrent_url = f"【{his.torrent_site}】{his.torrent_name}"
                # 查询下载文件
                files = self.downloadhis.get_files_by_hash(his.hash)
                if files:
--- a/app/chain/transfer.py
+++ b/app/chain/transfer.py
@@ -1,4 +1,3 @@
-import json
 import re
 import threading
 from pathlib import Path
@@ -118,6 +117,9 @@ class TransferChain(ChainBase):
                    if mediainfo:
                        # 补充图片
                        self.obtain_images(mediainfo)
+                        # 更新自定义媒体类别
+                        if downloadhis.media_category:
+                            mediainfo.category = downloadhis.media_category
                else:
                    # 非MoviePilot下载的任务，按文件识别
                    mediainfo = None
--- a/app/core/meta/words.py
+++ b/app/core/meta/words.py
@@ -14,7 +14,7 @@ class WordsMatcher(metaclass=Singleton):
    def __init__(self):
        self.systemconfig = SystemConfigOper()

-    def prepare(self, title: str) -> Tuple[str, List[str]]:
+    def prepare(self, title: str, custom_words: List[str] = None) -> Tuple[str, List[str]]:
        """
        预处理标题，支持三种格式
        1：屏蔽词
@@ -23,7 +23,7 @@ class WordsMatcher(metaclass=Singleton):
        """
        appley_words = []
        # 读取自定义识别词
-        words: List[str] = self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
+        words: List[str] = custom_words or self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
        for word in words:
            if not word or word.startswith("#"):
                continue
--- a/app/core/metainfo.py
+++ b/app/core/metainfo.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Tuple
+from typing import Tuple, List

 import regex as re

@@ -10,17 +10,18 @@ from app.log import logger
 from app.schemas.types import MediaType


-def MetaInfo(title: str, subtitle: str = None) -> MetaBase:
+def MetaInfo(title: str, subtitle: str = None, custom_words: List[str] = None) -> MetaBase:
    """
    根据标题和副标题识别元数据
    :param title: 标题、种子名、文件名
    :param subtitle: 副标题、描述
+    :param custom_words: 自定义识别词列表
    :return: MetaAnime、MetaVideo
    """
    # 原标题
    org_title = title
    # 预处理标题
-    title, apply_words = WordsMatcher().prepare(title)
+    title, apply_words = WordsMatcher().prepare(title, custom_words=custom_words)
    # 获取标题中媒体信息
    title, metainfo = find_metainfo(title)
    # 判断是否处理文件
--- a/app/db/init.py
+++ b/app/db/init.py
@@ -1,11 +1,9 @@
-import json
 from typing import Any, Self, List, Tuple, Optional, Generator

 from sqlalchemy import create_engine, QueuePool, and_, inspect
 from sqlalchemy.orm import declared_attr, sessionmaker, Session, scoped_session, as_declarative

 from app.core.config import settings
-from app.utils.object import ObjectUtils

 # 数据库引擎
 Engine = create_engine(
--- a/app/db/models/downloadhistory.py
+++ b/app/db/models/downloadhistory.py
@@ -47,6 +47,8 @@ class DownloadHistory(Base):
    date = Column(String)
    # 附加信息
    note = Column(JSON)
+    # 自定义媒体类别
+    media_category = Column(String)

    @staticmethod
    @db_query
--- a/app/db/models/subscribe.py
+++ b/app/db/models/subscribe.py
@@ -74,6 +74,12 @@ class Subscribe(Base):
    search_imdbid = Column(Integer, default=0)
    # 是否手动修改过总集数 0否 1是
    manual_total_episode = Column(Integer, default=0)
+    # 自定义识别词
+    custom_words = Column(String)
+    # 自定义媒体类别
+    media_category = Column(String)
+    # 过滤规则组
+    filter_groups = Column(JSON, default=list)

    @staticmethod
    @db_query
--- a/app/monitor.py
+++ b/app/monitor.py
@@ -326,10 +326,16 @@ class Monitor(metaclass=Singleton):
                    download_hash = download_history.download_hash

                # 识别媒体信息
-                if download_history and download_history.tmdbid:
+                if download_history and (download_history.tmdbid or download_history.doubanid):
+                    # 下载记录中已存在识别信息
                    mediainfo: MediaInfo = self.mediaChain.recognize_media(mtype=MediaType(download_history.type),
                                                                           tmdbid=download_history.tmdbid,
                                                                           doubanid=download_history.doubanid)
+                    if mediainfo:
+                        # 更新自定义媒体类别
+                        if download_history.media_category:
+                            mediainfo.category = download_history.media_category
+
                else:
                    mediainfo: MediaInfo = self.mediaChain.recognize_by_meta(file_meta)
                if not mediainfo:
--- a/app/schemas/subscribe.py
+++ b/app/schemas/subscribe.py
@@ -1,7 +1,6 @@
-import json
 from typing import Optional, List, Dict, Any

-from pydantic import BaseModel, validator
+from pydantic import BaseModel


 class Subscribe(BaseModel):
@@ -65,17 +64,12 @@ class Subscribe(BaseModel):
    search_imdbid: Optional[int] = 0
    # 时间
    date: Optional[str] = None
-
-    @validator('sites', pre=True)
-    def parse_json_fields(cls, value):
-        if value:
-            if isinstance(value, str):
-                try:
-                    return json.loads(value)
-                except json.JSONDecodeError:
-                    return []
-            return value
-        return []
+    # 自定义识别词
+    custom_words: Optional[str] = None
+    # 自定义媒体类别
+    media_category: Optional[str] = None
+    # 过滤规则组
+    filter_groups: Optional[List[str]] = []

    class Config:
        orm_mode = True
--- a/app/schemas/user.py
+++ b/app/schemas/user.py
@@ -1,7 +1,6 @@
-import json
 from typing import Optional

-from pydantic import BaseModel, validator
+from pydantic import BaseModel


 # Shared properties
@@ -23,17 +22,6 @@ class UserBase(BaseModel):
    # 个性化设置
    settings: Optional[dict] = {}

-    @validator('permissions', 'settings', pre=True)
-    def parse_json_fields(cls, value):
-        if value:
-            if isinstance(value, str):
-                try:
-                    return json.loads(value)
-                except json.JSONDecodeError:
-                    return {}
-            return value
-        return {}
-
    class Config:
        orm_mode = True