mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-04-27 12:12:24 +08:00
feat: tmdbid优先识别,同ID电影/电视剧通过元数据自动消歧
当名称中包含 {tmdbid=xxx} 时,优先使用tmdbid直接查询TMDB,不再回退到标题搜索。
当同一tmdbid同时存在电影和电视剧时,通过标题、年份、类型等元数据自动消歧。
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -403,16 +403,16 @@ class ChainBase(metaclass=ABCMeta):
|
||||
:return: 识别的媒体信息,包括剧集信息
|
||||
"""
|
||||
# 识别用名中含指定信息情形
|
||||
if not mtype and meta and meta.type in [MediaType.TV, MediaType.MOVIE]:
|
||||
mtype = meta.type
|
||||
if not tmdbid and hasattr(meta, "tmdbid"):
|
||||
tmdbid = meta.tmdbid
|
||||
if not doubanid and hasattr(meta, "doubanid"):
|
||||
doubanid = meta.doubanid
|
||||
# 有tmdbid时不使用其它ID
|
||||
# 有tmdbid时,不使用meta推断的类型(由消歧逻辑决定),也不使用其它ID
|
||||
if tmdbid:
|
||||
doubanid = None
|
||||
bangumiid = None
|
||||
elif not mtype and meta and meta.type in [MediaType.TV, MediaType.MOVIE]:
|
||||
mtype = meta.type
|
||||
with fresh(not cache):
|
||||
return self.run_module(
|
||||
"recognize_media",
|
||||
@@ -447,16 +447,16 @@ class ChainBase(metaclass=ABCMeta):
|
||||
:return: 识别的媒体信息,包括剧集信息
|
||||
"""
|
||||
# 识别用名中含指定信息情形
|
||||
if not mtype and meta and meta.type in [MediaType.TV, MediaType.MOVIE]:
|
||||
mtype = meta.type
|
||||
if not tmdbid and hasattr(meta, "tmdbid"):
|
||||
tmdbid = meta.tmdbid
|
||||
if not doubanid and hasattr(meta, "doubanid"):
|
||||
doubanid = meta.doubanid
|
||||
# 有tmdbid时不使用其它ID
|
||||
# 有tmdbid时,不使用meta推断的类型(由消歧逻辑决定),也不使用其它ID
|
||||
if tmdbid:
|
||||
doubanid = None
|
||||
bangumiid = None
|
||||
elif not mtype and meta and meta.type in [MediaType.TV, MediaType.MOVIE]:
|
||||
mtype = meta.type
|
||||
async with async_fresh(not cache):
|
||||
return await self.async_run_module(
|
||||
"async_recognize_media",
|
||||
|
||||
@@ -102,7 +102,7 @@ class TheMovieDbModule(_ModuleBase):
|
||||
if meta and not tmdbid and settings.RECOGNIZE_SOURCE != "themoviedb":
|
||||
return False
|
||||
|
||||
if meta and not meta.name:
|
||||
if meta and not meta.name and not tmdbid:
|
||||
logger.warn("识别媒体信息时未提供元数据名称")
|
||||
return False
|
||||
|
||||
@@ -118,6 +118,98 @@ class TheMovieDbModule(_ModuleBase):
|
||||
# 使用中英文名分别识别,去重去空,但要保持顺序
|
||||
return list(dict.fromkeys([k for k in [meta.cn_name, zh_name, meta.en_name] if k]))
|
||||
|
||||
def _get_info_by_tmdbid(self, tmdbid: int, mtype: Optional[MediaType],
|
||||
meta: Optional[MetaBase]) -> Optional[dict]:
|
||||
"""
|
||||
根据tmdbid查询媒体信息,当类型未知且同时存在电影和电视剧时,通过元数据消歧
|
||||
"""
|
||||
if mtype:
|
||||
return self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid)
|
||||
# 类型未知,分别查询电影和电视剧
|
||||
info_tv = self.tmdb.get_info(mtype=MediaType.TV, tmdbid=tmdbid)
|
||||
info_movie = self.tmdb.get_info(mtype=MediaType.MOVIE, tmdbid=tmdbid)
|
||||
if info_tv and info_movie:
|
||||
# 同时存在,尝试通过元数据消歧
|
||||
result = self._disambiguate_by_meta(info_tv, info_movie, meta)
|
||||
if result:
|
||||
return result
|
||||
logger.warn(f"无法判断tmdb_id:{tmdbid} 是电影还是电视剧")
|
||||
return None
|
||||
return info_tv or info_movie or None
|
||||
|
||||
async def _async_get_info_by_tmdbid(self, tmdbid: int, mtype: Optional[MediaType],
|
||||
meta: Optional[MetaBase]) -> Optional[dict]:
|
||||
"""
|
||||
根据tmdbid查询媒体信息,当类型未知且同时存在电影和电视剧时,通过元数据消歧(异步版本)
|
||||
"""
|
||||
if mtype:
|
||||
return await self.tmdb.async_get_info(mtype=mtype, tmdbid=tmdbid)
|
||||
# 类型未知,分别查询电影和电视剧
|
||||
info_tv = await self.tmdb.async_get_info(mtype=MediaType.TV, tmdbid=tmdbid)
|
||||
info_movie = await self.tmdb.async_get_info(mtype=MediaType.MOVIE, tmdbid=tmdbid)
|
||||
if info_tv and info_movie:
|
||||
# 同时存在,尝试通过元数据消歧
|
||||
result = self._disambiguate_by_meta(info_tv, info_movie, meta)
|
||||
if result:
|
||||
return result
|
||||
logger.warn(f"无法判断tmdb_id:{tmdbid} 是电影还是电视剧")
|
||||
return None
|
||||
return info_tv or info_movie or None
|
||||
|
||||
@staticmethod
|
||||
def _disambiguate_by_meta(info_tv: dict, info_movie: dict,
|
||||
meta: Optional[MetaBase]) -> Optional[dict]:
|
||||
"""
|
||||
通过元数据(标题、年份、类型)对同tmdbid的电影和电视剧进行消歧
|
||||
"""
|
||||
if not meta:
|
||||
return None
|
||||
|
||||
def _collect_titles(info: dict) -> set:
|
||||
titles = set()
|
||||
for key in ('title', 'name', 'original_title', 'original_name'):
|
||||
if info.get(key):
|
||||
titles.add(info[key])
|
||||
for name in (info.get('names') or []):
|
||||
titles.add(name)
|
||||
return titles
|
||||
|
||||
def _match_score(info: dict) -> int:
|
||||
score = 0
|
||||
# 标题匹配
|
||||
titles = _collect_titles(info)
|
||||
meta_names = [n for n in [meta.cn_name, meta.en_name] if n]
|
||||
for meta_name in meta_names:
|
||||
if any(meta_name in t or t in meta_name for t in titles):
|
||||
score += 2
|
||||
break
|
||||
# 年份匹配
|
||||
if meta.year:
|
||||
release_date = info.get('release_date') or info.get('first_air_date') or ''
|
||||
if release_date and release_date[:4] == meta.year:
|
||||
score += 1
|
||||
return score
|
||||
|
||||
score_tv = _match_score(info_tv)
|
||||
score_movie = _match_score(info_movie)
|
||||
|
||||
if score_tv > score_movie:
|
||||
logger.info(f"通过元数据消歧,tmdb_id:{info_tv.get('id')} 识别为电视剧")
|
||||
return info_tv
|
||||
elif score_movie > score_tv:
|
||||
logger.info(f"通过元数据消歧,tmdb_id:{info_movie.get('id')} 识别为电影")
|
||||
return info_movie
|
||||
|
||||
# 评分相同时参考meta.type
|
||||
if meta.type == MediaType.TV:
|
||||
logger.info(f"通过媒体类型提示消歧,tmdb_id:{info_tv.get('id')} 识别为电视剧")
|
||||
return info_tv
|
||||
elif meta.type == MediaType.MOVIE:
|
||||
logger.info(f"通过媒体类型提示消歧,tmdb_id:{info_movie.get('id')} 识别为电影")
|
||||
return info_movie
|
||||
|
||||
return None
|
||||
|
||||
def _search_by_name(self, name: str, meta: MetaBase, group_seasons: List[dict]) -> dict:
|
||||
"""
|
||||
根据名称搜索媒体信息
|
||||
@@ -404,9 +496,9 @@ class TheMovieDbModule(_ModuleBase):
|
||||
info = None
|
||||
# 缓存没有或者强制不使用缓存
|
||||
if tmdbid:
|
||||
# 直接查询详情
|
||||
info = self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid)
|
||||
if not info and meta:
|
||||
# 直接查询详情,支持同ID电影/电视剧消歧
|
||||
info = self._get_info_by_tmdbid(tmdbid=tmdbid, mtype=mtype, meta=meta)
|
||||
if not info and meta and not tmdbid:
|
||||
# 准备搜索名称
|
||||
names = self._prepare_search_names(meta)
|
||||
for name in names:
|
||||
@@ -422,7 +514,10 @@ class TheMovieDbModule(_ModuleBase):
|
||||
info = self.tmdb.get_info(mtype=info.get("media_type"),
|
||||
tmdbid=info.get("id"))
|
||||
elif not info:
|
||||
logger.error("识别媒体信息时未提供元数据或唯一且有效的tmdbid")
|
||||
if tmdbid:
|
||||
logger.warn(f"tmdb_id:{tmdbid} 无法确定媒体类型,识别失败")
|
||||
else:
|
||||
logger.error("识别媒体信息时未提供元数据或唯一且有效的tmdbid")
|
||||
return None
|
||||
|
||||
# 保存到缓存
|
||||
@@ -485,9 +580,9 @@ class TheMovieDbModule(_ModuleBase):
|
||||
info = None
|
||||
# 缓存没有或者强制不使用缓存
|
||||
if tmdbid:
|
||||
# 直接查询详情
|
||||
info = await self.tmdb.async_get_info(mtype=mtype, tmdbid=tmdbid)
|
||||
if not info and meta:
|
||||
# 直接查询详情,支持同ID电影/电视剧消歧
|
||||
info = await self._async_get_info_by_tmdbid(tmdbid=tmdbid, mtype=mtype, meta=meta)
|
||||
if not info and meta and not tmdbid:
|
||||
# 准备搜索名称
|
||||
names = self._prepare_search_names(meta)
|
||||
for name in names:
|
||||
@@ -503,7 +598,10 @@ class TheMovieDbModule(_ModuleBase):
|
||||
info = await self.tmdb.async_get_info(mtype=info.get("media_type"),
|
||||
tmdbid=info.get("id"))
|
||||
elif not info:
|
||||
logger.error("识别媒体信息时未提供元数据或唯一且有效的tmdbid")
|
||||
if tmdbid:
|
||||
logger.warn(f"tmdb_id:{tmdbid} 无法确定媒体类型,识别失败")
|
||||
else:
|
||||
logger.error("识别媒体信息时未提供元数据或唯一且有效的tmdbid")
|
||||
return None
|
||||
|
||||
# 保存到缓存
|
||||
|
||||
150
tests/test_tmdb_recognize.py
Normal file
150
tests/test_tmdb_recognize.py
Normal file
@@ -0,0 +1,150 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import asyncio
|
||||
from unittest import TestCase
|
||||
|
||||
from app.core.metainfo import MetaInfo
|
||||
from app.chain import ChainBase
|
||||
from app.modules.themoviedb import TheMovieDbModule
|
||||
from app.schemas.types import MediaType
|
||||
|
||||
|
||||
class TmdbRecognizeModuleTest(TestCase):
|
||||
"""
|
||||
TMDB模块层识别测试
|
||||
模块层的 async_recognize_media 不会自动从 meta.tmdbid 提取 tmdbid,
|
||||
该提取在 ChainBase 层完成,因此测试中需显式传入 tmdbid 参数。
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.module = TheMovieDbModule()
|
||||
cls.module.init_module()
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
cls.module.stop()
|
||||
|
||||
def _run(self, coro):
|
||||
return asyncio.run(coro)
|
||||
|
||||
def test_tmdbid_priority_over_title(self):
|
||||
"""
|
||||
当标题中包含 {tmdbid=xxx} 时,应优先使用tmdbid识别,
|
||||
而非回退到标题搜索
|
||||
"""
|
||||
meta = MetaInfo(title="空之境界 {tmdbid=938416}")
|
||||
self.assertEqual(meta.tmdbid, 938416)
|
||||
self.assertEqual(meta.cn_name, "空之境界")
|
||||
|
||||
result = self._run(
|
||||
self.module.async_recognize_media(
|
||||
meta=meta, tmdbid=meta.tmdbid, cache=False
|
||||
)
|
||||
)
|
||||
self.assertIsNotNone(result, "应能识别到媒体信息")
|
||||
self.assertEqual(result.tmdb_id, 938416)
|
||||
|
||||
def test_tmdbid_disambiguation_tv_vs_movie(self):
|
||||
"""
|
||||
当同一tmdbid同时存在电影和电视剧时,应通过元数据消歧
|
||||
tmdbid=23155 同时存在电影"空之境界 第五章 矛盾螺旋"和电视剧"TV Land Top 10"
|
||||
标题包含"空之境界"应消歧为电影
|
||||
"""
|
||||
meta = MetaInfo(title="空之境界 第五章 矛盾螺旋 (2008) {tmdbid=23155}")
|
||||
self.assertEqual(meta.tmdbid, 23155)
|
||||
|
||||
result = self._run(
|
||||
self.module.async_recognize_media(
|
||||
meta=meta, tmdbid=meta.tmdbid, cache=False
|
||||
)
|
||||
)
|
||||
self.assertIsNotNone(result, "同ID存在电影和电视剧时应能通过元数据消歧")
|
||||
self.assertEqual(result.tmdb_id, 23155)
|
||||
self.assertEqual(result.type, MediaType.MOVIE)
|
||||
|
||||
def test_tmdbid_with_explicit_type(self):
|
||||
"""
|
||||
当标题中同时包含 tmdbid 和 type 时,应直接使用指定类型查询
|
||||
"""
|
||||
meta = MetaInfo(title="空之境界 {tmdbid=23155}")
|
||||
|
||||
result = self._run(
|
||||
self.module.async_recognize_media(
|
||||
meta=meta, tmdbid=meta.tmdbid, mtype=MediaType.TV, cache=False
|
||||
)
|
||||
)
|
||||
self.assertIsNotNone(result)
|
||||
self.assertEqual(result.tmdb_id, 23155)
|
||||
self.assertEqual(result.type, MediaType.TV)
|
||||
|
||||
def test_tmdbid_only_movie_exists(self):
|
||||
"""
|
||||
tmdbid仅存在电影时,即使meta.type推断为TV也应正确识别为电影
|
||||
tmdbid=496891 仅存在电影"少女与战车 最终章 ~第2话~"
|
||||
"""
|
||||
meta = MetaInfo(title="少女与战车 最终章 ~第2话~ (2019) {tmdbid=496891}")
|
||||
self.assertEqual(meta.tmdbid, 496891)
|
||||
|
||||
result = self._run(
|
||||
self.module.async_recognize_media(
|
||||
meta=meta, tmdbid=meta.tmdbid, cache=False
|
||||
)
|
||||
)
|
||||
self.assertIsNotNone(result, "仅存在电影时应正确识别")
|
||||
self.assertEqual(result.tmdb_id, 496891)
|
||||
self.assertEqual(result.type, MediaType.MOVIE)
|
||||
|
||||
|
||||
class TmdbRecognizeChainTest(TestCase):
|
||||
"""
|
||||
ChainBase层识别测试(端到端)
|
||||
验证从 meta.tmdbid 提取到模块识别的完整流程
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.chain = ChainBase()
|
||||
|
||||
def _run(self, coro):
|
||||
return asyncio.run(coro)
|
||||
|
||||
def test_chain_tmdbid_movie(self):
|
||||
"""
|
||||
通过ChainBase识别,tmdbid对应电影应正确识别
|
||||
"""
|
||||
meta = MetaInfo(title="空之境界 第五章 矛盾螺旋 (2008) {tmdbid=23155}")
|
||||
result = self._run(
|
||||
self.chain.async_recognize_media(meta=meta, cache=False)
|
||||
)
|
||||
self.assertIsNotNone(result)
|
||||
self.assertEqual(result.tmdb_id, 23155)
|
||||
self.assertEqual(result.type, MediaType.MOVIE)
|
||||
|
||||
def test_chain_tmdbid_ignores_inferred_type(self):
|
||||
"""
|
||||
当tmdbid存在时,不应使用meta推断的类型
|
||||
"第2话"会让meta.type推断为TV,但tmdbid=496891仅存在电影
|
||||
"""
|
||||
meta = MetaInfo(title="少女与战车 最终章 ~第2话~ (2019) {tmdbid=496891}")
|
||||
self.assertEqual(meta.type, MediaType.TV, "meta.type应被推断为TV")
|
||||
self.assertEqual(meta.tmdbid, 496891)
|
||||
|
||||
result = self._run(
|
||||
self.chain.async_recognize_media(meta=meta, cache=False)
|
||||
)
|
||||
self.assertIsNotNone(result, "有tmdbid时不应因meta.type推断错误而识别失败")
|
||||
self.assertEqual(result.tmdb_id, 496891)
|
||||
self.assertEqual(result.type, MediaType.MOVIE)
|
||||
|
||||
def test_chain_no_tmdbid_uses_inferred_type(self):
|
||||
"""
|
||||
无tmdbid时,应正常使用meta推断的类型进行标题搜索
|
||||
"""
|
||||
meta = MetaInfo(title="进击的巨人 S01E01")
|
||||
self.assertEqual(meta.type, MediaType.TV)
|
||||
|
||||
result = self._run(
|
||||
self.chain.async_recognize_media(meta=meta, cache=False)
|
||||
)
|
||||
self.assertIsNotNone(result)
|
||||
self.assertEqual(result.type, MediaType.TV)
|
||||
Reference in New Issue
Block a user