优化find_metainfo函数,增加对Emby格式ID标签的支持,并添加相应的测试用例以验证不同ID格式的识别。

This commit is contained in:
jxxghp
2025-05-18 12:55:25 +08:00
parent 959d4da1f8
commit bedcd94020

View File

@@ -120,41 +120,69 @@ def find_metainfo(title: str) -> Tuple[str, dict]:
return title, metainfo
# 从标题中提取媒体信息 格式为{[tmdbid=xxx;type=xxx;s=xxx;e=xxx]}
results = re.findall(r'(?<={\[)[\W\w]+(?=]})', title)
if not results:
return title, metainfo
for result in results:
# 查找tmdbid信息
tmdbid = re.findall(r'(?<=tmdbid=)\d+', result)
if tmdbid and tmdbid[0].isdigit():
metainfo['tmdbid'] = tmdbid[0]
# 查找豆瓣id信息
doubanid = re.findall(r'(?<=doubanid=)\d+', result)
if doubanid and doubanid[0].isdigit():
metainfo['doubanid'] = doubanid[0]
# 查找媒体类型
mtype = re.findall(r'(?<=type=)\w+', result)
if mtype:
if mtype[0] == "movies":
metainfo['type'] = MediaType.MOVIE
elif mtype[0] == "tv":
metainfo['type'] = MediaType.TV
# 查找季信息
begin_season = re.findall(r'(?<=s=)\d+', result)
if begin_season and begin_season[0].isdigit():
metainfo['begin_season'] = int(begin_season[0])
end_season = re.findall(r'(?<=s=\d+-)\d+', result)
if end_season and end_season[0].isdigit():
metainfo['end_season'] = int(end_season[0])
# 查找集信息
begin_episode = re.findall(r'(?<=e=)\d+', result)
if begin_episode and begin_episode[0].isdigit():
metainfo['begin_episode'] = int(begin_episode[0])
end_episode = re.findall(r'(?<=e=\d+-)\d+', result)
if end_episode and end_episode[0].isdigit():
metainfo['end_episode'] = int(end_episode[0])
# 去除title中该部分
if tmdbid or mtype or begin_season or end_season or begin_episode or end_episode:
title = title.replace(f"{{[{result}]}}", '')
if results:
for result in results:
# 查找tmdbid信息
tmdbid = re.findall(r'(?<=tmdbid=)\d+', result)
if tmdbid and tmdbid[0].isdigit():
metainfo['tmdbid'] = tmdbid[0]
# 查找豆瓣id信息
doubanid = re.findall(r'(?<=doubanid=)\d+', result)
if doubanid and doubanid[0].isdigit():
metainfo['doubanid'] = doubanid[0]
# 查找媒体类型
mtype = re.findall(r'(?<=type=)\w+', result)
if mtype:
if mtype[0] == "movies":
metainfo['type'] = MediaType.MOVIE
elif mtype[0] == "tv":
metainfo['type'] = MediaType.TV
# 查找季信息
begin_season = re.findall(r'(?<=s=)\d+', result)
if begin_season and begin_season[0].isdigit():
metainfo['begin_season'] = int(begin_season[0])
end_season = re.findall(r'(?<=s=\d+-)\d+', result)
if end_season and end_season[0].isdigit():
metainfo['end_season'] = int(end_season[0])
# 查找集信息
begin_episode = re.findall(r'(?<=e=)\d+', result)
if begin_episode and begin_episode[0].isdigit():
metainfo['begin_episode'] = int(begin_episode[0])
end_episode = re.findall(r'(?<=e=\d+-)\d+', result)
if end_episode and end_episode[0].isdigit():
metainfo['end_episode'] = int(end_episode[0])
# 去除title中该部分
if tmdbid or mtype or begin_season or end_season or begin_episode or end_episode:
title = title.replace(f"{{[{result}]}}", '')
# 支持Emby格式的ID标签
# 1. [tmdbid=xxxx] 或 [tmdbid-xxxx] 格式
tmdb_match = re.search(r'\[tmdbid[=\-](\d+)\]', title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = re.sub(r'\[tmdbid[=\-](\d+)\]', '', title).strip()
# 2. [tmdb=xxxx] 或 [tmdb-xxxx] 格式
if not metainfo['tmdbid']:
tmdb_match = re.search(r'\[tmdb[=\-](\d+)\]', title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = re.sub(r'\[tmdb[=\-](\d+)\]', '', title).strip()
# 3. {tmdbid=xxxx} 或 {tmdbid-xxxx} 格式
if not metainfo['tmdbid']:
tmdb_match = re.search(r'\{tmdbid[=\-](\d+)\}', title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = re.sub(r'\{tmdbid[=\-](\d+)\}', '', title).strip()
# 4. {tmdb=xxxx} 或 {tmdb-xxxx} 格式
if not metainfo['tmdbid']:
tmdb_match = re.search(r'\{tmdb[=\-](\d+)\}', title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = re.sub(r'\{tmdb[=\-](\d+)\}', '', title).strip()
# 计算季集总数
if metainfo.get('begin_season') and metainfo.get('end_season'):
if metainfo['begin_season'] > metainfo['end_season']:
@@ -169,3 +197,67 @@ def find_metainfo(title: str) -> Tuple[str, dict]:
elif metainfo.get('begin_episode') and not metainfo.get('end_episode'):
metainfo['total_episode'] = 1
return title, metainfo
def test_find_metainfo():
"""
测试find_metainfo函数的各种ID识别格式
"""
test_cases = [
# 测试 [tmdbid=xxxx] 格式
("The Vampire Diaries (2009) [tmdbid=18165]", "18165"),
# 测试 [tmdbid-xxxx] 格式
("Inception (2010) [tmdbid-27205]", "27205"),
# 测试 [tmdb=xxxx] 格式
("Breaking Bad (2008) [tmdb=1396]", "1396"),
# 测试 [tmdb-xxxx] 格式
("Interstellar (2014) [tmdb-157336]", "157336"),
# 测试 {tmdbid=xxxx} 格式
("Stranger Things (2016) {tmdbid=66732}", "66732"),
# 测试 {tmdbid-xxxx} 格式
("The Matrix (1999) {tmdbid-603}", "603"),
# 测试 {tmdb=xxxx} 格式
("Game of Thrones (2011) {tmdb=1399}", "1399"),
# 测试 {tmdb-xxxx} 格式
("Avatar (2009) {tmdb-19995}", "19995"),
]
for title, expected_tmdbid in test_cases:
cleaned_title, metainfo = find_metainfo(title)
found_tmdbid = metainfo.get('tmdbid')
print(f"原标题: {title}")
print(f"清理后标题: {cleaned_title}")
print(f"期望的tmdbid: {expected_tmdbid}")
print(f"识别的tmdbid: {found_tmdbid}")
print(f"结果: {'通过' if found_tmdbid == expected_tmdbid else '失败'}")
print("-" * 50)
def test_meta_info_path():
"""
测试MetaInfoPath函数
"""
# 测试文件路径
path_tests = [
# 文件名中包含tmdbid
Path("/movies/The Vampire Diaries (2009) [tmdbid=18165]/The.Vampire.Diaries.S01E01.1080p.mkv"),
# 目录名中包含tmdbid
Path("/movies/Inception (2010) [tmdbid-27205]/Inception.2010.1080p.mkv"),
# 父目录名中包含tmdbid
Path("/movies/Breaking Bad (2008) [tmdb=1396]/Season 1/Breaking.Bad.S01E01.1080p.mkv"),
# 祖父目录名中包含tmdbid
Path("/tv/Game of Thrones (2011) {tmdb=1399}/Season 1/Game.of.Thrones.S01E01.1080p.mkv"),
]
for path in path_tests:
meta = MetaInfoPath(path)
print(f"测试路径: {path}")
print(f"识别结果: tmdbid={meta.tmdbid}")
print("-" * 50)
if __name__ == "__main__":
# 运行测试函数
# test_find_metainfo()
test_meta_info_path()