feat:优化字幕和音频文件整理方式

This commit is contained in:
jxxghp
2026-01-20 13:24:35 +08:00
parent 3450a89880
commit 2c62ffe34a
4 changed files with 128 additions and 246 deletions

View File

@@ -167,7 +167,7 @@ def rename(fileitem: schemas.FileItem,
# 重命名目录内文件
if recursive:
transferchain = TransferChain()
media_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIO_TRACK_EXT
media_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT
# 递归修改目录内文件(智能识别命名)
sub_files: List[schemas.FileItem] = StorageChain().list_files(fileitem)
if sub_files:

View File

@@ -332,7 +332,8 @@ class JobManager:
return 0
return sum([
task.fileitem.size if task.fileitem.size is not None
else (SystemUtils.get_directory_size(Path(task.fileitem.path)) if task.fileitem.storage == "local" else 0)
else (
SystemUtils.get_directory_size(Path(task.fileitem.path)) if task.fileitem.storage == "local" else 0)
for task in self._job_view[__mediaid__].tasks
if task.state == "completed"
])
@@ -366,8 +367,10 @@ class TransferChain(ChainBase, metaclass=Singleton):
def __init__(self):
super().__init__()
# 可处理的文件后缀
self.all_exts = settings.RMT_MEDIAEXT
# 可处理的文件后缀(视频文件、字幕、音频文件)
self._allowed_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT
# 附加文件后缀
self._extra_exts = settings.RMT_SUBEXT + settings.RMT_AUDIOEXT
# 待整理任务队列
self._queue = queue.Queue()
# 文件整理线程
@@ -990,13 +993,19 @@ class TransferChain(ChainBase, metaclass=Singleton):
返回:成功标识,错误信息
"""
def __is_allow_extensions(_ext: str) -> bool:
def __is_allowed_file(_ext: str) -> bool:
"""
判断是否允许的扩展名
"""
return True if not self.all_exts or f".{_ext.lower()}" in self.all_exts else False
return True if f".{_ext.lower()}" in self._allowed_exts else False
def __is_allow_filesize(_size: int, _min_filesize: int) -> bool:
def __is_extra_file(_ext: str) -> bool:
"""
判断是否额外的扩展名
"""
return True if f".{_ext.lower()}" in self._extra_exts else False
def __is_allow_filesize(_ext: str, _size: int, _min_filesize: int) -> bool:
"""
判断是否满足最小文件大小
"""
@@ -1040,9 +1049,13 @@ class TransferChain(ChainBase, metaclass=Singleton):
if formaterHandler:
file_items = [f for f in file_items if formaterHandler.match(f[0].name)]
# 过滤后缀和大小
file_items = [f for f in file_items if f[1] # 蓝光目录不过滤
or __is_allow_extensions(f[0].extension) and __is_allow_filesize(f[0].size, min_filesize)]
# 过滤后缀和大小(蓝光目录、附加文件不过滤大小)
file_items = [f for f in file_items if f[1] or
__is_extra_file(f[0].extension) or
(__is_allowed_file(f[0].extension) and __is_allow_filesize(_ext=f[0].extension,
_size=f[0].size or 0,
_min_filesize=min_filesize))]
if not file_items:
logger.warn(f"{fileitem.path} 没有找到可整理的媒体文件")
return False, f"{fileitem.name} 没有找到可整理的媒体文件"
@@ -1469,7 +1482,7 @@ class TransferChain(ChainBase, metaclass=Singleton):
for file in torrent_files:
file_path = save_path / file.name
# 如果存在未被屏蔽的媒体文件,则不删除种子
if (file_path.suffix in self.all_exts
if (file_path.suffix in self._allowed_exts
and not self._is_blocked_by_exclude_words(file_path.as_posix(), transfer_exclude_words)
and file_path.exists()):
return False

View File

@@ -219,7 +219,7 @@ class ConfigModel(BaseModel):
AUTO_UPDATE_RESOURCE: bool = True
# ==================== 媒体文件格式配置 ====================
# 支持的后缀格式
# 支持的视频文件后缀格式
RMT_MEDIAEXT: list = Field(
default_factory=lambda: ['.mp4', '.mkv', '.ts', '.iso',
'.rmvb', '.avi', '.mov', '.mpeg',
@@ -230,8 +230,6 @@ class ConfigModel(BaseModel):
# 支持的字幕文件后缀格式
RMT_SUBEXT: list = Field(default_factory=lambda: ['.srt', '.ass', '.ssa', '.sup'])
# 支持的音轨文件后缀格式
RMT_AUDIO_TRACK_EXT: list = Field(default_factory=lambda: ['.mka'])
# 音轨文件后缀格式
RMT_AUDIOEXT: list = Field(
default_factory=lambda: ['.aac', '.ac3', '.amr', '.caf', '.cda', '.dsf',
'.dff', '.kar', '.m4a', '.mp1', '.mp2', '.mp3',

View File

@@ -100,6 +100,30 @@ class TransHandler:
:return: TransferInfo、错误信息
"""
def __is_subtitle_file(_fileitem: FileItem) -> bool:
"""
判断是否为字幕文件
:param _fileitem: 文件项
:return: True/False
"""
if not _fileitem.extension:
return False
if f".{_fileitem.extension.lower()}" in settings.RMT_SUBEXT:
return True
return False
def __is_extra_file(_fileitem: FileItem) -> bool:
"""
判断是否为附加文件
:param _fileitem: 文件项
:return: True/False
"""
if not _fileitem.extension:
return False
if f".{_fileitem.extension.lower()}" in (settings.RMT_SUBEXT + settings.RMT_AUDIOEXT):
return True
return False
# 重置结果
self.__reset_result()
@@ -132,10 +156,9 @@ class TransHandler:
return self.result.model_copy()
else:
new_path = target_path / fileitem.name
# 在整理目录前先尝试获取原盘大小避免整理记录出现0字节的情况
# TODO 当前只计算STREAM目录内的文件大小如果需要精确则递归完整目录
# 原盘大小只计算STREAM目录内的文件大小
if stream_fileitem := source_oper.get_item(
Path(fileitem.path) / "BDMV" / "STREAM"
Path(fileitem.path) / "BDMV" / "STREAM"
):
fileitem.size = 0
files = source_oper.list(stream_fileitem) or []
@@ -204,6 +227,12 @@ class TransHandler:
file_ext=f".{fileitem.extension}"
)
)
# 针对字幕文件,文件名中补充额外标识信息
if __is_subtitle_file(fileitem):
new_file = self.__rename_subtitles(new_file)
# 文件目录
folder_path = DirectoryHelper.get_media_root_path(
rename_format, rename_path=new_file
)
@@ -221,8 +250,6 @@ class TransHandler:
new_file = target_path / fileitem.name
folder_path = target_path
# 判断是否要覆盖
overflag = False
# 目标目录
target_diritem = target_oper.get_folder(folder_path)
if not target_diritem:
@@ -234,30 +261,45 @@ class TransHandler:
transfer_type=transfer_type,
need_notify=need_notify)
return self.result.model_copy()
# 目标文件
target_item = target_oper.get_item(new_file)
if target_item:
# 目标文件已存在
target_file = new_file
if target_storage == "local" and new_file.is_symlink():
target_file = new_file.readlink()
if not target_file.exists():
overflag = True
if not overflag:
# 判断是否要覆盖,附加文件强制覆盖
overflag = False
if not __is_extra_file(fileitem):
# 目标文件
target_item = target_oper.get_item(new_file)
if target_item:
# 目标文件已存在
logger.info(
f"目的文件系统中已经存在同名文件 {target_file},当前整理覆盖模式设置为 {overwrite_mode}")
if overwrite_mode == 'always':
# 总是覆盖同名文件
overflag = True
elif overwrite_mode == 'size':
# 存在时大覆盖小
if target_item.size < fileitem.size:
logger.info(f"目标文件文件大小更小,将覆盖:{new_file}")
target_file = new_file
if target_storage == "local" and new_file.is_symlink():
target_file = new_file.readlink()
if not target_file.exists():
overflag = True
else:
if not overflag:
# 目标文件已存在
logger.info(
f"目的文件系统中已经存在同名文件 {target_file},当前整理覆盖模式设置为 {overwrite_mode}")
if overwrite_mode == 'always':
# 总是覆盖同名文件
overflag = True
elif overwrite_mode == 'size':
# 存在时大覆盖小
if target_item.size < fileitem.size:
logger.info(f"目标文件文件大小更小,将覆盖:{new_file}")
overflag = True
else:
self.__set_result(success=False,
message=f"媒体库存在同名文件,且质量更好",
fileitem=fileitem,
target_item=target_item,
target_diritem=target_diritem,
fail_list=[fileitem.path],
transfer_type=transfer_type,
need_notify=need_notify)
return self.result.model_copy()
elif overwrite_mode == 'never':
# 存在不覆盖
self.__set_result(success=False,
message=f"媒体库存在同名文件,且质量更好",
message=f"媒体库存在同名文件,当前覆盖模式为不覆盖",
fileitem=fileitem,
target_item=target_item,
target_diritem=target_diritem,
@@ -265,26 +307,17 @@ class TransHandler:
transfer_type=transfer_type,
need_notify=need_notify)
return self.result.model_copy()
elif overwrite_mode == 'never':
# 存在不覆盖
self.__set_result(success=False,
message=f"媒体库存在同名文件,当前覆盖模式为不覆盖",
fileitem=fileitem,
target_item=target_item,
target_diritem=target_diritem,
fail_list=[fileitem.path],
transfer_type=transfer_type,
need_notify=need_notify)
return self.result.model_copy()
elif overwrite_mode == 'latest':
# 仅保留最新版本
logger.info(f"当前整理覆盖模式设置为仅保留最新版本,将覆盖:{new_file}")
overflag = True
else:
if overwrite_mode == 'latest':
# 文件不存在,但仅保留最新版本
logger.info(f"当前整理覆盖模式设置为 {overwrite_mode},仅保留最新版本,正在删除已有版本文件 ...")
self.__delete_version_files(target_oper, new_file)
elif overwrite_mode == 'latest':
# 仅保留最新版本
logger.info(f"当前整理覆盖模式设置为仅保留最新版本,将覆盖:{new_file}")
overflag = True
else:
if overwrite_mode == 'latest':
# 文件不存在,但仅保留最新版本
logger.info(
f"当前整理覆盖模式设置为 {overwrite_mode},仅保留最新版本,正在删除已有版本文件 ...")
self.__delete_version_files(target_oper, new_file)
# 整理文件
new_item, err_msg = self.__transfer_file(fileitem=fileitem,
mediainfo=mediainfo,
@@ -460,48 +493,10 @@ class TransHandler:
return None, "未知错误"
def __transfer_other_files(self, fileitem: FileItem, target_storage: str,
source_oper: StorageBase, target_oper: StorageBase,
target_file: Path, transfer_type: str) -> Tuple[bool, str]:
@staticmethod
def __rename_subtitles(sub_item: FileItem, new_file: Path) -> Path:
"""
根据文件名整理其他相关文件
:param fileitem: 源文件
:param target_storage: 目标存储
:param source_oper: 源存储操作对象
:param target_oper: 目标存储操作对象
:param target_file: 目标路径
:param transfer_type: 整理方式
"""
# 整理字幕
state, errmsg = self.__transfer_subtitles(fileitem=fileitem,
target_storage=target_storage,
source_oper=source_oper,
target_oper=target_oper,
target_file=target_file,
transfer_type=transfer_type)
if not state:
return False, errmsg
# 整理音轨文件
state, errmsg = self.__transfer_audio_track_files(fileitem=fileitem,
target_storage=target_storage,
source_oper=source_oper,
target_oper=target_oper,
target_file=target_file,
transfer_type=transfer_type)
return state, errmsg
def __transfer_subtitles(self, fileitem: FileItem, target_storage: str,
source_oper: StorageBase, target_oper: StorageBase,
target_file: Path, transfer_type: str) -> Tuple[bool, str]:
"""
根据文件名整理对应字幕文件
:param fileitem: 源文件
:param target_storage: 目标存储
:param source_oper: 源存储操作对象
:param target_oper: 目标存储操作对象
:param target_file: 目标路径
:param transfer_type: 整理方式
重命名字幕文件,补充附加信息
"""
# 字幕正则式
_zhcn_sub_re = r"([.\[(](((zh[-_])?(cn|ch[si]|sg|sc))|zho?" \
@@ -517,145 +512,28 @@ class TransHandler:
r"|(?<![a-z0-9])big5(?![a-z0-9])"
_eng_sub_re = r"[.\[(]eng[.\])]"
# 比对文件名并整理字幕
org_path = Path(fileitem.path)
# 查找上级文件项
parent_item: FileItem = source_oper.get_parent(fileitem)
if not parent_item:
return False, f"{org_path} 上级目录获取失败"
# 字幕文件列表
file_list: List[FileItem] = source_oper.list(parent_item) or []
file_list = [f for f in file_list if f.type == "file" and f.extension
and f".{f.extension.lower()}" in settings.RMT_SUBEXT]
if len(file_list) == 0:
logger.info(f"{parent_item.path} 目录下没有找到字幕文件...")
else:
logger.info(f"字幕文件清单:{[f.name for f in file_list]}")
# 识别文件名
metainfo = MetaInfoPath(org_path)
for sub_item in file_list:
# 识别字幕文件名
sub_file_name = re.sub(_zhtw_sub_re,
".",
re.sub(_zhcn_sub_re,
".",
sub_item.name,
flags=re.I),
flags=re.I)
sub_file_name = re.sub(_eng_sub_re, ".", sub_file_name, flags=re.I)
sub_metainfo = MetaInfoPath(Path(sub_item.path))
# 匹配字幕文件名
if (org_path.stem == Path(sub_file_name).stem) or \
(sub_metainfo.cn_name and sub_metainfo.cn_name == metainfo.cn_name) \
or (sub_metainfo.en_name and sub_metainfo.en_name == metainfo.en_name):
if metainfo.part and metainfo.part != sub_metainfo.part:
continue
if metainfo.season \
and metainfo.season != sub_metainfo.season:
continue
if metainfo.episode \
and metainfo.episode != sub_metainfo.episode:
continue
new_file_type = ""
# 兼容jellyfin字幕识别(多重识别), emby则会识别最后一个后缀
if re.search(_zhcn_sub_re, sub_item.name, re.I):
new_file_type = ".chi.zh-cn"
elif re.search(_zhtw_sub_re, sub_item.name,
re.I):
new_file_type = ".zh-tw"
elif re.search(_eng_sub_re, sub_item.name, re.I):
new_file_type = ".eng"
# 通过对比字幕文件大小 尽量整理所有存在的字幕
file_ext = f".{sub_item.extension}"
new_sub_tag_dict = {
".eng": ".英文",
".chi.zh-cn": ".简体中文",
".zh-tw": ".繁体中文"
}
new_sub_tag_list = [
(".default" + new_file_type if (
(settings.DEFAULT_SUB == "zh-cn" and new_file_type == ".chi.zh-cn") or
(settings.DEFAULT_SUB == "zh-tw" and new_file_type == ".zh-tw") or
(settings.DEFAULT_SUB == "eng" and new_file_type == ".eng")
) else new_file_type) if t == 0 else "%s%s(%s)" % (new_file_type,
new_sub_tag_dict.get(
new_file_type, ""
),
t) for t in range(6)
]
for new_sub_tag in new_sub_tag_list:
new_file: Path = target_file.with_name(target_file.stem + new_sub_tag + file_ext)
# 如果字幕文件不存在, 直接整理字幕, 并跳出循环
try:
logger.debug(f"正在处理字幕:{sub_item.name}")
new_item, errmsg = self.__transfer_command(fileitem=sub_item,
target_storage=target_storage,
source_oper=source_oper,
target_oper=target_oper,
target_file=new_file,
transfer_type=transfer_type)
if new_item:
logger.info(f"字幕 {sub_item.name} 整理完成")
self.__set_result(
subtitle_list=[sub_item.path],
subtitle_list_new=[new_item.path],
)
break
else:
logger.error(f"字幕 {sub_item.name} 整理失败:{errmsg}")
return False, errmsg
except Exception as error:
logger.info(f"字幕 {new_file} 出错了,原因: {str(error)}")
return True, ""
# 原文件后缀
file_ext = f".{sub_item.extension}"
# 新文件后缀
new_file_type = ""
def __transfer_audio_track_files(self, fileitem: FileItem, target_storage: str,
source_oper: StorageBase, target_oper: StorageBase,
target_file: Path, transfer_type: str) -> Tuple[bool, str]:
"""
根据文件名整理对应音轨文件
:param fileitem: 源文件
:param target_storage: 目标存储
:param source_oper: 源存储操作对象
:param target_oper: 目标存储操作对象
:param target_file: 目标路径
:param transfer_type: 整理方式
"""
org_path = Path(fileitem.path)
# 查找上级文件项
parent_item: FileItem = source_oper.get_parent(fileitem)
if not parent_item:
return False, f"{org_path} 上级目录获取失败"
file_list: List[FileItem] = source_oper.list(parent_item)
# 匹配音轨文件
pending_file_list: List[FileItem] = [file for file in file_list
if Path(file.name).stem == org_path.stem
and file.type == "file" and file.extension
and f".{file.extension.lower()}" in settings.RMT_AUDIOEXT]
if len(pending_file_list) == 0:
return True, f"{parent_item.path} 目录下没有找到匹配的音轨文件"
logger.debug("音轨文件清单:" + str(pending_file_list))
for track_file in pending_file_list:
track_ext = f".{track_file.extension}"
new_track_file = target_file.with_name(target_file.stem + track_ext)
try:
logger.info(f"正在整理音轨文件:{track_file}{new_track_file}")
new_item, errmsg = self.__transfer_command(fileitem=track_file,
target_storage=target_storage,
source_oper=source_oper,
target_oper=target_oper,
target_file=new_track_file,
transfer_type=transfer_type)
if new_item:
logger.info(f"音轨文件 {org_path.name} 整理完成")
self.__set_result(
audio_list=[track_file.path],
audio_list_new=[new_item.path],
)
else:
logger.error(f"音轨文件 {org_path.name} 整理失败:{errmsg}")
except Exception as error:
logger.error(f"音轨文件 {org_path.name} 整理失败:{str(error)}")
return True, ""
# 识别字幕语言
if re.search(_zhcn_sub_re, sub_item.name, re.I):
new_file_type = ".chi.zh-cn"
elif re.search(_zhtw_sub_re, sub_item.name, re.I):
new_file_type = ".zh-tw"
elif re.search(_eng_sub_re, sub_item.name, re.I):
new_file_type = ".eng"
# 添加默认字幕标识
if ((settings.DEFAULT_SUB == "zh-cn" and new_file_type == ".chi.zh-cn")
or (settings.DEFAULT_SUB == "zh-tw" and new_file_type == ".zh-tw")
or (settings.DEFAULT_SUB == "eng" and new_file_type == ".eng")):
new_sub_tag = ".default" + new_file_type
else:
new_sub_tag = new_file_type
return new_file.with_name(new_file.stem + new_sub_tag + file_ext)
def __transfer_dir(self, fileitem: FileItem, mediainfo: MediaInfo,
source_oper: StorageBase, target_oper: StorageBase,
@@ -814,13 +692,6 @@ class TransHandler:
file_count=1,
total_size=fileitem.size,
)
# 处理其他相关文件
self.__transfer_other_files(fileitem=fileitem,
target_storage=target_storage,
source_oper=source_oper,
target_oper=target_oper,
target_file=target_file,
transfer_type=transfer_type)
return new_item, errmsg
return None, errmsg