修复 RAR 字幕包下载识别

This commit is contained in:
jxxghp
2026-06-10 08:46:10 +08:00
parent 82694d2d8b
commit cba52c57e6
6 changed files with 224 additions and 20 deletions

View File

@@ -36,6 +36,11 @@ class DownloadChain(ChainBase):
下载处理链
"""
_SUBTITLE_ARCHIVE_FORMATS = {
".zip": "zip",
".rar": "rar",
}
@staticmethod
def _safe_subtitle_file_name(file_name: str, fallback_name: str) -> str:
"""
@@ -51,7 +56,14 @@ class DownloadChain(ChainBase):
"""
判断是否为字幕压缩包。
"""
return Path(file_name).suffix.lower() == ".zip"
return Path(file_name).suffix.lower() in DownloadChain._SUBTITLE_ARCHIVE_FORMATS
@classmethod
def _subtitle_archive_format(cls, file_name: str) -> Optional[str]:
"""
获取字幕压缩包格式。
"""
return cls._SUBTITLE_ARCHIVE_FORMATS.get(Path(file_name).suffix.lower())
@staticmethod
def _is_subtitle_file(file_name: str) -> bool:
@@ -154,7 +166,15 @@ class DownloadChain(ChainBase):
try:
temp_file.write_bytes(response.content)
if self._is_subtitle_archive(file_name):
shutil.unpack_archive(temp_file, temp_extract_dir, format='zip')
try:
SystemUtils.unpack_archive(
temp_file,
temp_extract_dir,
archive_format=self._subtitle_archive_format(file_name),
)
except Exception as err:
logger.error(f"字幕压缩包解压失败:{temp_file} - {str(err)}")
return []
for sub_file in SystemUtils.list_files(temp_extract_dir, settings.RMT_SUBEXT):
uploaded_path = self._upload_subtitle_file(
storage_chain=storage_chain,

View File

@@ -28,6 +28,11 @@ class SubtitleModule(_ModuleBase):
字幕下载模块
"""
_SUBTITLE_ARCHIVE_FORMATS = {
".zip": "zip",
".rar": "rar",
}
# 站点详情页字幕下载元素识别XPATH
_SITE_SUBTITLE_XPATH = [
'//td[@class="rowhead"][text()="字幕"]/following-sibling::td//a[not(@class)]',
@@ -233,40 +238,52 @@ class SubtitleModule(_ModuleBase):
ua=torrent.site_ua,
proxies=settings.PROXY if torrent.site_proxy else None,
)
settings.TEMP_PATH.mkdir(parents=True, exist_ok=True)
for sublink in sublink_list:
logger.info(f"找到字幕下载链接:{sublink},开始下载...")
# 下载
ret = request.get_res(sublink)
if ret and ret.status_code == 200:
# 保存ZIP
file_name = TorrentHelper.get_url_filename(ret, sublink)
if not file_name:
logger.warn(f"链接不是字幕文件:{sublink}")
continue
if file_name.lower().endswith(".zip"):
# ZIP包
zip_file = settings.TEMP_PATH / file_name
archive_format = self._SUBTITLE_ARCHIVE_FORMATS.get(Path(file_name).suffix.lower())
if archive_format:
archive_file = settings.TEMP_PATH / file_name
# 保存
zip_file.write_bytes(ret.content)
archive_file.write_bytes(ret.content)
# 解压路径
zip_path = zip_file.with_name(zip_file.stem)
# 解压文件
shutil.unpack_archive(zip_file, zip_path, format='zip')
# 遍历转移文件
for sub_file in SystemUtils.list_files(zip_path, settings.RMT_SUBEXT):
target_sub_file = Path(working_dir_item.path) / Path(sub_file.name)
if storageChain.get_file_item(storage, target_sub_file):
logger.info(f"字幕文件已存在:{target_sub_file}")
continue
logger.info(f"转移字幕 {sub_file}{target_sub_file} ...")
storageChain.upload_file(working_dir_item, sub_file)
archive_path = archive_file.with_name(archive_file.stem)
try:
# 解压文件
SystemUtils.unpack_archive(
archive_file,
archive_path,
archive_format=archive_format,
)
# 遍历转移文件
for sub_file in SystemUtils.list_files(archive_path, settings.RMT_SUBEXT):
target_sub_file = Path(working_dir_item.path) / Path(sub_file.name)
if storageChain.get_file_item(storage, target_sub_file):
logger.info(f"字幕文件已存在:{target_sub_file}")
continue
logger.info(f"转移字幕 {sub_file}{target_sub_file} ...")
storageChain.upload_file(working_dir_item, sub_file)
except Exception as err:
logger.error(f"字幕压缩包解压失败:{archive_file} - {str(err)}")
# 删除临时文件
try:
shutil.rmtree(zip_path)
zip_file.unlink()
if archive_path.exists():
shutil.rmtree(archive_path)
if archive_file.exists():
archive_file.unlink()
except Exception as err:
logger.error(f"删除临时文件失败:{str(err)}")
else:
if Path(file_name).suffix.lower() not in settings.RMT_SUBEXT:
logger.warn(f"链接不是支持的字幕文件:{sublink} - {file_name}")
continue
sub_file = settings.TEMP_PATH / file_name
# 保存
sub_file.write_bytes(ret.content)

View File

@@ -262,6 +262,84 @@ class SystemUtils:
_scan_directory(directory, recursive)
return files
@staticmethod
def unpack_archive(archive_file: Path, extract_dir: Path, archive_format: Optional[str] = None) -> None:
"""
解压压缩包,并补充标准库未覆盖的 RAR 格式支持。
:param archive_file: 待解压的压缩包文件
:param extract_dir: 解压目标目录
:param archive_format: 压缩包格式,未指定时按文件后缀推断
"""
if archive_format == "rar" or (not archive_format and archive_file.suffix.lower() == ".rar"):
SystemUtils.__unpack_rar_archive(archive_file, extract_dir)
return
shutil.unpack_archive(archive_file, extract_dir, format=archive_format)
@staticmethod
def __unpack_rar_archive(archive_file: Path, extract_dir: Path) -> None:
"""
调用系统解压工具处理 RAR 压缩包。
"""
extract_dir.mkdir(parents=True, exist_ok=True)
commands = []
if shutil.which("unar"):
commands.append([
"unar",
"-quiet",
"-force-overwrite",
"-output-directory",
extract_dir.as_posix(),
archive_file.as_posix(),
])
if shutil.which("unrar"):
commands.append([
"unrar",
"x",
"-o+",
"-idq",
archive_file.as_posix(),
f"{extract_dir.as_posix()}/",
])
if shutil.which("7z"):
commands.append([
"7z",
"x",
"-y",
f"-o{extract_dir.as_posix()}",
archive_file.as_posix(),
])
if shutil.which("bsdtar"):
commands.append([
"bsdtar",
"-xf",
archive_file.as_posix(),
"-C",
extract_dir.as_posix(),
])
if not commands:
raise RuntimeError("未找到可用的 RAR 解压工具,请安装 unar、unrar、7z 或 bsdtar")
errors = []
for command in commands:
try:
result = subprocess.run(
command,
check=False,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=120,
)
except Exception as err:
errors.append(f"{command[0]}{str(err)}")
continue
if result.returncode == 0:
return
output = (result.stderr or result.stdout or "").strip()
errors.append(f"{command[0]}{output or f'返回码 {result.returncode}'}")
raise RuntimeError(f"RAR 压缩包解压失败:{''.join(errors)}")
@staticmethod
def exits_files(directory: Path, extensions: list, min_filesize: int = 0, recursive: bool = True) -> bool:
"""

View File

@@ -42,6 +42,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
netcat-openbsd \
lsof \
nano \
unar \
libjemalloc2 \
&& dpkg-reconfigure --frontend noninteractive tzdata \
&& curl https://rclone.org/install.sh | bash \

View File

@@ -9,6 +9,7 @@
- **Python 3.11 或更高版本**
- **pip** (Python 包管理器)
- **Git** (用于版本控制)
- **RAR 解压工具**:本地开发如需测试或使用 `.rar` 字幕包解压,请安装 `unar``unrar``7z``bsdtar` 之一Docker 镜像会内置 `unar`
Rust 加速扩展通过 `moviepilot-rust` PyPI 包安装,主项目本地开发不再需要 Rust toolchain。需要修改或发布 Rust 扩展时,请在 `MoviePilot-Rust` 仓库中构建。

View File

@@ -85,6 +85,20 @@ class _FakeSubtitleResponse:
"""
content = b"subtitle-content"
headers = {}
class _FakeSubtitleResponseWithHeader:
"""
模拟带下载文件名响应头的字幕 API 响应。
"""
content = b"archive-content"
headers = {
"content-disposition": (
'attachment; filename="Hypnosis_AKA_Saimin_(1999)_480i_JAPANESE_NTSC_DVD_REMUX_MPEG-2_DD_2.0-MeeSta.rar"'
)
}
def test_download_single_submits_download_added_to_background(monkeypatch):
@@ -176,6 +190,79 @@ def test_save_subtitle_response_creates_missing_temp_directory(monkeypatch, tmp_
assert storage_chain.uploaded_files
def test_save_subtitle_response_accepts_rar_filename_from_header(monkeypatch, tmp_path):
"""
PHP 下载链接应按响应头文件名识别 RAR 字幕压缩包,而不是按 URL 后缀误拒绝。
"""
storage_chain = _FakeSubtitleStorageChain()
temp_path = tmp_path / "temp"
extracted_dir = temp_path / "Hypnosis_AKA_Saimin_(1999)_480i_JAPANESE_NTSC_DVD_REMUX_MPEG-2_DD_2.0-MeeSta"
extracted_subtitle = extracted_dir / "Hypnosis_AKA_Saimin_(1999).srt"
def fake_unpack_archive(archive_file, extract_dir, archive_format=None):
assert archive_format == "rar"
assert archive_file.suffix == ".rar"
extract_dir.mkdir(parents=True, exist_ok=True)
extracted_subtitle.write_text("subtitle", encoding="utf-8")
monkeypatch.setattr(
download_module,
"settings",
SimpleNamespace(TEMP_PATH=temp_path, RMT_SUBEXT=settings.RMT_SUBEXT),
)
monkeypatch.setattr(download_module, "StorageChain", lambda: storage_chain)
monkeypatch.setattr(download_module.SystemUtils, "unpack_archive", fake_unpack_archive)
chain = DownloadChain.__new__(DownloadChain)
subtitle = SubtitleInfo(
title="Hypnosis",
enclosure="https://audiences.me/downloadsubs.php?torrentid=666519&subid=2195",
)
saved_files = chain._save_subtitle_response(
subtitle=subtitle,
response=_FakeSubtitleResponseWithHeader(),
target_dir=Path("/downloads"),
)
assert saved_files == ["/downloads/Hypnosis_AKA_Saimin_(1999).srt"]
assert storage_chain.uploaded_files == [extracted_subtitle]
def test_save_subtitle_response_rejects_unsupported_filename_from_header(monkeypatch, tmp_path):
"""
响应头文件名不是字幕或支持的压缩包时,应继续拒绝保存。
"""
storage_chain = _FakeSubtitleStorageChain()
temp_path = tmp_path / "temp"
response = SimpleNamespace(
content=b"<html>error</html>",
headers={"content-disposition": 'attachment; filename="error.html"'},
)
monkeypatch.setattr(
download_module,
"settings",
SimpleNamespace(TEMP_PATH=temp_path, RMT_SUBEXT=settings.RMT_SUBEXT),
)
monkeypatch.setattr(download_module, "StorageChain", lambda: storage_chain)
chain = DownloadChain.__new__(DownloadChain)
subtitle = SubtitleInfo(
title="Hypnosis",
enclosure="https://audiences.me/downloadsubs.php?torrentid=666519&subid=2195",
)
saved_files = chain._save_subtitle_response(
subtitle=subtitle,
response=response,
target_dir=Path("/downloads"),
)
assert saved_files == []
assert storage_chain.uploaded_files == []
class _FakeBatchTorrentHelper:
"""
为批量下载测试提供稳定排序和种子文件集数解析。