支持馒头站点的字幕下载

This commit is contained in:
景大侠
2025-12-29 20:09:46 +08:00
parent 41ddf77a5b
commit 7f33b0b1b8
2 changed files with 96 additions and 73 deletions

View File

@@ -8,9 +8,13 @@ from lxml import etree
from app.chain.storage import StorageChain
from app.core.config import settings
from app.core.context import Context
from app.db.site_oper import SiteOper
from app.helper.sites import SitesHelper # noqa
from app.helper.torrent import TorrentHelper
from app.log import logger
from app.modules import _ModuleBase
from app.modules.indexer.spider.mtorrent import MTorrentSpider
from app.schemas import TorrentInfo
from app.schemas.file import FileURI
from app.schemas.types import ModuleType, OtherModulesType
from app.utils.http import RequestUtils
@@ -65,6 +69,52 @@ class SubtitleModule(_ModuleBase):
def test(self):
pass
def _get_subtitle_links(self, torrent: TorrentInfo):
"""
获取字幕链接
"""
# API请求方式的站点需要特殊处理
if torrent.site is not None:
site = SiteOper().get(torrent.site)
if indexer := SitesHelper().get_indexer(site.domain):
if indexer.get("parser") == "mTorrent":
return MTorrentSpider(indexer).get_subtitle_links(
torrent.page_url
)
# TODO 其它采用API访问的站点
# 普通站点通过解析网站代码的方式获取
request = RequestUtils(cookies=torrent.site_cookie, ua=torrent.site_ua)
res = request.get_res(torrent.page_url)
if res and res.status_code == 200:
if not res.text:
logger.warn(f"读取页面代码失败:{torrent.page_url}")
return []
html = etree.HTML(res.text)
try:
sublink_list = []
for xpath in self._SITE_SUBTITLE_XPATH:
sublinks = html.xpath(xpath)
if sublinks:
for sublink in sublinks:
if not sublink:
continue
if not sublink.startswith("http"):
base_url = StringUtils.get_base_url(torrent.page_url)
if sublink.startswith("/"):
sublink = "%s%s" % (base_url, sublink)
else:
sublink = "%s/%s" % (base_url, sublink)
sublink_list.append(sublink)
return sublink_list
finally:
if html is not None:
del html
elif res is not None:
logger.warn(f"连接 {torrent.page_url} 失败,状态码:{res.status_code}")
else:
logger.warn(f"无法打开链接:{torrent.page_url}")
return None
def download_added(self, context: Context, download_dir: Path, torrent_content: Union[str, bytes] = None):
"""
添加下载任务成功后,从站点下载字幕,保存到下载目录
@@ -117,83 +167,56 @@ class SubtitleModule(_ModuleBase):
logger.error(f"下载目录不存在,无法保存字幕:{download_dir / folder_name}")
return
# 读取网站代码
sublink_list = self._get_subtitle_links(torrent)
if not sublink_list:
logger.warn(f"{torrent.page_url} 页面未找到字幕下载链接")
return
# 下载所有字幕文件
request = RequestUtils(cookies=torrent.site_cookie, ua=torrent.site_ua)
res = request.get_res(torrent.page_url)
if res and res.status_code == 200:
if not res.text:
logger.warn(f"读取页面代码失败:{torrent.page_url}")
return
html = etree.HTML(res.text)
try:
sublink_list = []
for xpath in self._SITE_SUBTITLE_XPATH:
sublinks = html.xpath(xpath)
if sublinks:
for sublink in sublinks:
if not sublink:
continue
if not sublink.startswith("http"):
base_url = StringUtils.get_base_url(torrent.page_url)
if sublink.startswith("/"):
sublink = "%s%s" % (base_url, sublink)
else:
sublink = "%s/%s" % (base_url, sublink)
sublink_list.append(sublink)
finally:
if html is not None:
del html
# 下载所有字幕文件
for sublink in sublink_list:
logger.info(f"找到字幕下载链接:{sublink},开始下载...")
# 下载
ret = request.get_res(sublink)
if ret and ret.status_code == 200:
# 保存ZIP
file_name = TorrentHelper.get_url_filename(ret, sublink)
if not file_name:
logger.warn(f"链接不是字幕文件:{sublink}")
continue
if file_name.lower().endswith(".zip"):
# ZIP包
zip_file = settings.TEMP_PATH / file_name
# 保存
zip_file.write_bytes(ret.content)
# 解压路径
zip_path = zip_file.with_name(zip_file.stem)
# 解压文件
shutil.unpack_archive(zip_file, zip_path, format='zip')
# 遍历转移文件
for sub_file in SystemUtils.list_files(zip_path, settings.RMT_SUBEXT):
target_sub_file = Path(working_dir_item.path) / Path(sub_file.name)
if storageChain.get_file_item(storage, target_sub_file):
logger.info(f"字幕文件已存在:{target_sub_file}")
continue
logger.info(f"转移字幕 {sub_file}{target_sub_file} ...")
storageChain.upload_file(working_dir_item, sub_file)
# 删除临时文件
try:
shutil.rmtree(zip_path)
zip_file.unlink()
except Exception as err:
logger.error(f"删除临时文件失败:{str(err)}")
else:
sub_file = settings.TEMP_PATH / file_name
# 保存
sub_file.write_bytes(ret.content)
for sublink in sublink_list:
logger.info(f"找到字幕下载链接:{sublink},开始下载...")
# 下载
ret = request.get_res(sublink)
if ret and ret.status_code == 200:
# 保存ZIP
file_name = TorrentHelper.get_url_filename(ret, sublink)
if not file_name:
logger.warn(f"链接不是字幕文件:{sublink}")
continue
if file_name.lower().endswith(".zip"):
# ZIP包
zip_file = settings.TEMP_PATH / file_name
# 保存
zip_file.write_bytes(ret.content)
# 解压路径
zip_path = zip_file.with_name(zip_file.stem)
# 解压文件
shutil.unpack_archive(zip_file, zip_path, format='zip')
# 遍历转移文件
for sub_file in SystemUtils.list_files(zip_path, settings.RMT_SUBEXT):
target_sub_file = Path(working_dir_item.path) / Path(sub_file.name)
if storageChain.get_file_item(storage, target_sub_file):
logger.info(f"字幕文件已存在:{target_sub_file}")
continue
logger.info(f"转移字幕 {sub_file}{target_sub_file} ...")
storageChain.upload_file(working_dir_item, sub_file)
# 删除临时文件
try:
shutil.rmtree(zip_path)
zip_file.unlink()
except Exception as err:
logger.error(f"删除临时文件失败:{str(err)}")
else:
logger.error(f"下载字幕文件失败:{sublink}")
continue
if sublink_list:
logger.info(f"{torrent.page_url} 页面字幕下载完成")
sub_file = settings.TEMP_PATH / file_name
# 保存
sub_file.write_bytes(ret.content)
target_sub_file = Path(working_dir_item.path) / Path(sub_file.name)
if storageChain.get_file_item(storage, target_sub_file):
logger.info(f"字幕文件已存在:{target_sub_file}")
continue
logger.info(f"转移字幕 {sub_file}{target_sub_file} ...")
storageChain.upload_file(working_dir_item, sub_file)
else:
logger.warn(f"{torrent.page_url} 页面未找到字幕下载链接")
elif res is not None:
logger.warn(f"连接 {torrent.page_url} 失败,状态码:{res.status_code}")
else:
logger.warn(f"无法打开链接:{torrent.page_url}")
logger.error(f"下载字幕文件失败:{sublink}")
continue
logger.info(f"{torrent.page_url} 页面字幕下载完成")