mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-04-02 10:21:48 +08:00
优化AliPan类的文件上传功能,增加多线程分片上传和动态分片计算,提升上传效率和进度监控。
This commit is contained in:
@@ -3,8 +3,11 @@ import hashlib
|
||||
import secrets
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
from queue import Queue
|
||||
from typing import List, Dict, Optional, Tuple, Union
|
||||
import io
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
@@ -50,6 +53,11 @@ class AliPan(StorageBase, metaclass=Singleton):
|
||||
# CID和路径缓存
|
||||
_id_cache: Dict[str, Tuple[str, str]] = {}
|
||||
|
||||
# 最大线程数
|
||||
MAX_WORKERS = 10
|
||||
# 最大分片大小(1GB)
|
||||
MAX_PART_SIZE = 1024 * 1024 * 1024
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.session = requests.Session()
|
||||
@@ -628,22 +636,77 @@ class AliPan(StorageBase, metaclass=Singleton):
|
||||
raise Exception(resp.get("message"))
|
||||
return resp
|
||||
|
||||
def _calc_parts(self, file_size: int) -> Tuple[int, int]:
|
||||
"""
|
||||
计算最优分片大小和线程数
|
||||
"""
|
||||
# 根据文件大小计算合适的分片数
|
||||
if file_size <= 100 * 1024 * 1024: # 小于100MB
|
||||
return 10 * 1024 * 1024, min(3, self.MAX_WORKERS) # 10MB分片
|
||||
elif file_size <= 1024 * 1024 * 1024: # 小于1GB
|
||||
return 100 * 1024 * 1024, min(5, self.MAX_WORKERS) # 100MB分片
|
||||
else:
|
||||
# 文件较大,使用较大分片
|
||||
part_size = min(file_size // self.MAX_WORKERS, self.MAX_PART_SIZE)
|
||||
return part_size, min(file_size // part_size + 1, self.MAX_WORKERS)
|
||||
|
||||
def _log_progress(self, desc: str, total: int) -> tqdm:
|
||||
"""
|
||||
创建一个可以输出到日志的进度条
|
||||
"""
|
||||
class TqdmToLogger(io.StringIO):
|
||||
def write(s, buf):
|
||||
buf = buf.strip('\r\n\t ')
|
||||
if buf:
|
||||
logger.info(buf)
|
||||
|
||||
return tqdm(
|
||||
total=total,
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
desc=desc,
|
||||
file=TqdmToLogger(),
|
||||
mininterval=1.0,
|
||||
maxinterval=5.0,
|
||||
miniters=1
|
||||
)
|
||||
|
||||
def _upload_part(self, upload_url: str, data: bytes, part_num: int,
|
||||
progress_queue: Queue) -> Tuple[int, str]:
|
||||
"""
|
||||
上传单个分片
|
||||
"""
|
||||
try:
|
||||
response = requests.put(upload_url, data=data)
|
||||
if response and response.status_code == 200:
|
||||
# 将上传进度放入队列
|
||||
progress_queue.put(len(data))
|
||||
logger.info(f"【阿里云盘】分片 {part_num} 上传完成")
|
||||
return part_num, response.headers.get('ETag', '')
|
||||
else:
|
||||
raise Exception(f"上传失败: {response.status_code if response else 'No Response'}")
|
||||
except Exception as e:
|
||||
logger.error(f"【阿里云盘】分片 {part_num} 上传失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def upload(self, target_dir: schemas.FileItem, local_path: Path,
|
||||
new_name: Optional[str] = None) -> Optional[schemas.FileItem]:
|
||||
"""
|
||||
文件上传:分片、支持秒传
|
||||
文件上传:多线程分片、支持秒传
|
||||
"""
|
||||
target_name = new_name or local_path.name
|
||||
target_path = Path(target_dir.path) / target_name
|
||||
file_size = local_path.stat().st_size
|
||||
|
||||
# 1. 创建文件并检查秒传
|
||||
chunk_size = 100 * 1024 * 1024 # 分片大小 100M
|
||||
# 1. 计算分片大小和线程数
|
||||
part_size, workers = self._calc_parts(file_size)
|
||||
|
||||
# 2. 创建文件并检查秒传
|
||||
create_res = self._create_file(drive_id=target_dir.drive_id,
|
||||
parent_file_id=target_dir.fileid,
|
||||
file_name=target_name,
|
||||
file_path=local_path,
|
||||
chunk_size=chunk_size)
|
||||
parent_file_id=target_dir.fileid,
|
||||
file_name=target_name,
|
||||
file_path=local_path,
|
||||
chunk_size=part_size)
|
||||
if create_res.get('rapid_upload', False):
|
||||
logger.info(f"【阿里云盘】{target_name} 秒传完成!")
|
||||
return self.get_item(target_path)
|
||||
@@ -652,93 +715,89 @@ class AliPan(StorageBase, metaclass=Singleton):
|
||||
logger.info(f"【阿里云盘】{target_name} 已存在")
|
||||
return self.get_item(target_path)
|
||||
|
||||
# 2. 准备分片上传参数
|
||||
# 3. 准备分片上传参数
|
||||
file_id = create_res.get('file_id')
|
||||
if not file_id:
|
||||
logger.warn(f"【阿里云盘】创建 {target_name} 文件失败!")
|
||||
return None
|
||||
upload_id = create_res.get('upload_id')
|
||||
part_info_list = create_res.get('part_info_list')
|
||||
uploaded_parts = set()
|
||||
uploaded_parts = {}
|
||||
|
||||
# 3. 获取已上传分片
|
||||
# 4. 获取已上传分片
|
||||
uploaded_info = self._list_uploaded_parts(drive_id=target_dir.drive_id, file_id=file_id, upload_id=upload_id)
|
||||
for part in uploaded_info.get('uploaded_parts', []):
|
||||
uploaded_parts.add(part['part_number'])
|
||||
uploaded_parts[part['part_number']] = part.get('etag', '')
|
||||
|
||||
# 4. 初始化进度条
|
||||
logger.info(f"【阿里云盘】开始上传: {local_path} -> {target_path},分片数:{len(part_info_list)}")
|
||||
progress_bar = tqdm(
|
||||
total=file_size,
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
desc="上传进度",
|
||||
ascii=True
|
||||
)
|
||||
# 5. 初始化进度条
|
||||
logger.info(f"【阿里云盘】开始上传: {local_path} -> {target_path},"
|
||||
f"分片大小:{StringUtils.str_filesize(part_size)},线程数:{workers}")
|
||||
progress_bar = self._log_progress(f"【阿里云盘】{target_name} 上传进度", file_size)
|
||||
|
||||
# 5. 分片上传循环
|
||||
with open(local_path, 'rb') as f:
|
||||
for part_info in part_info_list:
|
||||
part_num = part_info['part_number']
|
||||
# 6. 创建进度队列
|
||||
progress_queue = Queue()
|
||||
|
||||
# 计算分片参数
|
||||
start = (part_num - 1) * chunk_size
|
||||
end = min(start + chunk_size, file_size)
|
||||
current_chunk_size = end - start
|
||||
# 7. 创建线程池
|
||||
with ThreadPoolExecutor(max_workers=workers) as pool:
|
||||
futures = []
|
||||
|
||||
# 提交上传任务
|
||||
with open(local_path, 'rb') as f:
|
||||
for part_info in part_info_list:
|
||||
part_num = part_info['part_number']
|
||||
|
||||
# 跳过已上传的分片
|
||||
if part_num in uploaded_parts:
|
||||
start = (part_num - 1) * part_size
|
||||
end = min(start + part_size, file_size)
|
||||
progress_bar.update(end - start)
|
||||
continue
|
||||
|
||||
# 更新进度条(已存在的分片)
|
||||
if part_num in uploaded_parts:
|
||||
progress_bar.update(current_chunk_size)
|
||||
continue
|
||||
# 准备分片数据
|
||||
start = (part_num - 1) * part_size
|
||||
end = min(start + part_size, file_size)
|
||||
f.seek(start)
|
||||
data = f.read(end - start)
|
||||
|
||||
# 准备分片数据
|
||||
f.seek(start)
|
||||
data = f.read(current_chunk_size)
|
||||
# 提交上传任务
|
||||
future = pool.submit(
|
||||
self._upload_part,
|
||||
part_info['upload_url'],
|
||||
data,
|
||||
part_num,
|
||||
progress_queue
|
||||
)
|
||||
futures.append((part_num, future))
|
||||
|
||||
# 上传分片(带重试逻辑)
|
||||
success = False
|
||||
for attempt in range(3): # 最大重试次数
|
||||
try:
|
||||
# 获取当前上传地址(可能刷新)
|
||||
if attempt > 0:
|
||||
new_urls = self._refresh_upload_urls(drive_id=target_dir.drive_id, file_id=file_id,
|
||||
upload_id=upload_id, part_numbers=[part_num])
|
||||
upload_url = new_urls[0]['upload_url']
|
||||
else:
|
||||
upload_url = part_info['upload_url']
|
||||
# 更新进度条
|
||||
while len(uploaded_parts) < len(part_info_list):
|
||||
try:
|
||||
uploaded = progress_queue.get(timeout=1)
|
||||
progress_bar.update(uploaded)
|
||||
except:
|
||||
pass
|
||||
|
||||
# 执行上传
|
||||
logger.info(
|
||||
f"【阿里云盘】开始 第{attempt + 1}次 上传 {target_name} 分片 {part_num} ...")
|
||||
response = self._upload_part(upload_url=upload_url, data=data)
|
||||
if response is None:
|
||||
continue
|
||||
if response.status_code == 200:
|
||||
success = True
|
||||
break
|
||||
else:
|
||||
logger.warn(
|
||||
f"【阿里云盘】{target_name} 分片 {part_num} 第 {attempt + 1} 次上传失败:{response.text}!")
|
||||
except Exception as e:
|
||||
logger.warn(f"【阿里云盘】{target_name} 分片 {part_num} 上传异常: {str(e)}!")
|
||||
# 等待所有任务完成
|
||||
for part_num, future in futures:
|
||||
try:
|
||||
num, etag = future.result()
|
||||
uploaded_parts[num] = etag
|
||||
except Exception as e:
|
||||
logger.error(f"【阿里云盘】分片上传失败: {str(e)}")
|
||||
progress_bar.close()
|
||||
return None
|
||||
|
||||
# 处理上传结果
|
||||
if success:
|
||||
uploaded_parts.add(part_num)
|
||||
progress_bar.update(current_chunk_size)
|
||||
else:
|
||||
raise Exception(f"【阿里云盘】{target_name} 分片 {part_num} 上传失败!")
|
||||
# 8. 关闭进度条
|
||||
progress_bar.close()
|
||||
|
||||
# 6. 关闭进度条
|
||||
if progress_bar:
|
||||
progress_bar.close()
|
||||
|
||||
# 7. 完成上传
|
||||
# 9. 完成上传
|
||||
result = self._complete_upload(drive_id=target_dir.drive_id, file_id=file_id, upload_id=upload_id)
|
||||
if not result:
|
||||
raise Exception("【阿里云盘】完成上传失败!")
|
||||
if result.get("code"):
|
||||
logger.warn(f"【阿里云盘】{target_name} 上传失败:{result.get('message')}!")
|
||||
return None
|
||||
|
||||
return self.__get_fileitem(result, parent=target_dir.path)
|
||||
|
||||
def download(self, fileitem: schemas.FileItem, path: Path = None) -> Optional[Path]:
|
||||
|
||||
Reference in New Issue
Block a user