perf: optimize renamer with batch database queries and reduced blocking

- Add batch offset lookup to reduce N database connections to 1-3 per cycle
- Add search_by_qb_hashes() and search_ids() for batch queries
- Throttle pending rename cache cleanup to once per minute max
- Use exponential backoff for rename verification (0.1s->0.2s->0.4s)
- Skip verification for subtitle renames to reduce latency

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
EstrellaXD
2026-01-28 13:55:20 +01:00
parent 789c02f7f4
commit 99c8764484
5 changed files with 178 additions and 42 deletions

View File

@@ -365,6 +365,14 @@ class BangumiDatabase:
logger.debug(f"[Database] Find bangumi id: {_id}.")
return bangumi
def search_ids(self, ids: list[int]) -> list[Bangumi]:
"""Batch lookup multiple bangumi by their IDs."""
if not ids:
return []
statement = select(Bangumi).where(Bangumi.id.in_(ids))
result = self.session.execute(statement)
return list(result.scalars().all())
def match_poster(self, bangumi_name: str) -> str:
statement = select(Bangumi).where(
func.instr(bangumi_name, Bangumi.official_title) > 0

View File

@@ -36,9 +36,7 @@ class TorrentDatabase:
logger.debug(f"Update {data.name} in database.")
def search(self, _id: int) -> Torrent | None:
result = self.session.execute(
select(Torrent).where(Torrent.id == _id)
)
result = self.session.execute(select(Torrent).where(Torrent.id == _id))
return result.scalar_one_or_none()
def search_all(self) -> list[Torrent]:
@@ -46,9 +44,7 @@ class TorrentDatabase:
return list(result.scalars().all())
def search_rss(self, rss_id: int) -> list[Torrent]:
result = self.session.execute(
select(Torrent).where(Torrent.rss_id == rss_id)
)
result = self.session.execute(select(Torrent).where(Torrent.rss_id == rss_id))
return list(result.scalars().all())
def check_new(self, torrents_list: list[Torrent]) -> list[Torrent]:
@@ -62,16 +58,21 @@ class TorrentDatabase:
def search_by_qb_hash(self, qb_hash: str) -> Torrent | None:
"""Find torrent by qBittorrent hash."""
result = self.session.execute(
select(Torrent).where(Torrent.qb_hash == qb_hash)
)
result = self.session.execute(select(Torrent).where(Torrent.qb_hash == qb_hash))
return result.scalar_one_or_none()
def search_by_qb_hashes(self, qb_hashes: list[str]) -> list[Torrent]:
"""Find torrents by multiple qBittorrent hashes (batch query)."""
if not qb_hashes:
return []
result = self.session.execute(
select(Torrent).where(Torrent.qb_hash.in_(qb_hashes))
)
return list(result.scalars().all())
def search_by_url(self, url: str) -> Torrent | None:
"""Find torrent by URL."""
result = self.session.execute(
select(Torrent).where(Torrent.url == url)
)
result = self.session.execute(select(Torrent).where(Torrent.url == url))
return result.scalar_one_or_none()
def update_qb_hash(self, torrent_id: int, qb_hash: str) -> bool:

View File

@@ -193,7 +193,9 @@ class QbDownloader:
data={"hashes": hashes},
)
async def torrents_rename_file(self, torrent_hash, old_path, new_path) -> bool:
async def torrents_rename_file(
self, torrent_hash, old_path, new_path, verify: bool = True
) -> bool:
try:
resp = await self._client.post(
self._url("torrents/renameFile"),
@@ -205,20 +207,31 @@ class QbDownloader:
if resp.status_code != 200:
return False
if not verify:
return True
# Verify the rename actually happened by checking file list
# qBittorrent can return 200 but delay the actual rename (e.g., while seeding)
await asyncio.sleep(0.5) # Brief delay to allow qBittorrent to process
files = await self.torrents_files(torrent_hash)
for f in files:
if f.get("name") == new_path:
return True
if f.get("name") == old_path:
# File still has old name - rename didn't actually happen
logger.debug(
f"[Downloader] Rename API returned 200 but file unchanged: {old_path}"
)
return False
return True # new_path found or old_path not found
# Use exponential backoff: 0.1s, 0.2s, 0.4s (max 3 attempts)
for attempt in range(3):
delay = 0.1 * (2**attempt)
await asyncio.sleep(delay)
files = await self.torrents_files(torrent_hash)
for f in files:
if f.get("name") == new_path:
return True
if f.get("name") == old_path:
# File still has old name - try again
if attempt < 2:
continue
# Final attempt failed
logger.debug(
f"[Downloader] Rename API returned 200 but file unchanged: {old_path}"
)
return False
# new_path found or old_path not found
return True
return True
except (httpx.ConnectError, httpx.RequestError, httpx.TimeoutException) as e:
logger.warning(f"[Downloader] Failed to rename file {old_path}: {e}")
return False

View File

@@ -120,9 +120,11 @@ class DownloadClient(TorrentPath):
async def get_torrent_files(self, torrent_hash: str):
return await self.client.torrents_files(torrent_hash=torrent_hash)
async def rename_torrent_file(self, _hash, old_path, new_path) -> bool:
async def rename_torrent_file(
self, _hash, old_path, new_path, verify: bool = True
) -> bool:
result = await self.client.torrents_rename_file(
torrent_hash=_hash, old_path=old_path, new_path=new_path
torrent_hash=_hash, old_path=old_path, new_path=new_path, verify=verify
)
if result:
logger.info(f"{old_path} >> {new_path}")

View File

@@ -16,6 +16,8 @@ logger = logging.getLogger(__name__)
# This prevents spamming the same rename when qBittorrent returns 200 but doesn't actually rename
_pending_renames: dict[tuple[str, str, str], float] = {}
_PENDING_RENAME_COOLDOWN = 300 # 5 minutes cooldown before retrying same rename
_CLEANUP_INTERVAL = 60 # Clean up pending cache at most once per minute
_last_cleanup_time: float = 0
class Renamer(DownloadClient):
@@ -23,6 +25,23 @@ class Renamer(DownloadClient):
super().__init__()
self._parser = TitleParser()
self.check_pool = {}
self._offset_cache: dict[str, tuple[int, int]] = {}
@staticmethod
def _cleanup_pending_cache():
"""Clean up expired entries from pending renames cache (throttled)."""
global _last_cleanup_time
current_time = time.time()
if current_time - _last_cleanup_time < _CLEANUP_INTERVAL:
return
_last_cleanup_time = current_time
expired_keys = [
k
for k, v in _pending_renames.items()
if current_time - v > _PENDING_RENAME_COOLDOWN * 2
]
for k in expired_keys:
_pending_renames.pop(k, None)
@staticmethod
def print_result(torrent_count, rename_count):
@@ -111,7 +130,10 @@ class Renamer(DownloadClient):
# (qBittorrent can return 200 but delay actual rename while seeding)
pending_key = (_hash, media_path, new_path)
last_attempt = _pending_renames.get(pending_key)
if last_attempt and (time.time() - last_attempt) < _PENDING_RENAME_COOLDOWN:
if (
last_attempt
and (time.time() - last_attempt) < _PENDING_RENAME_COOLDOWN
):
logger.debug(
f"[Renamer] Skipping rename (pending cooldown): {media_path}"
)
@@ -137,14 +159,8 @@ class Renamer(DownloadClient):
# Rename API returned success but file wasn't actually renamed
# Add to pending cache to avoid spamming
_pending_renames[pending_key] = time.time()
# Clean up old entries from cache
current_time = time.time()
expired_keys = [
k for k, v in _pending_renames.items()
if current_time - v > _PENDING_RENAME_COOLDOWN * 2
]
for k in expired_keys:
_pending_renames.pop(k, None)
# Periodic cleanup of expired entries (at most once per minute)
self._cleanup_pending_cache()
else:
logger.warning(f"[Renamer] {media_path} parse failed")
if settings.bangumi_manage.remove_bad_torrent:
@@ -216,8 +232,12 @@ class Renamer(DownloadClient):
season_offset=season_offset,
)
if subtitle_path != new_path:
# Skip verification for subtitles to reduce latency
renamed = await self.rename_torrent_file(
_hash=_hash, old_path=subtitle_path, new_path=new_path
_hash=_hash,
old_path=subtitle_path,
new_path=new_path,
verify=False,
)
if not renamed:
logger.warning(f"[Renamer] {subtitle_path} rename failed")
@@ -249,6 +269,99 @@ class Renamer(DownloadClient):
# Remove trailing slashes
return normalized.rstrip("/")
def _batch_lookup_offsets(
self, torrents_info: list[dict]
) -> dict[str, tuple[int, int]]:
"""Batch lookup offsets for all torrents in a single database session.
Returns a dict mapping torrent_hash to (episode_offset, season_offset).
"""
result: dict[str, tuple[int, int]] = {}
if not torrents_info:
return result
try:
with Database() as db:
# Collect all hashes for batch query
hashes = [info["hash"] for info in torrents_info]
torrent_records = db.torrent.search_by_qb_hashes(hashes)
hash_to_bangumi_id = {
r.qb_hash: r.bangumi_id for r in torrent_records if r.bangumi_id
}
# Collect unique bangumi IDs to fetch
bangumi_ids_to_fetch = set(hash_to_bangumi_id.values())
# Also collect bangumi IDs from tags
tag_bangumi_ids = {}
for info in torrents_info:
tags = info.get("tags", "")
bangumi_id = self._parse_bangumi_id_from_tags(tags)
if bangumi_id:
tag_bangumi_ids[info["hash"]] = bangumi_id
bangumi_ids_to_fetch.add(bangumi_id)
# Batch fetch all bangumi records
bangumi_map = {}
if bangumi_ids_to_fetch:
bangumi_records = db.bangumi.search_ids(list(bangumi_ids_to_fetch))
bangumi_map = {
b.id: b for b in bangumi_records if b and not b.deleted
}
# Now resolve offsets for each torrent
for info in torrents_info:
torrent_hash = info["hash"]
torrent_name = info["name"]
save_path = info["save_path"]
# 1. Try by qb_hash
bangumi_id = hash_to_bangumi_id.get(torrent_hash)
if bangumi_id and bangumi_id in bangumi_map:
b = bangumi_map[bangumi_id]
result[torrent_hash] = (b.episode_offset, b.season_offset)
continue
# 2. Try by tag
bangumi_id = tag_bangumi_ids.get(torrent_hash)
if bangumi_id and bangumi_id in bangumi_map:
b = bangumi_map[bangumi_id]
result[torrent_hash] = (b.episode_offset, b.season_offset)
continue
# 3. Try by torrent name (individual query, but less common path)
bangumi = db.bangumi.match_torrent(torrent_name)
if bangumi:
result[torrent_hash] = (
bangumi.episode_offset,
bangumi.season_offset,
)
continue
# 4. Try by save_path (individual query, fallback)
normalized_save_path = self._normalize_path(save_path)
bangumi = db.bangumi.match_by_save_path(save_path)
if not bangumi:
bangumi = db.bangumi.match_by_save_path(normalized_save_path)
if bangumi:
result[torrent_hash] = (
bangumi.episode_offset,
bangumi.season_offset,
)
continue
# Default: no offset
result[torrent_hash] = (0, 0)
except Exception as e:
logger.debug(f"[Renamer] Batch offset lookup failed: {e}")
# Fall back to individual lookups on error
for info in torrents_info:
if info["hash"] not in result:
result[info["hash"]] = (0, 0)
return result
def _lookup_offsets(
self, torrent_hash: str, torrent_name: str, save_path: str, tags: str = ""
) -> tuple[int, int]:
@@ -331,17 +444,16 @@ class Renamer(DownloadClient):
all_files = await asyncio.gather(
*[self.get_torrent_files(info["hash"]) for info in torrents_info]
)
# Batch lookup all offsets in a single database session
offset_map = self._batch_lookup_offsets(torrents_info)
for info, files in zip(torrents_info, all_files):
torrent_hash = info["hash"]
torrent_name = info["name"]
save_path = info["save_path"]
tags = info.get("tags", "")
media_list, subtitle_list = self.check_files(files)
bangumi_name, season = self._path_to_bangumi(save_path)
# Look up offsets from database (use hash/tags/bangumi_id for accurate matching)
episode_offset, season_offset = self._lookup_offsets(
torrent_hash, torrent_name, save_path, tags
)
# Use pre-fetched offsets
episode_offset, season_offset = offset_map.get(torrent_hash, (0, 0))
kwargs = {
"torrent_name": torrent_name,
"bangumi_name": bangumi_name,