mirror of
https://github.com/EstrellaXD/Auto_Bangumi.git
synced 2026-04-26 11:32:16 +08:00
perf(database): optimize N+1 queries and add caching
- Replace N individual _is_duplicate() calls with single batch SELECT query in add_all() method, reducing database round-trips - Replace O(n*m) nested loop in match_list() with compiled regex alternation pattern for faster torrent-to-bangumi matching - Add LRU cache (512 entries) to torrent_parser() to avoid redundant regex parsing for the same torrent paths - Extend bangumi search_all() cache TTL from 60s to 300s (5 minutes) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
@@ -12,7 +13,7 @@ logger = logging.getLogger(__name__)
|
||||
# Module-level TTL cache for search_all results
|
||||
_bangumi_cache: list[Bangumi] | None = None
|
||||
_bangumi_cache_time: float = 0
|
||||
_BANGUMI_CACHE_TTL: float = 60.0 # seconds
|
||||
_BANGUMI_CACHE_TTL: float = 300.0 # 5 minutes - extended from 60s to reduce DB queries
|
||||
|
||||
|
||||
def _invalidate_bangumi_cache():
|
||||
@@ -53,11 +54,21 @@ class BangumiDatabase:
|
||||
if not datas:
|
||||
return 0
|
||||
|
||||
# Get existing title_raw + group_name combinations
|
||||
existing = set()
|
||||
for data in datas:
|
||||
if self._is_duplicate(data):
|
||||
existing.add((data.title_raw, data.group_name))
|
||||
# Batch query: get all existing (title_raw, group_name) combinations in one query
|
||||
# This replaces N individual _is_duplicate() calls with a single SELECT
|
||||
keys_to_check = [(d.title_raw, d.group_name) for d in datas]
|
||||
conditions = [
|
||||
and_(Bangumi.title_raw == tr, Bangumi.group_name == gn)
|
||||
for tr, gn in keys_to_check
|
||||
]
|
||||
if conditions:
|
||||
statement = select(Bangumi.title_raw, Bangumi.group_name).where(
|
||||
or_(*conditions)
|
||||
)
|
||||
result = self.session.execute(statement)
|
||||
existing = set(result.all())
|
||||
else:
|
||||
existing = set()
|
||||
|
||||
# Filter out duplicates
|
||||
to_add = [d for d in datas if (d.title_raw, d.group_name) not in existing]
|
||||
@@ -199,24 +210,29 @@ class BangumiDatabase:
|
||||
match_datas = self.search_all()
|
||||
if not match_datas:
|
||||
return torrent_list
|
||||
# Build index for faster lookup
|
||||
|
||||
# Build index for O(1) lookup after regex match
|
||||
title_index = {m.title_raw: m for m in match_datas}
|
||||
|
||||
# Build compiled regex pattern for fast substring matching
|
||||
# Sort by length descending so longer (more specific) matches are found first
|
||||
sorted_titles = sorted(title_index.keys(), key=len, reverse=True)
|
||||
# Escape special regex characters and join with alternation
|
||||
pattern = "|".join(re.escape(title) for title in sorted_titles)
|
||||
title_regex = re.compile(pattern)
|
||||
|
||||
unmatched = []
|
||||
rss_updated = set()
|
||||
for torrent in torrent_list:
|
||||
matched = False
|
||||
for title_raw, match_data in title_index.items():
|
||||
if title_raw in torrent.name:
|
||||
if (
|
||||
rss_link not in match_data.rss_link
|
||||
and title_raw not in rss_updated
|
||||
):
|
||||
match_data.rss_link += f",{rss_link}"
|
||||
match_data.added = False
|
||||
rss_updated.add(title_raw)
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
match = title_regex.search(torrent.name)
|
||||
if match:
|
||||
title_raw = match.group(0)
|
||||
match_data = title_index[title_raw]
|
||||
if rss_link not in match_data.rss_link and title_raw not in rss_updated:
|
||||
match_data.rss_link += f",{rss_link}"
|
||||
match_data.added = False
|
||||
rss_updated.add(title_raw)
|
||||
else:
|
||||
unmatched.append(torrent)
|
||||
# Batch commit all rss_link updates
|
||||
if rss_updated:
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
import logging
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
from module.models import EpisodeFile, SubtitleFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# LRU cache for torrent_parser results to avoid repeated regex parsing
|
||||
_PARSER_CACHE_MAX_SIZE = 512
|
||||
_parser_cache: OrderedDict[tuple, EpisodeFile | SubtitleFile | None] = OrderedDict()
|
||||
|
||||
PLATFORM = "Unix"
|
||||
|
||||
RULES = [
|
||||
@@ -70,7 +75,31 @@ def torrent_parser(
|
||||
torrent_name: str | None = None,
|
||||
season: int | None = None,
|
||||
file_type: str = "media",
|
||||
) -> EpisodeFile | SubtitleFile:
|
||||
) -> EpisodeFile | SubtitleFile | None:
|
||||
# Check cache first to avoid repeated regex parsing
|
||||
cache_key = (torrent_path, torrent_name, season, file_type)
|
||||
if cache_key in _parser_cache:
|
||||
# Move to end to mark as recently used
|
||||
_parser_cache.move_to_end(cache_key)
|
||||
return _parser_cache[cache_key]
|
||||
|
||||
result = _torrent_parser_impl(torrent_path, torrent_name, season, file_type)
|
||||
|
||||
# Store in cache with LRU eviction
|
||||
_parser_cache[cache_key] = result
|
||||
if len(_parser_cache) > _PARSER_CACHE_MAX_SIZE:
|
||||
_parser_cache.popitem(last=False) # Remove oldest item
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _torrent_parser_impl(
|
||||
torrent_path: str,
|
||||
torrent_name: str | None = None,
|
||||
season: int | None = None,
|
||||
file_type: str = "media",
|
||||
) -> EpisodeFile | SubtitleFile | None:
|
||||
"""Internal implementation of torrent_parser without caching."""
|
||||
media_path = get_path_basename(torrent_path)
|
||||
match_names = [torrent_name, media_path]
|
||||
if torrent_name is None:
|
||||
@@ -106,6 +135,7 @@ def torrent_parser(
|
||||
episode=episode,
|
||||
suffix=suffix,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user