From 70c4509682cdb7bebc84897c6ee2cc633d5c28d1 Mon Sep 17 00:00:00 2001 From: InfinityPacer <160988576+InfinityPacer@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:25:30 +0800 Subject: [PATCH 1/2] feat(cache): add `exists` to check key presence in cache backends --- app/core/cache.py | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/app/core/cache.py b/app/core/cache.py index 91f60427..4daf608c 100644 --- a/app/core/cache.py +++ b/app/core/cache.py @@ -35,6 +35,17 @@ class CacheBackend(ABC): """ pass + @abstractmethod + def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool: + """ + 判断缓存键是否存在 + + :param key: 缓存的键 + :param region: 缓存的区 + :return: 存在返回 True,否则返回 False + """ + pass + @abstractmethod def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Any: """ @@ -130,6 +141,19 @@ class CacheToolsBackend(CacheBackend): # 设置缓存值 region_cache[key] = value + def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool: + """ + 判断缓存键是否存在 + + :param key: 缓存的键 + :param region: 缓存的区 + :return: 存在返回 True,否则返回 False + """ + region_cache = self.__get_region_cache(region) + if region_cache is None: + return False + return key in region_cache + def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Any: """ 获取缓存的值 @@ -294,6 +318,21 @@ class RedisBackend(CacheBackend): except Exception as e: logger.error(f"Failed to set key: {key} in region: {region}, error: {e}") + def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool: + """ + 判断缓存键是否存在 + + :param key: 缓存的键 + :param region: 缓存的区 + :return: 存在返回 True,否则返回 False + """ + try: + redis_key = self.get_redis_key(region, key) + return self.client.exists(redis_key) == 1 + except Exception as e: + logger.error(f"Failed to exists key: {key} region: {region}, error: {e}") + return False + def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Optional[Any]: """ 获取缓存的值 @@ -392,7 +431,7 @@ def cached(region: Optional[str] = None, maxsize: int = 1000, ttl: int = 1800, :param maxsize: 缓存的最大条目数,默认值为 1000 :param ttl: 缓存的存活时间,单位秒,默认值为 1800 :param skip_none: 跳过 None 缓存,默认为 True - :param skip_empty: 跳过空值缓存(如 [], {}, "", set()),默认为 False + :param skip_empty: 跳过空值缓存(如 None, [], {}, "", set()),默认为 False :return: 装饰器函数 """ @@ -405,7 +444,7 @@ def cached(region: Optional[str] = None, maxsize: int = 1000, ttl: int = 1800, """ if skip_none and value is None: return False - # if disable_empty and value in [[], {}, "", set()]: + # if skip_empty and value in [None, [], {}, "", set()]: if skip_empty and not value: return False return True From 5c7bd95f6b642966c6a88cafefdf58f70c7a9891 Mon Sep 17 00:00:00 2001 From: InfinityPacer <160988576+InfinityPacer@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:58:56 +0800 Subject: [PATCH 2/2] fix(cache): enhance tmdb match_web rate-limiting and caching --- app/modules/themoviedb/tmdbapi.py | 98 +++++++++++++++++-------------- 1 file changed, 53 insertions(+), 45 deletions(-) diff --git a/app/modules/themoviedb/tmdbapi.py b/app/modules/themoviedb/tmdbapi.py index 178bd4f1..9479c987 100644 --- a/app/modules/themoviedb/tmdbapi.py +++ b/app/modules/themoviedb/tmdbapi.py @@ -8,8 +8,10 @@ from lxml import etree from app.core.cache import cached from app.core.config import settings from app.log import logger +from app.schemas import APIRateLimitException from app.schemas.types import MediaType from app.utils.http import RequestUtils +from app.utils.limit import rate_limit_exponential from app.utils.string import StringUtils from .tmdbv3api import TMDb, Search, Movie, TV, Season, Episode, Discover, Trending, Person, Collection from .tmdbv3api.exceptions import TMDbException @@ -492,6 +494,7 @@ class TmdbApi: return ret_info @cached(maxsize=settings.CACHE_CONF["tmdb"], ttl=settings.CACHE_CONF["meta"]) + @rate_limit_exponential(source="match_tmdb_web", max_wait=1800, enable_logging=True) def match_web(self, name: str, mtype: MediaType) -> Optional[dict]: """ 搜索TMDB网站,直接抓取结果,结果只有一条时才返回 @@ -504,51 +507,56 @@ class TmdbApi: return {} logger.info("正在从TheDbMovie网站查询:%s ..." % name) tmdb_url = "https://www.themoviedb.org/search?query=%s" % quote(name) - res = RequestUtils(timeout=5, ua=settings.USER_AGENT).get_res(url=tmdb_url) - if res and res.status_code == 200: - html_text = res.text - if not html_text: - return None - try: - tmdb_links = [] - html = etree.HTML(html_text) - if mtype == MediaType.TV: - links = html.xpath("//a[@data-id and @data-media-type='tv']/@href") - else: - links = html.xpath("//a[@data-id]/@href") - for link in links: - if not link or (not link.startswith("/tv") and not link.startswith("/movie")): - continue - if link not in tmdb_links: - tmdb_links.append(link) - if len(tmdb_links) == 1: - tmdbinfo = self.get_info( - mtype=MediaType.TV if tmdb_links[0].startswith("/tv") else MediaType.MOVIE, - tmdbid=tmdb_links[0].split("/")[-1]) - if tmdbinfo: - if mtype == MediaType.TV and tmdbinfo.get('media_type') != MediaType.TV: - return {} - if tmdbinfo.get('media_type') == MediaType.MOVIE: - logger.info("%s 从WEB识别到 电影:TMDBID=%s, 名称=%s, 上映日期=%s" % ( - name, - tmdbinfo.get('id'), - tmdbinfo.get('title'), - tmdbinfo.get('release_date'))) - else: - logger.info("%s 从WEB识别到 电视剧:TMDBID=%s, 名称=%s, 首播日期=%s" % ( - name, - tmdbinfo.get('id'), - tmdbinfo.get('name'), - tmdbinfo.get('first_air_date'))) - return tmdbinfo - elif len(tmdb_links) > 1: - logger.info("%s TMDB网站返回数据过多:%s" % (name, len(tmdb_links))) - else: - logger.info("%s TMDB网站未查询到媒体信息!" % name) - except Exception as err: - logger.error(f"从TheDbMovie网站查询出错:{str(err)}") - return None - return None + res = RequestUtils(timeout=5, ua=settings.USER_AGENT, proxies=settings.PROXY).get_res(url=tmdb_url) + if res is None: + return None + if res.status_code == 429: + raise APIRateLimitException("触发TheDbMovie网站限流,获取媒体信息失败") + if res.status_code != 200: + return {} + html_text = res.text + if not html_text: + return {} + try: + tmdb_links = [] + html = etree.HTML(html_text) + if mtype == MediaType.TV: + links = html.xpath("//a[@data-id and @data-media-type='tv']/@href") + else: + links = html.xpath("//a[@data-id]/@href") + for link in links: + if not link or (not link.startswith("/tv") and not link.startswith("/movie")): + continue + if link not in tmdb_links: + tmdb_links.append(link) + if len(tmdb_links) == 1: + tmdbinfo = self.get_info( + mtype=MediaType.TV if tmdb_links[0].startswith("/tv") else MediaType.MOVIE, + tmdbid=tmdb_links[0].split("/")[-1]) + if tmdbinfo: + if mtype == MediaType.TV and tmdbinfo.get('media_type') != MediaType.TV: + return {} + if tmdbinfo.get('media_type') == MediaType.MOVIE: + logger.info("%s 从WEB识别到 电影:TMDBID=%s, 名称=%s, 上映日期=%s" % ( + name, + tmdbinfo.get('id'), + tmdbinfo.get('title'), + tmdbinfo.get('release_date'))) + else: + logger.info("%s 从WEB识别到 电视剧:TMDBID=%s, 名称=%s, 首播日期=%s" % ( + name, + tmdbinfo.get('id'), + tmdbinfo.get('name'), + tmdbinfo.get('first_air_date'))) + return tmdbinfo + elif len(tmdb_links) > 1: + logger.info("%s TMDB网站返回数据过多:%s" % (name, len(tmdb_links))) + else: + logger.info("%s TMDB网站未查询到媒体信息!" % name) + except Exception as err: + logger.error(f"从TheDbMovie网站查询出错:{str(err)}") + return {} + return {} def get_info(self, mtype: MediaType,