From 70c4509682cdb7bebc84897c6ee2cc633d5c28d1 Mon Sep 17 00:00:00 2001
From: InfinityPacer <160988576+InfinityPacer@users.noreply.github.com>
Date: Wed, 22 Jan 2025 14:25:30 +0800
Subject: [PATCH 1/2] feat(cache): add `exists` to check key presence in cache
 backends

---
 app/core/cache.py | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/app/core/cache.py b/app/core/cache.py
index 91f60427..4daf608c 100644
--- a/app/core/cache.py
+++ b/app/core/cache.py
@@ -35,6 +35,17 @@ class CacheBackend(ABC):
         """
         pass
 
+    @abstractmethod
+    def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool:
+        """
+        判断缓存键是否存在
+
+        :param key: 缓存的键
+        :param region: 缓存的区
+        :return: 存在返回 True，否则返回 False
+        """
+        pass
+
     @abstractmethod
     def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Any:
         """
@@ -130,6 +141,19 @@ class CacheToolsBackend(CacheBackend):
         # 设置缓存值
         region_cache[key] = value
 
+    def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool:
+        """
+        判断缓存键是否存在
+
+        :param key: 缓存的键
+        :param region: 缓存的区
+        :return: 存在返回 True，否则返回 False
+        """
+        region_cache = self.__get_region_cache(region)
+        if region_cache is None:
+            return False
+        return key in region_cache
+
     def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Any:
         """
         获取缓存的值
@@ -294,6 +318,21 @@ class RedisBackend(CacheBackend):
         except Exception as e:
             logger.error(f"Failed to set key: {key} in region: {region}, error: {e}")
 
+    def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool:
+        """
+        判断缓存键是否存在
+
+        :param key: 缓存的键
+        :param region: 缓存的区
+        :return: 存在返回 True，否则返回 False
+        """
+        try:
+            redis_key = self.get_redis_key(region, key)
+            return self.client.exists(redis_key) == 1
+        except Exception as e:
+            logger.error(f"Failed to exists key: {key} region: {region}, error: {e}")
+            return False
+
     def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Optional[Any]:
         """
         获取缓存的值
@@ -392,7 +431,7 @@ def cached(region: Optional[str] = None, maxsize: int = 1000, ttl: int = 1800,
     :param maxsize: 缓存的最大条目数，默认值为 1000
     :param ttl: 缓存的存活时间，单位秒，默认值为 1800
     :param skip_none: 跳过 None 缓存，默认为 True
-    :param skip_empty: 跳过空值缓存（如 [], {}, "", set()），默认为 False
+    :param skip_empty: 跳过空值缓存（如 None, [], {}, "", set()），默认为 False
     :return: 装饰器函数
     """
 
@@ -405,7 +444,7 @@ def cached(region: Optional[str] = None, maxsize: int = 1000, ttl: int = 1800,
         """
         if skip_none and value is None:
             return False
-        # if disable_empty and value in [[], {}, "", set()]:
+        # if skip_empty and value in [None, [], {}, "", set()]:
         if skip_empty and not value:
             return False
         return True

From 5c7bd95f6b642966c6a88cafefdf58f70c7a9891 Mon Sep 17 00:00:00 2001
From: InfinityPacer <160988576+InfinityPacer@users.noreply.github.com>
Date: Wed, 22 Jan 2025 14:58:56 +0800
Subject: [PATCH 2/2] fix(cache): enhance tmdb match_web rate-limiting and
 caching

---
 app/modules/themoviedb/tmdbapi.py | 98 +++++++++++++++++--------------
 1 file changed, 53 insertions(+), 45 deletions(-)

diff --git a/app/modules/themoviedb/tmdbapi.py b/app/modules/themoviedb/tmdbapi.py
index 178bd4f1..9479c987 100644
--- a/app/modules/themoviedb/tmdbapi.py
+++ b/app/modules/themoviedb/tmdbapi.py
@@ -8,8 +8,10 @@ from lxml import etree
 from app.core.cache import cached
 from app.core.config import settings
 from app.log import logger
+from app.schemas import APIRateLimitException
 from app.schemas.types import MediaType
 from app.utils.http import RequestUtils
+from app.utils.limit import rate_limit_exponential
 from app.utils.string import StringUtils
 from .tmdbv3api import TMDb, Search, Movie, TV, Season, Episode, Discover, Trending, Person, Collection
 from .tmdbv3api.exceptions import TMDbException
@@ -492,6 +494,7 @@ class TmdbApi:
             return ret_info
 
     @cached(maxsize=settings.CACHE_CONF["tmdb"], ttl=settings.CACHE_CONF["meta"])
+    @rate_limit_exponential(source="match_tmdb_web", max_wait=1800, enable_logging=True)
     def match_web(self, name: str, mtype: MediaType) -> Optional[dict]:
         """
         搜索TMDB网站，直接抓取结果，结果只有一条时才返回
@@ -504,51 +507,56 @@ class TmdbApi:
             return {}
         logger.info("正在从TheDbMovie网站查询：%s ..." % name)
         tmdb_url = "https://www.themoviedb.org/search?query=%s" % quote(name)
-        res = RequestUtils(timeout=5, ua=settings.USER_AGENT).get_res(url=tmdb_url)
-        if res and res.status_code == 200:
-            html_text = res.text
-            if not html_text:
-                return None
-            try:
-                tmdb_links = []
-                html = etree.HTML(html_text)
-                if mtype == MediaType.TV:
-                    links = html.xpath("//a[@data-id and @data-media-type='tv']/@href")
-                else:
-                    links = html.xpath("//a[@data-id]/@href")
-                for link in links:
-                    if not link or (not link.startswith("/tv") and not link.startswith("/movie")):
-                        continue
-                    if link not in tmdb_links:
-                        tmdb_links.append(link)
-                if len(tmdb_links) == 1:
-                    tmdbinfo = self.get_info(
-                        mtype=MediaType.TV if tmdb_links[0].startswith("/tv") else MediaType.MOVIE,
-                        tmdbid=tmdb_links[0].split("/")[-1])
-                    if tmdbinfo:
-                        if mtype == MediaType.TV and tmdbinfo.get('media_type') != MediaType.TV:
-                            return {}
-                        if tmdbinfo.get('media_type') == MediaType.MOVIE:
-                            logger.info("%s 从WEB识别到 电影：TMDBID=%s, 名称=%s, 上映日期=%s" % (
-                                name,
-                                tmdbinfo.get('id'),
-                                tmdbinfo.get('title'),
-                                tmdbinfo.get('release_date')))
-                        else:
-                            logger.info("%s 从WEB识别到 电视剧：TMDBID=%s, 名称=%s, 首播日期=%s" % (
-                                name,
-                                tmdbinfo.get('id'),
-                                tmdbinfo.get('name'),
-                                tmdbinfo.get('first_air_date')))
-                    return tmdbinfo
-                elif len(tmdb_links) > 1:
-                    logger.info("%s TMDB网站返回数据过多：%s" % (name, len(tmdb_links)))
-                else:
-                    logger.info("%s TMDB网站未查询到媒体信息！" % name)
-            except Exception as err:
-                logger.error(f"从TheDbMovie网站查询出错：{str(err)}")
-                return None
-        return None
+        res = RequestUtils(timeout=5, ua=settings.USER_AGENT, proxies=settings.PROXY).get_res(url=tmdb_url)
+        if res is None:
+            return None
+        if res.status_code == 429:
+            raise APIRateLimitException("触发TheDbMovie网站限流，获取媒体信息失败")
+        if res.status_code != 200:
+            return {}
+        html_text = res.text
+        if not html_text:
+            return {}
+        try:
+            tmdb_links = []
+            html = etree.HTML(html_text)
+            if mtype == MediaType.TV:
+                links = html.xpath("//a[@data-id and @data-media-type='tv']/@href")
+            else:
+                links = html.xpath("//a[@data-id]/@href")
+            for link in links:
+                if not link or (not link.startswith("/tv") and not link.startswith("/movie")):
+                    continue
+                if link not in tmdb_links:
+                    tmdb_links.append(link)
+            if len(tmdb_links) == 1:
+                tmdbinfo = self.get_info(
+                    mtype=MediaType.TV if tmdb_links[0].startswith("/tv") else MediaType.MOVIE,
+                    tmdbid=tmdb_links[0].split("/")[-1])
+                if tmdbinfo:
+                    if mtype == MediaType.TV and tmdbinfo.get('media_type') != MediaType.TV:
+                        return {}
+                    if tmdbinfo.get('media_type') == MediaType.MOVIE:
+                        logger.info("%s 从WEB识别到 电影：TMDBID=%s, 名称=%s, 上映日期=%s" % (
+                            name,
+                            tmdbinfo.get('id'),
+                            tmdbinfo.get('title'),
+                            tmdbinfo.get('release_date')))
+                    else:
+                        logger.info("%s 从WEB识别到 电视剧：TMDBID=%s, 名称=%s, 首播日期=%s" % (
+                            name,
+                            tmdbinfo.get('id'),
+                            tmdbinfo.get('name'),
+                            tmdbinfo.get('first_air_date')))
+                return tmdbinfo
+            elif len(tmdb_links) > 1:
+                logger.info("%s TMDB网站返回数据过多：%s" % (name, len(tmdb_links)))
+            else:
+                logger.info("%s TMDB网站未查询到媒体信息！" % name)
+        except Exception as err:
+            logger.error(f"从TheDbMovie网站查询出错：{str(err)}")
+            return {}
+        return {}
 
     def get_info(self,
                  mtype: MediaType,