diff --git a/app/modules/indexer/__init__.py b/app/modules/indexer/__init__.py index 78527596..45f6def0 100644 --- a/app/modules/indexer/__init__.py +++ b/app/modules/indexer/__init__.py @@ -214,7 +214,8 @@ class IndexerModule(_ModuleBase): logger.warn(f"{site.get('name')} 未搜索到数据,共搜索 {search_count} 次,耗时 {seconds} 秒") return [] else: - logger.info(f"{site.get('name')} 搜索完成,共搜索 {search_count} 次,耗时 {seconds} 秒,返回数据:{len(result_array)}") + logger.info( + f"{site.get('name')} 搜索完成,共搜索 {search_count} 次,耗时 {seconds} 秒,返回数据:{len(result_array)}") # TorrentInfo torrents = [TorrentInfo(site=site.get("id"), site_name=site.get("name"), @@ -252,11 +253,40 @@ class IndexerModule(_ModuleBase): try: return _spider.is_error, _spider.get_torrents() finally: - # 显式清理SiteSpider对象 + del _spider + + @staticmethod + async def __async_spider_search(indexer: dict, + search_word: Optional[str] = None, + mtype: MediaType = None, + cat: Optional[str] = None, + page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 异步根据关键字搜索单个站点 + :param: indexer: 站点配置 + :param: search_word: 关键字 + :param: cat: 分类 + :param: page: 页码 + :param: mtype: 媒体类型 + :param: timeout: 超时时间 + :return: 是否发生错误, 种子列表 + """ + _spider = SiteSpider(indexer=indexer, + keyword=search_word, + mtype=mtype, + cat=cat, + page=page) + + try: + result = await _spider.async_get_torrents() + return _spider.is_error, result + finally: del _spider def refresh_torrents(self, site: dict, - keyword: Optional[str] = None, cat: Optional[str] = None, page: Optional[int] = 0) -> Optional[List[TorrentInfo]]: + keyword: Optional[str] = None, + cat: Optional[str] = None, + page: Optional[int] = 0) -> Optional[List[TorrentInfo]]: """ 获取站点最新一页的种子,多个站点需要多线程处理 :param site: 站点 diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index d213dd00..1ced2441 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -5,13 +5,14 @@ from typing import Any, Optional from typing import List from urllib.parse import quote, urlencode, urlparse, parse_qs +from fastapi.concurrency import run_in_threadpool from jinja2 import Template from pyquery import PyQuery from app.core.config import settings from app.log import logger from app.schemas.types import MediaType -from app.utils.http import RequestUtils +from app.utils.http import RequestUtils, AsyncRequestUtils from app.utils.string import StringUtils @@ -80,13 +81,10 @@ class SiteSpider: self.torrents_info = {} self.torrents_info_array = [] - def get_torrents(self) -> List[dict]: + def __get_search_url(self): """ - 开始请求 + 获取搜索URL """ - if not self.search or not self.domain: - return [] - # 种子搜索相对路径 paths = self.search.get('paths', []) torrentspath = "" @@ -200,6 +198,18 @@ class SiteSpider: # 搜索Url searchurl = self.domain + str(torrentspath).format(**inputs_dict) + return searchurl + + def get_torrents(self) -> List[dict]: + """ + 开始请求 + """ + if not self.search or not self.domain: + return [] + + # 获取搜索URL + searchurl = self.__get_search_url() + logger.info(f"开始请求:{searchurl}") # requests请求 @@ -219,6 +229,36 @@ class SiteSpider: ) ) + async def async_get_torrents(self) -> List[dict]: + """ + 异步请求 + """ + if not self.search or not self.domain: + return [] + + # 获取搜索URL + searchurl = self.__get_search_url() + + logger.info(f"开始异步请求:{searchurl}") + + # httpx请求 + ret = await AsyncRequestUtils( + ua=self.ua, + cookies=self.cookie, + timeout=self._timeout, + referer=self.referer, + proxies=self.proxies + ).get_res(searchurl, allow_redirects=True) + # 解析返回 + return await run_in_threadpool( + self.parse, + RequestUtils.get_decoded_html_content( + ret, + performance_mode=settings.ENCODING_DETECTION_PERFORMANCE_MODE, + confidence_threshold=settings.ENCODING_DETECTION_MIN_CONFIDENCE + ) + ) + def __get_title(self, torrent: Any): # title default text if 'title' not in self.fields: diff --git a/app/modules/indexer/spider/haidan.py b/app/modules/indexer/spider/haidan.py index ab9245b3..75bc5986 100644 --- a/app/modules/indexer/spider/haidan.py +++ b/app/modules/indexer/spider/haidan.py @@ -5,7 +5,7 @@ from app.core.config import settings from app.db.systemconfig_oper import SystemConfigOper from app.log import logger from app.schemas import MediaType -from app.utils.http import RequestUtils +from app.utils.http import RequestUtils, AsyncRequestUtils from app.utils.string import StringUtils @@ -63,9 +63,9 @@ class HaiDanSpider: self._ua = indexer.get('ua') self._timeout = indexer.get('timeout') or 15 - def search(self, keyword: str, mtype: MediaType = None) -> Tuple[bool, List[dict]]: + def __get_params(self, keyword: str, mtype: MediaType = None) -> dict: """ - 搜索 + 获取请求参数 """ def __dict_to_query(_params: dict): @@ -75,11 +75,7 @@ class HaiDanSpider: for key, value in _params.items(): if isinstance(value, list): _params[key] = ','.join(map(str, value)) - return urllib.parse.urlencode(params) - - # 检查cookie - if not self._cookie: - return True, [] + return urllib.parse.urlencode(_params) if not mtype: categories = [] @@ -94,59 +90,112 @@ class HaiDanSpider: else: search_area = '0' - params = { + return __dict_to_query({ "isapi": "1", "search_area": search_area, # 0-标题 1-简介(较慢)3-发种用户名 4-IMDb "search": keyword, "search_mode": "0", # 0-与 1-或 2-精准 "cat": categories - } + }) + + def __parse_result(self, result: dict): + """ + 解析结果 + """ + torrents = [] + data = result.get('data') or {} + for tid, item in data.items(): + category_value = result.get('category') + if category_value in self._tv_category \ + and category_value not in self._movie_category: + category = MediaType.TV.value + elif category_value in self._movie_category: + category = MediaType.MOVIE.value + else: + category = MediaType.UNKNOWN.value + torrent = { + 'title': item.get('name'), + 'description': item.get('small_descr'), + 'enclosure': item.get('url'), + 'pubdate': StringUtils.format_timestamp(item.get('added')), + 'size': int(item.get('size') or '0'), + 'seeders': int(item.get('seeders') or '0'), + 'peers': int(item.get("leechers") or '0'), + 'grabs': int(item.get("times_completed") or '0'), + 'downloadvolumefactor': self.__get_downloadvolumefactor(item.get('sp_state')), + 'uploadvolumefactor': self.__get_uploadvolumefactor(item.get('sp_state')), + 'page_url': self._detailurl % (self._url, item.get('group_id'), tid), + 'labels': [], + 'category': category + } + torrents.append(torrent) + return torrents + + def search(self, keyword: str, mtype: MediaType = None) -> Tuple[bool, List[dict]]: + """ + 搜索 + """ + + # 检查cookie + if not self._cookie: + return True, [] + + # 获取参数 + params_str = self.__get_params(keyword, mtype) + + # 发送请求 res = RequestUtils( cookies=self._cookie, ua=self._ua, proxies=self._proxy, timeout=self._timeout - ).get_res(url=f"{self._searchurl}?{__dict_to_query(params)}") - torrents = [] + ).get_res(url=f"{self._searchurl}?{params_str}") if res and res.status_code == 200: result = res.json() code = result.get('code') if code != 0: logger.warn(f"{self._name} 搜索失败:{result.get('msg')}") return True, [] - data = result.get('data') or {} - for tid, item in data.items(): - category_value = result.get('category') - if category_value in self._tv_category \ - and category_value not in self._movie_category: - category = MediaType.TV.value - elif category_value in self._movie_category: - category = MediaType.MOVIE.value - else: - category = MediaType.UNKNOWN.value - torrent = { - 'title': item.get('name'), - 'description': item.get('small_descr'), - 'enclosure': item.get('url'), - 'pubdate': StringUtils.format_timestamp(item.get('added')), - 'size': int(item.get('size') or '0'), - 'seeders': int(item.get('seeders') or '0'), - 'peers': int(item.get("leechers") or '0'), - 'grabs': int(item.get("times_completed") or '0'), - 'downloadvolumefactor': self.__get_downloadvolumefactor(item.get('sp_state')), - 'uploadvolumefactor': self.__get_uploadvolumefactor(item.get('sp_state')), - 'page_url': self._detailurl % (self._url, item.get('group_id'), tid), - 'labels': [], - 'category': category - } - torrents.append(torrent) + return False, self.__parse_result(result) + elif res is not None: + logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") + return True, [] + else: + logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") + return True, [] + + async def async_search(self, keyword: str, mtype: MediaType = None) -> Tuple[bool, List[dict]]: + """ + 异步搜索 + """ + # 检查cookie + if not self._cookie: + return True, [] + + # 获取参数 + params_str = self.__get_params(keyword, mtype) + + # 发送请求 + res = await AsyncRequestUtils( + cookies=self._cookie, + ua=self._ua, + proxies=self._proxy, + timeout=self._timeout + ).get_res(url=f"{self._searchurl}?{params_str}") + + if res and res.status_code == 200: + result = res.json() + code = result.get('code') + if code != 0: + logger.warn(f"{self._name} 搜索失败:{result.get('msg')}") + return True, [] + return False, self.__parse_result(result) elif res is not None: logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") return True, [] else: logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") return True, [] - return False, torrents def __get_downloadvolumefactor(self, discount: str) -> float: """ diff --git a/app/modules/indexer/spider/hddolby.py b/app/modules/indexer/spider/hddolby.py index a614c6b8..471bba39 100644 --- a/app/modules/indexer/spider/hddolby.py +++ b/app/modules/indexer/spider/hddolby.py @@ -4,7 +4,7 @@ from app.core.config import settings from app.db.systemconfig_oper import SystemConfigOper from app.log import logger from app.schemas import MediaType -from app.utils.http import RequestUtils +from app.utils.http import RequestUtils, AsyncRequestUtils from app.utils.string import StringUtils @@ -73,11 +73,10 @@ class HddolbySpider: self._searchurl = f"https://api.{self._domain_host}/api/v1/torrent/search" self._pageurl = f"{self._domain}details.php?id=%s&hit=1" - def search(self, keyword: str, mtype: MediaType = None, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + def __get_params(self, keyword: str, mtype: MediaType = None, page: Optional[int] = 0) -> dict: """ - 搜索 + 获取请求参数 """ - if mtype == MediaType.TV: categories = self._tv_category elif mtype == MediaType.MOVIE: @@ -86,7 +85,7 @@ class HddolbySpider: categories = list(set(self._movie_category + self._tv_category)) # 输入参数 - params = { + return { "keyword": keyword, "page_number": page, "page_size": 100, @@ -94,6 +93,84 @@ class HddolbySpider: "visible": 1, } + def __parse_result(self, results: List[dict]) -> List[dict]: + """ + 解析搜索结果 + """ + torrents = [] + if not results: + return [] + + for result in results: + """ + { + "id": 120202, + "promotion_time_type": 0, + "promotion_until": "0000-00-00 00:00:00", + "category": 402, + "medium": 6, + "codec": 1, + "standard": 2, + "team": 10, + "audiocodec": 14, + "leechers": 0, + "seeders": 1, + "name": "[DBY] Lost S06 2010 Complete 1080p Netflix WEB-DL AVC DDP5.1-DBTV", + "small_descr": "lost ", + "times_completed": 0, + "size": 33665425886, + "added": "2025-02-18 19:47:56", + "url": 0, + "hr": 0, + "tmdb_type": "tv", + "tmdb_id": 4607, + "imdb_id": null, + "tags": "gf" + } + """ + # 类别 + category_value = result.get('category') + if category_value in self._tv_category: + category = MediaType.TV.value + elif category_value in self._movie_category: + category = MediaType.MOVIE.value + else: + category = MediaType.UNKNOWN.value + # 标签 + torrentLabelIds = result.get('tags', "").split(";") or [] + torrentLabels = [] + for labelId in torrentLabelIds: + if self._labels.get(labelId) is not None: + torrentLabels.append(self._labels.get(labelId)) + # 种子信息 + torrent = { + 'title': result.get('name'), + 'description': result.get('small_descr'), + 'enclosure': self.__get_download_url(result.get('id'), result.get('downhash')), + 'pubdate': result.get('added'), + 'size': result.get('size'), + 'seeders': result.get('seeders'), + 'peers': result.get('leechers'), + 'grabs': result.get('times_completed'), + 'downloadvolumefactor': self.__get_downloadvolumefactor(result.get('promotion_time_type')), + 'uploadvolumefactor': self.__get_uploadvolumefactor(result.get('promotion_time_type')), + 'freedate': result.get('promotion_until'), + 'page_url': self._pageurl % result.get('id'), + 'labels': torrentLabels, + 'category': category + } + torrents.append(torrent) + return torrents + + def search(self, keyword: str, mtype: MediaType = None, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 搜索 + """ + + # 准备参数 + params = self.__get_params(keyword, mtype, page) + + # 发送请求 res = RequestUtils( headers={ "Content-Type": "application/json", @@ -105,75 +182,44 @@ class HddolbySpider: referer=f"{self._domain}", timeout=self._timeout ).post_res(url=self._searchurl, json=params) - torrents = [] if res and res.status_code == 200: results = res.json().get('data', []) or [] - for result in results: - """ - { - "id": 120202, - "promotion_time_type": 0, - "promotion_until": "0000-00-00 00:00:00", - "category": 402, - "medium": 6, - "codec": 1, - "standard": 2, - "team": 10, - "audiocodec": 14, - "leechers": 0, - "seeders": 1, - "name": "[DBY] Lost S06 2010 Complete 1080p Netflix WEB-DL AVC DDP5.1-DBTV", - "small_descr": "lost ", - "times_completed": 0, - "size": 33665425886, - "added": "2025-02-18 19:47:56", - "url": 0, - "hr": 0, - "tmdb_type": "tv", - "tmdb_id": 4607, - "imdb_id": null, - "tags": "gf" - } - """ - # 类别 - category_value = result.get('category') - if category_value in self._tv_category: - category = MediaType.TV.value - elif category_value in self._movie_category: - category = MediaType.MOVIE.value - else: - category = MediaType.UNKNOWN.value - # 标签 - torrentLabelIds = result.get('tags', "").split(";") or [] - torrentLabels = [] - for labelId in torrentLabelIds: - if self._labels.get(labelId) is not None: - torrentLabels.append(self._labels.get(labelId)) - # 种子信息 - torrent = { - 'title': result.get('name'), - 'description': result.get('small_descr'), - 'enclosure': self.__get_download_url(result.get('id'), result.get('downhash')), - 'pubdate': result.get('added'), - 'size': result.get('size'), - 'seeders': result.get('seeders'), - 'peers': result.get('leechers'), - 'grabs': result.get('times_completed'), - 'downloadvolumefactor': self.__get_downloadvolumefactor(result.get('promotion_time_type')), - 'uploadvolumefactor': self.__get_uploadvolumefactor(result.get('promotion_time_type')), - 'freedate': result.get('promotion_until'), - 'page_url': self._pageurl % result.get('id'), - 'labels': torrentLabels, - 'category': category - } - torrents.append(torrent) + return False, self.__parse_result(results) + elif res is not None: + logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") + return True, [] + else: + logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") + return True, [] + + async def async_search(self, keyword: str, mtype: MediaType = None, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 异步搜索 + """ + # 准备参数 + params = self.__get_params(keyword, mtype, page) + + # 发送请求 + res = await AsyncRequestUtils( + headers={ + "Content-Type": "application/json", + "Accept": "application/json, text/plain, */*", + "x-api-key": self._apikey + }, + cookies=self._cookie, + proxies=self._proxy, + referer=f"{self._domain}", + timeout=self._timeout + ).post_res(url=self._searchurl, json=params) + if res and res.status_code == 200: + results = res.json().get('data', []) or [] + return False, self.__parse_result(results) elif res is not None: logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") return True, [] else: logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") return True, [] - return False, torrents @staticmethod def __get_downloadvolumefactor(discount: int) -> float: diff --git a/app/modules/indexer/spider/mtorrent.py b/app/modules/indexer/spider/mtorrent.py index e872f4df..533da7f9 100644 --- a/app/modules/indexer/spider/mtorrent.py +++ b/app/modules/indexer/spider/mtorrent.py @@ -7,7 +7,7 @@ from app.core.config import settings from app.db.systemconfig_oper import SystemConfigOper from app.log import logger from app.schemas import MediaType -from app.utils.http import RequestUtils +from app.utils.http import RequestUtils, AsyncRequestUtils from app.utils.string import StringUtils @@ -65,6 +65,71 @@ class MTorrentSpider: self._token = indexer.get('token') self._timeout = indexer.get('timeout') or 15 + def __get_params(self, keyword: str, mtype: MediaType = None, page: Optional[int] = 0) -> dict: + """ + 获取请求参数 + """ + if not mtype: + categories = [] + elif mtype == MediaType.TV: + categories = self._tv_category + else: + categories = self._movie_category + return { + "keyword": keyword, + "categories": categories, + "pageNumber": int(page) + 1, + "pageSize": self._size, + "visible": 1 + } + + def __parse_result(self, results: List[dict]): + """ + 解析搜索结果 + """ + torrents = [] + if not results: + return torrents + + for result in results: + category_value = result.get('category') + if category_value in self._tv_category \ + and category_value not in self._movie_category: + category = MediaType.TV.value + elif category_value in self._movie_category: + category = MediaType.MOVIE.value + else: + category = MediaType.UNKNOWN.value + # 处理馒头新版标签 + labels = [] + labels_new = result.get('labelsNew') + if labels_new: + # 新版标签本身就是list + labels = labels_new + else: + # 旧版标签 + labels_value = self._labels.get(result.get('labels') or "0") or "" + if labels_value: + labels = labels_value.split() + torrent = { + 'title': result.get('name'), + 'description': result.get('smallDescr'), + 'enclosure': self.__get_download_url(result.get('id')), + 'pubdate': StringUtils.format_timestamp(result.get('createdDate')), + 'size': int(result.get('size') or '0'), + 'seeders': int(result.get('status', {}).get("seeders") or '0'), + 'peers': int(result.get('status', {}).get("leechers") or '0'), + 'grabs': int(result.get('status', {}).get("timesCompleted") or '0'), + 'downloadvolumefactor': self.__get_downloadvolumefactor(result.get('status', {}).get("discount")), + 'uploadvolumefactor': self.__get_uploadvolumefactor(result.get('status', {}).get("discount")), + 'page_url': self._pageurl % (self._url, result.get('id')), + 'imdbid': self.__find_imdbid(result.get('imdb')), + 'labels': labels, + 'category': category + } + torrents.append(torrent) + return torrents + def search(self, keyword: str, mtype: MediaType = None, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: """ 搜索 @@ -73,19 +138,10 @@ class MTorrentSpider: if not self._apikey: return True, [] - if not mtype: - categories = [] - elif mtype == MediaType.TV: - categories = self._tv_category - else: - categories = self._movie_category - params = { - "keyword": keyword, - "categories": categories, - "pageNumber": int(page) + 1, - "pageSize": self._size, - "visible": 1 - } + # 获取请求参数 + params = self.__get_params(keyword, mtype, page) + + # 发送请求 res = RequestUtils( headers={ "Content-Type": "application/json", @@ -96,53 +152,47 @@ class MTorrentSpider: referer=f"{self._domain}browse", timeout=self._timeout ).post_res(url=self._searchurl, json=params) - torrents = [] if res and res.status_code == 200: results = res.json().get('data', {}).get("data") or [] - for result in results: - category_value = result.get('category') - if category_value in self._tv_category \ - and category_value not in self._movie_category: - category = MediaType.TV.value - elif category_value in self._movie_category: - category = MediaType.MOVIE.value - else: - category = MediaType.UNKNOWN.value - # 处理馒头新版标签 - labels = [] - labels_new = result.get( 'labelsNew' ) - if labels_new: - # 新版标签本身就是list - labels = labels_new - else: - # 旧版标签 - labels_value = self._labels.get(result.get('labels') or "0") or "" - if labels_value: - labels = labels_value.split() - torrent = { - 'title': result.get('name'), - 'description': result.get('smallDescr'), - 'enclosure': self.__get_download_url(result.get('id')), - 'pubdate': StringUtils.format_timestamp(result.get('createdDate')), - 'size': int(result.get('size') or '0'), - 'seeders': int(result.get('status', {}).get("seeders") or '0'), - 'peers': int(result.get('status', {}).get("leechers") or '0'), - 'grabs': int(result.get('status', {}).get("timesCompleted") or '0'), - 'downloadvolumefactor': self.__get_downloadvolumefactor(result.get('status', {}).get("discount")), - 'uploadvolumefactor': self.__get_uploadvolumefactor(result.get('status', {}).get("discount")), - 'page_url': self._pageurl % (self._url, result.get('id')), - 'imdbid': self.__find_imdbid(result.get('imdb')), - 'labels': labels, - 'category': category - } - torrents.append(torrent) + return False, self.__parse_result(results) + elif res is not None: + logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") + return True, [] + else: + logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") + return True, [] + + async def async_search(self, keyword: str, mtype: MediaType = None, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 搜索 + """ + # 检查ApiKey + if not self._apikey: + return True, [] + + # 获取请求参数 + params = self.__get_params(keyword, mtype, page) + + # 发送请求 + res = await AsyncRequestUtils( + headers={ + "Content-Type": "application/json", + "User-Agent": f"{self._ua}", + "x-api-key": self._apikey + }, + proxies=self._proxy, + referer=f"{self._domain}browse", + timeout=self._timeout + ).post_res(url=self._searchurl, json=params) + if res and res.status_code == 200: + results = res.json().get('data', {}).get("data") or [] + return False, self.__parse_result(results) elif res is not None: logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") return True, [] else: logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") return True, [] - return False, torrents @staticmethod def __find_imdbid(imdb: str) -> str: diff --git a/app/modules/indexer/spider/tnode.py b/app/modules/indexer/spider/tnode.py index 4ea51099..bb1417fa 100644 --- a/app/modules/indexer/spider/tnode.py +++ b/app/modules/indexer/spider/tnode.py @@ -1,23 +1,18 @@ import re from typing import Tuple, List, Optional +from app.core.cache import cached from app.core.config import settings from app.log import logger -from app.utils.http import RequestUtils +from app.utils.http import RequestUtils, AsyncRequestUtils +from app.utils.singleton import Singleton from app.utils.string import StringUtils -class TNodeSpider: - _indexerid = None - _domain = None - _name = "" - _proxy = None - _cookie = None - _ua = None - _token = None +class TNodeSpider(metaclass=Singleton): _size = 100 _timeout = 15 - _searchurl = "%sapi/torrent/advancedSearch" + _baseurl = "%sapi/torrent/advancedSearch" _downloadurl = "%sapi/torrent/download/%s" _pageurl = "%storrent/info/%s" @@ -25,19 +20,16 @@ class TNodeSpider: if indexer: self._indexerid = indexer.get('id') self._domain = indexer.get('domain') - self._searchurl = self._searchurl % self._domain + self._searchurl = self._baseurl % self._domain self._name = indexer.get('name') if indexer.get('proxy'): self._proxy = settings.PROXY self._cookie = indexer.get('cookie') self._ua = indexer.get('ua') self._timeout = indexer.get('timeout') or 15 - self.init_config() - def init_config(self): - self.__get_token() - - def __get_token(self): + @cached(region="indexer_spider", maxsize=1, ttl=60 * 60 * 24, skip_empty=True) + def __get_token(self) -> Optional[str]: if not self._domain: return res = RequestUtils(ua=self._ua, @@ -47,14 +39,29 @@ class TNodeSpider: if res and res.status_code == 200: csrf_token = re.search(r'', res.text) if csrf_token: - self._token = csrf_token.group(1) + return csrf_token.group(1) + return None - def search(self, keyword: str, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: - if not self._token: - logger.warn(f"{self._name} 未获取到token,无法搜索") - return True, [] + @cached(region="indexer_spider", maxsize=1, ttl=60 * 60 * 24, skip_empty=True) + async def __async_get_token(self) -> Optional[str]: + if not self._domain: + return + res = await AsyncRequestUtils(ua=self._ua, + cookies=self._cookie, + proxies=self._proxy, + timeout=self._timeout).get_res(url=self._domain) + if res and res.status_code == 200: + csrf_token = re.search(r'', res.text) + if csrf_token: + _token = csrf_token.group(1) + return None + + def __get_params(self, keyword: str = None, page: Optional[int] = 0) -> dict: + """ + 获取搜索参数 + """ search_type = "imdbid" if (keyword and keyword.startswith('tt')) else "title" - params = { + return { "page": int(page) + 1, "size": self._size, "type": search_type, @@ -69,9 +76,51 @@ class TNodeSpider: "resolution": [], "group": [] } + + def __parse_result(self, results: List[dict]) -> List[dict]: + """ + 解析搜索结果 + """ + torrents = [] + if not results: + return torrents + + for result in results: + torrent = { + 'title': result.get('title'), + 'description': result.get('subtitle'), + 'enclosure': self._downloadurl % (self._domain, result.get('id')), + 'pubdate': StringUtils.format_timestamp(result.get('upload_time')), + 'size': result.get('size'), + 'seeders': result.get('seeding'), + 'peers': result.get('leeching'), + 'grabs': result.get('complete'), + 'downloadvolumefactor': result.get('downloadRate'), + 'uploadvolumefactor': result.get('uploadRate'), + 'page_url': self._pageurl % (self._domain, result.get('id')), + 'imdbid': result.get('imdb') + } + torrents.append(torrent) + + return torrents + + def search(self, keyword: str, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 搜索 + """ + # 获取token + _token = self.__get_token() + if not _token: + logger.warn(f"{self._name} 未获取到token,无法搜索") + return True, [] + + # 获取请求参数 + params = self.__get_params(keyword, page) + + # 发送请求 res = RequestUtils( headers={ - 'X-CSRF-TOKEN': self._token, + 'X-CSRF-TOKEN': _token, "Content-Type": "application/json; charset=utf-8", "User-Agent": f"{self._ua}" }, @@ -79,29 +128,46 @@ class TNodeSpider: proxies=self._proxy, timeout=self._timeout ).post_res(url=self._searchurl, json=params) - torrents = [] if res and res.status_code == 200: results = res.json().get('data', {}).get("torrents") or [] - for result in results: - torrent = { - 'title': result.get('title'), - 'description': result.get('subtitle'), - 'enclosure': self._downloadurl % (self._domain, result.get('id')), - 'pubdate': StringUtils.format_timestamp(result.get('upload_time')), - 'size': result.get('size'), - 'seeders': result.get('seeding'), - 'peers': result.get('leeching'), - 'grabs': result.get('complete'), - 'downloadvolumefactor': result.get('downloadRate'), - 'uploadvolumefactor': result.get('uploadRate'), - 'page_url': self._pageurl % (self._domain, result.get('id')), - 'imdbid': result.get('imdb') - } - torrents.append(torrent) + return False, self.__parse_result(results) + elif res is not None: + logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") + return True, [] + else: + logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") + return True, [] + + async def async_search(self, keyword: str, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 异步搜索 + """ + # 获取token + _token = await self.__async_get_token() + if not _token: + logger.warn(f"{self._name} 未获取到token,无法搜索") + return True, [] + + # 获取请求参数 + params = self.__get_params(keyword, page) + + # 发送请求 + res = await AsyncRequestUtils( + headers={ + 'X-CSRF-TOKEN': _token, + "Content-Type": "application/json; charset=utf-8", + "User-Agent": f"{self._ua}" + }, + cookies=self._cookie, + proxies=self._proxy, + timeout=self._timeout + ).post_res(url=self._searchurl, json=params) + if res and res.status_code == 200: + results = res.json().get('data', {}).get("torrents") or [] + return False, self.__parse_result(results) elif res is not None: logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") return True, [] else: logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") return True, [] - return False, torrents diff --git a/app/modules/indexer/spider/torrentleech.py b/app/modules/indexer/spider/torrentleech.py index 41098647..657e7671 100644 --- a/app/modules/indexer/spider/torrentleech.py +++ b/app/modules/indexer/spider/torrentleech.py @@ -3,7 +3,7 @@ from urllib.parse import quote from app.core.config import settings from app.log import logger -from app.utils.http import RequestUtils +from app.utils.http import RequestUtils, AsyncRequestUtils from app.utils.string import StringUtils @@ -23,8 +23,37 @@ class TorrentLeech: self._proxy = settings.PROXY self._timeout = indexer.get('timeout') or 15 - def search(self, keyword: str, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + def __parse_result(self, results: List[dict]) -> List[dict]: + """ + 解析搜索结果 + """ + torrents = [] + if not results: + return torrents + for result in results: + torrent = { + 'title': result.get('name'), + 'enclosure': self._downloadurl % (self._indexer.get('domain'), + result.get('fid'), + result.get('filename')), + 'pubdate': StringUtils.format_timestamp(result.get('addedTimestamp')), + 'size': result.get('size'), + 'seeders': result.get('seeders'), + 'peers': result.get('leechers'), + 'grabs': result.get('completed'), + 'downloadvolumefactor': result.get('download_multiplier'), + 'uploadvolumefactor': 1, + 'page_url': self._pageurl % (self._indexer.get('domain'), result.get('fid')), + 'imdbid': result.get('imdbID') + } + torrents.append(torrent) + return torrents + + def search(self, keyword: str, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 搜索种子 + """ if StringUtils.is_chinese(keyword): # 不支持中文 return True, [] @@ -33,6 +62,7 @@ class TorrentLeech: url = self._searchurl % (self._indexer.get('domain'), quote(keyword)) else: url = self._browseurl % (self._indexer.get('domain'), int(page) + 1) + res = RequestUtils( headers={ "Content-Type": "application/json; charset=utf-8", @@ -42,24 +72,9 @@ class TorrentLeech: proxies=self._proxy, timeout=self._timeout ).get_res(url) - torrents = [] if res and res.status_code == 200: results = res.json().get('torrentList') or [] - for result in results: - torrent = { - 'title': result.get('name'), - 'enclosure': self._downloadurl % (self._indexer.get('domain'), result.get('fid'), result.get('filename')), - 'pubdate': StringUtils.format_timestamp(result.get('addedTimestamp')), - 'size': result.get('size'), - 'seeders': result.get('seeders'), - 'peers': result.get('leechers'), - 'grabs': result.get('completed'), - 'downloadvolumefactor': result.get('download_multiplier'), - 'uploadvolumefactor': 1, - 'page_url': self._pageurl % (self._indexer.get('domain'), result.get('fid')), - 'imdbid': result.get('imdbID') - } - torrents.append(torrent) + return False, self.__parse_result(results) elif res is not None: logger.warn(f"{self._indexer.get('name')} 搜索失败,错误码:{res.status_code}") return True, [] @@ -67,4 +82,34 @@ class TorrentLeech: logger.warn(f"{self._indexer.get('name')} 搜索失败,无法连接 {self._indexer.get('domain')}") return True, [] - return False, torrents + async def async_search(self, keyword: str, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 异步搜索种子 + """ + if StringUtils.is_chinese(keyword): + # 不支持中文 + return True, [] + + if keyword: + url = self._searchurl % (self._indexer.get('domain'), quote(keyword)) + else: + url = self._browseurl % (self._indexer.get('domain'), int(page) + 1) + + res = await AsyncRequestUtils( + headers={ + "Content-Type": "application/json; charset=utf-8", + "User-Agent": f"{self._indexer.get('ua')}", + }, + cookies=self._indexer.get('cookie'), + proxies=self._proxy, + timeout=self._timeout + ).get_res(url) + if res and res.status_code == 200: + results = res.json().get('torrentList') or [] + return False, self.__parse_result(results) + elif res is not None: + logger.warn(f"{self._indexer.get('name')} 搜索失败,错误码:{res.status_code}") + return True, [] + else: + logger.warn(f"{self._indexer.get('name')} 搜索失败,无法连接 {self._indexer.get('domain')}") + return True, [] diff --git a/app/modules/indexer/spider/yema.py b/app/modules/indexer/spider/yema.py index ae6266ff..ee6278e9 100644 --- a/app/modules/indexer/spider/yema.py +++ b/app/modules/indexer/spider/yema.py @@ -4,7 +4,7 @@ from app.core.config import settings from app.db.systemconfig_oper import SystemConfigOper from app.log import logger from app.schemas import MediaType -from app.utils.http import RequestUtils +from app.utils.http import RequestUtils, AsyncRequestUtils from app.utils.string import StringUtils @@ -57,9 +57,9 @@ class YemaSpider: self._ua = indexer.get('ua') self._timeout = indexer.get('timeout') or 15 - def search(self, keyword: str, mtype: MediaType = None, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + def __get_params(self, keyword: str = None, page: Optional[int] = 0) -> dict: """ - 搜索 + 获取搜索参数 """ params = { "pageParam": { @@ -69,16 +69,63 @@ class YemaSpider: }, "sorter": {} } - # 新接口可不传 categoryId 参数 - # if mtype == MediaType.MOVIE: - # params.update({ - # "categoryId": self._movie_category, - # }) - # pass if keyword: params.update({ "keyword": keyword, }) + return params + + def __parse_result(self, results: List[dict]) -> List[dict]: + """ + 解析搜索结果 + """ + torrents = [] + if not results: + return torrents + + for result in results: + category_value = result.get('categoryId') + if category_value in self._tv_category: + category = MediaType.TV.value + elif category_value in self._movie_category: + category = MediaType.MOVIE.value + else: + category = MediaType.UNKNOWN.value + pass + + torrentLabelIds = result.get('tagList', []) or [] + torrentLabels = [] + for labelId in torrentLabelIds: + if self._labels.get(labelId) is not None: + torrentLabels.append(self._labels.get(labelId)) + pass + pass + torrent = { + 'title': result.get('showName'), + 'description': result.get('shortDesc'), + 'enclosure': self.__get_download_url(result.get('id')), + 'pubdate': StringUtils.unify_datetime_str(result.get('listingTime')), + 'size': result.get('fileSize'), + 'seeders': result.get('seedNum'), + 'peers': result.get('leechNum'), + 'grabs': result.get('completedNum'), + 'downloadvolumefactor': self.__get_downloadvolumefactor(result.get('downloadPromotion')), + 'uploadvolumefactor': self.__get_uploadvolumefactor(result.get('uploadPromotion')), + 'freedate': StringUtils.unify_datetime_str(result.get('downloadPromotionEndTime')), + 'page_url': self._pageurl % (self._domain, result.get('id')), + 'labels': torrentLabels, + 'category': category + } + torrents.append(torrent) + + return torrents + + def search(self, keyword: str, + mtype: MediaType = None, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 搜索 + """ + res = RequestUtils( headers={ "Content-Type": "application/json", @@ -89,52 +136,43 @@ class YemaSpider: proxies=self._proxy, referer=f"{self._domain}", timeout=self._timeout - ).post_res(url=self._searchurl, json=params) - torrents = [] + ).post_res(url=self._searchurl, json=self.__get_params(keyword, page)) if res and res.status_code == 200: results = res.json().get('data', []) or [] - for result in results: - category_value = result.get('categoryId') - if category_value in self._tv_category: - category = MediaType.TV.value - elif category_value in self._movie_category: - category = MediaType.MOVIE.value - else: - category = MediaType.UNKNOWN.value - pass - - torrentLabelIds = result.get('tagList', []) or [] - torrentLabels = [] - for labelId in torrentLabelIds: - if self._labels.get(labelId) is not None: - torrentLabels.append(self._labels.get(labelId)) - pass - pass - torrent = { - 'title': result.get('showName'), - 'description': result.get('shortDesc'), - 'enclosure': self.__get_download_url(result.get('id')), - # 使用上架时间,而不是用户发布时间,上架时间即其他用户可见时间 - 'pubdate': StringUtils.unify_datetime_str(result.get('listingTime')), - 'size': result.get('fileSize'), - 'seeders': result.get('seedNum'), - 'peers': result.get('leechNum'), - 'grabs': result.get('completedNum'), - 'downloadvolumefactor': self.__get_downloadvolumefactor(result.get('downloadPromotion')), - 'uploadvolumefactor': self.__get_uploadvolumefactor(result.get('uploadPromotion')), - 'freedate': StringUtils.unify_datetime_str(result.get('downloadPromotionEndTime')), - 'page_url': self._pageurl % (self._domain, result.get('id')), - 'labels': torrentLabels, - 'category': category - } - torrents.append(torrent) + return False, self.__parse_result(results) + elif res is not None: + logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") + return True, [] + else: + logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") + return True, [] + + async def async_search(self, keyword: str, + mtype: MediaType = None, page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + """ + 异步搜索 + """ + res = await AsyncRequestUtils( + headers={ + "Content-Type": "application/json", + "User-Agent": f"{self._ua}", + "Accept": "application/json, text/plain, */*" + }, + cookies=self._cookie, + proxies=self._proxy, + referer=f"{self._domain}", + timeout=self._timeout + ).post_res(url=self._searchurl, json=self.__get_params(keyword, page)) + + if res and res.status_code == 200: + results = res.json().get('data', []) or [] + return False, self.__parse_result(results) elif res is not None: logger.warn(f"{self._name} 搜索失败,错误码:{res.status_code}") return True, [] else: logger.warn(f"{self._name} 搜索失败,无法连接 {self._domain}") return True, [] - return False, torrents @staticmethod def __get_downloadvolumefactor(discount: str) -> float: