feat:支持FlareSolverr

This commit is contained in:
jxxghp
2025-08-11 21:14:46 +08:00
parent 7e3e0e1178
commit 90f74d8d2b
4 changed files with 110 additions and 11 deletions

View File

@@ -697,7 +697,7 @@ class SiteChain(ChainBase):
username=username,
password=password,
two_step_code=two_step_code,
proxies=settings.PROXY_HOST if site_info.proxy else None
proxies=settings.PROXY_SERVER if site_info.proxy else None
)
if result:
cookie, ua, msg = result

View File

@@ -8,6 +8,7 @@ import sys
import threading
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Type
from urllib.parse import urlparse
from dotenv import set_key
from pydantic import BaseModel, BaseSettings, validator, Field
@@ -319,6 +320,10 @@ class ConfigModel(BaseModel):
RCLONE_SNAPSHOT_CHECK_FOLDER_MODTIME = True
# 对OpenList进行快照对比时是否检查文件夹的修改时间
OPENLIST_SNAPSHOT_CHECK_FOLDER_MODTIME = True
# 仿真类型playwright 或 flaresolverr
BROWSER_EMULATION: str = "playwright"
# FlareSolverr 服务地址,例如 http://127.0.0.1:8191
FLARESOLVERR_URL: Optional[str] = None
class Settings(BaseSettings, ConfigModel, LogConfigModel):
@@ -615,9 +620,22 @@ class Settings(BaseSettings, ConfigModel, LogConfigModel):
@property
def PROXY_SERVER(self):
if self.PROXY_HOST:
return {
"server": self.PROXY_HOST
}
try:
parsed = urlparse(self.PROXY_HOST)
if not parsed.scheme:
return {"server": self.PROXY_HOST}
host = parsed.hostname or ""
port = f":{parsed.port}" if parsed.port else ""
server = f"{parsed.scheme}://{host}{port}"
proxy = {"server": server}
if parsed.username:
proxy["username"] = parsed.username
if parsed.password:
proxy["password"] = parsed.password
return proxy
except Exception as err:
logger.error(f"解析代理服务器地址 '{self.PROXY_HOST}' 时出错: {err}")
return {"server": self.PROXY_HOST}
return None
@property

View File

@@ -3,7 +3,9 @@ from typing import Callable, Any, Optional
from cf_clearance import sync_cf_retry, sync_stealth
from playwright.sync_api import sync_playwright, Page
from app.core.config import settings
from app.log import logger
from app.utils.http import RequestUtils, cookie_parse
class PlaywrightHelper:
@@ -19,6 +21,53 @@ class PlaywrightHelper:
page.goto(url)
return sync_cf_retry(page)[0]
@staticmethod
def __fs_cookie_str(cookies: list) -> str:
if not cookies:
return ""
return "; ".join([f"{c.get('name')}={c.get('value')}" for c in cookies if c and c.get('name') is not None])
@staticmethod
def __flaresolverr_request(url: str,
cookies: Optional[str] = None,
proxy_url: Optional[str] = None,
timeout: Optional[int] = 30) -> Optional[dict]:
"""
调用 FlareSolverr 解决 Cloudflare 并返回 solution 结果
参考: https://github.com/FlareSolverr/FlareSolverr
"""
if not settings.FLARESOLVERR_URL:
logger.warn("未配置 FLARESOLVERR_URL无法使用 FlareSolverr")
return None
payload = {
"cmd": "request.get",
"url": url,
"maxTimeout": max(10, int(timeout or 30)) * 1000,
}
# 将 cookies 以数组形式传递给 FlareSolverr
if cookies:
try:
payload["cookies"] = cookie_parse(cookies, array=True)
except Exception as e:
logger.debug(f"解析 cookies 失败,忽略: {str(e)}")
if proxy_url:
payload["proxy"] = {"url": proxy_url}
try:
fs_api = settings.FLARESOLVERR_URL.rstrip("/") + "/v1"
data = RequestUtils(content_type="application/json").post_json(url=fs_api, json=payload)
if not data:
logger.error("FlareSolverr 返回空响应")
return None
if data.get("status") != "ok":
logger.error(f"FlareSolverr 调用失败: {data.get('message')}")
return None
return data.get("solution")
except Exception as e:
logger.error(f"调用 FlareSolverr 失败: {str(e)}")
return None
def action(self, url: str,
callback: Callable,
cookies: Optional[str] = None,
@@ -43,15 +92,34 @@ class PlaywrightHelper:
context = None
page = None
try:
# 如果配置使用 FlareSolverr先通过其获取清除后的 cookies 与 UA
fs_cookie_header = None
fs_ua = None
if settings.BROWSER_EMULATION == "flaresolverr":
proxy_url = None
if proxies and isinstance(proxies, dict):
proxy_url = proxies.get("server")
proxy_url = proxy_url or settings.PROXY_HOST
solution = self.__flaresolverr_request(url=url, cookies=cookies,
proxy_url=proxy_url, timeout=timeout)
if solution:
fs_cookie_header = self.__fs_cookie_str(solution.get("cookies", []))
fs_ua = solution.get("userAgent")
browser = playwright[self.browser_type].launch(headless=headless)
context = browser.new_context(user_agent=ua, proxy=proxies)
context = browser.new_context(user_agent=fs_ua or ua, proxy=proxies)
page = context.new_page()
if cookies:
page.set_extra_http_headers({"cookie": cookies})
# 优先使用 FlareSolverr 返回,其次使用入参
merged_cookie = fs_cookie_header or cookies
if merged_cookie:
page.set_extra_http_headers({"cookie": merged_cookie})
if not self.__pass_cloudflare(url, page):
logger.warn("cloudflare challenge fail")
if settings.BROWSER_EMULATION == "playwright":
if not self.__pass_cloudflare(url, page):
logger.warn("cloudflare challenge fail")
else:
page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
# 回调函数
@@ -87,6 +155,19 @@ class PlaywrightHelper:
:param timeout: 超时时间
"""
source = None
# 如果配置为 FlareSolverr则直接调用获取页面源码
if settings.BROWSER_EMULATION == "flaresolverr":
try:
proxy_url = None
if proxies and isinstance(proxies, dict):
proxy_url = proxies.get("server")
proxy_url = proxy_url or settings.PROXY_HOST
solution = self.__flaresolverr_request(url=url, cookies=cookies,
proxy_url=proxy_url, timeout=timeout)
if solution:
return solution.get("response")
except Exception as e:
logger.error(f"FlareSolverr 获取源码失败: {str(e)}")
try:
with sync_playwright() as playwright:
browser = None

View File

@@ -2,7 +2,7 @@ import re
import sys
from contextlib import contextmanager, asynccontextmanager
from pathlib import Path
from typing import Any, Optional, Union
from typing import Any, Optional, Tuple, Union
import chardet
import httpx
@@ -395,7 +395,7 @@ class RequestUtils:
return None
@staticmethod
def parse_cache_control(header: str) -> (str, int):
def parse_cache_control(header: str) -> Tuple[str, Optional[int]]:
"""
解析 Cache-Control 头,返回 cache_directive 和 max_age
:param header: Cache-Control 头部的字符串