fix: bind browser sessions to dedicated worker threads

This commit is contained in:
jxxghp
2026-06-22 22:18:12 +08:00
parent 5c649ff1d1
commit dc773337d3
2 changed files with 314 additions and 32 deletions

View File

@@ -2,6 +2,7 @@ import ipaddress
import threading
import time
import uuid
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field
from typing import Any, Callable, Optional, Protocol
from urllib.parse import urlparse
@@ -144,8 +145,11 @@ class BrowserSessionHelper:
PRIVATE_HOST_SUFFIXES = (".localhost", ".local", ".lan", ".home", ".internal")
PRIVATE_HOSTNAMES = {"localhost", "ip6-localhost", "ip6-loopback"}
REF_ATTRIBUTE = "data-moviepilot-agent-ref"
SESSION_WORKER_NAME_PREFIX = "browser-session"
_sessions: dict[str, _BrowserSessionState] = {}
_session_executors: dict[str, ThreadPoolExecutor] = {}
_session_thread_ids: dict[str, int] = {}
_sessions_lock = threading.RLock()
def __init__(self, headless: bool = True, viewport: Optional[dict[str, int]] = None):
@@ -211,7 +215,9 @@ class BrowserSessionHelper:
关闭所有 Agent 浏览器会话。
"""
with cls._sessions_lock:
session_keys = list(cls._sessions.keys())
session_keys = list(
set(cls._sessions.keys()) | set(cls._session_executors.keys())
)
for session_key in session_keys:
cls.close_session(session_key)
@@ -223,12 +229,27 @@ class BrowserSessionHelper:
:param session_key: 会话标识
:return: 找到并关闭会话时返回 True
"""
with cls._sessions_lock:
session = cls._sessions.pop(session_key, None)
if not session:
return False
cls._close_session_state(session)
return True
if cls._is_current_session_thread(session_key):
closed = cls._close_session_in_thread(session_key)
cls._shutdown_session_executor(session_key, wait=False)
return closed
executor = cls._get_existing_session_executor(session_key)
if executor:
future = executor.submit(
cls._run_session_task,
session_key,
cls._close_session_in_thread,
session_key,
)
try:
return future.result()
finally:
cls._shutdown_session_executor(session_key)
closed = cls._close_session_in_thread(session_key)
cls._shutdown_session_executor(session_key)
return closed
def with_session(
self,
@@ -249,6 +270,34 @@ class BrowserSessionHelper:
:return: 回调函数返回值
"""
self._prune_sessions()
return self._run_in_session_thread(
session_key,
self._with_session_in_thread,
session_key,
callback,
user_agent=user_agent,
cookies=cookies,
timeout=timeout,
)
def _with_session_in_thread(
self,
session_key: str,
callback: Callable[[_BrowserSessionState], Any],
user_agent: Optional[str] = None,
cookies: Optional[str] = None,
timeout: Optional[int] = 30,
) -> Any:
"""
在会话专属线程内获取浏览器会话并执行回调。
:param session_key: 会话标识
:param callback: 使用浏览器会话执行操作的回调函数
:param user_agent: 新建会话时使用的 User-Agent
:param cookies: 本次操作要注入的 Cookie 请求头
:param timeout: 默认操作超时时间,单位秒
:return: 回调函数返回值
"""
session = self._get_or_create_session(
session_key=session_key,
user_agent=user_agent,
@@ -263,6 +312,164 @@ class BrowserSessionHelper:
session.active_page.set_extra_http_headers({"cookie": cookies})
return callback(session)
@classmethod
def _run_in_session_thread(
cls,
session_key: str,
callback: Callable[..., Any],
*args: Any,
**kwargs: Any,
) -> Any:
"""
将浏览器同步 API 调用投递到当前会话固定的单线程执行器。
:param session_key: 会话标识
:param callback: 需要在会话线程中运行的回调
:param args: 回调位置参数
:param kwargs: 回调关键字参数
:return: 回调返回值
"""
if cls._is_current_session_thread(session_key):
return callback(*args, **kwargs)
for _ in range(2):
executor = cls._get_session_executor(session_key)
try:
future = executor.submit(
cls._run_session_task,
session_key,
callback,
*args,
**kwargs,
)
except RuntimeError:
cls._discard_session_executor(session_key, executor)
continue
try:
return future.result()
except Exception:
with cls._sessions_lock:
has_session = session_key in cls._sessions
if not has_session:
cls._shutdown_session_executor(session_key)
raise
raise RuntimeError("浏览器会话线程已关闭")
@classmethod
def _run_session_task(
cls,
session_key: str,
callback: Callable[..., Any],
*args: Any,
**kwargs: Any,
) -> Any:
"""
记录当前会话工作线程后执行实际任务。
:param session_key: 会话标识
:param callback: 需要执行的回调
:param args: 回调位置参数
:param kwargs: 回调关键字参数
:return: 回调返回值
"""
with cls._sessions_lock:
cls._session_thread_ids[session_key] = threading.get_ident()
return callback(*args, **kwargs)
@classmethod
def _is_current_session_thread(cls, session_key: str) -> bool:
"""
判断当前代码是否已运行在指定会话的固定线程内。
:param session_key: 会话标识
:return: 当前线程是会话工作线程时返回 True
"""
with cls._sessions_lock:
thread_id = cls._session_thread_ids.get(session_key)
return thread_id == threading.get_ident()
@classmethod
def _get_session_executor(cls, session_key: str) -> ThreadPoolExecutor:
"""
获取或创建指定会话的单线程执行器。
:param session_key: 会话标识
:return: 会话专属执行器
"""
with cls._sessions_lock:
executor = cls._session_executors.get(session_key)
if executor:
return executor
executor = ThreadPoolExecutor(
max_workers=1,
thread_name_prefix=cls.SESSION_WORKER_NAME_PREFIX,
)
cls._session_executors[session_key] = executor
return executor
@classmethod
def _get_existing_session_executor(
cls, session_key: str
) -> Optional[ThreadPoolExecutor]:
"""
获取指定会话已存在的执行器。
:param session_key: 会话标识
:return: 已存在的执行器,不存在时返回 None
"""
with cls._sessions_lock:
return cls._session_executors.get(session_key)
@classmethod
def _discard_session_executor(
cls,
session_key: str,
executor: ThreadPoolExecutor,
) -> None:
"""
丢弃已经关闭的会话执行器。
:param session_key: 会话标识
:param executor: 需要从缓存中移除的执行器
"""
with cls._sessions_lock:
if cls._session_executors.get(session_key) is executor:
cls._session_executors.pop(session_key, None)
@classmethod
def _shutdown_session_executor(
cls,
session_key: str,
wait: bool = True,
) -> None:
"""
关闭并移除指定会话的执行器。
:param session_key: 会话标识
:param wait: 是否等待工作线程退出
"""
with cls._sessions_lock:
executor = cls._session_executors.pop(session_key, None)
cls._session_thread_ids.pop(session_key, None)
if executor:
executor.shutdown(wait=wait, cancel_futures=True)
@classmethod
def _close_session_in_thread(cls, session_key: str) -> bool:
"""
在会话固定线程内关闭并移除浏览器会话。
:param session_key: 会话标识
:return: 找到并关闭会话时返回 True
"""
with cls._sessions_lock:
session = cls._sessions.pop(session_key, None)
if not session:
return False
cls._close_session_state(session)
return True
def open_tab(
self,
session: _BrowserSessionState,
@@ -479,24 +686,29 @@ class BrowserSessionHelper:
if session:
return session
context = self._launch_context(
headless=self.headless,
user_agent=user_agent,
viewport=self.viewport,
)
page = context.new_page()
if cookies:
page.set_extra_http_headers({"cookie": cookies})
session = _BrowserSessionState(
session_key=session_key,
context=context,
pages=[page],
user_agent=user_agent,
cookies=cookies,
)
context = self._launch_context(
headless=self.headless,
user_agent=user_agent,
viewport=self.viewport,
)
page = context.new_page()
if cookies:
page.set_extra_http_headers({"cookie": cookies})
session = _BrowserSessionState(
session_key=session_key,
context=context,
pages=[page],
user_agent=user_agent,
cookies=cookies,
)
with self._sessions_lock:
existing_session = self._sessions.get(session_key)
if existing_session:
self._close_session_state(session)
return existing_session
self._sessions[session_key] = session
self._enforce_session_limit()
return session
self._enforce_session_limit(protect_session_key=session_key)
return session
@classmethod
def _prune_sessions(cls) -> None:
@@ -511,14 +723,29 @@ class BrowserSessionHelper:
cls.close_session(session_key)
@classmethod
def _enforce_session_limit(cls) -> None:
while len(cls._sessions) > cls.MAX_SESSIONS:
oldest_key = min(
cls._sessions,
key=lambda key: cls._sessions[key].last_used_at,
)
session = cls._sessions.pop(oldest_key)
cls._close_session_state(session)
def _enforce_session_limit(cls, protect_session_key: Optional[str] = None) -> None:
"""
清理超过数量上限的旧会话。
:param protect_session_key: 本次刚创建、需要优先保留的会话标识
"""
while True:
with cls._sessions_lock:
if len(cls._sessions) <= cls.MAX_SESSIONS:
return
candidate_keys = [
session_key
for session_key in cls._sessions
if session_key != protect_session_key
]
if not candidate_keys:
return
oldest_key = min(
candidate_keys,
key=lambda key: cls._sessions[key].last_used_at,
)
if not cls.close_session(oldest_key):
return
@staticmethod
def _close_session_state(session: _BrowserSessionState) -> None:

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import json
import threading
from concurrent.futures import ThreadPoolExecutor
from typing import Optional
from unittest.mock import patch
@@ -45,6 +47,7 @@ class _FakePage:
self.clicks = []
self.fills = []
self.selects = []
self.close_thread_id = None
def set_extra_http_headers(self, headers: dict[str, str]) -> None:
"""记录额外请求头。"""
@@ -124,6 +127,7 @@ class _FakePage:
def close(self) -> None:
"""记录页面关闭状态。"""
self.close_thread_id = threading.get_ident()
self.closed = True
@@ -133,6 +137,7 @@ class _FakeContext:
def __init__(self, pages: Optional[list[_FakePage]] = None) -> None:
self.pages = pages or [_FakePage()]
self.closed = False
self.close_thread_id = None
def new_page(self) -> _FakePage:
"""返回或创建模拟页面。"""
@@ -146,6 +151,7 @@ class _FakeContext:
def close(self) -> None:
"""记录上下文关闭状态。"""
self.close_thread_id = threading.get_ident()
self.closed = True
@@ -250,6 +256,55 @@ def test_browser_session_helper_reuses_page_within_session():
assert not context.closed
def test_browser_session_helper_runs_same_session_on_one_worker_thread():
"""同一 session_key 的浏览器操作应固定在同一个工作线程。"""
page = _FakePage()
context = _FakeContext([page])
helper = BrowserSessionHelper()
caller_thread_ids = set()
session_thread_ids = []
barrier = threading.Barrier(2)
def _run_from_caller_thread() -> int:
"""从外部调用线程进入同一个浏览器会话。"""
caller_thread_ids.add(threading.get_ident())
barrier.wait(timeout=1)
return helper.with_session(
"session-1",
lambda _session: threading.get_ident(),
)
with patch.object(BrowserSessionHelper, "_launch_context", return_value=context):
with ThreadPoolExecutor(max_workers=2) as executor:
futures = [
executor.submit(_run_from_caller_thread),
executor.submit(_run_from_caller_thread),
]
session_thread_ids = [future.result(timeout=1) for future in futures]
assert len(caller_thread_ids) == 2
assert len(set(session_thread_ids)) == 1
assert session_thread_ids[0] not in caller_thread_ids
def test_browser_session_helper_closes_session_on_worker_thread():
"""关闭会话时应在创建浏览器对象的工作线程内释放资源。"""
page = _FakePage()
context = _FakeContext([page])
helper = BrowserSessionHelper()
with patch.object(BrowserSessionHelper, "_launch_context", return_value=context):
session_thread_id = helper.with_session(
"session-1",
lambda _session: threading.get_ident(),
)
closed = BrowserSessionHelper.close_session("session-1")
assert closed is True
assert page.close_thread_id == session_thread_id
assert context.close_thread_id == session_thread_id
def test_browse_webpage_returns_snapshot_with_refs_after_goto():
"""goto 后应返回包含可交互元素 ref 的页面快照。"""
page = _FakePage()