mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-03 00:03:50 +08:00
- 新增 JobsMiddleware 中间件,支持通过 JOB.md 文件管理长期/重复性任务 - 智能体可创建一次性(once)和重复性(recurring)任务,自动跟踪执行状态 - 新增心跳唤醒机制,定时调度器周期性唤醒智能体检查并执行待处理任务 - 新增 AI_AGENT_JOB_INTERVAL 配置项控制检查间隔,默认24小时 - 每次心跳使用独立会话,执行完毕后清理资源
351 lines
11 KiB
Python
351 lines
11 KiB
Python
import re
|
||
from collections.abc import Awaitable, Callable
|
||
from typing import Annotated, NotRequired, TypedDict
|
||
|
||
import yaml # noqa
|
||
from anyio import Path as AsyncPath
|
||
from langchain.agents.middleware.types import (
|
||
AgentMiddleware,
|
||
AgentState,
|
||
ContextT,
|
||
ModelRequest,
|
||
ModelResponse,
|
||
PrivateStateAttr, # noqa
|
||
ResponseT,
|
||
)
|
||
from langchain_core.runnables import RunnableConfig
|
||
from langgraph.runtime import Runtime
|
||
|
||
from app.agent.middleware.utils import append_to_system_message
|
||
from app.log import logger
|
||
|
||
# JOB.md 文件最大限制为 1MB
|
||
MAX_JOB_FILE_SIZE = 1 * 1024 * 1024
|
||
|
||
|
||
class JobMetadata(TypedDict):
|
||
"""Job 元数据。"""
|
||
|
||
path: str
|
||
"""JOB.md 文件路径。"""
|
||
|
||
id: str
|
||
"""Job 标识符(目录名)。"""
|
||
|
||
name: str
|
||
"""Job 名称。"""
|
||
|
||
description: str
|
||
"""Job 描述。"""
|
||
|
||
schedule: str
|
||
"""调度类型: once(一次性)/ recurring(重复性)。"""
|
||
|
||
status: str
|
||
"""当前状态: pending / in_progress / completed / cancelled。"""
|
||
|
||
last_run: str | None
|
||
"""上次执行时间。"""
|
||
|
||
|
||
class JobsState(AgentState):
|
||
"""jobs 中间件状态。"""
|
||
|
||
jobs_metadata: NotRequired[Annotated[list[JobMetadata], PrivateStateAttr]]
|
||
"""已加载的 job 元数据列表,不传播给父 agent。"""
|
||
|
||
|
||
class JobsStateUpdate(TypedDict):
|
||
"""jobs 中间件状态更新项。"""
|
||
|
||
jobs_metadata: list[JobMetadata]
|
||
"""待合并的 job 元数据列表。"""
|
||
|
||
|
||
def _parse_job_metadata(
|
||
content: str,
|
||
job_path: str,
|
||
job_id: str,
|
||
) -> JobMetadata | None:
|
||
"""从 JOB.md 内容中解析 YAML 前言并验证元数据。"""
|
||
if len(content) > MAX_JOB_FILE_SIZE:
|
||
logger.warning(
|
||
"Skipping %s: content too large (%d bytes)", job_path, len(content)
|
||
)
|
||
return None
|
||
|
||
# 匹配 --- 分隔的 YAML 前言
|
||
frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
|
||
match = re.match(frontmatter_pattern, content, re.DOTALL)
|
||
if not match:
|
||
logger.warning("Skipping %s: no valid YAML frontmatter found", job_path)
|
||
return None
|
||
frontmatter_str = match.group(1)
|
||
|
||
# 解析 YAML
|
||
try:
|
||
frontmatter_data = yaml.safe_load(frontmatter_str)
|
||
except yaml.YAMLError as e:
|
||
logger.warning("Invalid YAML in %s: %s", job_path, e)
|
||
return None
|
||
|
||
if not isinstance(frontmatter_data, dict):
|
||
logger.warning("Skipping %s: frontmatter is not a mapping", job_path)
|
||
return None
|
||
|
||
# Job 名称和描述
|
||
name = str(frontmatter_data.get("name", "")).strip()
|
||
description = str(frontmatter_data.get("description", "")).strip()
|
||
if not name:
|
||
logger.warning("Skipping %s: missing required 'name'", job_path)
|
||
return None
|
||
|
||
# 调度类型
|
||
schedule = str(frontmatter_data.get("schedule", "once")).strip().lower()
|
||
if schedule not in ("once", "recurring"):
|
||
schedule = "once"
|
||
|
||
# 状态
|
||
status = str(frontmatter_data.get("status", "pending")).strip().lower()
|
||
if status not in ("pending", "in_progress", "completed", "cancelled"):
|
||
status = "pending"
|
||
|
||
# 上次执行时间
|
||
last_run = str(frontmatter_data.get("last_run", "")).strip() or None
|
||
|
||
return JobMetadata(
|
||
id=job_id,
|
||
name=name,
|
||
description=description,
|
||
path=job_path,
|
||
schedule=schedule,
|
||
status=status,
|
||
last_run=last_run,
|
||
)
|
||
|
||
|
||
async def _alist_jobs(source_path: AsyncPath) -> list[JobMetadata]:
|
||
"""异步列出指定路径下的所有任务。
|
||
|
||
扫描包含 JOB.md 的目录并解析其元数据。
|
||
"""
|
||
jobs: list[JobMetadata] = []
|
||
|
||
if not await source_path.exists():
|
||
return []
|
||
|
||
# 查找所有任务目录(包含 JOB.md 的目录)
|
||
job_dirs: list[AsyncPath] = []
|
||
async for path in source_path.iterdir():
|
||
if await path.is_dir() and await (path / "JOB.md").is_file():
|
||
job_dirs.append(path)
|
||
|
||
if not job_dirs:
|
||
return []
|
||
|
||
# 解析 JOB.md
|
||
for job_path in job_dirs:
|
||
job_md_path = job_path / "JOB.md"
|
||
|
||
job_content = await job_md_path.read_text(encoding="utf-8")
|
||
|
||
# 解析元数据
|
||
job_metadata = _parse_job_metadata(
|
||
content=job_content,
|
||
job_path=str(job_md_path),
|
||
job_id=job_path.name,
|
||
)
|
||
if job_metadata:
|
||
jobs.append(job_metadata)
|
||
|
||
return jobs
|
||
|
||
|
||
JOBS_SYSTEM_PROMPT = """
|
||
<jobs_system>
|
||
You have a **scheduled jobs** system that allows you to track and execute long-running or recurring tasks.
|
||
|
||
**Jobs Location:** `{jobs_location}`
|
||
|
||
**Current Jobs:**
|
||
|
||
{jobs_list}
|
||
|
||
**Job File Format:**
|
||
|
||
Each job is a directory containing a `JOB.md` file with YAML frontmatter followed by task details:
|
||
|
||
```markdown
|
||
---
|
||
name: 任务名称(简短中文描述)
|
||
description: 任务的详细描述,说明要做什么
|
||
schedule: once 或 recurring
|
||
status: pending / in_progress / completed / cancelled
|
||
last_run: "YYYY-MM-DD HH:MM"(上次执行时间,可选)
|
||
---
|
||
# 任务详情
|
||
|
||
## 目标
|
||
详细描述这个任务要完成的目标。
|
||
|
||
## 执行日志
|
||
记录每次执行的情况和结果。
|
||
|
||
- **2024-01-15 10:00** - 执行了XXX操作,结果:成功/失败
|
||
- **2024-01-16 10:00** - 继续执行XXX...
|
||
```
|
||
|
||
**Job Lifecycle Rules:**
|
||
|
||
1. **Creating a Job**: When a user asks you to do something periodically or at a later time:
|
||
- Create a new directory under the jobs location, directory name is the `job-id` (lowercase, hyphens, 1-64 chars)
|
||
- Write a `JOB.md` file with proper frontmatter and detailed task description
|
||
- Set `schedule: once` for one-time tasks, `schedule: recurring` for repeating tasks (e.g., daily sign-in, weekly checks)
|
||
- Set initial `status: pending`
|
||
|
||
2. **Executing a Job**: When you work on a job:
|
||
- Update `status: in_progress` in the frontmatter
|
||
- Execute the required actions using your tools
|
||
- Log the execution result in the "执行日志" section with timestamp
|
||
- Update `last_run` in frontmatter to current time
|
||
|
||
3. **Completing a Job**:
|
||
- For `schedule: once` tasks: set `status: completed` after successful execution
|
||
- For `schedule: recurring` tasks: keep `status: pending` after execution, only update `last_run` time. The job stays active for the next scheduled run.
|
||
- Set `status: cancelled` if the user explicitly asks to cancel/stop a task
|
||
|
||
4. **Heartbeat Check**: You will be periodically woken up to check pending jobs. When woken up:
|
||
- Read the jobs directory to find all active jobs (status: pending or in_progress)
|
||
- Skip jobs with `status: completed` or `status: cancelled`
|
||
- For `schedule: recurring` jobs, check `last_run` to determine if it's time to run again
|
||
- Execute pending jobs and update their status/logs accordingly
|
||
|
||
**Important Notes:**
|
||
- Each job MUST have its own separate directory and JOB.md file to avoid conflicts
|
||
- Always update the frontmatter fields (status, last_run) when executing a job
|
||
- Keep execution logs concise but informative
|
||
- For recurring jobs, maintain a rolling log (keep recent entries, you can summarize/remove old entries to keep the file manageable)
|
||
- When creating jobs, make the description detailed enough that you can understand and execute the task in future sessions without additional context
|
||
|
||
**When to Create Jobs:**
|
||
- User says "每天帮我..." / "定期..." / "定时..." / "提醒我..." / "以后每次..."
|
||
- User requests a task that should be done repeatedly
|
||
- User asks for monitoring or periodic checking of something
|
||
|
||
**When NOT to Create Jobs:**
|
||
- User asks for an immediate one-time action (just do it now)
|
||
- Simple questions or conversations
|
||
- Tasks that are already handled by MoviePilot's built-in scheduler services
|
||
</jobs_system>
|
||
"""
|
||
|
||
|
||
class JobsMiddleware(AgentMiddleware[JobsState, ContextT, ResponseT]): # noqa
|
||
"""加载并向系统提示词注入 Agent Jobs 的中间件。
|
||
|
||
扫描 jobs 目录下的 JOB.md 文件,解析元数据并注入到系统提示词中,
|
||
使智能体了解当前的长期任务及其状态。
|
||
"""
|
||
|
||
state_schema = JobsState
|
||
|
||
def __init__(self, *, sources: list[str]) -> None:
|
||
"""初始化 Jobs 中间件。"""
|
||
self.sources = sources
|
||
self.system_prompt_template = JOBS_SYSTEM_PROMPT
|
||
|
||
@staticmethod
|
||
def _format_jobs_list(jobs: list[JobMetadata]) -> str:
|
||
"""格式化任务元数据列表用于系统提示词。"""
|
||
if not jobs:
|
||
return "(No active jobs. You can create jobs when users request periodic or scheduled tasks.)"
|
||
|
||
lines = []
|
||
for job in jobs:
|
||
status_emoji = {
|
||
"pending": "⏳",
|
||
"in_progress": "🔄",
|
||
"completed": "✅",
|
||
"cancelled": "❌",
|
||
}.get(job["status"], "❓")
|
||
|
||
schedule_label = (
|
||
"recurring (重复)"
|
||
if job["schedule"] == "recurring"
|
||
else "once (一次性)"
|
||
)
|
||
desc_line = (
|
||
f"- {status_emoji} **{job['id']}**: {job['name']}"
|
||
f" [{schedule_label}] - {job['description']}"
|
||
)
|
||
if job.get("last_run"):
|
||
desc_line += f" (上次执行: {job['last_run']})"
|
||
lines.append(desc_line)
|
||
lines.append(f" -> Read `{job['path']}` for full details")
|
||
|
||
return "\n".join(lines)
|
||
|
||
def modify_request(self, request: ModelRequest[ContextT]) -> ModelRequest[ContextT]:
|
||
"""将任务文档注入模型请求的系统消息中。"""
|
||
jobs_metadata = request.state.get("jobs_metadata", []) # noqa
|
||
|
||
# 过滤:只展示活跃任务(pending / in_progress / recurring)
|
||
active_jobs = [
|
||
j
|
||
for j in jobs_metadata
|
||
if j["status"] in ("pending", "in_progress")
|
||
or (j["schedule"] == "recurring" and j["status"] not in ("cancelled",))
|
||
]
|
||
|
||
jobs_list = self._format_jobs_list(active_jobs)
|
||
jobs_location = self.sources[0] if self.sources else ""
|
||
|
||
jobs_section = self.system_prompt_template.format(
|
||
jobs_location=jobs_location,
|
||
jobs_list=jobs_list,
|
||
)
|
||
|
||
new_system_message = append_to_system_message(
|
||
request.system_message, jobs_section
|
||
)
|
||
|
||
return request.override(system_message=new_system_message)
|
||
|
||
async def abefore_agent( # noqa
|
||
self, state: JobsState, runtime: Runtime, config: RunnableConfig
|
||
) -> JobsStateUpdate | None:
|
||
"""在 Agent 执行前异步加载任务元数据。
|
||
|
||
每个会话仅加载一次。若 state 中已有则跳过。
|
||
"""
|
||
# 如果 state 中已存在元数据则跳过
|
||
if "jobs_metadata" in state:
|
||
return None
|
||
|
||
all_jobs: list[JobMetadata] = []
|
||
|
||
# 遍历源加载任务
|
||
for source_path_str in self.sources:
|
||
source_path = AsyncPath(source_path_str)
|
||
if not await source_path.exists():
|
||
await source_path.mkdir(parents=True, exist_ok=True)
|
||
continue
|
||
source_jobs = await _alist_jobs(source_path)
|
||
all_jobs.extend(source_jobs)
|
||
|
||
return JobsStateUpdate(jobs_metadata=all_jobs)
|
||
|
||
async def awrap_model_call(
|
||
self,
|
||
request: ModelRequest[ContextT],
|
||
handler: Callable[
|
||
[ModelRequest[ContextT]], Awaitable[ModelResponse[ResponseT]]
|
||
],
|
||
) -> ModelResponse[ResponseT]:
|
||
"""在模型调用时注入任务文档。"""
|
||
modified_request = self.modify_request(request)
|
||
return await handler(modified_request)
|
||
|
||
|
||
__all__ = ["JobMetadata", "JobsMiddleware"]
|