Files
MoviePilot/app/agent/middleware/jobs.py
jxxghp 94c75eb1c7 feat: 智能体增加定时任务(Jobs)管理和心跳唤醒机制
- 新增 JobsMiddleware 中间件,支持通过 JOB.md 文件管理长期/重复性任务
- 智能体可创建一次性(once)和重复性(recurring)任务,自动跟踪执行状态
- 新增心跳唤醒机制,定时调度器周期性唤醒智能体检查并执行待处理任务
- 新增 AI_AGENT_JOB_INTERVAL 配置项控制检查间隔,默认24小时
- 每次心跳使用独立会话,执行完毕后清理资源
2026-03-25 13:02:20 +08:00

351 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from collections.abc import Awaitable, Callable
from typing import Annotated, NotRequired, TypedDict
import yaml # noqa
from anyio import Path as AsyncPath
from langchain.agents.middleware.types import (
AgentMiddleware,
AgentState,
ContextT,
ModelRequest,
ModelResponse,
PrivateStateAttr, # noqa
ResponseT,
)
from langchain_core.runnables import RunnableConfig
from langgraph.runtime import Runtime
from app.agent.middleware.utils import append_to_system_message
from app.log import logger
# JOB.md 文件最大限制为 1MB
MAX_JOB_FILE_SIZE = 1 * 1024 * 1024
class JobMetadata(TypedDict):
"""Job 元数据。"""
path: str
"""JOB.md 文件路径。"""
id: str
"""Job 标识符(目录名)。"""
name: str
"""Job 名称。"""
description: str
"""Job 描述。"""
schedule: str
"""调度类型: once一次性/ recurring重复性"""
status: str
"""当前状态: pending / in_progress / completed / cancelled。"""
last_run: str | None
"""上次执行时间。"""
class JobsState(AgentState):
"""jobs 中间件状态。"""
jobs_metadata: NotRequired[Annotated[list[JobMetadata], PrivateStateAttr]]
"""已加载的 job 元数据列表,不传播给父 agent。"""
class JobsStateUpdate(TypedDict):
"""jobs 中间件状态更新项。"""
jobs_metadata: list[JobMetadata]
"""待合并的 job 元数据列表。"""
def _parse_job_metadata(
content: str,
job_path: str,
job_id: str,
) -> JobMetadata | None:
"""从 JOB.md 内容中解析 YAML 前言并验证元数据。"""
if len(content) > MAX_JOB_FILE_SIZE:
logger.warning(
"Skipping %s: content too large (%d bytes)", job_path, len(content)
)
return None
# 匹配 --- 分隔的 YAML 前言
frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
match = re.match(frontmatter_pattern, content, re.DOTALL)
if not match:
logger.warning("Skipping %s: no valid YAML frontmatter found", job_path)
return None
frontmatter_str = match.group(1)
# 解析 YAML
try:
frontmatter_data = yaml.safe_load(frontmatter_str)
except yaml.YAMLError as e:
logger.warning("Invalid YAML in %s: %s", job_path, e)
return None
if not isinstance(frontmatter_data, dict):
logger.warning("Skipping %s: frontmatter is not a mapping", job_path)
return None
# Job 名称和描述
name = str(frontmatter_data.get("name", "")).strip()
description = str(frontmatter_data.get("description", "")).strip()
if not name:
logger.warning("Skipping %s: missing required 'name'", job_path)
return None
# 调度类型
schedule = str(frontmatter_data.get("schedule", "once")).strip().lower()
if schedule not in ("once", "recurring"):
schedule = "once"
# 状态
status = str(frontmatter_data.get("status", "pending")).strip().lower()
if status not in ("pending", "in_progress", "completed", "cancelled"):
status = "pending"
# 上次执行时间
last_run = str(frontmatter_data.get("last_run", "")).strip() or None
return JobMetadata(
id=job_id,
name=name,
description=description,
path=job_path,
schedule=schedule,
status=status,
last_run=last_run,
)
async def _alist_jobs(source_path: AsyncPath) -> list[JobMetadata]:
"""异步列出指定路径下的所有任务。
扫描包含 JOB.md 的目录并解析其元数据。
"""
jobs: list[JobMetadata] = []
if not await source_path.exists():
return []
# 查找所有任务目录(包含 JOB.md 的目录)
job_dirs: list[AsyncPath] = []
async for path in source_path.iterdir():
if await path.is_dir() and await (path / "JOB.md").is_file():
job_dirs.append(path)
if not job_dirs:
return []
# 解析 JOB.md
for job_path in job_dirs:
job_md_path = job_path / "JOB.md"
job_content = await job_md_path.read_text(encoding="utf-8")
# 解析元数据
job_metadata = _parse_job_metadata(
content=job_content,
job_path=str(job_md_path),
job_id=job_path.name,
)
if job_metadata:
jobs.append(job_metadata)
return jobs
JOBS_SYSTEM_PROMPT = """
<jobs_system>
You have a **scheduled jobs** system that allows you to track and execute long-running or recurring tasks.
**Jobs Location:** `{jobs_location}`
**Current Jobs:**
{jobs_list}
**Job File Format:**
Each job is a directory containing a `JOB.md` file with YAML frontmatter followed by task details:
```markdown
---
name: 任务名称(简短中文描述)
description: 任务的详细描述,说明要做什么
schedule: once 或 recurring
status: pending / in_progress / completed / cancelled
last_run: "YYYY-MM-DD HH:MM"(上次执行时间,可选)
---
# 任务详情
## 目标
详细描述这个任务要完成的目标。
## 执行日志
记录每次执行的情况和结果。
- **2024-01-15 10:00** - 执行了XXX操作结果成功/失败
- **2024-01-16 10:00** - 继续执行XXX...
```
**Job Lifecycle Rules:**
1. **Creating a Job**: When a user asks you to do something periodically or at a later time:
- Create a new directory under the jobs location, directory name is the `job-id` (lowercase, hyphens, 1-64 chars)
- Write a `JOB.md` file with proper frontmatter and detailed task description
- Set `schedule: once` for one-time tasks, `schedule: recurring` for repeating tasks (e.g., daily sign-in, weekly checks)
- Set initial `status: pending`
2. **Executing a Job**: When you work on a job:
- Update `status: in_progress` in the frontmatter
- Execute the required actions using your tools
- Log the execution result in the "执行日志" section with timestamp
- Update `last_run` in frontmatter to current time
3. **Completing a Job**:
- For `schedule: once` tasks: set `status: completed` after successful execution
- For `schedule: recurring` tasks: keep `status: pending` after execution, only update `last_run` time. The job stays active for the next scheduled run.
- Set `status: cancelled` if the user explicitly asks to cancel/stop a task
4. **Heartbeat Check**: You will be periodically woken up to check pending jobs. When woken up:
- Read the jobs directory to find all active jobs (status: pending or in_progress)
- Skip jobs with `status: completed` or `status: cancelled`
- For `schedule: recurring` jobs, check `last_run` to determine if it's time to run again
- Execute pending jobs and update their status/logs accordingly
**Important Notes:**
- Each job MUST have its own separate directory and JOB.md file to avoid conflicts
- Always update the frontmatter fields (status, last_run) when executing a job
- Keep execution logs concise but informative
- For recurring jobs, maintain a rolling log (keep recent entries, you can summarize/remove old entries to keep the file manageable)
- When creating jobs, make the description detailed enough that you can understand and execute the task in future sessions without additional context
**When to Create Jobs:**
- User says "每天帮我..." / "定期..." / "定时..." / "提醒我..." / "以后每次..."
- User requests a task that should be done repeatedly
- User asks for monitoring or periodic checking of something
**When NOT to Create Jobs:**
- User asks for an immediate one-time action (just do it now)
- Simple questions or conversations
- Tasks that are already handled by MoviePilot's built-in scheduler services
</jobs_system>
"""
class JobsMiddleware(AgentMiddleware[JobsState, ContextT, ResponseT]): # noqa
"""加载并向系统提示词注入 Agent Jobs 的中间件。
扫描 jobs 目录下的 JOB.md 文件,解析元数据并注入到系统提示词中,
使智能体了解当前的长期任务及其状态。
"""
state_schema = JobsState
def __init__(self, *, sources: list[str]) -> None:
"""初始化 Jobs 中间件。"""
self.sources = sources
self.system_prompt_template = JOBS_SYSTEM_PROMPT
@staticmethod
def _format_jobs_list(jobs: list[JobMetadata]) -> str:
"""格式化任务元数据列表用于系统提示词。"""
if not jobs:
return "(No active jobs. You can create jobs when users request periodic or scheduled tasks.)"
lines = []
for job in jobs:
status_emoji = {
"pending": "",
"in_progress": "🔄",
"completed": "",
"cancelled": "",
}.get(job["status"], "")
schedule_label = (
"recurring (重复)"
if job["schedule"] == "recurring"
else "once (一次性)"
)
desc_line = (
f"- {status_emoji} **{job['id']}**: {job['name']}"
f" [{schedule_label}] - {job['description']}"
)
if job.get("last_run"):
desc_line += f" (上次执行: {job['last_run']})"
lines.append(desc_line)
lines.append(f" -> Read `{job['path']}` for full details")
return "\n".join(lines)
def modify_request(self, request: ModelRequest[ContextT]) -> ModelRequest[ContextT]:
"""将任务文档注入模型请求的系统消息中。"""
jobs_metadata = request.state.get("jobs_metadata", []) # noqa
# 过滤只展示活跃任务pending / in_progress / recurring
active_jobs = [
j
for j in jobs_metadata
if j["status"] in ("pending", "in_progress")
or (j["schedule"] == "recurring" and j["status"] not in ("cancelled",))
]
jobs_list = self._format_jobs_list(active_jobs)
jobs_location = self.sources[0] if self.sources else ""
jobs_section = self.system_prompt_template.format(
jobs_location=jobs_location,
jobs_list=jobs_list,
)
new_system_message = append_to_system_message(
request.system_message, jobs_section
)
return request.override(system_message=new_system_message)
async def abefore_agent( # noqa
self, state: JobsState, runtime: Runtime, config: RunnableConfig
) -> JobsStateUpdate | None:
"""在 Agent 执行前异步加载任务元数据。
每个会话仅加载一次。若 state 中已有则跳过。
"""
# 如果 state 中已存在元数据则跳过
if "jobs_metadata" in state:
return None
all_jobs: list[JobMetadata] = []
# 遍历源加载任务
for source_path_str in self.sources:
source_path = AsyncPath(source_path_str)
if not await source_path.exists():
await source_path.mkdir(parents=True, exist_ok=True)
continue
source_jobs = await _alist_jobs(source_path)
all_jobs.extend(source_jobs)
return JobsStateUpdate(jobs_metadata=all_jobs)
async def awrap_model_call(
self,
request: ModelRequest[ContextT],
handler: Callable[
[ModelRequest[ContextT]], Awaitable[ModelResponse[ResponseT]]
],
) -> ModelResponse[ResponseT]:
"""在模型调用时注入任务文档。"""
modified_request = self.modify_request(request)
return await handler(modified_request)
__all__ = ["JobMetadata", "JobsMiddleware"]