Files
MoviePilot/app/agent/tools/impl/send_voice_message.py
jxxghp e5f97cd299 feat(agent): add voice message support with TTS/STT for Telegram and WeChat
- Integrate voice message handling: detect and extract audio references from Telegram and WeChat messages, route to agent with voice reply preference.
- Add voice provider abstraction and OpenAI-based TTS/STT implementation.
- Implement agent tool `send_voice_message` for generating and sending voice replies, with fallback to text if voice is unavailable.
- Extend agent prompt and context to support voice reply instructions.
- Update notification and message schemas to support audio fields.
- Add Telegram and WeChat voice sending logic, including audio file conversion and temporary media upload for WeChat.
- Add tests for voice helper and agent voice routing.
2026-04-12 12:30:02 +08:00

97 lines
3.4 KiB
Python

"""发送语音消息工具。"""
import asyncio
from typing import Optional, Type
from pydantic import BaseModel, Field
from app.agent.tools.base import MoviePilotTool, ToolChain
from app.core.config import settings
from app.helper.voice import VoiceHelper
from app.helper.service import ServiceConfigHelper
from app.log import logger
from app.schemas import Notification, NotificationType
from app.schemas.types import MessageChannel
class SendVoiceMessageInput(BaseModel):
"""发送语音消息工具输入。"""
explanation: str = Field(
...,
description="Clear explanation of why a voice reply is the best fit in the current context",
)
message: str = Field(
...,
description="The spoken content to send back to the user",
)
class SendVoiceMessageTool(MoviePilotTool):
name: str = "send_voice_message"
description: str = (
"Send a voice reply to the current user. Prefer this when the user sent a voice message "
"or when spoken playback is more natural. On channels without voice support or when TTS "
"is unavailable, it automatically falls back to sending the same content as plain text."
)
args_schema: Type[BaseModel] = SendVoiceMessageInput
require_admin: bool = False
def get_tool_message(self, **kwargs) -> Optional[str]:
message = kwargs.get("message") or ""
if len(message) > 40:
message = message[:40] + "..."
return f"正在发送语音回复: {message}"
def _supports_real_voice_reply(self) -> bool:
channel = self._channel or ""
if channel == MessageChannel.Telegram.value:
return True
if channel != MessageChannel.Wechat.value:
return False
for config in ServiceConfigHelper.get_notification_configs():
if config.name != self._source:
continue
return (config.config or {}).get("WECHAT_MODE", "app") != "bot"
return False
async def run(self, message: str, **kwargs) -> str:
if not message:
return "语音回复内容不能为空"
voice_path = None
used_voice = False
channel = self._channel or ""
if self._supports_real_voice_reply() and VoiceHelper.is_available("tts"):
voice_file = await asyncio.to_thread(VoiceHelper.synthesize_speech, message)
if voice_file:
voice_path = str(voice_file)
used_voice = True
logger.info(
"执行工具: %s, channel=%s, use_voice=%s, text_len=%s",
self.name,
channel,
used_voice,
len(message),
)
await ToolChain().async_post_message(
Notification(
channel=self._channel,
source=self._source,
mtype=NotificationType.Agent,
userid=self._user_id,
username=self._username,
text=message,
voice_path=voice_path,
voice_caption=message if settings.AI_VOICE_REPLY_WITH_TEXT else None,
)
)
self._agent_context["user_reply_sent"] = True
self._agent_context["reply_mode"] = "voice" if used_voice else "text_fallback"
if used_voice:
return "语音回复已发送"
return "当前未使用语音通道,已自动回退为文字回复"