mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-02-02 18:22:39 +08:00
fix search_web tool
This commit is contained in:
@@ -1,11 +1,10 @@
|
||||
"""搜索网络内容工具"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from typing import Optional, Type
|
||||
from typing import Optional, Type, List, Dict
|
||||
|
||||
from duckduckgo_search import DDGS
|
||||
import httpx
|
||||
from ddgs import DDGS
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.agent.tools.base import MoviePilotTool
|
||||
@@ -20,7 +19,8 @@ class SearchWebInput(BaseModel):
|
||||
"""搜索网络内容工具的输入参数模型"""
|
||||
explanation: str = Field(..., description="Clear explanation of why this tool is being used in the current context")
|
||||
query: str = Field(..., description="The search query string to search for on the web")
|
||||
max_results: Optional[int] = Field(5, description="Maximum number of search results to return (default: 5, max: 10)")
|
||||
max_results: Optional[int] = Field(5,
|
||||
description="Maximum number of search results to return (default: 5, max: 10)")
|
||||
|
||||
|
||||
class SearchWebTool(MoviePilotTool):
|
||||
@@ -37,149 +37,137 @@ class SearchWebTool(MoviePilotTool):
|
||||
async def run(self, query: str, max_results: Optional[int] = 5, **kwargs) -> str:
|
||||
"""
|
||||
执行网络搜索
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
max_results: 最大返回结果数(默认5,最大10)
|
||||
|
||||
Returns:
|
||||
格式化的搜索结果JSON字符串
|
||||
"""
|
||||
logger.info(f"执行工具: {self.name}, 参数: query={query}, max_results={max_results}")
|
||||
|
||||
try:
|
||||
# 限制最大结果数
|
||||
max_results = min(max(1, max_results or 5), 10)
|
||||
|
||||
# 使用 duckduckgo-search 库进行搜索
|
||||
search_results = await self._search_duckduckgo(query, max_results)
|
||||
|
||||
if not search_results:
|
||||
results = []
|
||||
|
||||
# 1. 优先使用 Tavily (如果配置了 API Key)
|
||||
if settings.TAVILY_API_KEY:
|
||||
logger.info("使用 Tavily 进行搜索...")
|
||||
results = await self._search_tavily(query, max_results)
|
||||
|
||||
# 2. 如果没有结果或未配置 Tavily,使用 DuckDuckGo
|
||||
if not results:
|
||||
logger.info("使用 DuckDuckGo 进行搜索...")
|
||||
results = await self._search_duckduckgo(query, max_results)
|
||||
|
||||
if not results:
|
||||
return f"未找到与 '{query}' 相关的搜索结果"
|
||||
|
||||
# 裁剪结果以避免占用过多上下文
|
||||
formatted_results = self._format_and_truncate_results(search_results, max_results)
|
||||
|
||||
result_json = json.dumps(formatted_results, ensure_ascii=False, indent=2)
|
||||
return result_json
|
||||
|
||||
|
||||
# 格式化并裁剪结果
|
||||
formatted_results = self._format_and_truncate_results(results, max_results)
|
||||
return json.dumps(formatted_results, ensure_ascii=False, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
error_message = f"搜索网络内容失败: {str(e)}"
|
||||
logger.error(f"搜索网络内容失败: {e}", exc_info=True)
|
||||
return error_message
|
||||
|
||||
@staticmethod
|
||||
def _get_proxy_url(proxy_setting) -> Optional[str]:
|
||||
"""
|
||||
从代理设置中提取代理URL
|
||||
|
||||
Args:
|
||||
proxy_setting: 代理设置,可以是字符串或字典
|
||||
|
||||
Returns:
|
||||
代理URL字符串,如果没有配置则返回None
|
||||
"""
|
||||
if not proxy_setting:
|
||||
return None
|
||||
|
||||
if isinstance(proxy_setting, dict):
|
||||
return proxy_setting.get('http') or proxy_setting.get('https')
|
||||
|
||||
return proxy_setting
|
||||
async def _search_tavily(query: str, max_results: int) -> List[Dict]:
|
||||
"""使用 Tavily API 进行搜索"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=SEARCH_TIMEOUT) as client:
|
||||
response = await client.post(
|
||||
"https://api.tavily.com/search",
|
||||
json={
|
||||
"api_key": settings.TAVILY_API_KEY,
|
||||
"query": query,
|
||||
"search_depth": "basic",
|
||||
"max_results": max_results,
|
||||
"include_answer": False,
|
||||
"include_images": False,
|
||||
"include_raw_content": False,
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
results = []
|
||||
for result in data.get("results", []):
|
||||
results.append({
|
||||
'title': result.get('title', ''),
|
||||
'snippet': result.get('content', ''),
|
||||
'url': result.get('url', ''),
|
||||
'source': 'Tavily'
|
||||
})
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.warning(f"Tavily 搜索失败: {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
async def _search_duckduckgo(query: str, max_results: int) -> list:
|
||||
"""
|
||||
使用 duckduckgo-search 库进行搜索
|
||||
|
||||
Args:
|
||||
query: 搜索查询
|
||||
max_results: 最大结果数
|
||||
|
||||
Returns:
|
||||
搜索结果列表
|
||||
"""
|
||||
def _get_proxy_url(proxy_setting) -> Optional[str]:
|
||||
"""从代理设置中提取代理URL"""
|
||||
if not proxy_setting:
|
||||
return None
|
||||
if isinstance(proxy_setting, dict):
|
||||
return proxy_setting.get('http') or proxy_setting.get('https')
|
||||
return proxy_setting
|
||||
|
||||
async def _search_duckduckgo(self, query: str, max_results: int) -> List[Dict]:
|
||||
"""使用 duckduckgo-search (DDGS) 进行搜索"""
|
||||
try:
|
||||
# duckduckgo-search 是同步库,需要在 executor 中运行
|
||||
def sync_search():
|
||||
results = []
|
||||
ddgs_kwargs = {
|
||||
'timeout': SEARCH_TIMEOUT
|
||||
}
|
||||
proxy_url = self._get_proxy_url(settings.PROXY)
|
||||
if proxy_url:
|
||||
ddgs_kwargs['proxy'] = proxy_url
|
||||
|
||||
try:
|
||||
# 使用代理(如果配置了)
|
||||
ddgs_kwargs = {}
|
||||
proxy_url = SearchWebTool._get_proxy_url(settings.PROXY)
|
||||
if proxy_url:
|
||||
ddgs_kwargs['proxy'] = proxy_url
|
||||
|
||||
# 设置超时
|
||||
ddgs_kwargs['timeout'] = SEARCH_TIMEOUT
|
||||
|
||||
with DDGS(**ddgs_kwargs) as ddgs:
|
||||
# 使用 text 方法进行搜索
|
||||
search_results = list(ddgs.text(
|
||||
keywords=query,
|
||||
ddgs_gen = ddgs.text(
|
||||
query,
|
||||
max_results=max_results
|
||||
))
|
||||
|
||||
for result in search_results:
|
||||
results.append({
|
||||
'title': result.get('title', ''),
|
||||
'snippet': result.get('body', ''),
|
||||
'url': result.get('href', ''),
|
||||
'source': 'DuckDuckGo'
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"duckduckgo-search 搜索失败: {e}")
|
||||
raise
|
||||
|
||||
)
|
||||
if ddgs_gen:
|
||||
for result in ddgs_gen:
|
||||
results.append({
|
||||
'title': result.get('title', ''),
|
||||
'snippet': result.get('body', ''),
|
||||
'url': result.get('href', ''),
|
||||
'source': 'DuckDuckGo'
|
||||
})
|
||||
except Exception as err:
|
||||
logger.warning(f"DuckDuckGo search process failed: {err}")
|
||||
return results
|
||||
|
||||
# 在线程池中运行同步搜索
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
results = await loop.run_in_executor(None, sync_search)
|
||||
return results
|
||||
|
||||
except ImportError:
|
||||
logger.error("duckduckgo-search 库未安装,请在 requirements.in 中添加依赖后重新构建")
|
||||
return []
|
||||
return await loop.run_in_executor(None, sync_search)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"DuckDuckGo 搜索失败: {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _format_and_truncate_results(results: list, max_results: int) -> dict:
|
||||
"""
|
||||
格式化并裁剪搜索结果以避免占用过多上下文
|
||||
|
||||
Args:
|
||||
results: 原始搜索结果列表
|
||||
max_results: 最大结果数
|
||||
|
||||
Returns:
|
||||
格式化后的结果字典
|
||||
"""
|
||||
def _format_and_truncate_results(results: List[Dict], max_results: int) -> Dict:
|
||||
"""格式化并裁剪搜索结果"""
|
||||
formatted = {
|
||||
"total_results": len(results),
|
||||
"results": []
|
||||
}
|
||||
|
||||
# 限制结果数量
|
||||
limited_results = results[:max_results]
|
||||
|
||||
for idx, result in enumerate(limited_results, 1):
|
||||
title = result.get("title", "")[:200] # 限制标题长度
|
||||
|
||||
for idx, result in enumerate(results[:max_results], 1):
|
||||
title = result.get("title", "")[:200]
|
||||
snippet = result.get("snippet", "")
|
||||
url = result.get("url", "")
|
||||
source = result.get("source", "Unknown")
|
||||
|
||||
# 裁剪摘要,避免过长
|
||||
max_snippet_length = 300 # 每个摘要最多300字符
|
||||
|
||||
# 裁剪摘要
|
||||
max_snippet_length = 500 # 增加到500字符,提供更多上下文
|
||||
if len(snippet) > max_snippet_length:
|
||||
snippet = snippet[:max_snippet_length] + "..."
|
||||
|
||||
# 清理文本,移除多余的空白字符
|
||||
|
||||
# 清理文本
|
||||
snippet = re.sub(r'\s+', ' ', snippet).strip()
|
||||
|
||||
|
||||
formatted["results"].append({
|
||||
"rank": idx,
|
||||
"title": title,
|
||||
@@ -187,9 +175,8 @@ class SearchWebTool(MoviePilotTool):
|
||||
"url": url,
|
||||
"source": source
|
||||
})
|
||||
|
||||
# 添加提示信息
|
||||
|
||||
if len(results) > max_results:
|
||||
formatted["note"] = f"注意:共找到 {len(results)} 条结果,为节省上下文空间,仅显示前 {max_results} 条结果。"
|
||||
|
||||
formatted["note"] = f"仅显示前 {max_results} 条结果。"
|
||||
|
||||
return formatted
|
||||
|
||||
@@ -447,10 +447,14 @@ class ConfigModel(BaseModel):
|
||||
AI_RECOMMEND_ENABLED: bool = False
|
||||
# AI推荐用户偏好
|
||||
AI_RECOMMEND_USER_PREFERENCE: str = ""
|
||||
# Tavily API密钥(用于网络搜索)
|
||||
TAVILY_API_KEY: str = "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"
|
||||
|
||||
# AI推荐条目数量限制
|
||||
AI_RECOMMEND_MAX_ITEMS: int = 50
|
||||
|
||||
|
||||
|
||||
class Settings(BaseSettings, ConfigModel, LogConfigModel):
|
||||
"""
|
||||
系统配置类
|
||||
|
||||
@@ -91,4 +91,4 @@ langchain-deepseek~=0.1.4
|
||||
langchain-experimental~=0.3.4
|
||||
openai~=1.108.2
|
||||
google-generativeai~=0.8.5
|
||||
duckduckgo-search~=7.2.1
|
||||
ddgs~=9.10.0
|
||||
|
||||
Reference in New Issue
Block a user