Reimplemented SearchWebTool using duckduckgo-search library

Co-authored-by: jxxghp <51039935+jxxghp@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-01-23 15:20:06 +00:00
parent 8f17b52466
commit ca18705d88
2 changed files with 51 additions and 63 deletions

View File

@@ -9,7 +9,6 @@ from pydantic import BaseModel, Field
from app.agent.tools.base import MoviePilotTool
from app.core.config import settings
from app.log import logger
from app.utils.http import AsyncRequestUtils
class SearchWebInput(BaseModel):
@@ -47,8 +46,8 @@ class SearchWebTool(MoviePilotTool):
# 限制最大结果数
max_results = min(max(1, max_results or 5), 10)
# 使用DuckDuckGo API进行搜索
search_results = await self._search_duckduckgo_api(query, max_results)
# 使用 duckduckgo-search 库进行搜索
search_results = await self._search_duckduckgo(query, max_results)
if not search_results:
return f"未找到与 '{query}' 相关的搜索结果"
@@ -65,9 +64,9 @@ class SearchWebTool(MoviePilotTool):
return error_message
@staticmethod
async def _search_duckduckgo_api(query: str, max_results: int) -> list:
async def _search_duckduckgo(query: str, max_results: int) -> list:
"""
使用DuckDuckGo API进行搜索
使用 duckduckgo-search 库进行搜索
Args:
query: 搜索查询
@@ -77,71 +76,59 @@ class SearchWebTool(MoviePilotTool):
搜索结果列表
"""
try:
# DuckDuckGo Instant Answer API
api_url = "https://api.duckduckgo.com/"
params = {
"q": query,
"format": "json",
"no_html": "1",
"skip_disambig": "1"
}
from duckduckgo_search import DDGS
import asyncio
# 使用代理(如果配置了)
http_utils = AsyncRequestUtils(
proxies=settings.PROXY,
timeout=10
)
data = await http_utils.get_json(api_url, params=params)
results = []
if data:
# 处理AbstractText摘要
if data.get("AbstractText"):
results.append({
"title": data.get("Heading", query),
"snippet": data.get("AbstractText", ""),
"url": data.get("AbstractURL", ""),
"source": "DuckDuckGo Abstract"
})
# 处理RelatedTopics相关主题
related_topics = data.get("RelatedTopics", [])
for topic in related_topics[:max_results - len(results)]:
if isinstance(topic, dict):
text = topic.get("Text", "")
first_url = topic.get("FirstURL", "")
if text and first_url:
# 提取标题(通常在" - "之前)
title = text.split(" - ")[0] if " - " in text else text[:100]
snippet = text
# duckduckgo-search 是同步库,需要在 executor 中运行
def sync_search():
results = []
try:
# 使用代理(如果配置了)
ddgs_kwargs = {}
if settings.PROXY:
# duckduckgo-search 支持代理配置
if isinstance(settings.PROXY, dict):
proxy_url = settings.PROXY.get('http') or settings.PROXY.get('https')
else:
proxy_url = settings.PROXY
if proxy_url:
ddgs_kwargs['proxy'] = proxy_url
# 设置超时
ddgs_kwargs['timeout'] = 20
with DDGS(**ddgs_kwargs) as ddgs:
# 使用 text 方法进行搜索
search_results = ddgs.text(
keywords=query,
max_results=max_results
)
for result in search_results:
results.append({
"title": title.strip(),
"snippet": snippet,
"url": first_url,
"source": "DuckDuckGo Related"
'title': result.get('title', ''),
'snippet': result.get('body', ''),
'url': result.get('href', ''),
'source': 'DuckDuckGo'
})
except Exception as e:
logger.warning(f"duckduckgo-search 搜索失败: {e}")
raise
# 处理Results搜索结果
api_results = data.get("Results", [])
for result in api_results[:max_results - len(results)]:
if isinstance(result, dict):
title = result.get("Text", "")
url = result.get("FirstURL", "")
if title and url:
results.append({
"title": title,
"snippet": result.get("Text", ""),
"url": url,
"source": "DuckDuckGo Results"
})
return results
return results[:max_results]
# 在线程池中运行同步搜索
loop = asyncio.get_event_loop()
results = await loop.run_in_executor(None, sync_search)
return results
except ImportError:
logger.error("duckduckgo-search 库未安装,请运行: pip install duckduckgo-search")
return []
except Exception as e:
logger.warning(f"DuckDuckGo API搜索失败: {e}")
logger.warning(f"DuckDuckGo 搜索失败: {e}")
return []
@staticmethod

View File

@@ -91,3 +91,4 @@ langchain-deepseek~=0.1.4
langchain-experimental~=0.3.4
openai~=1.108.2
google-generativeai~=0.8.5
duckduckgo-search~=7.2.1