From ca18705d88d3ffacecc9ef103d370f11b5f83998 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 23 Jan 2026 15:20:06 +0000 Subject: [PATCH] Reimplemented SearchWebTool using duckduckgo-search library Co-authored-by: jxxghp <51039935+jxxghp@users.noreply.github.com> --- app/agent/tools/impl/search_web.py | 113 +++++++++++++---------------- requirements.in | 1 + 2 files changed, 51 insertions(+), 63 deletions(-) diff --git a/app/agent/tools/impl/search_web.py b/app/agent/tools/impl/search_web.py index 2642b6d2..9a0e41c0 100644 --- a/app/agent/tools/impl/search_web.py +++ b/app/agent/tools/impl/search_web.py @@ -9,7 +9,6 @@ from pydantic import BaseModel, Field from app.agent.tools.base import MoviePilotTool from app.core.config import settings from app.log import logger -from app.utils.http import AsyncRequestUtils class SearchWebInput(BaseModel): @@ -47,8 +46,8 @@ class SearchWebTool(MoviePilotTool): # 限制最大结果数 max_results = min(max(1, max_results or 5), 10) - # 使用DuckDuckGo API进行搜索 - search_results = await self._search_duckduckgo_api(query, max_results) + # 使用 duckduckgo-search 库进行搜索 + search_results = await self._search_duckduckgo(query, max_results) if not search_results: return f"未找到与 '{query}' 相关的搜索结果" @@ -65,9 +64,9 @@ class SearchWebTool(MoviePilotTool): return error_message @staticmethod - async def _search_duckduckgo_api(query: str, max_results: int) -> list: + async def _search_duckduckgo(query: str, max_results: int) -> list: """ - 使用DuckDuckGo API进行搜索 + 使用 duckduckgo-search 库进行搜索 Args: query: 搜索查询 @@ -77,71 +76,59 @@ class SearchWebTool(MoviePilotTool): 搜索结果列表 """ try: - # DuckDuckGo Instant Answer API - api_url = "https://api.duckduckgo.com/" - params = { - "q": query, - "format": "json", - "no_html": "1", - "skip_disambig": "1" - } + from duckduckgo_search import DDGS + import asyncio - # 使用代理(如果配置了) - http_utils = AsyncRequestUtils( - proxies=settings.PROXY, - timeout=10 - ) - - data = await http_utils.get_json(api_url, params=params) - - results = [] - - if data: - # 处理AbstractText(摘要) - if data.get("AbstractText"): - results.append({ - "title": data.get("Heading", query), - "snippet": data.get("AbstractText", ""), - "url": data.get("AbstractURL", ""), - "source": "DuckDuckGo Abstract" - }) - - # 处理RelatedTopics(相关主题) - related_topics = data.get("RelatedTopics", []) - for topic in related_topics[:max_results - len(results)]: - if isinstance(topic, dict): - text = topic.get("Text", "") - first_url = topic.get("FirstURL", "") - if text and first_url: - # 提取标题(通常在" - "之前) - title = text.split(" - ")[0] if " - " in text else text[:100] - snippet = text - + # duckduckgo-search 是同步库,需要在 executor 中运行 + def sync_search(): + results = [] + try: + # 使用代理(如果配置了) + ddgs_kwargs = {} + if settings.PROXY: + # duckduckgo-search 支持代理配置 + if isinstance(settings.PROXY, dict): + proxy_url = settings.PROXY.get('http') or settings.PROXY.get('https') + else: + proxy_url = settings.PROXY + + if proxy_url: + ddgs_kwargs['proxy'] = proxy_url + + # 设置超时 + ddgs_kwargs['timeout'] = 20 + + with DDGS(**ddgs_kwargs) as ddgs: + # 使用 text 方法进行搜索 + search_results = ddgs.text( + keywords=query, + max_results=max_results + ) + + for result in search_results: results.append({ - "title": title.strip(), - "snippet": snippet, - "url": first_url, - "source": "DuckDuckGo Related" + 'title': result.get('title', ''), + 'snippet': result.get('body', ''), + 'url': result.get('href', ''), + 'source': 'DuckDuckGo' }) + + except Exception as e: + logger.warning(f"duckduckgo-search 搜索失败: {e}") + raise - # 处理Results(搜索结果) - api_results = data.get("Results", []) - for result in api_results[:max_results - len(results)]: - if isinstance(result, dict): - title = result.get("Text", "") - url = result.get("FirstURL", "") - if title and url: - results.append({ - "title": title, - "snippet": result.get("Text", ""), - "url": url, - "source": "DuckDuckGo Results" - }) + return results - return results[:max_results] + # 在线程池中运行同步搜索 + loop = asyncio.get_event_loop() + results = await loop.run_in_executor(None, sync_search) + return results + except ImportError: + logger.error("duckduckgo-search 库未安装,请运行: pip install duckduckgo-search") + return [] except Exception as e: - logger.warning(f"DuckDuckGo API搜索失败: {e}") + logger.warning(f"DuckDuckGo 搜索失败: {e}") return [] @staticmethod diff --git a/requirements.in b/requirements.in index 6eb374f3..451e4cda 100644 --- a/requirements.in +++ b/requirements.in @@ -91,3 +91,4 @@ langchain-deepseek~=0.1.4 langchain-experimental~=0.3.4 openai~=1.108.2 google-generativeai~=0.8.5 +duckduckgo-search~=7.2.1