chore: refactor searXNG tool (#7220)

This commit is contained in:
非法操作 2024-08-13 15:34:29 +08:00 committed by GitHub
parent 13d061911b
commit b3743a9ae5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 2609 additions and 160 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
[uwsgi]
# Who will run the code
uid = searxng
gid = searxng
# Number of workers (usually CPU count)
# default value: %k (= number of CPU core, see Dockerfile)
workers = %k
# Number of threads per worker
# default value: 4 (see Dockerfile)
threads = 4
# The right granted on the created socket
chmod-socket = 666
# Plugin to use and interpreter config
single-interpreter = true
master = true
plugin = python3
lazy-apps = true
enable-threads = 4
# Module to import
module = searx.webapp
# Virtualenv and python path
pythonpath = /usr/local/searxng/
chdir = /usr/local/searxng/searx/
# automatically set processes name to something meaningful
auto-procname = true
# Disable request logging for privacy
disable-logging = true
log-5xx = true
# Set the max size of a request (request-body excluded)
buffer-size = 8192
# No keep alive
# See https://github.com/searx/searx-docker/issues/24
add-header = Connection: close
# Follow SIGTERM convention
# See https://github.com/searxng/searxng/issues/3427
die-on-term
# uwsgi serves the static files
static-map = /static=/usr/local/searxng/searx/static
# expires set to one day
static-expires = /* 86400
static-gzip-all = True
offload-threads = 4

View File

@ -17,8 +17,7 @@ class SearXNGProvider(BuiltinToolProviderController):
tool_parameters={ tool_parameters={
"query": "SearXNG", "query": "SearXNG",
"limit": 1, "limit": 1,
"search_type": "page", "search_type": "general"
"result_type": "link"
}, },
) )
except Exception as e: except Exception as e:

View File

@ -6,7 +6,7 @@ identity:
zh_Hans: SearXNG zh_Hans: SearXNG
description: description:
en_US: A free internet metasearch engine. en_US: A free internet metasearch engine.
zh_Hans: 开源互联网元搜索引擎 zh_Hans: 开源免费的互联网元搜索引擎
icon: icon.svg icon: icon.svg
tags: tags:
- search - search
@ -18,9 +18,6 @@ credentials_for_provider:
label: label:
en_US: SearXNG base URL en_US: SearXNG base URL
zh_Hans: SearXNG base URL zh_Hans: SearXNG base URL
help:
en_US: Please input your SearXNG base URL
zh_Hans: 请输入您的 SearXNG base URL
placeholder: placeholder:
en_US: Please input your SearXNG base URL en_US: Please input your SearXNG base URL
zh_Hans: 请输入您的 SearXNG base URL zh_Hans: 请输入您的 SearXNG base URL

View File

@ -1,4 +1,3 @@
import json
from typing import Any from typing import Any
import requests import requests
@ -7,90 +6,11 @@ from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool from core.tools.tool.builtin_tool import BuiltinTool
class SearXNGSearchResults(dict):
"""Wrapper for search results."""
def __init__(self, data: str):
super().__init__(json.loads(data))
self.__dict__ = self
@property
def results(self) -> Any:
return self.get("results", [])
class SearXNGSearchTool(BuiltinTool): class SearXNGSearchTool(BuiltinTool):
""" """
Tool for performing a search using SearXNG engine. Tool for performing a search using SearXNG engine.
""" """
SEARCH_TYPE: dict[str, str] = {
"page": "general",
"news": "news",
"image": "images",
# "video": "videos",
# "file": "files"
}
LINK_FILED: dict[str, str] = {
"page": "url",
"news": "url",
"image": "img_src",
# "video": "iframe_src",
# "file": "magnetlink"
}
TEXT_FILED: dict[str, str] = {
"page": "content",
"news": "content",
"image": "img_src",
# "video": "iframe_src",
# "file": "magnetlink"
}
def _invoke_query(self, user_id: str, host: str, query: str, search_type: str, result_type: str, topK: int = 5) -> list[dict]:
"""Run query and return the results."""
search_type = search_type.lower()
if search_type not in self.SEARCH_TYPE.keys():
search_type= "page"
response = requests.get(host, params={
"q": query,
"format": "json",
"categories": self.SEARCH_TYPE[search_type]
})
if response.status_code != 200:
raise Exception(f'Error {response.status_code}: {response.text}')
search_results = SearXNGSearchResults(response.text).results[:topK]
if result_type == 'link':
results = []
if search_type == "page" or search_type == "news":
for r in search_results:
results.append(self.create_text_message(
text=f'{r["title"]}: {r.get(self.LINK_FILED[search_type], "")}'
))
elif search_type == "image":
for r in search_results:
results.append(self.create_image_message(
image=r.get(self.LINK_FILED[search_type], "")
))
else:
for r in search_results:
results.append(self.create_link_message(
link=r.get(self.LINK_FILED[search_type], "")
))
return results
else:
text = ''
for i, r in enumerate(search_results):
text += f'{i+1}: {r["title"]} - {r.get(self.TEXT_FILED[search_type], "")}\n'
return self.create_text_message(text=self.summary(user_id=user_id, content=text))
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]: def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
""" """
Invoke the SearXNG search tool. Invoke the SearXNG search tool.
@ -103,23 +23,21 @@ class SearXNGSearchTool(BuiltinTool):
ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation. ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation.
""" """
host = self.runtime.credentials.get('searxng_base_url', None) host = self.runtime.credentials.get('searxng_base_url')
if not host: if not host:
raise Exception('SearXNG api is required') raise Exception('SearXNG api is required')
query = tool_parameters.get('query')
if not query:
return self.create_text_message('Please input query')
num_results = min(tool_parameters.get('num_results', 5), 20)
search_type = tool_parameters.get('search_type', 'page') or 'page'
result_type = tool_parameters.get('result_type', 'text') or 'text'
return self._invoke_query( response = requests.get(host, params={
user_id=user_id, "q": tool_parameters.get('query'),
host=host, "format": "json",
query=query, "categories": tool_parameters.get('search_type', 'general')
search_type=search_type, })
result_type=result_type,
topK=num_results if response.status_code != 200:
) raise Exception(f'Error {response.status_code}: {response.text}')
res = response.json().get("results", [])
if not res:
return self.create_text_message(f"No results found, get response: {response.content}")
return [self.create_json_message(item) for item in res]

View File

@ -1,13 +1,13 @@
identity: identity:
name: searxng_search name: searxng_search
author: Tice author: Junytang
label: label:
en_US: SearXNG Search en_US: SearXNG Search
zh_Hans: SearXNG 搜索 zh_Hans: SearXNG 搜索
description: description:
human: human:
en_US: Perform searches on SearXNG and get results. en_US: SearXNG is a free internet metasearch engine which aggregates results from more than 70 search services.
zh_Hans: 在 SearXNG 上进行搜索并获取结果。 zh_Hans: SearXNG 是一个免费的互联网元搜索引擎它从70多个不同的搜索服务中聚合搜索结果。
llm: Perform searches on SearXNG and get results. llm: Perform searches on SearXNG and get results.
parameters: parameters:
- name: query - name: query
@ -16,9 +16,6 @@ parameters:
label: label:
en_US: Query string en_US: Query string
zh_Hans: 查询语句 zh_Hans: 查询语句
human_description:
en_US: The search query.
zh_Hans: 搜索查询语句。
llm_description: Key words for searching llm_description: Key words for searching
form: llm form: llm
- name: search_type - name: search_type
@ -27,63 +24,46 @@ parameters:
label: label:
en_US: search type en_US: search type
zh_Hans: 搜索类型 zh_Hans: 搜索类型
pt_BR: search type default: general
human_description:
en_US: search type for page, news or image.
zh_Hans: 选择搜索的类型:网页,新闻,图片。
pt_BR: search type for page, news or image.
default: Page
options: options:
- value: Page - value: general
label: label:
en_US: Page en_US: General
zh_Hans: 网页 zh_Hans: 综合
pt_BR: Page - value: images
- value: News label:
en_US: Images
zh_Hans: 图片
- value: videos
label:
en_US: Videos
zh_Hans: 视频
- value: news
label: label:
en_US: News en_US: News
zh_Hans: 新闻 zh_Hans: 新闻
pt_BR: News - value: map
- value: Image
label: label:
en_US: Image en_US: Map
zh_Hans: 图片 zh_Hans: 地图
pt_BR: Image - value: music
form: form
- name: num_results
type: number
required: true
label:
en_US: Number of query results
zh_Hans: 返回查询数量
human_description:
en_US: The number of query results.
zh_Hans: 返回查询结果的数量。
form: form
default: 5
min: 1
max: 20
- name: result_type
type: select
required: true
label:
en_US: result type
zh_Hans: 结果类型
pt_BR: result type
human_description:
en_US: return a list of links or texts.
zh_Hans: 返回一个连接列表还是纯文本内容。
pt_BR: return a list of links or texts.
default: text
options:
- value: link
label: label:
en_US: Link en_US: Music
zh_Hans: 链接 zh_Hans: 音乐
pt_BR: Link - value: it
- value: text
label: label:
en_US: Text en_US: It
zh_Hans: 文本 zh_Hans: 信息技术
pt_BR: Text - value: science
label:
en_US: Science
zh_Hans: 科学
- value: files
label:
en_US: Files
zh_Hans: 文件
- value: social_media
label:
en_US: Social Media
zh_Hans: 社交媒体
form: form form: form