Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrated SearXNG search as built-in tool #3363

Merged
merged 7 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/core/tools/provider/_position.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
- google
- bing
- duckduckgo
- searxng
- dalle
- azuredalle
- wikipedia
Expand Down
56 changes: 56 additions & 0 deletions api/core/tools/provider/builtin/searxng/_assets/icon.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
25 changes: 25 additions & 0 deletions api/core/tools/provider/builtin/searxng/searxng.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from typing import Any

from core.tools.errors import ToolProviderCredentialValidationError
from core.tools.provider.builtin.searxng.tools.searxng_search import SearXNGSearchTool
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController


class SearXNGProvider(BuiltinToolProviderController):
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
try:
SearXNGSearchTool().fork_tool_runtime(
meta={
"credentials": credentials,
}
).invoke(
user_id='',
tool_parameters={
"query": "SearXNG",
"limit": 1,
"search_type": "page",
"result_type": "link"
},
)
except Exception as e:
raise ToolProviderCredentialValidationError(str(e))
24 changes: 24 additions & 0 deletions api/core/tools/provider/builtin/searxng/searxng.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
identity:
author: Junytang
name: searxng
label:
en_US: SearXNG
zh_Hans: SearXNG
description:
en_US: A free internet metasearch engine.
zh_Hans: 开源互联网元搜索引擎
icon: icon.svg
credentials_for_provider:
searxng_base_url:
type: secret-input
required: true
label:
en_US: SearXNG base URL
zh_Hans: SearXNG base URL
help:
en_US: Please input your SearXNG base URL
zh_Hans: 请输入您的 SearXNG base URL
placeholder:
en_US: Please input your SearXNG base URL
zh_Hans: 请输入您的 SearXNG base URL
url: https://docs.dify.ai/tutorials/tool-configuration/searxng
crazywoola marked this conversation as resolved.
Show resolved Hide resolved
124 changes: 124 additions & 0 deletions api/core/tools/provider/builtin/searxng/tools/searxng_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import json
from typing import Any

import requests

from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool


class SearXNGSearchResults(dict):
"""Wrapper for search results."""

def __init__(self, data: str):
super().__init__(json.loads(data))
self.__dict__ = self

@property
def results(self) -> Any:
return self.get("results", [])


class SearXNGSearchTool(BuiltinTool):
"""
Tool for performing a search using SearXNG engine.
"""

SEARCH_TYPE = {
"page": "general",
"news": "news",
"image": "images",
# "video": "videos",
# "file": "files"
}
LINK_FILED = {
"page": "url",
"news": "url",
"image": "img_src",
# "video": "iframe_src",
# "file": "magnetlink"
}
TEXT_FILED = {
"page": "content",
"news": "content",
"image": "img_src",
# "video": "iframe_src",
# "file": "magnetlink"
}

def _invoke_query(self, user_id: str, host: str, query: str, search_type: str, result_type: str, topK: int = 5) -> list[dict]:
"""Run query and return the results."""

search_type = search_type.lower()
if search_type not in self.SEARCH_TYPE.keys():
search_type= "page"

response = requests.get(host, params={
"q": query,
"format": "json",
"categories": self.SEARCH_TYPE[search_type]
})

if response.status_code != 200:
raise Exception(f'Error {response.status_code}: {response.text}')

search_results = SearXNGSearchResults(response.text).results[:topK]

if result_type == 'link':
results = []
if search_type == "page" or search_type == "news":
for r in search_results:
results.append(self.create_text_message(
text=f'{r["title"]}: {r.get(self.LINK_FILED[search_type], "")}'
))
elif search_type == "image":
for r in search_results:
results.append(self.create_image_message(
image=r.get(self.LINK_FILED[search_type], "")
))
else:
for r in search_results:
results.append(self.create_link_message(
link=r.get(self.LINK_FILED[search_type], "")
))

return results
else:
text = ''
for i, r in enumerate(search_results):
text += f'{i+1}: {r["title"]} - {r.get(self.TEXT_FILED[search_type], "")}\n'

return self.create_text_message(text=self.summary(user_id=user_id, content=text))


def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
"""
Invoke the SearXNG search tool.

Args:
user_id (str): The ID of the user invoking the tool.
tool_parameters (dict[str, Any]): The parameters for the tool invocation.

Returns:
ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation.
"""

host = self.runtime.credentials.get('searxng_base_url', None)
if not host:
raise Exception('SearXNG api is required')

query = tool_parameters.get('query', None)
if not query:
return self.create_text_message('Please input query')

num_results = min(tool_parameters.get('num_results', 5), 20)
search_type = tool_parameters.get('search_type', 'page') or 'page'
result_type = tool_parameters.get('result_type', 'text') or 'text'

return self._invoke_query(
user_id=user_id,
host=host,
query=query,
search_type=search_type,
result_type=result_type,
topK=num_results)
89 changes: 89 additions & 0 deletions api/core/tools/provider/builtin/searxng/tools/searxng_search.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
identity:
name: searxng_search
author: Tice
label:
en_US: SearXNG Search
zh_Hans: SearXNG 搜索
description:
human:
en_US: Perform searches on SearXNG and get results.
zh_Hans: 在 SearXNG 上进行搜索并获取结果。
llm: Perform searches on SearXNG and get results.
parameters:
- name: query
type: string
required: true
label:
en_US: Query string
zh_Hans: 查询语句
human_description:
en_US: The search query.
zh_Hans: 搜索查询语句。
llm_description: Key words for searching
form: llm
- name: search_type
type: select
required: true
label:
en_US: search type
zh_Hans: 搜索类型
pt_BR: search type
human_description:
en_US: search type for page, news or image.
zh_Hans: 选择搜索的类型:网页,新闻,图片。
pt_BR: search type for page, news or image.
default: Page
options:
- value: Page
label:
en_US: Page
zh_Hans: 网页
pt_BR: Page
- value: News
label:
en_US: News
zh_Hans: 新闻
pt_BR: News
- value: Image
label:
en_US: Image
zh_Hans: 图片
pt_BR: Image
form: form
- name: num_results
type: number
required: true
label:
en_US: Number of query results
zh_Hans: 返回查询数量
human_description:
en_US: The number of query results.
zh_Hans: 返回查询结果的数量。
form: form
default: 5
min: 1
max: 20
- name: result_type
type: select
required: true
label:
en_US: result type
zh_Hans: 结果类型
pt_BR: result type
human_description:
en_US: return a list of links or texts.
zh_Hans: 返回一个连接列表还是纯文本内容。
pt_BR: return a list of links or texts.
default: text
options:
- value: link
label:
en_US: Link
zh_Hans: 链接
pt_BR: Link
- value: text
label:
en_US: Text
zh_Hans: 文本
pt_BR: Text
form: form