diff --git a/api/core/tools/provider/builtin/jina/jina.yaml b/api/core/tools/provider/builtin/jina/jina.yaml index 6ae3330f40..6fdeeaa40a 100644 --- a/api/core/tools/provider/builtin/jina/jina.yaml +++ b/api/core/tools/provider/builtin/jina/jina.yaml @@ -6,8 +6,8 @@ identity: zh_Hans: JinaReader pt_BR: JinaReader description: - en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost. - zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。 - pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo. + en_US: Convert any URL to an LLM-friendly input or perform searches on the web for grounding information. Experience improved output for your agent and RAG systems at no cost. + zh_Hans: 将任何URL转换为LLM易读的输入或在网页上搜索引擎上搜索引擎。 + pt_BR: Converte qualquer URL em uma entrada LLm-fácil de ler ou realize pesquisas na web para obter informação de grounding. Tenha uma experiência melhor para seu agente e sistemas RAG sem custo. icon: icon.svg credentials_for_provider: diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.py b/api/core/tools/provider/builtin/jina/tools/jina_reader.py index fd29a00aa5..beb05717ea 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_reader.py +++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.py @@ -23,6 +23,14 @@ class JinaReaderTool(BuiltinTool): 'Accept': 'application/json' } + target_selector = tool_parameters.get('target_selector', None) + if target_selector is not None: + headers['X-Target-Selector'] = target_selector + + wait_for_selector = tool_parameters.get('wait_for_selector', None) + if wait_for_selector is not None: + headers['X-Wait-For-Selector'] = wait_for_selector + response = ssrf_proxy.get( str(URL(self._jina_reader_endpoint + url)), headers=headers, diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml index 38d66292df..73cacb7fde 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml +++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml @@ -25,6 +25,32 @@ parameters: pt_BR: used for linking to webpages llm_description: url for scraping form: llm + - name: target_selector + type: string + required: false + label: + en_US: Target selector + zh_Hans: 目标选择器 + pt_BR: Seletor de destino + human_description: + en_US: css selector for scraping specific elements + zh_Hans: css 选择器用于抓取特定元素 + pt_BR: css selector for scraping specific elements + llm_description: css selector of the target element to scrape + form: form + - name: wait_for_selector + type: string + required: false + label: + en_US: Wait for selector + zh_Hans: 等待选择器 + pt_BR: Aguardar por seletor + human_description: + en_US: css selector for waiting for specific elements + zh_Hans: css 选择器用于等待特定元素 + pt_BR: css selector for waiting for specific elements + llm_description: css selector of the target element to wait for + form: form - name: summary type: boolean required: false diff --git a/api/core/tools/provider/builtin/jina/tools/jina_search.py b/api/core/tools/provider/builtin/jina/tools/jina_search.py new file mode 100644 index 0000000000..cfe36e6a3c --- /dev/null +++ b/api/core/tools/provider/builtin/jina/tools/jina_search.py @@ -0,0 +1,30 @@ +from typing import Any, Union + +from yarl import URL + +from core.helper import ssrf_proxy +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + + +class JinaSearchTool(BuiltinTool): + _jina_search_endpoint = 'https://s.jina.ai/' + + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + query = tool_parameters['query'] + + headers = { + 'Accept': 'application/json' + } + + response = ssrf_proxy.get( + str(URL(self._jina_search_endpoint + query)), + headers=headers, + timeout=(10, 60) + ) + + return self.create_text_message(response.text) diff --git a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml new file mode 100644 index 0000000000..5ad70c03f3 --- /dev/null +++ b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml @@ -0,0 +1,21 @@ +identity: + name: jina_search + author: Dify + label: + en_US: JinaSearch + zh_Hans: JinaSearch + pt_BR: JinaSearch +description: + human: + en_US: Search on the web and get the top 5 results. Useful for grounding using information from the web. + llm: A tool for searching results on the web for grounding. Input should be a simple question. +parameters: + - name: query + type: string + required: true + label: + en_US: Question (Query) + human_description: + en_US: used to find information on the web + llm_description: simple question to ask on the web + form: llm