From 5b3133f9fca6721f45b016130811e1174149be5d Mon Sep 17 00:00:00 2001 From: Yeuoly <45712896+Yeuoly@users.noreply.github.com> Date: Sun, 14 Apr 2024 22:03:19 +0800 Subject: [PATCH] feat: jina reader (#3468) --- api/core/tools/provider/_position.yaml | 1 + .../provider/builtin/jina/_assets/icon.svg | 4 ++ api/core/tools/provider/builtin/jina/jina.py | 12 ++++++ .../tools/provider/builtin/jina/jina.yaml | 13 ++++++ .../builtin/jina/tools/jina_reader.py | 35 ++++++++++++++++ .../builtin/jina/tools/jina_reader.yaml | 41 +++++++++++++++++++ 6 files changed, 106 insertions(+) create mode 100644 api/core/tools/provider/builtin/jina/_assets/icon.svg create mode 100644 api/core/tools/provider/builtin/jina/jina.py create mode 100644 api/core/tools/provider/builtin/jina/jina.yaml create mode 100644 api/core/tools/provider/builtin/jina/tools/jina_reader.py create mode 100644 api/core/tools/provider/builtin/jina/tools/jina_reader.yaml diff --git a/api/core/tools/provider/_position.yaml b/api/core/tools/provider/_position.yaml index ae8f7cccf7..414bd7e38c 100644 --- a/api/core/tools/provider/_position.yaml +++ b/api/core/tools/provider/_position.yaml @@ -13,6 +13,7 @@ - pubmed - stablediffusion - webscraper +- jina - model.zhipuai - aippt - youtube diff --git a/api/core/tools/provider/builtin/jina/_assets/icon.svg b/api/core/tools/provider/builtin/jina/_assets/icon.svg new file mode 100644 index 0000000000..2e1b00fa52 --- /dev/null +++ b/api/core/tools/provider/builtin/jina/_assets/icon.svg @@ -0,0 +1,4 @@ + + + + diff --git a/api/core/tools/provider/builtin/jina/jina.py b/api/core/tools/provider/builtin/jina/jina.py new file mode 100644 index 0000000000..ed1de6f6c1 --- /dev/null +++ b/api/core/tools/provider/builtin/jina/jina.py @@ -0,0 +1,12 @@ +from typing import Any + +from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController + + +class GoogleProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: dict[str, Any]) -> None: + try: + pass + except Exception as e: + raise ToolProviderCredentialValidationError(str(e)) \ No newline at end of file diff --git a/api/core/tools/provider/builtin/jina/jina.yaml b/api/core/tools/provider/builtin/jina/jina.yaml new file mode 100644 index 0000000000..6ae3330f40 --- /dev/null +++ b/api/core/tools/provider/builtin/jina/jina.yaml @@ -0,0 +1,13 @@ +identity: + author: Dify + name: jina + label: + en_US: JinaReader + zh_Hans: JinaReader + pt_BR: JinaReader + description: + en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost. + zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。 + pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo. + icon: icon.svg +credentials_for_provider: diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.py b/api/core/tools/provider/builtin/jina/tools/jina_reader.py new file mode 100644 index 0000000000..322265cefe --- /dev/null +++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.py @@ -0,0 +1,35 @@ +from typing import Any, Union + +from yarl import URL + +from core.helper import ssrf_proxy +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + + +class JinaReaderTool(BuiltinTool): + _jina_reader_endpoint = 'https://r.jina.ai/' + + def _invoke(self, + user_id: str, + tool_parameters: dict[str, Any], + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + """ + invoke tools + """ + url = tool_parameters['url'] + + headers = { + 'Accept': 'text/event-stream' + } + + response = ssrf_proxy.get( + str(URL(self._jina_reader_endpoint + url)), + headers=headers, + timeout=(10, 60) + ) + + if tool_parameters.get('summary', False): + return self.create_text_message(self.summary(user_id, response.text)) + + return self.create_text_message(response.text) diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml new file mode 100644 index 0000000000..38d66292df --- /dev/null +++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml @@ -0,0 +1,41 @@ +identity: + name: jina_reader + author: Dify + label: + en_US: JinaReader + zh_Hans: JinaReader + pt_BR: JinaReader +description: + human: + en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost. + zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。 + pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo. + llm: A tool for scraping webpages. Input should be a URL. +parameters: + - name: url + type: string + required: true + label: + en_US: URL + zh_Hans: 网页链接 + pt_BR: URL + human_description: + en_US: used for linking to webpages + zh_Hans: 用于链接到网页 + pt_BR: used for linking to webpages + llm_description: url for scraping + form: llm + - name: summary + type: boolean + required: false + default: false + label: + en_US: Enable summary + zh_Hans: 是否启用摘要 + pt_BR: Habilitar resumo + human_description: + en_US: Enable summary for the output + zh_Hans: 为输出启用摘要 + pt_BR: Habilitar resumo para a saída + llm_description: enable summary + form: form