feat: add new features to enhance image and link handling in Jina tool (#5517)

2025-08-11 16:18:59 +08:00 · 2024-06-24 01:06:26 +08:00 · 2024-06-24 01:06:26 +08:00 · dcec9d7bb7
commit dcec9d7bb7
parent ea29007bc0
4 changed files with 107 additions and 5 deletions
--- a/api/core/tools/provider/builtin/jina/tools/jina_reader.py
+++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.py
@ -12,8 +12,8 @@ class JinaReaderTool(BuiltinTool):

    def _invoke(self,
                user_id: str,
-               tool_parameters: dict[str, Any], 
-        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+                tool_parameters: dict[str, Any],
+                ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        """
            invoke tools
        """
@ -34,6 +34,15 @@ class JinaReaderTool(BuiltinTool):
        if wait_for_selector is not None and wait_for_selector != '':
            headers['X-Wait-For-Selector'] = wait_for_selector

+        if tool_parameters.get('image_caption', False):
+            headers['X-With-Generated-Alt'] = 'true'
+
+        if tool_parameters.get('gather_all_links_at_the_end', False):
+            headers['X-With-Links-Summary'] = 'true'
+
+        if tool_parameters.get('gather_all_images_at_the_end', False):
+            headers['X-With-Images-Summary'] = 'true'
+
        proxy_server = tool_parameters.get('proxy_server', None)
        if proxy_server is not None and proxy_server != '':
            headers['X-Proxy-Url'] = proxy_server
--- a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
@ -51,6 +51,48 @@ parameters:
      pt_BR: css selector for waiting for specific elements
    llm_description: css selector of the target element to wait for
    form: form
+  - name: image_caption
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Image caption
+      zh_Hans: 图片说明
+      pt_BR: Legenda da imagem
+    human_description:
+      en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
+      zh_Hans: "为指定 URL 上的所有图像添加标题，为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
+      pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
+    llm_description: Captions all images at the specified URL
+    form: form
+  - name: gather_all_links_at_the_end
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Gather all links at the end
+      zh_Hans: 将所有链接集中到最后
+      pt_BR: Coletar todos os links ao final
+    human_description:
+      en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+      zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
+      pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+    llm_description: Gather all links at the end
+    form: form
+  - name: gather_all_images_at_the_end
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Gather all images at the end
+      zh_Hans: 将所有图片集中到最后
+      pt_BR: Coletar todas as imagens ao final
+    human_description:
+      en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+      zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果，从而提高推理能力。
+      pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+    llm_description: Gather all images at the end
+    form: form
  - name: proxy_server
    type: string
    required: false
--- a/api/core/tools/provider/builtin/jina/tools/jina_search.py
+++ b/api/core/tools/provider/builtin/jina/tools/jina_search.py
@ -24,6 +24,15 @@ class JinaSearchTool(BuiltinTool):
        if 'api_key' in self.runtime.credentials and self.runtime.credentials.get('api_key'):
            headers['Authorization'] = "Bearer " + self.runtime.credentials.get('api_key')

+        if tool_parameters.get('image_caption', False):
+            headers['X-With-Generated-Alt'] = 'true'
+
+        if tool_parameters.get('gather_all_links_at_the_end', False):
+            headers['X-With-Links-Summary'] = 'true'
+
+        if tool_parameters.get('gather_all_images_at_the_end', False):
+            headers['X-With-Images-Summary'] = 'true'
+
        proxy_server = tool_parameters.get('proxy_server', None)
        if proxy_server is not None and proxy_server != '':
            headers['X-Proxy-Url'] = proxy_server
--- a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
@ -22,6 +22,48 @@ parameters:
      zh_Hans: 在网络上搜索信息
    llm_description: simple question to ask on the web
    form: llm
+  - name: image_caption
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Image caption
+      zh_Hans: 图片说明
+      pt_BR: Legenda da imagem
+    human_description:
+      en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
+      zh_Hans: "为指定 URL 上的所有图像添加标题，为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
+      pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
+    llm_description: Captions all images at the specified URL
+    form: form
+  - name: gather_all_links_at_the_end
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Gather all links at the end
+      zh_Hans: 将所有链接集中到最后
+      pt_BR: Coletar todos os links ao final
+    human_description:
+      en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+      zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
+      pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+    llm_description: Gather all links at the end
+    form: form
+  - name: gather_all_images_at_the_end
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Gather all images at the end
+      zh_Hans: 将所有图片集中到最后
+      pt_BR: Coletar todas as imagens ao final
+    human_description:
+      en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+      zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果，从而提高推理能力。
+      pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+    llm_description: Gather all images at the end
+    form: form
  - name: proxy_server
    type: string
    required: false