feat: add new features to enhance image and link handling in Jina tool (#5517)

This commit is contained in:
Xiao Ley 2024-06-24 01:06:26 +08:00 committed by GitHub
parent ea29007bc0
commit dcec9d7bb7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 107 additions and 5 deletions

View File

@ -12,8 +12,8 @@ class JinaReaderTool(BuiltinTool):
def _invoke(self,
user_id: str,
tool_parameters: dict[str, Any],
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
tool_parameters: dict[str, Any],
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
"""
invoke tools
"""
@ -34,6 +34,15 @@ class JinaReaderTool(BuiltinTool):
if wait_for_selector is not None and wait_for_selector != '':
headers['X-Wait-For-Selector'] = wait_for_selector
if tool_parameters.get('image_caption', False):
headers['X-With-Generated-Alt'] = 'true'
if tool_parameters.get('gather_all_links_at_the_end', False):
headers['X-With-Links-Summary'] = 'true'
if tool_parameters.get('gather_all_images_at_the_end', False):
headers['X-With-Images-Summary'] = 'true'
proxy_server = tool_parameters.get('proxy_server', None)
if proxy_server is not None and proxy_server != '':
headers['X-Proxy-Url'] = proxy_server

View File

@ -51,6 +51,48 @@ parameters:
pt_BR: css selector for waiting for specific elements
llm_description: css selector of the target element to wait for
form: form
- name: image_caption
type: boolean
required: false
default: false
label:
en_US: Image caption
zh_Hans: 图片说明
pt_BR: Legenda da imagem
human_description:
en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
zh_Hans: "为指定 URL 上的所有图像添加标题为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
llm_description: Captions all images at the specified URL
form: form
- name: gather_all_links_at_the_end
type: boolean
required: false
default: false
label:
en_US: Gather all links at the end
zh_Hans: 将所有链接集中到最后
pt_BR: Coletar todos os links ao final
human_description:
en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
llm_description: Gather all links at the end
form: form
- name: gather_all_images_at_the_end
type: boolean
required: false
default: false
label:
en_US: Gather all images at the end
zh_Hans: 将所有图片集中到最后
pt_BR: Coletar todas as imagens ao final
human_description:
en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。
pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
llm_description: Gather all images at the end
form: form
- name: proxy_server
type: string
required: false

View File

@ -24,6 +24,15 @@ class JinaSearchTool(BuiltinTool):
if 'api_key' in self.runtime.credentials and self.runtime.credentials.get('api_key'):
headers['Authorization'] = "Bearer " + self.runtime.credentials.get('api_key')
if tool_parameters.get('image_caption', False):
headers['X-With-Generated-Alt'] = 'true'
if tool_parameters.get('gather_all_links_at_the_end', False):
headers['X-With-Links-Summary'] = 'true'
if tool_parameters.get('gather_all_images_at_the_end', False):
headers['X-With-Images-Summary'] = 'true'
proxy_server = tool_parameters.get('proxy_server', None)
if proxy_server is not None and proxy_server != '':
headers['X-Proxy-Url'] = proxy_server

View File

@ -22,6 +22,48 @@ parameters:
zh_Hans: 在网络上搜索信息
llm_description: simple question to ask on the web
form: llm
- name: image_caption
type: boolean
required: false
default: false
label:
en_US: Image caption
zh_Hans: 图片说明
pt_BR: Legenda da imagem
human_description:
en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
zh_Hans: "为指定 URL 上的所有图像添加标题为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
llm_description: Captions all images at the specified URL
form: form
- name: gather_all_links_at_the_end
type: boolean
required: false
default: false
label:
en_US: Gather all links at the end
zh_Hans: 将所有链接集中到最后
pt_BR: Coletar todos os links ao final
human_description:
en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
llm_description: Gather all links at the end
form: form
- name: gather_all_images_at_the_end
type: boolean
required: false
default: false
label:
en_US: Gather all images at the end
zh_Hans: 将所有图片集中到最后
pt_BR: Coletar todas as imagens ao final
human_description:
en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。
pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
llm_description: Gather all images at the end
form: form
- name: proxy_server
type: string
required: false