diff --git a/api/core/tools/provider/builtin/tavily/tavily.yaml b/api/core/tools/provider/builtin/tavily/tavily.yaml index 95820f4d18..aba621b094 100644 --- a/api/core/tools/provider/builtin/tavily/tavily.yaml +++ b/api/core/tools/provider/builtin/tavily/tavily.yaml @@ -1,14 +1,12 @@ identity: - author: Yash Parmar + author: Yash Parmar, Kalo Chin name: tavily label: - en_US: Tavily - zh_Hans: Tavily - pt_BR: Tavily + en_US: Tavily Search & Extract + zh_Hans: Tavily 搜索和提取 description: - en_US: Tavily - zh_Hans: Tavily - pt_BR: Tavily + en_US: A powerful AI-native search engine and web content extraction tool that provides highly relevant search results and raw content extraction from web pages. + zh_Hans: 一个强大的原生AI搜索引擎和网页内容提取工具,提供高度相关的搜索结果和网页原始内容提取。 icon: icon.png tags: - search @@ -19,13 +17,10 @@ credentials_for_provider: label: en_US: Tavily API key zh_Hans: Tavily API key - pt_BR: Tavily API key placeholder: en_US: Please input your Tavily API key zh_Hans: 请输入你的 Tavily API key - pt_BR: Please input your Tavily API key help: en_US: Get your Tavily API key from Tavily zh_Hans: 从 TavilyApi 获取您的 Tavily API key - pt_BR: Get your Tavily API key from Tavily - url: https://docs.tavily.com/docs/welcome + url: https://app.tavily.com/home diff --git a/api/core/tools/provider/builtin/tavily/tools/tavily_extract.py b/api/core/tools/provider/builtin/tavily/tools/tavily_extract.py new file mode 100644 index 0000000000..a37548018d --- /dev/null +++ b/api/core/tools/provider/builtin/tavily/tools/tavily_extract.py @@ -0,0 +1,145 @@ +from typing import Any + +import requests + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + +TAVILY_API_URL = "https://api.tavily.com" + + +class TavilyExtract: + """ + A class for extracting content from web pages using the Tavily Extract API. + + Args: + api_key (str): The API key for accessing the Tavily Extract API. + + Methods: + extract_content: Retrieves extracted content from the Tavily Extract API. + """ + + def __init__(self, api_key: str) -> None: + self.api_key = api_key + + def extract_content(self, params: dict[str, Any]) -> dict: + """ + Retrieves extracted content from the Tavily Extract API. + + Args: + params (Dict[str, Any]): The extraction parameters. + + Returns: + dict: The extracted content. + + """ + # Ensure required parameters are set + if "api_key" not in params: + params["api_key"] = self.api_key + + # Process parameters + processed_params = self._process_params(params) + + response = requests.post(f"{TAVILY_API_URL}/extract", json=processed_params) + response.raise_for_status() + return response.json() + + def _process_params(self, params: dict[str, Any]) -> dict: + """ + Processes and validates the extraction parameters. + + Args: + params (Dict[str, Any]): The extraction parameters. + + Returns: + dict: The processed parameters. + """ + processed_params = {} + + # Process 'urls' + if "urls" in params: + urls = params["urls"] + if isinstance(urls, str): + processed_params["urls"] = [url.strip() for url in urls.replace(",", " ").split()] + elif isinstance(urls, list): + processed_params["urls"] = urls + else: + raise ValueError("The 'urls' parameter is required.") + + # Only include 'api_key' + processed_params["api_key"] = params.get("api_key", self.api_key) + + return processed_params + + +class TavilyExtractTool(BuiltinTool): + """ + A tool for extracting content from web pages using Tavily Extract. + """ + + def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]: + """ + Invokes the Tavily Extract tool with the given user ID and tool parameters. + + Args: + user_id (str): The ID of the user invoking the tool. + tool_parameters (Dict[str, Any]): The parameters for the Tavily Extract tool. + + Returns: + ToolInvokeMessage | list[ToolInvokeMessage]: The result of the Tavily Extract tool invocation. + """ + urls = tool_parameters.get("urls", "") + api_key = self.runtime.credentials.get("tavily_api_key") + if not api_key: + return self.create_text_message( + "Tavily API key is missing. Please set the 'tavily_api_key' in credentials." + ) + if not urls: + return self.create_text_message("Please input at least one URL to extract.") + + tavily_extract = TavilyExtract(api_key) + try: + raw_results = tavily_extract.extract_content(tool_parameters) + except requests.HTTPError as e: + return self.create_text_message(f"Error occurred while extracting content: {str(e)}") + + if not raw_results.get("results"): + return self.create_text_message("No content could be extracted from the provided URLs.") + else: + # Always return JSON message with all data + json_message = self.create_json_message(raw_results) + + # Create text message based on user-selected parameters + text_message_content = self._format_results_as_text(raw_results) + text_message = self.create_text_message(text=text_message_content) + + return [json_message, text_message] + + def _format_results_as_text(self, raw_results: dict) -> str: + """ + Formats the raw extraction results into a markdown text based on user-selected parameters. + + Args: + raw_results (dict): The raw extraction results. + + Returns: + str: The formatted markdown text. + """ + output_lines = [] + + for idx, result in enumerate(raw_results.get("results", []), 1): + url = result.get("url", "") + raw_content = result.get("raw_content", "") + + output_lines.append(f"## Extracted Content {idx}: {url}\n") + output_lines.append(f"**Raw Content:**\n{raw_content}\n") + output_lines.append("---\n") + + if raw_results.get("failed_results"): + output_lines.append("## Failed URLs:\n") + for failed in raw_results["failed_results"]: + url = failed.get("url", "") + error = failed.get("error", "Unknown error") + output_lines.append(f"- {url}: {error}\n") + + return "\n".join(output_lines) diff --git a/api/core/tools/provider/builtin/tavily/tools/tavily_extract.yaml b/api/core/tools/provider/builtin/tavily/tools/tavily_extract.yaml new file mode 100644 index 0000000000..a04da73b54 --- /dev/null +++ b/api/core/tools/provider/builtin/tavily/tools/tavily_extract.yaml @@ -0,0 +1,23 @@ +identity: + name: tavily_extract + author: Kalo Chin + label: + en_US: Tavily Extract + zh_Hans: Tavily Extract +description: + human: + en_US: A web extraction tool built specifically for AI agents (LLMs), delivering raw content from web pages. + zh_Hans: 专为人工智能代理 (LLM) 构建的网页提取工具,提供网页的原始内容。 + llm: A tool for extracting raw content from web pages, designed for AI agents (LLMs). +parameters: + - name: urls + type: string + required: true + label: + en_US: URLs + zh_Hans: URLs + human_description: + en_US: A comma-separated list of URLs to extract content from. + zh_Hans: 要从中提取内容的 URL 的逗号分隔列表。 + llm_description: A comma-separated list of URLs to extract content from. + form: llm diff --git a/api/core/tools/provider/builtin/tavily/tools/tavily_search.py b/api/core/tools/provider/builtin/tavily/tools/tavily_search.py index ca6d8633e4..ea41ea3ca3 100644 --- a/api/core/tools/provider/builtin/tavily/tools/tavily_search.py +++ b/api/core/tools/provider/builtin/tavily/tools/tavily_search.py @@ -17,8 +17,6 @@ class TavilySearch: Methods: raw_results: Retrieves raw search results from the Tavily Search API. - results: Retrieves cleaned search results from the Tavily Search API. - clean_results: Cleans the raw search results. """ def __init__(self, api_key: str) -> None: @@ -35,63 +33,62 @@ class TavilySearch: dict: The raw search results. """ + # Ensure required parameters are set params["api_key"] = self.api_key - if ( - "exclude_domains" in params - and isinstance(params["exclude_domains"], str) - and params["exclude_domains"] != "None" - ): - params["exclude_domains"] = params["exclude_domains"].split() - else: - params["exclude_domains"] = [] - if ( - "include_domains" in params - and isinstance(params["include_domains"], str) - and params["include_domains"] != "None" - ): - params["include_domains"] = params["include_domains"].split() - else: - params["include_domains"] = [] - response = requests.post(f"{TAVILY_API_URL}/search", json=params) + # Process parameters to ensure correct types + processed_params = self._process_params(params) + + response = requests.post(f"{TAVILY_API_URL}/search", json=processed_params) response.raise_for_status() return response.json() - def results(self, params: dict[str, Any]) -> list[dict]: + def _process_params(self, params: dict[str, Any]) -> dict: """ - Retrieves cleaned search results from the Tavily Search API. + Processes and validates the search parameters. Args: params (Dict[str, Any]): The search parameters. Returns: - list: The cleaned search results. - + dict: The processed parameters. """ - raw_search_results = self.raw_results(params) - return self.clean_results(raw_search_results["results"]) + processed_params = {} - def clean_results(self, results: list[dict]) -> list[dict]: - """ - Cleans the raw search results. + for key, value in params.items(): + if value is None or value == "None": + continue + if key in ["include_domains", "exclude_domains"]: + if isinstance(value, str): + # Split the string by commas or spaces and strip whitespace + processed_params[key] = [domain.strip() for domain in value.replace(",", " ").split()] + elif key in ["include_images", "include_image_descriptions", "include_answer", "include_raw_content"]: + # Ensure boolean type + if isinstance(value, str): + processed_params[key] = value.lower() == "true" + else: + processed_params[key] = bool(value) + elif key in ["max_results", "days"]: + if isinstance(value, str): + processed_params[key] = int(value) + else: + processed_params[key] = value + elif key in ["search_depth", "topic", "query", "api_key"]: + processed_params[key] = value + else: + # Unrecognized parameter + pass - Args: - results (list): The raw search results. + # Set defaults if not present + processed_params.setdefault("search_depth", "basic") + processed_params.setdefault("topic", "general") + processed_params.setdefault("max_results", 5) - Returns: - list: The cleaned search results. + # If topic is 'news', ensure 'days' is set + if processed_params.get("topic") == "news": + processed_params.setdefault("days", 3) - """ - clean_results = [] - for result in results: - clean_results.append( - { - "url": result["url"], - "content": result["content"], - } - ) - # return clean results as a string - return "\n".join([f"{res['url']}\n{res['content']}" for res in clean_results]) + return processed_params class TavilySearchTool(BuiltinTool): @@ -111,14 +108,88 @@ class TavilySearchTool(BuiltinTool): ToolInvokeMessage | list[ToolInvokeMessage]: The result of the Tavily search tool invocation. """ query = tool_parameters.get("query", "") - - api_key = self.runtime.credentials["tavily_api_key"] + api_key = self.runtime.credentials.get("tavily_api_key") + if not api_key: + return self.create_text_message( + "Tavily API key is missing. Please set the 'tavily_api_key' in credentials." + ) if not query: - return self.create_text_message("Please input query") + return self.create_text_message("Please input a query.") + tavily_search = TavilySearch(api_key) - results = tavily_search.results(tool_parameters) - print(results) - if not results: - return self.create_text_message(f"No results found for '{query}' in Tavily") + try: + raw_results = tavily_search.raw_results(tool_parameters) + except requests.HTTPError as e: + return self.create_text_message(f"Error occurred while searching: {str(e)}") + + if not raw_results.get("results"): + return self.create_text_message(f"No results found for '{query}' in Tavily.") else: - return self.create_text_message(text=results) + # Always return JSON message with all data + json_message = self.create_json_message(raw_results) + + # Create text message based on user-selected parameters + text_message_content = self._format_results_as_text(raw_results, tool_parameters) + text_message = self.create_text_message(text=text_message_content) + + return [json_message, text_message] + + def _format_results_as_text(self, raw_results: dict, tool_parameters: dict[str, Any]) -> str: + """ + Formats the raw results into a markdown text based on user-selected parameters. + + Args: + raw_results (dict): The raw search results. + tool_parameters (dict): The tool parameters selected by the user. + + Returns: + str: The formatted markdown text. + """ + output_lines = [] + + # Include answer if requested + if tool_parameters.get("include_answer", False) and raw_results.get("answer"): + output_lines.append(f"**Answer:** {raw_results['answer']}\n") + + # Include images if requested + if tool_parameters.get("include_images", False) and raw_results.get("images"): + output_lines.append("**Images:**\n") + for image in raw_results["images"]: + if tool_parameters.get("include_image_descriptions", False) and "description" in image: + output_lines.append(f"![{image['description']}]({image['url']})\n") + else: + output_lines.append(f"![]({image['url']})\n") + + # Process each result + if "results" in raw_results: + for idx, result in enumerate(raw_results["results"], 1): + title = result.get("title", "No Title") + url = result.get("url", "") + content = result.get("content", "") + published_date = result.get("published_date", "") + score = result.get("score", "") + + output_lines.append(f"### Result {idx}: [{title}]({url})\n") + + # Include published date if available and topic is 'news' + if tool_parameters.get("topic") == "news" and published_date: + output_lines.append(f"**Published Date:** {published_date}\n") + + output_lines.append(f"**URL:** {url}\n") + + # Include score (relevance) + if score: + output_lines.append(f"**Relevance Score:** {score}\n") + + # Include content + if content: + output_lines.append(f"**Content:**\n{content}\n") + + # Include raw content if requested + if tool_parameters.get("include_raw_content", False) and result.get("raw_content"): + output_lines.append(f"**Raw Content:**\n{result['raw_content']}\n") + + # Add a separator + output_lines.append("---\n") + + return "\n".join(output_lines) diff --git a/api/core/tools/provider/builtin/tavily/tools/tavily_search.yaml b/api/core/tools/provider/builtin/tavily/tools/tavily_search.yaml index 88426056af..14b2829701 100644 --- a/api/core/tools/provider/builtin/tavily/tools/tavily_search.yaml +++ b/api/core/tools/provider/builtin/tavily/tools/tavily_search.yaml @@ -2,28 +2,24 @@ identity: name: tavily_search author: Yash Parmar label: - en_US: TavilySearch - zh_Hans: TavilySearch - pt_BR: TavilySearch + en_US: Tavily Search + zh_Hans: Tavily Search description: human: - en_US: A tool for search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. + en_US: A search engine tool built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. zh_Hans: 专为人工智能代理 (LLM) 构建的搜索引擎工具,可快速提供实时、准确和真实的结果。 - pt_BR: A tool for search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. llm: A tool for search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. parameters: - name: query type: string required: true label: - en_US: Query string - zh_Hans: 查询语句 - pt_BR: Query string + en_US: Query + zh_Hans: 查询 human_description: - en_US: used for searching - zh_Hans: 用于搜索网页内容 - pt_BR: used for searching - llm_description: key words for searching + en_US: The search query you want to execute with Tavily. + zh_Hans: 您想用 Tavily 执行的搜索查询。 + llm_description: The search query. form: llm - name: search_depth type: select @@ -31,122 +27,118 @@ parameters: label: en_US: Search Depth zh_Hans: 搜索深度 - pt_BR: Search Depth human_description: - en_US: The depth of search results - zh_Hans: 搜索结果的深度 - pt_BR: The depth of search results + en_US: The depth of the search. + zh_Hans: 搜索的深度。 form: form options: - value: basic label: en_US: Basic zh_Hans: 基本 - pt_BR: Basic - value: advanced label: en_US: Advanced zh_Hans: 高级 - pt_BR: Advanced default: basic + - name: topic + type: select + required: false + label: + en_US: Topic + zh_Hans: 主题 + human_description: + en_US: The category of the search. + zh_Hans: 搜索的类别。 + form: form + options: + - value: general + label: + en_US: General + zh_Hans: 一般 + - value: news + label: + en_US: News + zh_Hans: 新闻 + default: general + - name: days + type: number + required: false + label: + en_US: Days + zh_Hans: 天数 + human_description: + en_US: The number of days back from the current date to include in the search results (only applicable when "topic" is "news"). + zh_Hans: 从当前日期起向前追溯的天数,以包含在搜索结果中(仅当“topic”为“news”时适用)。 + form: form + min: 1 + default: 3 + - name: max_results + type: number + required: false + label: + en_US: Max Results + zh_Hans: 最大结果数 + human_description: + en_US: The maximum number of search results to return. + zh_Hans: 要返回的最大搜索结果数。 + form: form + min: 1 + max: 20 + default: 5 - name: include_images type: boolean required: false label: en_US: Include Images zh_Hans: 包含图片 - pt_BR: Include Images human_description: - en_US: Include images in the search results - zh_Hans: 在搜索结果中包含图片 - pt_BR: Include images in the search results + en_US: Include a list of query-related images in the response. + zh_Hans: 在响应中包含与查询相关的图片列表。 form: form - options: - - value: 'true' - label: - en_US: 'Yes' - zh_Hans: 是 - pt_BR: 'Yes' - - value: 'false' - label: - en_US: 'No' - zh_Hans: 否 - pt_BR: 'No' - default: 'false' + default: false + - name: include_image_descriptions + type: boolean + required: false + label: + en_US: Include Image Descriptions + zh_Hans: 包含图片描述 + human_description: + en_US: When include_images is True, adds descriptive text for each image. + zh_Hans: 当 include_images 为 True 时,为每个图像添加描述文本。 + form: form + default: false - name: include_answer type: boolean required: false label: en_US: Include Answer zh_Hans: 包含答案 - pt_BR: Include Answer human_description: - en_US: Include answers in the search results - zh_Hans: 在搜索结果中包含答案 - pt_BR: Include answers in the search results + en_US: Include a short answer to the original query in the response. + zh_Hans: 在响应中包含对原始查询的简短回答。 form: form - options: - - value: 'true' - label: - en_US: 'Yes' - zh_Hans: 是 - pt_BR: 'Yes' - - value: 'false' - label: - en_US: 'No' - zh_Hans: 否 - pt_BR: 'No' - default: 'false' + default: false - name: include_raw_content type: boolean required: false label: en_US: Include Raw Content zh_Hans: 包含原始内容 - pt_BR: Include Raw Content human_description: - en_US: Include raw content in the search results - zh_Hans: 在搜索结果中包含原始内容 - pt_BR: Include raw content in the search results + en_US: Include the cleaned and parsed HTML content of each search result. + zh_Hans: 包含每个搜索结果的已清理和解析的HTML内容。 form: form - options: - - value: 'true' - label: - en_US: 'Yes' - zh_Hans: 是 - pt_BR: 'Yes' - - value: 'false' - label: - en_US: 'No' - zh_Hans: 否 - pt_BR: 'No' - default: 'false' - - name: max_results - type: number - required: false - label: - en_US: Max Results - zh_Hans: 最大结果 - pt_BR: Max Results - human_description: - en_US: The number of maximum search results to return - zh_Hans: 返回的最大搜索结果数 - pt_BR: The number of maximum search results to return - form: form - min: 1 - max: 20 - default: 5 + default: false - name: include_domains type: string required: false label: en_US: Include Domains zh_Hans: 包含域 - pt_BR: Include Domains human_description: - en_US: A list of domains to specifically include in the search results - zh_Hans: 在搜索结果中特别包含的域名列表 - pt_BR: A list of domains to specifically include in the search results + en_US: A comma-separated list of domains to specifically include in the search results. + zh_Hans: 要在搜索结果中特别包含的域的逗号分隔列表。 form: form - name: exclude_domains type: string @@ -154,9 +146,7 @@ parameters: label: en_US: Exclude Domains zh_Hans: 排除域 - pt_BR: Exclude Domains human_description: - en_US: A list of domains to specifically exclude from the search results - zh_Hans: 从搜索结果中特别排除的域名列表 - pt_BR: A list of domains to specifically exclude from the search results + en_US: A comma-separated list of domains to specifically exclude from the search results. + zh_Hans: 要从搜索结果中特别排除的域的逗号分隔列表。 form: form