Add 2 firecrawl tools : Scrape and Search (#6016)

Co-authored-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
ahasasjeb 2024-07-06 09:45:39 +08:00 committed by GitHub
parent b217ee414f
commit ab847c81fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 98 additions and 0 deletions

View File

@ -0,0 +1,26 @@
import json
from typing import Any, Union
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.provider.builtin.firecrawl.firecrawl_appx import FirecrawlApp
from core.tools.tool.builtin_tool import BuiltinTool
class ScrapeTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
app = FirecrawlApp(api_key=self.runtime.credentials['firecrawl_api_key'], base_url=self.runtime.credentials['base_url'])
crawl_result = app.scrape_url(
url=tool_parameters['url'],
wait=True
)
if isinstance(crawl_result, dict):
result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
else:
result_message = str(crawl_result)
if not crawl_result:
return self.create_text_message("Scrape request failed.")
return self.create_text_message(result_message)

View File

@ -0,0 +1,23 @@
identity:
name: scrape
author: ahasasjeb
label:
en_US: Scrape
zh_Hans: 抓取
description:
human:
en_US: Extract data from a single URL.
zh_Hans: 从单个URL抓取数据。
llm: This tool is designed to scrape URL and output the content in Markdown format.
parameters:
- name: url
type: string
required: true
label:
en_US: URL to scrape
zh_Hans: 要抓取的URL
human_description:
en_US: The URL of the website to scrape and extract data from.
zh_Hans: 要抓取并提取数据的网站URL。
llm_description: The URL of the website that needs to be crawled. This is a required parameter.
form: llm

View File

@ -0,0 +1,26 @@
import json
from typing import Any, Union
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.provider.builtin.firecrawl.firecrawl_appx import FirecrawlApp
from core.tools.tool.builtin_tool import BuiltinTool
class SearchTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
app = FirecrawlApp(api_key=self.runtime.credentials['firecrawl_api_key'], base_url=self.runtime.credentials['base_url'])
crawl_result = app.search(
query=tool_parameters['keyword'],
wait=True
)
if isinstance(crawl_result, dict):
result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
else:
result_message = str(crawl_result)
if not crawl_result:
return self.create_text_message("Search request failed.")
return self.create_text_message(result_message)

View File

@ -0,0 +1,23 @@
identity:
name: search
author: ahasasjeb
label:
en_US: Search
zh_Hans: 搜索
description:
human:
en_US: Search, and output in Markdown format
zh_Hans: 搜索并且以Markdown格式输出
llm: This tool can perform online searches and convert the results to Markdown format.
parameters:
- name: keyword
type: string
required: true
label:
en_US: keyword
zh_Hans: 关键词
human_description:
en_US: Input keywords to use Firecrawl API for search.
zh_Hans: 输入关键词即可使用Firecrawl API进行搜索。
llm_description: Efficiently extract keywords from user text.
form: llm