mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-11 16:08:58 +08:00
Add 2 firecrawl tools : Scrape and Search (#6016)
Co-authored-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
parent
b217ee414f
commit
ab847c81fa
26
api/core/tools/provider/builtin/firecrawl/tools/scrape.py
Normal file
26
api/core/tools/provider/builtin/firecrawl/tools/scrape.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import json
|
||||||
|
from typing import Any, Union
|
||||||
|
|
||||||
|
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||||
|
from core.tools.provider.builtin.firecrawl.firecrawl_appx import FirecrawlApp
|
||||||
|
from core.tools.tool.builtin_tool import BuiltinTool
|
||||||
|
|
||||||
|
|
||||||
|
class ScrapeTool(BuiltinTool):
|
||||||
|
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
||||||
|
app = FirecrawlApp(api_key=self.runtime.credentials['firecrawl_api_key'], base_url=self.runtime.credentials['base_url'])
|
||||||
|
|
||||||
|
crawl_result = app.scrape_url(
|
||||||
|
url=tool_parameters['url'],
|
||||||
|
wait=True
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(crawl_result, dict):
|
||||||
|
result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
|
||||||
|
else:
|
||||||
|
result_message = str(crawl_result)
|
||||||
|
|
||||||
|
if not crawl_result:
|
||||||
|
return self.create_text_message("Scrape request failed.")
|
||||||
|
|
||||||
|
return self.create_text_message(result_message)
|
23
api/core/tools/provider/builtin/firecrawl/tools/scrape.yaml
Normal file
23
api/core/tools/provider/builtin/firecrawl/tools/scrape.yaml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
identity:
|
||||||
|
name: scrape
|
||||||
|
author: ahasasjeb
|
||||||
|
label:
|
||||||
|
en_US: Scrape
|
||||||
|
zh_Hans: 抓取
|
||||||
|
description:
|
||||||
|
human:
|
||||||
|
en_US: Extract data from a single URL.
|
||||||
|
zh_Hans: 从单个URL抓取数据。
|
||||||
|
llm: This tool is designed to scrape URL and output the content in Markdown format.
|
||||||
|
parameters:
|
||||||
|
- name: url
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
label:
|
||||||
|
en_US: URL to scrape
|
||||||
|
zh_Hans: 要抓取的URL
|
||||||
|
human_description:
|
||||||
|
en_US: The URL of the website to scrape and extract data from.
|
||||||
|
zh_Hans: 要抓取并提取数据的网站URL。
|
||||||
|
llm_description: The URL of the website that needs to be crawled. This is a required parameter.
|
||||||
|
form: llm
|
26
api/core/tools/provider/builtin/firecrawl/tools/search.py
Normal file
26
api/core/tools/provider/builtin/firecrawl/tools/search.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import json
|
||||||
|
from typing import Any, Union
|
||||||
|
|
||||||
|
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||||
|
from core.tools.provider.builtin.firecrawl.firecrawl_appx import FirecrawlApp
|
||||||
|
from core.tools.tool.builtin_tool import BuiltinTool
|
||||||
|
|
||||||
|
|
||||||
|
class SearchTool(BuiltinTool):
|
||||||
|
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
||||||
|
app = FirecrawlApp(api_key=self.runtime.credentials['firecrawl_api_key'], base_url=self.runtime.credentials['base_url'])
|
||||||
|
|
||||||
|
crawl_result = app.search(
|
||||||
|
query=tool_parameters['keyword'],
|
||||||
|
wait=True
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(crawl_result, dict):
|
||||||
|
result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
|
||||||
|
else:
|
||||||
|
result_message = str(crawl_result)
|
||||||
|
|
||||||
|
if not crawl_result:
|
||||||
|
return self.create_text_message("Search request failed.")
|
||||||
|
|
||||||
|
return self.create_text_message(result_message)
|
23
api/core/tools/provider/builtin/firecrawl/tools/search.yaml
Normal file
23
api/core/tools/provider/builtin/firecrawl/tools/search.yaml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
identity:
|
||||||
|
name: search
|
||||||
|
author: ahasasjeb
|
||||||
|
label:
|
||||||
|
en_US: Search
|
||||||
|
zh_Hans: 搜索
|
||||||
|
description:
|
||||||
|
human:
|
||||||
|
en_US: Search, and output in Markdown format
|
||||||
|
zh_Hans: 搜索,并且以Markdown格式输出
|
||||||
|
llm: This tool can perform online searches and convert the results to Markdown format.
|
||||||
|
parameters:
|
||||||
|
- name: keyword
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
label:
|
||||||
|
en_US: keyword
|
||||||
|
zh_Hans: 关键词
|
||||||
|
human_description:
|
||||||
|
en_US: Input keywords to use Firecrawl API for search.
|
||||||
|
zh_Hans: 输入关键词即可使用Firecrawl API进行搜索。
|
||||||
|
llm_description: Efficiently extract keywords from user text.
|
||||||
|
form: llm
|
Loading…
x
Reference in New Issue
Block a user