mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-16 19:16:03 +08:00
Add query_prefix + Return TED Transcript URL for Downstream Scraping Tasks (#11090)
This commit is contained in:
parent
60c1549771
commit
af2461cccc
@ -18,6 +18,12 @@ class DuckDuckGoImageSearchTool(BuiltinTool):
|
|||||||
"size": tool_parameters.get("size"),
|
"size": tool_parameters.get("size"),
|
||||||
"max_results": tool_parameters.get("max_results"),
|
"max_results": tool_parameters.get("max_results"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Add query_prefix handling
|
||||||
|
query_prefix = tool_parameters.get("query_prefix", "").strip()
|
||||||
|
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
|
||||||
|
query_dict["keywords"] = final_query
|
||||||
|
|
||||||
response = DDGS().images(**query_dict)
|
response = DDGS().images(**query_dict)
|
||||||
markdown_result = "\n\n"
|
markdown_result = "\n\n"
|
||||||
json_result = []
|
json_result = []
|
||||||
|
@ -86,3 +86,14 @@ parameters:
|
|||||||
en_US: The size of the image to be searched.
|
en_US: The size of the image to be searched.
|
||||||
zh_Hans: 要搜索的图片的大小
|
zh_Hans: 要搜索的图片的大小
|
||||||
form: form
|
form: form
|
||||||
|
- name: query_prefix
|
||||||
|
label:
|
||||||
|
en_US: Query Prefix
|
||||||
|
zh_Hans: 查询前缀
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
form: form
|
||||||
|
human_description:
|
||||||
|
en_US: Specific Search e.g. "site:unsplash.com"
|
||||||
|
zh_Hans: 定向搜索 e.g. "site:unsplash.com"
|
||||||
|
@ -7,7 +7,7 @@ from core.tools.entities.tool_entities import ToolInvokeMessage
|
|||||||
from core.tools.tool.builtin_tool import BuiltinTool
|
from core.tools.tool.builtin_tool import BuiltinTool
|
||||||
|
|
||||||
SUMMARY_PROMPT = """
|
SUMMARY_PROMPT = """
|
||||||
User's query:
|
User's query:
|
||||||
{query}
|
{query}
|
||||||
|
|
||||||
Here are the news results:
|
Here are the news results:
|
||||||
@ -30,6 +30,12 @@ class DuckDuckGoNewsSearchTool(BuiltinTool):
|
|||||||
"safesearch": "moderate",
|
"safesearch": "moderate",
|
||||||
"region": "wt-wt",
|
"region": "wt-wt",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Add query_prefix handling
|
||||||
|
query_prefix = tool_parameters.get("query_prefix", "").strip()
|
||||||
|
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
|
||||||
|
query_dict["keywords"] = final_query
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = list(DDGS().news(**query_dict))
|
response = list(DDGS().news(**query_dict))
|
||||||
if not response:
|
if not response:
|
||||||
|
@ -69,3 +69,14 @@ parameters:
|
|||||||
en_US: Whether to pass the news results to llm for summarization.
|
en_US: Whether to pass the news results to llm for summarization.
|
||||||
zh_Hans: 是否需要将新闻结果传给大模型总结
|
zh_Hans: 是否需要将新闻结果传给大模型总结
|
||||||
form: form
|
form: form
|
||||||
|
- name: query_prefix
|
||||||
|
label:
|
||||||
|
en_US: Query Prefix
|
||||||
|
zh_Hans: 查询前缀
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
form: form
|
||||||
|
human_description:
|
||||||
|
en_US: Specific Search e.g. "site:msn.com"
|
||||||
|
zh_Hans: 定向搜索 e.g. "site:msn.com"
|
||||||
|
@ -7,7 +7,7 @@ from core.tools.entities.tool_entities import ToolInvokeMessage
|
|||||||
from core.tools.tool.builtin_tool import BuiltinTool
|
from core.tools.tool.builtin_tool import BuiltinTool
|
||||||
|
|
||||||
SUMMARY_PROMPT = """
|
SUMMARY_PROMPT = """
|
||||||
User's query:
|
User's query:
|
||||||
{query}
|
{query}
|
||||||
|
|
||||||
Here is the search engine result:
|
Here is the search engine result:
|
||||||
@ -26,7 +26,12 @@ class DuckDuckGoSearchTool(BuiltinTool):
|
|||||||
query = tool_parameters.get("query")
|
query = tool_parameters.get("query")
|
||||||
max_results = tool_parameters.get("max_results", 5)
|
max_results = tool_parameters.get("max_results", 5)
|
||||||
require_summary = tool_parameters.get("require_summary", False)
|
require_summary = tool_parameters.get("require_summary", False)
|
||||||
response = DDGS().text(query, max_results=max_results)
|
|
||||||
|
# Add query_prefix handling
|
||||||
|
query_prefix = tool_parameters.get("query_prefix", "").strip()
|
||||||
|
final_query = f"{query_prefix} {query}".strip()
|
||||||
|
|
||||||
|
response = DDGS().text(final_query, max_results=max_results)
|
||||||
if require_summary:
|
if require_summary:
|
||||||
results = "\n".join([res.get("body") for res in response])
|
results = "\n".join([res.get("body") for res in response])
|
||||||
results = self.summary_results(user_id=user_id, content=results, query=query)
|
results = self.summary_results(user_id=user_id, content=results, query=query)
|
||||||
|
@ -39,3 +39,14 @@ parameters:
|
|||||||
en_US: Whether to pass the search results to llm for summarization.
|
en_US: Whether to pass the search results to llm for summarization.
|
||||||
zh_Hans: 是否需要将搜索结果传给大模型总结
|
zh_Hans: 是否需要将搜索结果传给大模型总结
|
||||||
form: form
|
form: form
|
||||||
|
- name: query_prefix
|
||||||
|
label:
|
||||||
|
en_US: Query Prefix
|
||||||
|
zh_Hans: 查询前缀
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
form: form
|
||||||
|
human_description:
|
||||||
|
en_US: Specific Search e.g. "site:wikipedia.org"
|
||||||
|
zh_Hans: 定向搜索 e.g. "site:wikipedia.org"
|
||||||
|
@ -24,7 +24,7 @@ max-width: 100%; border-radius: 8px;">
|
|||||||
|
|
||||||
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]:
|
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]:
|
||||||
query_dict = {
|
query_dict = {
|
||||||
"keywords": tool_parameters.get("query"),
|
"keywords": tool_parameters.get("query"), # LLM's query
|
||||||
"region": tool_parameters.get("region", "wt-wt"),
|
"region": tool_parameters.get("region", "wt-wt"),
|
||||||
"safesearch": tool_parameters.get("safesearch", "moderate"),
|
"safesearch": tool_parameters.get("safesearch", "moderate"),
|
||||||
"timelimit": tool_parameters.get("timelimit"),
|
"timelimit": tool_parameters.get("timelimit"),
|
||||||
@ -40,6 +40,12 @@ max-width: 100%; border-radius: 8px;">
|
|||||||
# Get proxy URL from parameters
|
# Get proxy URL from parameters
|
||||||
proxy_url = tool_parameters.get("proxy_url", "").strip()
|
proxy_url = tool_parameters.get("proxy_url", "").strip()
|
||||||
|
|
||||||
|
query_prefix = tool_parameters.get("query_prefix", "").strip()
|
||||||
|
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
|
||||||
|
|
||||||
|
# Update the keywords in query_dict with the final_query
|
||||||
|
query_dict["keywords"] = final_query
|
||||||
|
|
||||||
response = DDGS().videos(**query_dict)
|
response = DDGS().videos(**query_dict)
|
||||||
|
|
||||||
# Create HTML result with embedded iframes
|
# Create HTML result with embedded iframes
|
||||||
@ -51,9 +57,13 @@ max-width: 100%; border-radius: 8px;">
|
|||||||
embed_html = res.get("embed_html", "")
|
embed_html = res.get("embed_html", "")
|
||||||
description = res.get("description", "")
|
description = res.get("description", "")
|
||||||
content_url = res.get("content", "")
|
content_url = res.get("content", "")
|
||||||
|
transcript_url = None
|
||||||
|
|
||||||
# Handle TED.com videos
|
# Handle TED.com videos
|
||||||
if not embed_html and "ted.com/talks" in content_url:
|
if "ted.com/talks" in content_url:
|
||||||
|
# Create transcript URL
|
||||||
|
transcript_url = f"{content_url}/transcript"
|
||||||
|
# Create embed URL
|
||||||
embed_url = content_url.replace("www.ted.com", "embed.ted.com")
|
embed_url = content_url.replace("www.ted.com", "embed.ted.com")
|
||||||
if proxy_url:
|
if proxy_url:
|
||||||
embed_url = f"{proxy_url}{embed_url}"
|
embed_url = f"{proxy_url}{embed_url}"
|
||||||
@ -68,8 +78,14 @@ max-width: 100%; border-radius: 8px;">
|
|||||||
|
|
||||||
markdown_result += f"{title}\n\n"
|
markdown_result += f"{title}\n\n"
|
||||||
markdown_result += f"{embed_html}\n\n"
|
markdown_result += f"{embed_html}\n\n"
|
||||||
|
if description:
|
||||||
|
markdown_result += f"{description}\n\n"
|
||||||
markdown_result += "---\n\n"
|
markdown_result += "---\n\n"
|
||||||
|
|
||||||
json_result.append(self.create_json_message(res))
|
# Add transcript_url to the JSON result if available
|
||||||
|
result_dict = res.copy()
|
||||||
|
if transcript_url:
|
||||||
|
result_dict["transcript_url"] = transcript_url
|
||||||
|
json_result.append(self.create_json_message(result_dict))
|
||||||
|
|
||||||
return [self.create_text_message(markdown_result)] + json_result
|
return [self.create_text_message(markdown_result)] + json_result
|
||||||
|
@ -95,3 +95,14 @@ parameters:
|
|||||||
en_US: Proxy URL
|
en_US: Proxy URL
|
||||||
zh_Hans: 视频代理地址
|
zh_Hans: 视频代理地址
|
||||||
form: form
|
form: form
|
||||||
|
- name: query_prefix
|
||||||
|
label:
|
||||||
|
en_US: Query Prefix
|
||||||
|
zh_Hans: 查询前缀
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
form: form
|
||||||
|
human_description:
|
||||||
|
en_US: Specific Search e.g. "site:www.ted.com"
|
||||||
|
zh_Hans: 定向搜索 e.g. "site:www.ted.com"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user