mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-16 20:35:59 +08:00
Add query_prefix + Return TED Transcript URL for Downstream Scraping Tasks (#11090)
This commit is contained in:
parent
60c1549771
commit
af2461cccc
@ -18,6 +18,12 @@ class DuckDuckGoImageSearchTool(BuiltinTool):
|
||||
"size": tool_parameters.get("size"),
|
||||
"max_results": tool_parameters.get("max_results"),
|
||||
}
|
||||
|
||||
# Add query_prefix handling
|
||||
query_prefix = tool_parameters.get("query_prefix", "").strip()
|
||||
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
|
||||
query_dict["keywords"] = final_query
|
||||
|
||||
response = DDGS().images(**query_dict)
|
||||
markdown_result = "\n\n"
|
||||
json_result = []
|
||||
|
@ -86,3 +86,14 @@ parameters:
|
||||
en_US: The size of the image to be searched.
|
||||
zh_Hans: 要搜索的图片的大小
|
||||
form: form
|
||||
- name: query_prefix
|
||||
label:
|
||||
en_US: Query Prefix
|
||||
zh_Hans: 查询前缀
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
form: form
|
||||
human_description:
|
||||
en_US: Specific Search e.g. "site:unsplash.com"
|
||||
zh_Hans: 定向搜索 e.g. "site:unsplash.com"
|
||||
|
@ -30,6 +30,12 @@ class DuckDuckGoNewsSearchTool(BuiltinTool):
|
||||
"safesearch": "moderate",
|
||||
"region": "wt-wt",
|
||||
}
|
||||
|
||||
# Add query_prefix handling
|
||||
query_prefix = tool_parameters.get("query_prefix", "").strip()
|
||||
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
|
||||
query_dict["keywords"] = final_query
|
||||
|
||||
try:
|
||||
response = list(DDGS().news(**query_dict))
|
||||
if not response:
|
||||
|
@ -69,3 +69,14 @@ parameters:
|
||||
en_US: Whether to pass the news results to llm for summarization.
|
||||
zh_Hans: 是否需要将新闻结果传给大模型总结
|
||||
form: form
|
||||
- name: query_prefix
|
||||
label:
|
||||
en_US: Query Prefix
|
||||
zh_Hans: 查询前缀
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
form: form
|
||||
human_description:
|
||||
en_US: Specific Search e.g. "site:msn.com"
|
||||
zh_Hans: 定向搜索 e.g. "site:msn.com"
|
||||
|
@ -26,7 +26,12 @@ class DuckDuckGoSearchTool(BuiltinTool):
|
||||
query = tool_parameters.get("query")
|
||||
max_results = tool_parameters.get("max_results", 5)
|
||||
require_summary = tool_parameters.get("require_summary", False)
|
||||
response = DDGS().text(query, max_results=max_results)
|
||||
|
||||
# Add query_prefix handling
|
||||
query_prefix = tool_parameters.get("query_prefix", "").strip()
|
||||
final_query = f"{query_prefix} {query}".strip()
|
||||
|
||||
response = DDGS().text(final_query, max_results=max_results)
|
||||
if require_summary:
|
||||
results = "\n".join([res.get("body") for res in response])
|
||||
results = self.summary_results(user_id=user_id, content=results, query=query)
|
||||
|
@ -39,3 +39,14 @@ parameters:
|
||||
en_US: Whether to pass the search results to llm for summarization.
|
||||
zh_Hans: 是否需要将搜索结果传给大模型总结
|
||||
form: form
|
||||
- name: query_prefix
|
||||
label:
|
||||
en_US: Query Prefix
|
||||
zh_Hans: 查询前缀
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
form: form
|
||||
human_description:
|
||||
en_US: Specific Search e.g. "site:wikipedia.org"
|
||||
zh_Hans: 定向搜索 e.g. "site:wikipedia.org"
|
||||
|
@ -24,7 +24,7 @@ max-width: 100%; border-radius: 8px;">
|
||||
|
||||
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]:
|
||||
query_dict = {
|
||||
"keywords": tool_parameters.get("query"),
|
||||
"keywords": tool_parameters.get("query"), # LLM's query
|
||||
"region": tool_parameters.get("region", "wt-wt"),
|
||||
"safesearch": tool_parameters.get("safesearch", "moderate"),
|
||||
"timelimit": tool_parameters.get("timelimit"),
|
||||
@ -40,6 +40,12 @@ max-width: 100%; border-radius: 8px;">
|
||||
# Get proxy URL from parameters
|
||||
proxy_url = tool_parameters.get("proxy_url", "").strip()
|
||||
|
||||
query_prefix = tool_parameters.get("query_prefix", "").strip()
|
||||
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
|
||||
|
||||
# Update the keywords in query_dict with the final_query
|
||||
query_dict["keywords"] = final_query
|
||||
|
||||
response = DDGS().videos(**query_dict)
|
||||
|
||||
# Create HTML result with embedded iframes
|
||||
@ -51,9 +57,13 @@ max-width: 100%; border-radius: 8px;">
|
||||
embed_html = res.get("embed_html", "")
|
||||
description = res.get("description", "")
|
||||
content_url = res.get("content", "")
|
||||
transcript_url = None
|
||||
|
||||
# Handle TED.com videos
|
||||
if not embed_html and "ted.com/talks" in content_url:
|
||||
if "ted.com/talks" in content_url:
|
||||
# Create transcript URL
|
||||
transcript_url = f"{content_url}/transcript"
|
||||
# Create embed URL
|
||||
embed_url = content_url.replace("www.ted.com", "embed.ted.com")
|
||||
if proxy_url:
|
||||
embed_url = f"{proxy_url}{embed_url}"
|
||||
@ -68,8 +78,14 @@ max-width: 100%; border-radius: 8px;">
|
||||
|
||||
markdown_result += f"{title}\n\n"
|
||||
markdown_result += f"{embed_html}\n\n"
|
||||
if description:
|
||||
markdown_result += f"{description}\n\n"
|
||||
markdown_result += "---\n\n"
|
||||
|
||||
json_result.append(self.create_json_message(res))
|
||||
# Add transcript_url to the JSON result if available
|
||||
result_dict = res.copy()
|
||||
if transcript_url:
|
||||
result_dict["transcript_url"] = transcript_url
|
||||
json_result.append(self.create_json_message(result_dict))
|
||||
|
||||
return [self.create_text_message(markdown_result)] + json_result
|
||||
|
@ -95,3 +95,14 @@ parameters:
|
||||
en_US: Proxy URL
|
||||
zh_Hans: 视频代理地址
|
||||
form: form
|
||||
- name: query_prefix
|
||||
label:
|
||||
en_US: Query Prefix
|
||||
zh_Hans: 查询前缀
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
form: form
|
||||
human_description:
|
||||
en_US: Specific Search e.g. "site:www.ted.com"
|
||||
zh_Hans: 定向搜索 e.g. "site:www.ted.com"
|
||||
|
Loading…
x
Reference in New Issue
Block a user