refactor name of duckduckgo (#1496)

### What problem does this PR solve?


### Type of change

- [x] Refactoring
This commit is contained in:
Kevin Hu 2024-07-12 19:20:12 +08:00 committed by GitHub
parent 4eeb535946
commit eecec7b119
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 63 additions and 63 deletions

View File

@ -10,7 +10,7 @@ from .message import Message, MessageParam
from .rewrite import RewriteQuestion, RewriteQuestionParam from .rewrite import RewriteQuestion, RewriteQuestionParam
from .keyword import KeywordExtract, KeywordExtractParam from .keyword import KeywordExtract, KeywordExtractParam
from .baidu import Baidu, BaiduParam from .baidu import Baidu, BaiduParam
from .duckduckgosearch import DuckDuckGoSearch, DuckDuckGoSearchParam from .duckduckgo import DuckDuckGo, DuckDuckGoParam
def component_class(class_name): def component_class(class_name):

View File

@ -1,62 +1,62 @@
# #
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import random import random
from abc import ABC from abc import ABC
from functools import partial from functools import partial
from duckduckgo_search import DDGS from duckduckgo_search import DDGS
import pandas as pd import pandas as pd
from graph.component.base import ComponentBase, ComponentParamBase from graph.component.base import ComponentBase, ComponentParamBase
class DuckDuckGoSearchParam(ComponentParamBase): class DuckDuckGoParam(ComponentParamBase):
""" """
Define the DuckDuckGoSearch component parameters. Define the DuckDuckGo component parameters.
""" """
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.top_n = 10 self.top_n = 10
self.channel = "text" self.channel = "text"
def check(self): def check(self):
self.check_positive_integer(self.top_n, "Top N") self.check_positive_integer(self.top_n, "Top N")
self.check_valid_value(self.channel, "Web Search or News", ["text", "news"]) self.check_valid_value(self.channel, "Web Search or News", ["text", "news"])
class DuckDuckGoSearch(ComponentBase, ABC): class DuckDuckGo(ComponentBase, ABC):
component_name = "DuckDuckGoSearch" component_name = "DuckDuckGo"
def _run(self, history, **kwargs): def _run(self, history, **kwargs):
ans = self.get_input() ans = self.get_input()
ans = " - ".join(ans["content"]) if "content" in ans else "" ans = " - ".join(ans["content"]) if "content" in ans else ""
if not ans: if not ans:
return DuckDuckGoSearch.be_output(self._param.no) return DuckDuckGo.be_output(self._param.no)
if self.channel == "text": if self.channel == "text":
with DDGS() as ddgs: with DDGS() as ddgs:
# {'title': '', 'href': '', 'body': ''} # {'title': '', 'href': '', 'body': ''}
duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i in duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
ddgs.text(ans, max_results=self._param.top_n)] ddgs.text(ans, max_results=self._param.top_n)]
elif self.channel == "news": elif self.channel == "news":
with DDGS() as ddgs: with DDGS() as ddgs:
# {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''} # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i in duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
ddgs.news(ans, max_results=self._param.top_n)] ddgs.news(ans, max_results=self._param.top_n)]
df = pd.DataFrame(duck_res) df = pd.DataFrame(duck_res)
print(df, ":::::::::::::::::::::::::::::::::") print(df, ":::::::::::::::::::::::::::::::::")
return df return df