diff --git a/graph/component/__init__.py b/graph/component/__init__.py index 3f1d32fea..8257701b4 100644 --- a/graph/component/__init__.py +++ b/graph/component/__init__.py @@ -14,6 +14,8 @@ from .duckduckgo import DuckDuckGo, DuckDuckGoParam from .wikipedia import Wikipedia, WikipediaParam from .pubmed import PubMed, PubMedParam from .arxiv import ArXiv, ArXivParam +from .google import Google, GoogleParam +from .bing import Bing, BingParam def component_class(class_name): diff --git a/graph/component/bing.py b/graph/component/bing.py new file mode 100644 index 000000000..128358816 --- /dev/null +++ b/graph/component/bing.py @@ -0,0 +1,85 @@ +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from abc import ABC +import requests +import pandas as pd +from graph.settings import DEBUG +from graph.component.base import ComponentBase, ComponentParamBase + + +class BingParam(ComponentParamBase): + """ + Define the Bing component parameters. + """ + + def __init__(self): + super().__init__() + self.top_n = 10 + self.channel = "Webpages" + self.api_key = "YOUR_ACCESS_KEY" + self.country = "CN" + self.language = "en" + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + self.check_valid_value(self.channel, "Bing Web Search or Bing News", ["Webpages", "News"]) + self.check_empty(self.api_key, "Bing subscription key") + self.check_valid_value(self.country, "Bing Country", + ['AR', 'AU', 'AT', 'BE', 'BR', 'CA', 'CL', 'DK', 'FI', 'FR', 'DE', 'HK', 'IN', 'ID', + 'IT', 'JP', 'KR', 'MY', 'MX', 'NL', 'NZ', 'NO', 'CN', 'PL', 'PT', 'PH', 'RU', 'SA', + 'ZA', 'ES', 'SE', 'CH', 'TW', 'TR', 'GB', 'US']) + self.check_valid_value(self.language, "Bing Languages", + ['ar', 'eu', 'bn', 'bg', 'ca', 'ns', 'nt', 'hr', 'cs', 'da', 'nl', 'en', 'gb', 'et', + 'fi', 'fr', 'gl', 'de', 'gu', 'he', 'hi', 'hu', 'is', 'it', 'jp', 'kn', 'ko', 'lv', + 'lt', 'ms', 'ml', 'mr', 'nb', 'pl', 'br', 'pt', 'pa', 'ro', 'ru', 'sr', 'sk', 'sl', + 'es', 'sv', 'ta', 'te', 'th', 'tr', 'uk', 'vi']) + + +class Bing(ComponentBase, ABC): + component_name = "Bing" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return Bing.be_output("") + + try: + headers = {"Ocp-Apim-Subscription-Key": self._param.api_key, 'Accept-Language': self._param.language} + params = {"q": ans, "textDecorations": True, "textFormat": "HTML", "cc": self._param.country, + "answerCount": 1, "promote": self._param.channel} + if self._param.channel == "Webpages": + response = requests.get("https://api.bing.microsoft.com/v7.0/search", headers=headers, params=params) + response.raise_for_status() + search_results = response.json() + bing_res = [{"content": '' + i["name"] + ' ' + i["snippet"]} for i in + search_results["webPages"]["value"]] + elif self._param.channel == "News": + response = requests.get("https://api.bing.microsoft.com/v7.0/news/search", headers=headers, + params=params) + response.raise_for_status() + search_results = response.json() + bing_res = [{"content": '' + i["name"] + ' ' + i["description"]} for i + in search_results['news']['value']] + except Exception as e: + return Bing.be_output("**ERROR**: " + str(e)) + + if not bing_res: + return Bing.be_output("") + + df = pd.DataFrame(bing_res) + if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") + return df diff --git a/graph/component/google.py b/graph/component/google.py new file mode 100644 index 000000000..eb7cd50de --- /dev/null +++ b/graph/component/google.py @@ -0,0 +1,96 @@ +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from abc import ABC +from serpapi import GoogleSearch +import pandas as pd +from graph.settings import DEBUG +from graph.component.base import ComponentBase, ComponentParamBase + + +class GoogleParam(ComponentParamBase): + """ + Define the Google component parameters. + """ + + def __init__(self): + super().__init__() + self.top_n = 10 + self.api_key = "xxx" + self.country = "cn" + self.language = "en" + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + self.check_empty(self.api_key, "SerpApi API key") + self.check_valid_value(self.country, "Google Country", + ['af', 'al', 'dz', 'as', 'ad', 'ao', 'ai', 'aq', 'ag', 'ar', 'am', 'aw', 'au', 'at', + 'az', 'bs', 'bh', 'bd', 'bb', 'by', 'be', 'bz', 'bj', 'bm', 'bt', 'bo', 'ba', 'bw', + 'bv', 'br', 'io', 'bn', 'bg', 'bf', 'bi', 'kh', 'cm', 'ca', 'cv', 'ky', 'cf', 'td', + 'cl', 'cn', 'cx', 'cc', 'co', 'km', 'cg', 'cd', 'ck', 'cr', 'ci', 'hr', 'cu', 'cy', + 'cz', 'dk', 'dj', 'dm', 'do', 'ec', 'eg', 'sv', 'gq', 'er', 'ee', 'et', 'fk', 'fo', + 'fj', 'fi', 'fr', 'gf', 'pf', 'tf', 'ga', 'gm', 'ge', 'de', 'gh', 'gi', 'gr', 'gl', + 'gd', 'gp', 'gu', 'gt', 'gn', 'gw', 'gy', 'ht', 'hm', 'va', 'hn', 'hk', 'hu', 'is', + 'in', 'id', 'ir', 'iq', 'ie', 'il', 'it', 'jm', 'jp', 'jo', 'kz', 'ke', 'ki', 'kp', + 'kr', 'kw', 'kg', 'la', 'lv', 'lb', 'ls', 'lr', 'ly', 'li', 'lt', 'lu', 'mo', 'mk', + 'mg', 'mw', 'my', 'mv', 'ml', 'mt', 'mh', 'mq', 'mr', 'mu', 'yt', 'mx', 'fm', 'md', + 'mc', 'mn', 'ms', 'ma', 'mz', 'mm', 'na', 'nr', 'np', 'nl', 'an', 'nc', 'nz', 'ni', + 'ne', 'ng', 'nu', 'nf', 'mp', 'no', 'om', 'pk', 'pw', 'ps', 'pa', 'pg', 'py', 'pe', + 'ph', 'pn', 'pl', 'pt', 'pr', 'qa', 're', 'ro', 'ru', 'rw', 'sh', 'kn', 'lc', 'pm', + 'vc', 'ws', 'sm', 'st', 'sa', 'sn', 'rs', 'sc', 'sl', 'sg', 'sk', 'si', 'sb', 'so', + 'za', 'gs', 'es', 'lk', 'sd', 'sr', 'sj', 'sz', 'se', 'ch', 'sy', 'tw', 'tj', 'tz', + 'th', 'tl', 'tg', 'tk', 'to', 'tt', 'tn', 'tr', 'tm', 'tc', 'tv', 'ug', 'ua', 'ae', + 'uk', 'gb', 'us', 'um', 'uy', 'uz', 'vu', 've', 'vn', 'vg', 'vi', 'wf', 'eh', 'ye', + 'zm', 'zw']) + self.check_valid_value(self.language, "Google languages", + ['af', 'ak', 'sq', 'ws', 'am', 'ar', 'hy', 'az', 'eu', 'be', 'bem', 'bn', 'bh', + 'xx-bork', 'bs', 'br', 'bg', 'bt', 'km', 'ca', 'chr', 'ny', 'zh-cn', 'zh-tw', 'co', + 'hr', 'cs', 'da', 'nl', 'xx-elmer', 'en', 'eo', 'et', 'ee', 'fo', 'tl', 'fi', 'fr', + 'fy', 'gaa', 'gl', 'ka', 'de', 'el', 'kl', 'gn', 'gu', 'xx-hacker', 'ht', 'ha', 'haw', + 'iw', 'hi', 'hu', 'is', 'ig', 'id', 'ia', 'ga', 'it', 'ja', 'jw', 'kn', 'kk', 'rw', + 'rn', 'xx-klingon', 'kg', 'ko', 'kri', 'ku', 'ckb', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', + 'loz', 'lg', 'ach', 'mk', 'mg', 'ms', 'ml', 'mt', 'mv', 'mi', 'mr', 'mfe', 'mo', 'mn', + 'sr-me', 'my', 'ne', 'pcm', 'nso', 'no', 'nn', 'oc', 'or', 'om', 'ps', 'fa', + 'xx-pirate', 'pl', 'pt', 'pt-br', 'pt-pt', 'pa', 'qu', 'ro', 'rm', 'nyn', 'ru', 'gd', + 'sr', 'sh', 'st', 'tn', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'es', 'es-419', 'su', + 'sw', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'ti', 'to', 'lua', 'tum', 'tr', 'tk', 'tw', + 'ug', 'uk', 'ur', 'uz', 'vu', 'vi', 'cy', 'wo', 'xh', 'yi', 'yo', 'zu'] + ) + + +class Google(ComponentBase, ABC): + component_name = "Google" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return Google.be_output("") + + try: + client = GoogleSearch( + {"engine": "google", "q": ans, "api_key": self._param.api_key, "gl": self._param.country, + "hl": self._param.language, "num": self._param.top_n}) + google_res = [{"content": '' + i["title"] + ' ' + i["snippet"]} for i in + client.get_dict()["organic_results"]] + except Exception as e: + return Google.be_output("**ERROR**: Existing Unavailable Parameters!") + + if not google_res: + return Google.be_output("") + + df = pd.DataFrame(google_res) + if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") + return df diff --git a/requirements.txt b/requirements.txt index 1752b7759..fa57cfded 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,7 @@ Flask==3.0.3 Flask_Cors==4.0.1 Flask_Login==0.6.3 flask_session==0.8.0 +google_search_results==2.4.2 groq==0.9.0 hanziconv==0.3.2 html_text==0.6.2 diff --git a/requirements_arm.txt b/requirements_arm.txt index 1a8f6a137..c5f440b00 100644 --- a/requirements_arm.txt +++ b/requirements_arm.txt @@ -154,3 +154,4 @@ wikipedia==1.4.0 Bio==1.7.1 arxiv==2.1.3 pypdf==4.3.0 +google_search_results==2.4.2 diff --git a/requirements_dev.txt b/requirements_dev.txt index f518cab64..aa030b574 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -139,3 +139,4 @@ wikipedia==1.4.0 Bio==1.7.1 arxiv==2.1.3 pypdf==4.3.0 +google_search_results==2.4.2