Fix component exception (#1603)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
H 2024-07-19 13:36:45 +08:00 committed by GitHub
parent c0090a1b4f
commit 915354bec9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 61 additions and 48 deletions

View File

@ -47,18 +47,21 @@ class ArXiv(ComponentBase, ABC):
if not ans: if not ans:
return ArXiv.be_output("") return ArXiv.be_output("")
sort_choices = {"relevance": arxiv.SortCriterion.Relevance, try:
"lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate, sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
'submittedDate': arxiv.SortCriterion.SubmittedDate} "lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
arxiv_client = arxiv.Client() 'submittedDate': arxiv.SortCriterion.SubmittedDate}
search = arxiv.Search( arxiv_client = arxiv.Client()
query=ans, search = arxiv.Search(
max_results=self._param.top_n, query=ans,
sort_by=sort_choices[self._param.sort_by] max_results=self._param.top_n,
) sort_by=sort_choices[self._param.sort_by]
arxiv_res = [ )
{"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for arxiv_res = [
i in list(arxiv_client.results(search))] {"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
i in list(arxiv_client.results(search))]
except Exception as e:
return ArXiv.be_output("**ERROR**: " + str(e))
if not arxiv_res: if not arxiv_res:
return ArXiv.be_output("") return ArXiv.be_output("")

View File

@ -45,16 +45,20 @@ class Baidu(ComponentBase, ABC):
if not ans: if not ans:
return Baidu.be_output("") return Baidu.be_output("")
url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n) try:
headers = { url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'} headers = {
response = requests.get(url=url, headers=headers) 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
response = requests.get(url=url, headers=headers)
url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text) url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text) title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text) body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for url, title, body in zip(url_res, title_res, body_res)] baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for
del body_res, url_res, title_res url, title, body in zip(url_res, title_res, body_res)]
del body_res, url_res, title_res
except Exception as e:
return Baidu.be_output("**ERROR**: " + str(e))
if not baidu_res: if not baidu_res:
return Baidu.be_output("") return Baidu.be_output("")

View File

@ -46,16 +46,19 @@ class DuckDuckGo(ComponentBase, ABC):
if not ans: if not ans:
return DuckDuckGo.be_output("") return DuckDuckGo.be_output("")
if self._param.channel == "text": try:
with DDGS() as ddgs: if self._param.channel == "text":
# {'title': '', 'href': '', 'body': ''} with DDGS() as ddgs:
duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i in # {'title': '', 'href': '', 'body': ''}
ddgs.text(ans, max_results=self._param.top_n)] duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i
elif self._param.channel == "news": in ddgs.text(ans, max_results=self._param.top_n)]
with DDGS() as ddgs: elif self._param.channel == "news":
# {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''} with DDGS() as ddgs:
duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i in # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
ddgs.news(ans, max_results=self._param.top_n)] duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i
in ddgs.news(ans, max_results=self._param.top_n)]
except Exception as e:
return DuckDuckGo.be_output("**ERROR**: " + str(e))
if not duck_res: if not duck_res:
return DuckDuckGo.be_output("") return DuckDuckGo.be_output("")

View File

@ -46,14 +46,18 @@ class PubMed(ComponentBase, ABC):
if not ans: if not ans:
return PubMed.be_output("") return PubMed.be_output("")
Entrez.email = self._param.email try:
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList'] Entrez.email = self._param.email
pubmedcnt = ET.fromstring( pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8")) pubmedcnt = ET.fromstring(
pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find( Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
"ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find( pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
"MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find("MedlineCitation").find( "ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
"Article").find("Abstract").find("AbstractText").text} for child in pubmedcnt.findall("PubmedArticle")] "MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find(
"MedlineCitation").find("Article").find("Abstract").find("AbstractText").text} for child in
pubmedcnt.findall("PubmedArticle")]
except Exception as e:
return PubMed.be_output("**ERROR**: " + str(e))
if not pubmed_res: if not pubmed_res:
return PubMed.be_output("") return PubMed.be_output("")

View File

@ -51,16 +51,15 @@ class Wikipedia(ComponentBase, ABC):
if not ans: if not ans:
return Wikipedia.be_output("") return Wikipedia.be_output("")
wiki_res = [] try:
wikipedia.set_lang(self._param.language) wiki_res = []
wiki_engine = wikipedia wikipedia.set_lang(self._param.language)
for wiki_key in wiki_engine.search(ans, results=self._param.top_n): wiki_engine = wikipedia
try: for wiki_key in wiki_engine.search(ans, results=self._param.top_n):
page = wiki_engine.page(title=wiki_key, auto_suggest=False) page = wiki_engine.page(title=wiki_key, auto_suggest=False)
wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary}) wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
except Exception as e: except Exception as e:
print(e) return Wikipedia.be_output("**ERROR**: " + str(e))
pass
if not wiki_res: if not wiki_res:
return Wikipedia.be_output("") return Wikipedia.be_output("")