mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-13 03:19:04 +08:00
Fix component exception (#1603)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
c0090a1b4f
commit
915354bec9
@ -47,18 +47,21 @@ class ArXiv(ComponentBase, ABC):
|
|||||||
if not ans:
|
if not ans:
|
||||||
return ArXiv.be_output("")
|
return ArXiv.be_output("")
|
||||||
|
|
||||||
sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
|
try:
|
||||||
"lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
|
sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
|
||||||
'submittedDate': arxiv.SortCriterion.SubmittedDate}
|
"lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
|
||||||
arxiv_client = arxiv.Client()
|
'submittedDate': arxiv.SortCriterion.SubmittedDate}
|
||||||
search = arxiv.Search(
|
arxiv_client = arxiv.Client()
|
||||||
query=ans,
|
search = arxiv.Search(
|
||||||
max_results=self._param.top_n,
|
query=ans,
|
||||||
sort_by=sort_choices[self._param.sort_by]
|
max_results=self._param.top_n,
|
||||||
)
|
sort_by=sort_choices[self._param.sort_by]
|
||||||
arxiv_res = [
|
)
|
||||||
{"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
|
arxiv_res = [
|
||||||
i in list(arxiv_client.results(search))]
|
{"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
|
||||||
|
i in list(arxiv_client.results(search))]
|
||||||
|
except Exception as e:
|
||||||
|
return ArXiv.be_output("**ERROR**: " + str(e))
|
||||||
|
|
||||||
if not arxiv_res:
|
if not arxiv_res:
|
||||||
return ArXiv.be_output("")
|
return ArXiv.be_output("")
|
||||||
|
@ -45,16 +45,20 @@ class Baidu(ComponentBase, ABC):
|
|||||||
if not ans:
|
if not ans:
|
||||||
return Baidu.be_output("")
|
return Baidu.be_output("")
|
||||||
|
|
||||||
url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
|
try:
|
||||||
headers = {
|
url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
|
headers = {
|
||||||
response = requests.get(url=url, headers=headers)
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
|
||||||
|
response = requests.get(url=url, headers=headers)
|
||||||
|
|
||||||
url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
|
url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
|
||||||
title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
|
title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
|
||||||
body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
|
body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
|
||||||
baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
|
baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for
|
||||||
del body_res, url_res, title_res
|
url, title, body in zip(url_res, title_res, body_res)]
|
||||||
|
del body_res, url_res, title_res
|
||||||
|
except Exception as e:
|
||||||
|
return Baidu.be_output("**ERROR**: " + str(e))
|
||||||
|
|
||||||
if not baidu_res:
|
if not baidu_res:
|
||||||
return Baidu.be_output("")
|
return Baidu.be_output("")
|
||||||
|
@ -46,16 +46,19 @@ class DuckDuckGo(ComponentBase, ABC):
|
|||||||
if not ans:
|
if not ans:
|
||||||
return DuckDuckGo.be_output("")
|
return DuckDuckGo.be_output("")
|
||||||
|
|
||||||
if self._param.channel == "text":
|
try:
|
||||||
with DDGS() as ddgs:
|
if self._param.channel == "text":
|
||||||
# {'title': '', 'href': '', 'body': ''}
|
with DDGS() as ddgs:
|
||||||
duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
|
# {'title': '', 'href': '', 'body': ''}
|
||||||
ddgs.text(ans, max_results=self._param.top_n)]
|
duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i
|
||||||
elif self._param.channel == "news":
|
in ddgs.text(ans, max_results=self._param.top_n)]
|
||||||
with DDGS() as ddgs:
|
elif self._param.channel == "news":
|
||||||
# {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
|
with DDGS() as ddgs:
|
||||||
duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
|
# {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
|
||||||
ddgs.news(ans, max_results=self._param.top_n)]
|
duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i
|
||||||
|
in ddgs.news(ans, max_results=self._param.top_n)]
|
||||||
|
except Exception as e:
|
||||||
|
return DuckDuckGo.be_output("**ERROR**: " + str(e))
|
||||||
|
|
||||||
if not duck_res:
|
if not duck_res:
|
||||||
return DuckDuckGo.be_output("")
|
return DuckDuckGo.be_output("")
|
||||||
|
@ -46,14 +46,18 @@ class PubMed(ComponentBase, ABC):
|
|||||||
if not ans:
|
if not ans:
|
||||||
return PubMed.be_output("")
|
return PubMed.be_output("")
|
||||||
|
|
||||||
Entrez.email = self._param.email
|
try:
|
||||||
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
|
Entrez.email = self._param.email
|
||||||
pubmedcnt = ET.fromstring(
|
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
|
||||||
Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
|
pubmedcnt = ET.fromstring(
|
||||||
pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
|
Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
|
||||||
"ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
|
pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
|
||||||
"MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find("MedlineCitation").find(
|
"ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
|
||||||
"Article").find("Abstract").find("AbstractText").text} for child in pubmedcnt.findall("PubmedArticle")]
|
"MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find(
|
||||||
|
"MedlineCitation").find("Article").find("Abstract").find("AbstractText").text} for child in
|
||||||
|
pubmedcnt.findall("PubmedArticle")]
|
||||||
|
except Exception as e:
|
||||||
|
return PubMed.be_output("**ERROR**: " + str(e))
|
||||||
|
|
||||||
if not pubmed_res:
|
if not pubmed_res:
|
||||||
return PubMed.be_output("")
|
return PubMed.be_output("")
|
||||||
|
@ -51,16 +51,15 @@ class Wikipedia(ComponentBase, ABC):
|
|||||||
if not ans:
|
if not ans:
|
||||||
return Wikipedia.be_output("")
|
return Wikipedia.be_output("")
|
||||||
|
|
||||||
wiki_res = []
|
try:
|
||||||
wikipedia.set_lang(self._param.language)
|
wiki_res = []
|
||||||
wiki_engine = wikipedia
|
wikipedia.set_lang(self._param.language)
|
||||||
for wiki_key in wiki_engine.search(ans, results=self._param.top_n):
|
wiki_engine = wikipedia
|
||||||
try:
|
for wiki_key in wiki_engine.search(ans, results=self._param.top_n):
|
||||||
page = wiki_engine.page(title=wiki_key, auto_suggest=False)
|
page = wiki_engine.page(title=wiki_key, auto_suggest=False)
|
||||||
wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
|
wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
return Wikipedia.be_output("**ERROR**: " + str(e))
|
||||||
pass
|
|
||||||
|
|
||||||
if not wiki_res:
|
if not wiki_res:
|
||||||
return Wikipedia.be_output("")
|
return Wikipedia.be_output("")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user