mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 02:49:00 +08:00
Fix component PubMed (#2195)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
ad09d4bb24
commit
1d2c081710
@ -15,6 +15,7 @@
|
||||
#
|
||||
from abc import ABC
|
||||
from Bio import Entrez
|
||||
import re
|
||||
import pandas as pd
|
||||
import xml.etree.ElementTree as ET
|
||||
from agent.settings import DEBUG
|
||||
@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC):
|
||||
try:
|
||||
Entrez.email = self._param.email
|
||||
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
|
||||
pubmedcnt = ET.fromstring(
|
||||
Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
|
||||
pubmed_res = []
|
||||
for child in pubmedcnt.findall("PubmedArticle"):
|
||||
if child.find("MedlineCitation").find("Article").find("ArticleTitle").text:
|
||||
title_tmp = 'Title:' + child.find("MedlineCitation").find("Article").find("ArticleTitle").text
|
||||
else:
|
||||
title_tmp = 'Title:' + "".join(
|
||||
[childtitle.text for childtitle in
|
||||
child.find("MedlineCitation").find("Article").find("ArticleTitle")])
|
||||
url_tmp = '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find("MedlineCitation").find(
|
||||
"PMID").text + '">' + '</a>'
|
||||
abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find(
|
||||
"AbstractText").text
|
||||
pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp})
|
||||
pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
|
||||
retmode="xml").read().decode(
|
||||
"utf-8")))
|
||||
pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
|
||||
"ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
|
||||
"MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + (
|
||||
child.find("MedlineCitation").find("Article").find("Abstract").find(
|
||||
"AbstractText").text if child.find("MedlineCitation").find(
|
||||
"Article").find("Abstract") else "No abstract available")} for child in
|
||||
pubmedcnt.findall("PubmedArticle")]
|
||||
except Exception as e:
|
||||
return PubMed.be_output("**ERROR**: " + str(e))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user