Fix component PubMed (#2195)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
H 2024-09-02 18:49:09 +08:00 committed by GitHub
parent ad09d4bb24
commit 1d2c081710
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -15,6 +15,7 @@
# #
from abc import ABC from abc import ABC
from Bio import Entrez from Bio import Entrez
import re
import pandas as pd import pandas as pd
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from agent.settings import DEBUG from agent.settings import DEBUG
@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC):
try: try:
Entrez.email = self._param.email Entrez.email = self._param.email
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList'] pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
pubmedcnt = ET.fromstring( pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8")) retmode="xml").read().decode(
pubmed_res = [] "utf-8")))
for child in pubmedcnt.findall("PubmedArticle"): pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
if child.find("MedlineCitation").find("Article").find("ArticleTitle").text: "ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
title_tmp = 'Title:' + child.find("MedlineCitation").find("Article").find("ArticleTitle").text "MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + (
else: child.find("MedlineCitation").find("Article").find("Abstract").find(
title_tmp = 'Title:' + "".join( "AbstractText").text if child.find("MedlineCitation").find(
[childtitle.text for childtitle in "Article").find("Abstract") else "No abstract available")} for child in
child.find("MedlineCitation").find("Article").find("ArticleTitle")]) pubmedcnt.findall("PubmedArticle")]
url_tmp = '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find("MedlineCitation").find(
"PMID").text + '">' + '</a>'
abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find(
"AbstractText").text
pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp})
except Exception as e: except Exception as e:
return PubMed.be_output("**ERROR**: " + str(e)) return PubMed.be_output("**ERROR**: " + str(e))