Fix component PubMed (#2195)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
H 2024-09-02 18:49:09 +08:00 committed by GitHub
parent ad09d4bb24
commit 1d2c081710
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -15,6 +15,7 @@
#
from abc import ABC
from Bio import Entrez
import re
import pandas as pd
import xml.etree.ElementTree as ET
from agent.settings import DEBUG
@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC):
try:
Entrez.email = self._param.email
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
pubmedcnt = ET.fromstring(
Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
pubmed_res = []
for child in pubmedcnt.findall("PubmedArticle"):
if child.find("MedlineCitation").find("Article").find("ArticleTitle").text:
title_tmp = 'Title:' + child.find("MedlineCitation").find("Article").find("ArticleTitle").text
else:
title_tmp = 'Title:' + "".join(
[childtitle.text for childtitle in
child.find("MedlineCitation").find("Article").find("ArticleTitle")])
url_tmp = '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find("MedlineCitation").find(
"PMID").text + '">' + '</a>'
abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find(
"AbstractText").text
pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp})
pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
retmode="xml").read().decode(
"utf-8")))
pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
"ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
"MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + (
child.find("MedlineCitation").find("Article").find("Abstract").find(
"AbstractText").text if child.find("MedlineCitation").find(
"Article").find("Abstract") else "No abstract available")} for child in
pubmedcnt.findall("PubmedArticle")]
except Exception as e:
return PubMed.be_output("**ERROR**: " + str(e))