From 1d2c0817103c80c2d9b9622df7515458bf5e492f Mon Sep 17 00:00:00 2001 From: H <43509927+guoyuhao2330@users.noreply.github.com> Date: Mon, 2 Sep 2024 18:49:09 +0800 Subject: [PATCH] Fix component PubMed (#2195) ### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- agent/component/pubmed.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/agent/component/pubmed.py b/agent/component/pubmed.py index 19cbdecc0..ff97ec88b 100644 --- a/agent/component/pubmed.py +++ b/agent/component/pubmed.py @@ -15,6 +15,7 @@ # from abc import ABC from Bio import Entrez +import re import pandas as pd import xml.etree.ElementTree as ET from agent.settings import DEBUG @@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC): try: Entrez.email = self._param.email pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList'] - pubmedcnt = ET.fromstring( - Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8")) - pubmed_res = [] - for child in pubmedcnt.findall("PubmedArticle"): - if child.find("MedlineCitation").find("Article").find("ArticleTitle").text: - title_tmp = 'Title:' + child.find("MedlineCitation").find("Article").find("ArticleTitle").text - else: - title_tmp = 'Title:' + "".join( - [childtitle.text for childtitle in - child.find("MedlineCitation").find("Article").find("ArticleTitle")]) - url_tmp = '\nUrl:' + '' - abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find( - "AbstractText").text - pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp}) + pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids), + retmode="xml").read().decode( + "utf-8"))) + pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find( + "ArticleTitle").text + '\nUrl:' + '\n' + 'Abstract:' + ( + child.find("MedlineCitation").find("Article").find("Abstract").find( + "AbstractText").text if child.find("MedlineCitation").find( + "Article").find("Abstract") else "No abstract available")} for child in + pubmedcnt.findall("PubmedArticle")] except Exception as e: return PubMed.be_output("**ERROR**: " + str(e))