From 1d2c0817103c80c2d9b9622df7515458bf5e492f Mon Sep 17 00:00:00 2001
From: H <43509927+guoyuhao2330@users.noreply.github.com>
Date: Mon, 2 Sep 2024 18:49:09 +0800
Subject: [PATCH] Fix component PubMed (#2195)
### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---
agent/component/pubmed.py | 26 +++++++++++---------------
1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/agent/component/pubmed.py b/agent/component/pubmed.py
index 19cbdecc0..ff97ec88b 100644
--- a/agent/component/pubmed.py
+++ b/agent/component/pubmed.py
@@ -15,6 +15,7 @@
#
from abc import ABC
from Bio import Entrez
+import re
import pandas as pd
import xml.etree.ElementTree as ET
from agent.settings import DEBUG
@@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC):
try:
Entrez.email = self._param.email
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
- pubmedcnt = ET.fromstring(
- Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
- pubmed_res = []
- for child in pubmedcnt.findall("PubmedArticle"):
- if child.find("MedlineCitation").find("Article").find("ArticleTitle").text:
- title_tmp = 'Title:' + child.find("MedlineCitation").find("Article").find("ArticleTitle").text
- else:
- title_tmp = 'Title:' + "".join(
- [childtitle.text for childtitle in
- child.find("MedlineCitation").find("Article").find("ArticleTitle")])
- url_tmp = '\nUrl:' + ''
- abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find(
- "AbstractText").text
- pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp})
+ pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
+ retmode="xml").read().decode(
+ "utf-8")))
+ pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
+ "ArticleTitle").text + '\nUrl:' + '\n' + 'Abstract:' + (
+ child.find("MedlineCitation").find("Article").find("Abstract").find(
+ "AbstractText").text if child.find("MedlineCitation").find(
+ "Article").find("Abstract") else "No abstract available")} for child in
+ pubmedcnt.findall("PubmedArticle")]
except Exception as e:
return PubMed.be_output("**ERROR**: " + str(e))