diff --git a/agent/component/pubmed.py b/agent/component/pubmed.py
index 19cbdecc0..ff97ec88b 100644
--- a/agent/component/pubmed.py
+++ b/agent/component/pubmed.py
@@ -15,6 +15,7 @@
#
from abc import ABC
from Bio import Entrez
+import re
import pandas as pd
import xml.etree.ElementTree as ET
from agent.settings import DEBUG
@@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC):
try:
Entrez.email = self._param.email
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
- pubmedcnt = ET.fromstring(
- Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
- pubmed_res = []
- for child in pubmedcnt.findall("PubmedArticle"):
- if child.find("MedlineCitation").find("Article").find("ArticleTitle").text:
- title_tmp = 'Title:' + child.find("MedlineCitation").find("Article").find("ArticleTitle").text
- else:
- title_tmp = 'Title:' + "".join(
- [childtitle.text for childtitle in
- child.find("MedlineCitation").find("Article").find("ArticleTitle")])
- url_tmp = '\nUrl:' + ''
- abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find(
- "AbstractText").text
- pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp})
+ pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
+ retmode="xml").read().decode(
+ "utf-8")))
+ pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
+ "ArticleTitle").text + '\nUrl:' + '\n' + 'Abstract:' + (
+ child.find("MedlineCitation").find("Article").find("Abstract").find(
+ "AbstractText").text if child.find("MedlineCitation").find(
+ "Article").find("Abstract") else "No abstract available")} for child in
+ pubmedcnt.findall("PubmedArticle")]
except Exception as e:
return PubMed.be_output("**ERROR**: " + str(e))