mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 16:59:02 +08:00
Fix component PubMed (#2195)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
ad09d4bb24
commit
1d2c081710
@ -15,6 +15,7 @@
|
|||||||
#
|
#
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from Bio import Entrez
|
from Bio import Entrez
|
||||||
|
import re
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from agent.settings import DEBUG
|
from agent.settings import DEBUG
|
||||||
@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC):
|
|||||||
try:
|
try:
|
||||||
Entrez.email = self._param.email
|
Entrez.email = self._param.email
|
||||||
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
|
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
|
||||||
pubmedcnt = ET.fromstring(
|
pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
|
||||||
Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
|
retmode="xml").read().decode(
|
||||||
pubmed_res = []
|
"utf-8")))
|
||||||
for child in pubmedcnt.findall("PubmedArticle"):
|
pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
|
||||||
if child.find("MedlineCitation").find("Article").find("ArticleTitle").text:
|
"ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
|
||||||
title_tmp = 'Title:' + child.find("MedlineCitation").find("Article").find("ArticleTitle").text
|
"MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + (
|
||||||
else:
|
child.find("MedlineCitation").find("Article").find("Abstract").find(
|
||||||
title_tmp = 'Title:' + "".join(
|
"AbstractText").text if child.find("MedlineCitation").find(
|
||||||
[childtitle.text for childtitle in
|
"Article").find("Abstract") else "No abstract available")} for child in
|
||||||
child.find("MedlineCitation").find("Article").find("ArticleTitle")])
|
pubmedcnt.findall("PubmedArticle")]
|
||||||
url_tmp = '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find("MedlineCitation").find(
|
|
||||||
"PMID").text + '">' + '</a>'
|
|
||||||
abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find(
|
|
||||||
"AbstractText").text
|
|
||||||
pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp})
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return PubMed.be_output("**ERROR**: " + str(e))
|
return PubMed.be_output("**ERROR**: " + str(e))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user