From bfa8d342b33771f6a3f15270d26060588907c1a8 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 17 Mar 2025 13:07:13 +0800 Subject: [PATCH] Fix: retrieval debug mode issue. (#6150) ### What problem does this PR solve? #6139 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- agent/component/base.py | 2 +- agent/component/retrieval.py | 3 +-- deepdoc/parser/pdf_parser.py | 1 + rag/app/naive.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/component/base.py b/agent/component/base.py index 4aaf601f1..45e77bed4 100644 --- a/agent/component/base.py +++ b/agent/component/base.py @@ -521,7 +521,7 @@ class ComponentBase(ABC): if u.lower().find("answer") >= 0: for r, c in self._canvas.history[::-1]: if r == "user": - upstream_outs.append(pd.DataFrame([{"content": f"USER:{c}", "component_id": u}])) + upstream_outs.append(pd.DataFrame([{"content": c, "component_id": u}])) break break if self.component_name.lower().find("answer") >= 0 and self.get_component_name(u) in ["relevant"]: diff --git a/agent/component/retrieval.py b/agent/component/retrieval.py index c960725e6..10717250d 100644 --- a/agent/component/retrieval.py +++ b/agent/component/retrieval.py @@ -57,8 +57,7 @@ class Retrieval(ComponentBase, ABC): query = self.get_input() query = str(query["content"][0]) if "content" in query else "" lines = query.split('\n') - user_queries = [line.split("USER:", 1)[1] for line in lines if line.startswith("USER:")] - query = user_queries[-1] if user_queries else "" + query = lines[-1] if lines else "" kbs = KnowledgebaseService.get_by_ids(self._param.kb_ids) if not kbs: return Retrieval.be_output("") diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index b9e2bee65..c65c17525 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -41,6 +41,7 @@ LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber" if LOCK_KEY_pdfplumber not in sys.modules: sys.modules[LOCK_KEY_pdfplumber] = threading.Lock() + class RAGFlowPdfParser: def __init__(self, parallel_devices: int | None = None): """ diff --git a/rag/app/naive.py b/rag/app/naive.py index 670a1f701..a01cf5eea 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -197,7 +197,7 @@ class Markdown(MarkdownParser): def chunk(filename, binary=None, from_page=0, to_page=100000, - lang="Chinese", parallel_devices=None, callback=None, **kwargs): + lang="Chinese", callback=None, parallel_devices=None, **kwargs): """ Supported file formats are docx, pdf, excel, txt. This method apply the naive ways to chunk files.