From f556f0239cede7c4dee94083ba17c907ddd447a3 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Tue, 14 Jan 2025 13:16:05 +0800 Subject: [PATCH] Fix dify retrieval issue. (#4473) ### What problem does this PR solve? #4464 #4469 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/sdk/dify_retrieval.py | 2 +- rag/app/manual.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/apps/sdk/dify_retrieval.py b/api/apps/sdk/dify_retrieval.py index d1c61c6b5..a364e8145 100644 --- a/api/apps/sdk/dify_retrieval.py +++ b/api/apps/sdk/dify_retrieval.py @@ -62,7 +62,7 @@ def retrieval(tenant_id): for c in ranks["chunks"]: c.pop("vector", None) records.append({ - "content": c["content_ltks"], + "content": c["content_with_weight"], "score": c["similarity"], "title": c["docnm_kwd"], "metadata": {} diff --git a/rag/app/manual.py b/rag/app/manual.py index 8a3480907..9018cb2a5 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -70,7 +70,7 @@ class Pdf(PdfParser): for b in self.boxes: b["text"] = re.sub(r"([\t  ]|\u3000){2,}", " ", b["text"].strip()) - return [(b["text"], b.get("layout_no", ""), self.get_position(b, zoomin)) + return [(b["text"], b.get("layoutno", ""), self.get_position(b, zoomin)) for i, b in enumerate(self.boxes)], tbls