From 83d0949498daabe3423a5cd6d4a5ba859578345b Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Thu, 27 Feb 2025 18:33:55 +0800 Subject: [PATCH] Fix: fix special delimiter parsing issue (#5448) ### What problem does this PR solve? Fix special delimiter parsing issue #5382 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- deepdoc/parser/txt_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepdoc/parser/txt_parser.py b/deepdoc/parser/txt_parser.py index 7f2179923..92f6f3299 100644 --- a/deepdoc/parser/txt_parser.py +++ b/deepdoc/parser/txt_parser.py @@ -51,11 +51,13 @@ class RAGFlowTxtParser: s = t if s < len(delimiter): dels.extend(list(delimiter[s:])) - dels = [re.escape(d) for d in delimiter if d] + dels = [re.escape(d) for d in dels if d] dels = [d for d in dels if d] dels = "|".join(dels) secs = re.split(r"(%s)" % dels, txt) for sec in secs: + if re.match(f"^{dels}$", sec): + continue add_chunk(sec) return [[c, ""] for c in cks]