From d589b0f56832ae58678198e44fe288cf2345d3e8 Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Sun, 28 Apr 2024 14:23:53 +0800 Subject: [PATCH] fix exception in pdf parser (#584) ### What problem does this PR solve? #451 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- deepdoc/parser/pdf_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 67b9d172c..4c3255c70 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -470,7 +470,8 @@ class RAGFlowPdfParser: continue if re.match(r"[0-9]{2,3}/[0-9]{3}$", up["text"]) \ - or re.match(r"[0-9]{2,3}/[0-9]{3}$", down["text"]): + or re.match(r"[0-9]{2,3}/[0-9]{3}$", down["text"]) \ + or not down["text"].strip(): i += 1 continue