From 843720f958038d7ceb9482b849b9142acb8c3478 Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Thu, 30 May 2024 11:47:36 +0800 Subject: [PATCH] fix bug in pdf parser (#986) ### What problem does this PR solve? #963 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- deepdoc/parser/pdf_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 4e43df7a3..2971858c0 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -392,7 +392,7 @@ class RAGFlowPdfParser: b["text"].strip()[-1] in ",;:'\",、‘“;:-", len(b["text"].strip()) > 1 and b["text"].strip( )[-2] in ",;:'\",‘“、;:", - b_["text"].strip()[0] in "。;?!?”)),,、:", + b_["text"].strip() and b_["text"].strip()[0] in "。;?!?”)),,、:", ] # features for not concating feats = [