fix bug in pdf parser (#986)

### What problem does this PR solve?

#963 

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
KevinHuSh 2024-05-30 11:47:36 +08:00 committed by GitHub
parent f077b57f8b
commit 843720f958
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -392,7 +392,7 @@ class RAGFlowPdfParser:
b["text"].strip()[-1] in ",;:'\",、‘“;:-",
len(b["text"].strip()) > 1 and b["text"].strip(
)[-2] in ",;:'\",‘“、;:",
b_["text"].strip()[0] in "。;?!?”)),,、:",
b_["text"].strip() and b_["text"].strip()[0] in "。;?!?”)),,、:",
]
# features for not concating
feats = [