mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 20:29:02 +08:00
fix: Ignore some emtpy page_content when append to split_documents (#2898)
This commit is contained in:
parent
4419d357c4
commit
696efe494e
@ -45,9 +45,10 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
|||||||
# delete Spliter character
|
# delete Spliter character
|
||||||
page_content = document_node.page_content
|
page_content = document_node.page_content
|
||||||
if page_content.startswith(".") or page_content.startswith("。"):
|
if page_content.startswith(".") or page_content.startswith("。"):
|
||||||
page_content = page_content[1:]
|
page_content = page_content[1:].strip()
|
||||||
else:
|
else:
|
||||||
page_content = page_content
|
page_content = page_content
|
||||||
|
if len(page_content) > 0:
|
||||||
document_node.page_content = page_content
|
document_node.page_content = page_content
|
||||||
split_documents.append(document_node)
|
split_documents.append(document_node)
|
||||||
all_documents.extend(split_documents)
|
all_documents.extend(split_documents)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user