From 6454e1d644a45f01f9c8f5db16187bea81859575 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Mon, 11 Mar 2024 15:36:56 +0800 Subject: [PATCH] chunk-overlap None check (#2781) Co-authored-by: jyong --- api/core/indexing_runner.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 0cd9f9f646..dd46aa27dc 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -416,9 +416,14 @@ class IndexingRunner: if separator: separator = separator.replace('\\n', '\n') + if 'chunk_overlap' in segmentation and segmentation['chunk_overlap']: + chunk_overlap = segmentation['chunk_overlap'] + else: + chunk_overlap = 0 + character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder( chunk_size=segmentation["max_tokens"], - chunk_overlap=segmentation.get('chunk_overlap', 0), + chunk_overlap=chunk_overlap, fixed_separator=separator, separators=["\n\n", "。", ".", " ", ""], embedding_model_instance=embedding_model_instance