diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 0cd9f9f646..dd46aa27dc 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -416,9 +416,14 @@ class IndexingRunner: if separator: separator = separator.replace('\\n', '\n') + if 'chunk_overlap' in segmentation and segmentation['chunk_overlap']: + chunk_overlap = segmentation['chunk_overlap'] + else: + chunk_overlap = 0 + character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder( chunk_size=segmentation["max_tokens"], - chunk_overlap=segmentation.get('chunk_overlap', 0), + chunk_overlap=chunk_overlap, fixed_separator=separator, separators=["\n\n", "。", ".", " ", ""], embedding_model_instance=embedding_model_instance