diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index e11993ddc7..1550de0fd2 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -667,6 +667,11 @@ class IndexingConfig(BaseSettings): default=4000, ) + CHILD_CHUNKS_PREVIEW_NUMBER: PositiveInt = Field( + description="Maximum number of child chunks to preview", + default=50, + ) + class MultiModalTransferConfig(BaseSettings): MULTIMODAL_SEND_FORMAT: Literal["base64", "url"] = Field( diff --git a/api/core/rag/index_processor/processor/parent_child_index_processor.py b/api/core/rag/index_processor/processor/parent_child_index_processor.py index e8423e2b77..3140122081 100644 --- a/api/core/rag/index_processor/processor/parent_child_index_processor.py +++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py @@ -3,6 +3,7 @@ import uuid from typing import Optional +from configs import dify_config from core.model_manager import ModelInstance from core.rag.cleaner.clean_processor import CleanProcessor from core.rag.datasource.retrieval_service import RetrievalService @@ -80,6 +81,10 @@ class ParentChildIndexProcessor(BaseIndexProcessor): child_nodes = self._split_child_nodes( document, rules, process_rule.get("mode"), kwargs.get("embedding_model_instance") ) + if kwargs.get("preview"): + if len(child_nodes) > dify_config.CHILD_CHUNKS_PREVIEW_NUMBER: + child_nodes = child_nodes[: dify_config.CHILD_CHUNKS_PREVIEW_NUMBER] + document.children = child_nodes doc_id = str(uuid.uuid4()) hash = helper.generate_text_hash(document.page_content)