From 58c62f0a34b540c621ad42971c574692aaccb3b0 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Tue, 25 Mar 2025 16:26:14 +0800 Subject: [PATCH] fix full-doc mode document doesn't reindex after enable or un_archive (#16737) --- api/core/indexing_runner.py | 2 +- api/models/dataset.py | 17 +++++++++++++++++ api/tasks/add_document_to_index_task.py | 2 +- api/tasks/deal_dataset_vector_index_task.py | 2 +- api/tasks/enable_segment_to_index_task.py | 2 +- api/tasks/enable_segments_to_index_task.py | 2 +- 6 files changed, 22 insertions(+), 5 deletions(-) diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 8206a8d3ec..a75a4c22d1 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -187,7 +187,7 @@ class IndexingRunner: }, ) if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX: - child_chunks = document_segment.child_chunks + child_chunks = document_segment.get_child_chunks() if child_chunks: child_documents = [] for child_chunk in child_chunks: diff --git a/api/models/dataset.py b/api/models/dataset.py index f104c32b53..47f96c669e 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -720,6 +720,23 @@ class DocumentSegment(db.Model): # type: ignore[name-defined] else: return [] + def get_child_chunks(self): + process_rule = self.document.dataset_process_rule + if process_rule.mode == "hierarchical": + rules = Rule(**process_rule.rules_dict) + if rules.parent_mode: + child_chunks = ( + db.session.query(ChildChunk) + .filter(ChildChunk.segment_id == self.id) + .order_by(ChildChunk.position.asc()) + .all() + ) + return child_chunks or [] + else: + return [] + else: + return [] + @property def sign_content(self): return self.get_sign_content() diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index c5a5ddaadc..3b8b0466b1 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -59,7 +59,7 @@ def add_document_to_index_task(dataset_document_id: str): }, ) if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX: - child_chunks = segment.child_chunks + child_chunks = segment.get_child_chunks() if child_chunks: child_documents = [] for child_chunk in child_chunks: diff --git a/api/tasks/deal_dataset_vector_index_task.py b/api/tasks/deal_dataset_vector_index_task.py index a9b5ab91a8..5fd8647da8 100644 --- a/api/tasks/deal_dataset_vector_index_task.py +++ b/api/tasks/deal_dataset_vector_index_task.py @@ -130,7 +130,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): }, ) if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX: - child_chunks = segment.child_chunks + child_chunks = segment.get_child_chunks() if child_chunks: child_documents = [] for child_chunk in child_chunks: diff --git a/api/tasks/enable_segment_to_index_task.py b/api/tasks/enable_segment_to_index_task.py index 76522f4720..0601e594fe 100644 --- a/api/tasks/enable_segment_to_index_task.py +++ b/api/tasks/enable_segment_to_index_task.py @@ -63,7 +63,7 @@ def enable_segment_to_index_task(segment_id: str): index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor() if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX: - child_chunks = segment.child_chunks + child_chunks = segment.get_child_chunks() if child_chunks: child_documents = [] for child_chunk in child_chunks: diff --git a/api/tasks/enable_segments_to_index_task.py b/api/tasks/enable_segments_to_index_task.py index 3942268afe..5129dbd24e 100644 --- a/api/tasks/enable_segments_to_index_task.py +++ b/api/tasks/enable_segments_to_index_task.py @@ -67,7 +67,7 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i ) if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX: - child_chunks = segment.child_chunks + child_chunks = segment.get_child_chunks() if child_chunks: child_documents = [] for child_chunk in child_chunks: