From a199fa63881018a1d0802048d3883ac816711c52 Mon Sep 17 00:00:00 2001 From: takatost Date: Fri, 1 Sep 2023 10:52:39 +0800 Subject: [PATCH] feat: optimize high load sql query of document segment (#1078) --- .../keyword_table_index/keyword_table_index.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/api/core/index/keyword_table_index/keyword_table_index.py b/api/core/index/keyword_table_index/keyword_table_index.py index 34ee7c8ff7..3792120e45 100644 --- a/api/core/index/keyword_table_index/keyword_table_index.py +++ b/api/core/index/keyword_table_index/keyword_table_index.py @@ -25,7 +25,7 @@ class KeywordTableIndex(BaseIndex): keyword_table = {} for text in texts: keywords = keyword_table_handler.extract_keywords(text.page_content, self._config.max_keywords_per_chunk) - self._update_segment_keywords(text.metadata['doc_id'], list(keywords)) + self._update_segment_keywords(self.dataset.id, text.metadata['doc_id'], list(keywords)) keyword_table = self._add_text_to_keyword_table(keyword_table, text.metadata['doc_id'], list(keywords)) dataset_keyword_table = DatasetKeywordTable( @@ -52,7 +52,7 @@ class KeywordTableIndex(BaseIndex): keyword_table = self._get_dataset_keyword_table() for text in texts: keywords = keyword_table_handler.extract_keywords(text.page_content, self._config.max_keywords_per_chunk) - self._update_segment_keywords(text.metadata['doc_id'], list(keywords)) + self._update_segment_keywords(self.dataset.id, text.metadata['doc_id'], list(keywords)) keyword_table = self._add_text_to_keyword_table(keyword_table, text.metadata['doc_id'], list(keywords)) self._save_dataset_keyword_table(keyword_table) @@ -199,15 +199,18 @@ class KeywordTableIndex(BaseIndex): return sorted_chunk_indices[: k] - def _update_segment_keywords(self, node_id: str, keywords: List[str]): - document_segment = db.session.query(DocumentSegment).filter(DocumentSegment.index_node_id == node_id).first() + def _update_segment_keywords(self, dataset_id: str, node_id: str, keywords: List[str]): + document_segment = db.session.query(DocumentSegment).filter( + DocumentSegment.dataset_id == dataset_id, + DocumentSegment.index_node_id == node_id + ).first() if document_segment: document_segment.keywords = keywords db.session.commit() def create_segment_keywords(self, node_id: str, keywords: List[str]): keyword_table = self._get_dataset_keyword_table() - self._update_segment_keywords(node_id, keywords) + self._update_segment_keywords(self.dataset.id, node_id, keywords) keyword_table = self._add_text_to_keyword_table(keyword_table, node_id, keywords) self._save_dataset_keyword_table(keyword_table)