From c8145ce5816fed70418c3478293e226e1313a98d Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Mon, 7 Apr 2025 20:31:26 +0800 Subject: [PATCH] deal db session in celery worker (#17549) --- api/tasks/add_document_to_index_task.py | 5 +++-- api/tasks/annotation/add_annotation_to_index_task.py | 3 +++ api/tasks/annotation/batch_import_annotations_task.py | 2 ++ api/tasks/annotation/delete_annotation_index_task.py | 3 +++ api/tasks/annotation/disable_annotation_reply_task.py | 10 +++++++--- api/tasks/annotation/enable_annotation_reply_task.py | 6 ++++-- .../annotation/update_annotation_to_index_task.py | 3 +++ api/tasks/batch_clean_document_task.py | 2 ++ api/tasks/batch_create_segment_to_index_task.py | 2 ++ api/tasks/clean_dataset_task.py | 2 ++ api/tasks/clean_document_task.py | 2 ++ api/tasks/clean_notion_document_task.py | 2 ++ api/tasks/create_segment_to_index_task.py | 6 ++++-- api/tasks/deal_dataset_vector_index_task.py | 2 ++ api/tasks/delete_segment_from_index_task.py | 2 ++ api/tasks/disable_segment_from_index_task.py | 10 +++++++--- api/tasks/disable_segments_from_index_task.py | 5 +++++ api/tasks/document_indexing_sync_task.py | 5 +++-- api/tasks/document_indexing_task.py | 4 ++++ api/tasks/document_indexing_update_task.py | 7 +++++-- api/tasks/duplicate_document_indexing_task.py | 8 +++++++- api/tasks/enable_segment_to_index_task.py | 10 +++++++--- api/tasks/enable_segments_to_index_task.py | 5 +++++ api/tasks/recover_document_indexing_task.py | 7 +++++-- api/tasks/remove_document_from_index_task.py | 8 ++++++-- api/tasks/retry_document_indexing_task.py | 8 +++++++- 26 files changed, 104 insertions(+), 25 deletions(-) diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index 3b8b0466b1..0b7d2ad31f 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -4,7 +4,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.index_processor_factory import IndexProcessorFactory @@ -28,7 +27,9 @@ def add_document_to_index_task(dataset_document_id: str): dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document_id).first() if not dataset_document: - raise NotFound("Document not found") + logging.info(click.style("Document not found: {}".format(dataset_document_id), fg="red")) + db.session.close() + return if dataset_document.indexing_status != "completed": return diff --git a/api/tasks/annotation/add_annotation_to_index_task.py b/api/tasks/annotation/add_annotation_to_index_task.py index aab21a4410..2a93c21abd 100644 --- a/api/tasks/annotation/add_annotation_to_index_task.py +++ b/api/tasks/annotation/add_annotation_to_index_task.py @@ -6,6 +6,7 @@ from celery import shared_task # type: ignore from core.rag.datasource.vdb.vector_factory import Vector from core.rag.models.document import Document +from extensions.ext_database import db from models.dataset import Dataset from services.dataset_service import DatasetCollectionBindingService @@ -55,3 +56,5 @@ def add_annotation_to_index_task( ) except Exception: logging.exception("Build index for annotation failed") + finally: + db.session.close() diff --git a/api/tasks/annotation/batch_import_annotations_task.py b/api/tasks/annotation/batch_import_annotations_task.py index 06162b02d6..6144a4fe3e 100644 --- a/api/tasks/annotation/batch_import_annotations_task.py +++ b/api/tasks/annotation/batch_import_annotations_task.py @@ -88,3 +88,5 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id: indexing_error_msg_key = "app_annotation_batch_import_error_msg_{}".format(str(job_id)) redis_client.setex(indexing_error_msg_key, 600, str(e)) logging.exception("Build index for batch import annotations failed") + finally: + db.session.close() diff --git a/api/tasks/annotation/delete_annotation_index_task.py b/api/tasks/annotation/delete_annotation_index_task.py index a6a598ce4b..a6657e813a 100644 --- a/api/tasks/annotation/delete_annotation_index_task.py +++ b/api/tasks/annotation/delete_annotation_index_task.py @@ -5,6 +5,7 @@ import click from celery import shared_task # type: ignore from core.rag.datasource.vdb.vector_factory import Vector +from extensions.ext_database import db from models.dataset import Dataset from services.dataset_service import DatasetCollectionBindingService @@ -39,3 +40,5 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str ) except Exception as e: logging.exception("Annotation deleted index failed") + finally: + db.session.close() diff --git a/api/tasks/annotation/disable_annotation_reply_task.py b/api/tasks/annotation/disable_annotation_reply_task.py index 504fe5921b..747fce5784 100644 --- a/api/tasks/annotation/disable_annotation_reply_task.py +++ b/api/tasks/annotation/disable_annotation_reply_task.py @@ -3,7 +3,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.rag.datasource.vdb.vector_factory import Vector from extensions.ext_database import db @@ -23,14 +22,18 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str): app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() annotations_count = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).count() if not app: - raise NotFound("App not found") + logging.info(click.style("App not found: {}".format(app_id), fg="red")) + db.session.close() + return app_annotation_setting = ( db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app_id).first() ) if not app_annotation_setting: - raise NotFound("App annotation setting not found") + logging.info(click.style("App annotation setting not found: {}".format(app_id), fg="red")) + db.session.close() + return disable_app_annotation_key = "disable_app_annotation_{}".format(str(app_id)) disable_app_annotation_job_key = "disable_app_annotation_job_{}".format(str(job_id)) @@ -66,3 +69,4 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str): redis_client.setex(disable_app_annotation_error_key, 600, str(e)) finally: redis_client.delete(disable_app_annotation_key) + db.session.close() diff --git a/api/tasks/annotation/enable_annotation_reply_task.py b/api/tasks/annotation/enable_annotation_reply_task.py index 3dc10534df..c04f1be845 100644 --- a/api/tasks/annotation/enable_annotation_reply_task.py +++ b/api/tasks/annotation/enable_annotation_reply_task.py @@ -4,7 +4,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.rag.datasource.vdb.vector_factory import Vector from core.rag.models.document import Document @@ -34,7 +33,9 @@ def enable_annotation_reply_task( app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() if not app: - raise NotFound("App not found") + logging.info(click.style("App not found: {}".format(app_id), fg="red")) + db.session.close() + return annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).all() enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id)) @@ -121,3 +122,4 @@ def enable_annotation_reply_task( db.session.rollback() finally: redis_client.delete(enable_app_annotation_key) + db.session.close() diff --git a/api/tasks/annotation/update_annotation_to_index_task.py b/api/tasks/annotation/update_annotation_to_index_task.py index 8c675feaa6..596ba829ad 100644 --- a/api/tasks/annotation/update_annotation_to_index_task.py +++ b/api/tasks/annotation/update_annotation_to_index_task.py @@ -6,6 +6,7 @@ from celery import shared_task # type: ignore from core.rag.datasource.vdb.vector_factory import Vector from core.rag.models.document import Document +from extensions.ext_database import db from models.dataset import Dataset from services.dataset_service import DatasetCollectionBindingService @@ -56,3 +57,5 @@ def update_annotation_to_index_task( ) except Exception: logging.exception("Build index for annotation failed") + finally: + db.session.close() diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py index 8376ab1b03..97efc47b33 100644 --- a/api/tasks/batch_clean_document_task.py +++ b/api/tasks/batch_clean_document_task.py @@ -74,3 +74,5 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form ) except Exception: logging.exception("Cleaned documents when documents deleted failed") + finally: + db.session.close() diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py index 648f92b0f8..f32bc4f187 100644 --- a/api/tasks/batch_create_segment_to_index_task.py +++ b/api/tasks/batch_create_segment_to_index_task.py @@ -127,3 +127,5 @@ def batch_create_segment_to_index_task( except Exception: logging.exception("Segments batch created index failed") redis_client.setex(indexing_cache_key, 600, "error") + finally: + db.session.close() diff --git a/api/tasks/clean_dataset_task.py b/api/tasks/clean_dataset_task.py index f3386dd444..6bac718395 100644 --- a/api/tasks/clean_dataset_task.py +++ b/api/tasks/clean_dataset_task.py @@ -121,3 +121,5 @@ def clean_dataset_task( ) except Exception: logging.exception("Cleaned dataset when dataset deleted failed") + finally: + db.session.close() diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py index c5234a4ecc..5824121e8f 100644 --- a/api/tasks/clean_document_task.py +++ b/api/tasks/clean_document_task.py @@ -82,3 +82,5 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i ) except Exception: logging.exception("Cleaned document when document deleted failed") + finally: + db.session.close() diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py index 5a6eb00a62..1087a37761 100644 --- a/api/tasks/clean_notion_document_task.py +++ b/api/tasks/clean_notion_document_task.py @@ -53,3 +53,5 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str): ) except Exception: logging.exception("Cleaned document when import form notion document deleted failed") + finally: + db.session.close() diff --git a/api/tasks/create_segment_to_index_task.py b/api/tasks/create_segment_to_index_task.py index dfa053a43c..4500b2a44b 100644 --- a/api/tasks/create_segment_to_index_task.py +++ b/api/tasks/create_segment_to_index_task.py @@ -5,7 +5,6 @@ from typing import Optional import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import Document @@ -27,7 +26,9 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first() if not segment: - raise NotFound("Segment not found") + logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + db.session.close() + return if segment.status != "waiting": return @@ -93,3 +94,4 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] db.session.commit() finally: redis_client.delete(indexing_cache_key) + db.session.close() diff --git a/api/tasks/deal_dataset_vector_index_task.py b/api/tasks/deal_dataset_vector_index_task.py index 5fd8647da8..075453e283 100644 --- a/api/tasks/deal_dataset_vector_index_task.py +++ b/api/tasks/deal_dataset_vector_index_task.py @@ -167,3 +167,5 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): ) except Exception: logging.exception("Deal dataset vector index failed") + finally: + db.session.close() diff --git a/api/tasks/delete_segment_from_index_task.py b/api/tasks/delete_segment_from_index_task.py index e4fbd5465e..a93babc310 100644 --- a/api/tasks/delete_segment_from_index_task.py +++ b/api/tasks/delete_segment_from_index_task.py @@ -41,3 +41,5 @@ def delete_segment_from_index_task(index_node_ids: list, dataset_id: str, docume logging.info(click.style("Segment deleted from index latency: {}".format(end_at - start_at), fg="green")) except Exception: logging.exception("delete segment from index failed") + finally: + db.session.close() diff --git a/api/tasks/disable_segment_from_index_task.py b/api/tasks/disable_segment_from_index_task.py index f30a1cc7ac..327eed4721 100644 --- a/api/tasks/disable_segment_from_index_task.py +++ b/api/tasks/disable_segment_from_index_task.py @@ -3,7 +3,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db @@ -24,10 +23,14 @@ def disable_segment_from_index_task(segment_id: str): segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first() if not segment: - raise NotFound("Segment not found") + logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + db.session.close() + return if segment.status != "completed": - raise NotFound("Segment is not completed , disable action is not allowed.") + logging.info(click.style("Segment is not completed, disable is not allowed: {}".format(segment_id), fg="red")) + db.session.close() + return indexing_cache_key = "segment_{}_indexing".format(segment.id) @@ -62,3 +65,4 @@ def disable_segment_from_index_task(segment_id: str): db.session.commit() finally: redis_client.delete(indexing_cache_key) + db.session.close() diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py index d43fb90ed3..8b77b290c8 100644 --- a/api/tasks/disable_segments_from_index_task.py +++ b/api/tasks/disable_segments_from_index_task.py @@ -26,15 +26,18 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() if not dataset: logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan")) + db.session.close() return dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first() if not dataset_document: logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan")) + db.session.close() return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan")) + db.session.close() return # sync index processor index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor() @@ -50,6 +53,7 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen ) if not segments: + db.session.close() return try: @@ -76,3 +80,4 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen for segment in segments: indexing_cache_key = "segment_{}_indexing".format(segment.id) redis_client.delete(indexing_cache_key) + db.session.close() diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index d686698b9a..2e68dcb0fb 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -4,7 +4,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.rag.extractor.notion_extractor import NotionExtractor @@ -29,7 +28,9 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - raise NotFound("Document not found") + logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + db.session.close() + return data_source_info = document.data_source_info_dict if document.data_source_type == "notion_import": diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py index 50761a2f34..ee470d44e8 100644 --- a/api/tasks/document_indexing_task.py +++ b/api/tasks/document_indexing_task.py @@ -27,6 +27,7 @@ def document_indexing_task(dataset_id: str, document_ids: list): dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() if not dataset: logging.info(click.style("Dataset is not found: {}".format(dataset_id), fg="yellow")) + db.session.close() return # check document limit features = FeatureService.get_features(dataset.tenant_id) @@ -55,6 +56,7 @@ def document_indexing_task(dataset_id: str, document_ids: list): document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) db.session.add(document) db.session.commit() + db.session.close() return for document_id in document_ids: @@ -80,3 +82,5 @@ def document_indexing_task(dataset_id: str, document_ids: list): logging.info(click.style(str(ex), fg="yellow")) except Exception: pass + finally: + db.session.close() diff --git a/api/tasks/document_indexing_update_task.py b/api/tasks/document_indexing_update_task.py index d8f14830c9..b9ed11a8da 100644 --- a/api/tasks/document_indexing_update_task.py +++ b/api/tasks/document_indexing_update_task.py @@ -4,7 +4,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.rag.index_processor.index_processor_factory import IndexProcessorFactory @@ -27,7 +26,9 @@ def document_indexing_update_task(dataset_id: str, document_id: str): document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - raise NotFound("Document not found") + logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + db.session.close() + return document.indexing_status = "parsing" document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) @@ -73,3 +74,5 @@ def document_indexing_update_task(dataset_id: str, document_id: str): logging.info(click.style(str(ex), fg="yellow")) except Exception: pass + finally: + db.session.close() diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py index fbb33df109..100fc257ce 100644 --- a/api/tasks/duplicate_document_indexing_task.py +++ b/api/tasks/duplicate_document_indexing_task.py @@ -27,7 +27,9 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() if dataset is None: - raise ValueError("Dataset not found") + logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red")) + db.session.close() + return # check document limit features = FeatureService.get_features(dataset.tenant_id) @@ -57,6 +59,8 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): db.session.add(document) db.session.commit() return + finally: + db.session.close() for document_id in document_ids: logging.info(click.style("Start process document: {}".format(document_id), fg="green")) @@ -96,3 +100,5 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): logging.info(click.style(str(ex), fg="yellow")) except Exception: pass + finally: + db.session.close() diff --git a/api/tasks/enable_segment_to_index_task.py b/api/tasks/enable_segment_to_index_task.py index 0601e594fe..21f08f40a7 100644 --- a/api/tasks/enable_segment_to_index_task.py +++ b/api/tasks/enable_segment_to_index_task.py @@ -4,7 +4,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.index_processor_factory import IndexProcessorFactory @@ -27,10 +26,14 @@ def enable_segment_to_index_task(segment_id: str): segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first() if not segment: - raise NotFound("Segment not found") + logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + db.session.close() + return if segment.status != "completed": - raise NotFound("Segment is not completed, enable action is not allowed.") + logging.info(click.style("Segment is not completed, enable is not allowed: {}".format(segment_id), fg="red")) + db.session.close() + return indexing_cache_key = "segment_{}_indexing".format(segment.id) @@ -94,3 +97,4 @@ def enable_segment_to_index_task(segment_id: str): db.session.commit() finally: redis_client.delete(indexing_cache_key) + db.session.close() diff --git a/api/tasks/enable_segments_to_index_task.py b/api/tasks/enable_segments_to_index_task.py index 5129dbd24e..625a3b582e 100644 --- a/api/tasks/enable_segments_to_index_task.py +++ b/api/tasks/enable_segments_to_index_task.py @@ -34,9 +34,11 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i if not dataset_document: logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan")) + db.session.close() return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan")) + db.session.close() return # sync index processor index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor() @@ -51,6 +53,8 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i .all() ) if not segments: + logging.info(click.style("Segments not found: {}".format(segment_ids), fg="cyan")) + db.session.close() return try: @@ -108,3 +112,4 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i for segment in segments: indexing_cache_key = "segment_{}_indexing".format(segment.id) redis_client.delete(indexing_cache_key) + db.session.close() diff --git a/api/tasks/recover_document_indexing_task.py b/api/tasks/recover_document_indexing_task.py index b603d689ba..eada2ff9db 100644 --- a/api/tasks/recover_document_indexing_task.py +++ b/api/tasks/recover_document_indexing_task.py @@ -3,7 +3,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.indexing_runner import DocumentIsPausedError, IndexingRunner from extensions.ext_database import db @@ -25,7 +24,9 @@ def recover_document_indexing_task(dataset_id: str, document_id: str): document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - raise NotFound("Document not found") + logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + db.session.close() + return try: indexing_runner = IndexingRunner() @@ -43,3 +44,5 @@ def recover_document_indexing_task(dataset_id: str, document_id: str): logging.info(click.style(str(ex), fg="yellow")) except Exception: pass + finally: + db.session.close() diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py index d0c4382f58..0e2960788d 100644 --- a/api/tasks/remove_document_from_index_task.py +++ b/api/tasks/remove_document_from_index_task.py @@ -4,7 +4,6 @@ import time import click from celery import shared_task # type: ignore -from werkzeug.exceptions import NotFound from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db @@ -25,9 +24,13 @@ def remove_document_from_index_task(document_id: str): document = db.session.query(Document).filter(Document.id == document_id).first() if not document: - raise NotFound("Document not found") + logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + db.session.close() + return if document.indexing_status != "completed": + logging.info(click.style("Document is not completed, remove is not allowed: {}".format(document_id), fg="red")) + db.session.close() return indexing_cache_key = "document_{}_indexing".format(document.id) @@ -71,3 +74,4 @@ def remove_document_from_index_task(document_id: str): db.session.commit() finally: redis_client.delete(indexing_cache_key) + db.session.close() diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py index 83ddbcfcc5..7e50eb9f8d 100644 --- a/api/tasks/retry_document_indexing_task.py +++ b/api/tasks/retry_document_indexing_task.py @@ -27,7 +27,9 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() if not dataset: - raise ValueError("Dataset not found") + logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red")) + db.session.close() + return for document_id in document_ids: retry_indexing_cache_key = "document_{}_is_retried".format(document_id) @@ -52,6 +54,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): db.session.add(document) db.session.commit() redis_client.delete(retry_indexing_cache_key) + db.session.close() return logging.info(click.style("Start retry document: {}".format(document_id), fg="green")) @@ -60,6 +63,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): ) if not document: logging.info(click.style("Document not found: {}".format(document_id), fg="yellow")) + db.session.close() return try: # clean old data @@ -92,5 +96,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): logging.info(click.style(str(ex), fg="yellow")) redis_client.delete(retry_indexing_cache_key) pass + finally: + db.session.close() end_at = time.perf_counter() logging.info(click.style("Retry dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green"))