From 27ffc0ed74f41c40634038b1ec5340bb1a99d941 Mon Sep 17 00:00:00 2001 From: Stephen Hu Date: Wed, 7 May 2025 10:55:08 +0800 Subject: [PATCH] Feat: Improve 'user_canvan_version' delete and 'document' delete performance (#6553) ### What problem does this PR solve? 1. Add delete_by_ids method 2. Add get_doc_ids_by_doc_names 3. Improve user_canvan_version's logic (avoid O(n) db IO) 4. Improve document delete logic (avoid O(n) db IO) ### Type of change - [x] Performance Improvement --- api/apps/api_app.py | 11 ++++++++--- api/db/services/common_service.py | 12 ++++++++++++ api/db/services/document_service.py | 9 +++++++++ api/db/services/user_canvas_version.py | 5 ++++- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/api/apps/api_app.py b/api/apps/api_app.py index 36c8ccbcf..5376ef0ed 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -628,7 +628,7 @@ def document_rm(): tenant_id = objs[0].tenant_id req = request.json try: - doc_ids = [DocumentService.get_doc_id_by_doc_name(doc_name) for doc_name in req.get("doc_names", [])] + doc_ids = DocumentService.get_doc_ids_by_doc_names(req.get("doc_names", [])) for doc_id in req.get("doc_ids", []): if doc_id not in doc_ids: doc_ids.append(doc_id) @@ -646,11 +646,16 @@ def document_rm(): FileService.init_knowledgebase_docs(pf_id, tenant_id) errors = "" + docs = DocumentService.get_by_ids(doc_ids) + doc_dic = {} + for doc in docs: + doc_dic[doc.id] = doc + for doc_id in doc_ids: try: - e, doc = DocumentService.get_by_id(doc_id) - if not e: + if doc_id not in doc_dic: return get_data_error_result(message="Document not found!") + doc = doc_dic[doc_id] tenant_id = DocumentService.get_tenant_id(doc_id) if not tenant_id: return get_data_error_result(message="Tenant not found!") diff --git a/api/db/services/common_service.py b/api/db/services/common_service.py index 3af19ce4b..8f76bec8d 100644 --- a/api/db/services/common_service.py +++ b/api/db/services/common_service.py @@ -263,6 +263,18 @@ class CommonService: # Returns: # Number of records deleted return cls.model.delete().where(cls.model.id == pid).execute() + + @classmethod + @DB.connection_context() + def delete_by_ids(cls, pids): + # Delete multiple records by their IDs + # Args: + # pids: List of record IDs + # Returns: + # Number of records deleted + with DB.atomic(): + res = cls.model.delete().where(cls.model.id.in_(pids)).execute() + return res @classmethod @DB.connection_context() diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 74bc19bf1..2367c6bc5 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -360,6 +360,15 @@ class DocumentService(CommonService): if not doc_id: return return doc_id[0]["id"] + + @classmethod + @DB.connection_context() + def get_doc_ids_by_doc_names(cls, doc_names): + if not doc_names: + return [] + + query = cls.model.select(cls.model.id).where(cls.model.name.in_(doc_names)) + return list(query.scalars().iterator()) @classmethod @DB.connection_context() diff --git a/api/db/services/user_canvas_version.py b/api/db/services/user_canvas_version.py index 414a1a8e1..9fe12e32e 100644 --- a/api/db/services/user_canvas_version.py +++ b/api/db/services/user_canvas_version.py @@ -31,8 +31,11 @@ class UserCanvasVersionService(CommonService): try: user_canvas_version = cls.model.select().where(cls.model.user_canvas_id == user_canvas_id).order_by(cls.model.create_time.desc()) if user_canvas_version.count() > 20: + delete_ids = [] for i in range(20, user_canvas_version.count()): - cls.delete(user_canvas_version[i].id) + delete_ids.append(user_canvas_version[i].id) + + cls.delete_by_ids(delete_ids) return True except DoesNotExist: return None