Feat: Improve 'user_canvan_version' delete and 'document' delete performance (#6553)

### What problem does this PR solve?

1.  Add delete_by_ids method
2. Add get_doc_ids_by_doc_names
3. Improve user_canvan_version's logic (avoid O(n) db IO)
4. Improve document delete logic (avoid O(n) db IO)

### Type of change

- [x] Performance Improvement
This commit is contained in:
Stephen Hu 2025-05-07 10:55:08 +08:00 committed by GitHub
parent 539876af11
commit 27ffc0ed74
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 33 additions and 4 deletions

View File

@ -628,7 +628,7 @@ def document_rm():
tenant_id = objs[0].tenant_id tenant_id = objs[0].tenant_id
req = request.json req = request.json
try: try:
doc_ids = [DocumentService.get_doc_id_by_doc_name(doc_name) for doc_name in req.get("doc_names", [])] doc_ids = DocumentService.get_doc_ids_by_doc_names(req.get("doc_names", []))
for doc_id in req.get("doc_ids", []): for doc_id in req.get("doc_ids", []):
if doc_id not in doc_ids: if doc_id not in doc_ids:
doc_ids.append(doc_id) doc_ids.append(doc_id)
@ -646,11 +646,16 @@ def document_rm():
FileService.init_knowledgebase_docs(pf_id, tenant_id) FileService.init_knowledgebase_docs(pf_id, tenant_id)
errors = "" errors = ""
docs = DocumentService.get_by_ids(doc_ids)
doc_dic = {}
for doc in docs:
doc_dic[doc.id] = doc
for doc_id in doc_ids: for doc_id in doc_ids:
try: try:
e, doc = DocumentService.get_by_id(doc_id) if doc_id not in doc_dic:
if not e:
return get_data_error_result(message="Document not found!") return get_data_error_result(message="Document not found!")
doc = doc_dic[doc_id]
tenant_id = DocumentService.get_tenant_id(doc_id) tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id: if not tenant_id:
return get_data_error_result(message="Tenant not found!") return get_data_error_result(message="Tenant not found!")

View File

@ -264,6 +264,18 @@ class CommonService:
# Number of records deleted # Number of records deleted
return cls.model.delete().where(cls.model.id == pid).execute() return cls.model.delete().where(cls.model.id == pid).execute()
@classmethod
@DB.connection_context()
def delete_by_ids(cls, pids):
# Delete multiple records by their IDs
# Args:
# pids: List of record IDs
# Returns:
# Number of records deleted
with DB.atomic():
res = cls.model.delete().where(cls.model.id.in_(pids)).execute()
return res
@classmethod @classmethod
@DB.connection_context() @DB.connection_context()
def filter_delete(cls, filters): def filter_delete(cls, filters):

View File

@ -361,6 +361,15 @@ class DocumentService(CommonService):
return return
return doc_id[0]["id"] return doc_id[0]["id"]
@classmethod
@DB.connection_context()
def get_doc_ids_by_doc_names(cls, doc_names):
if not doc_names:
return []
query = cls.model.select(cls.model.id).where(cls.model.name.in_(doc_names))
return list(query.scalars().iterator())
@classmethod @classmethod
@DB.connection_context() @DB.connection_context()
def get_thumbnails(cls, docids): def get_thumbnails(cls, docids):

View File

@ -31,8 +31,11 @@ class UserCanvasVersionService(CommonService):
try: try:
user_canvas_version = cls.model.select().where(cls.model.user_canvas_id == user_canvas_id).order_by(cls.model.create_time.desc()) user_canvas_version = cls.model.select().where(cls.model.user_canvas_id == user_canvas_id).order_by(cls.model.create_time.desc())
if user_canvas_version.count() > 20: if user_canvas_version.count() > 20:
delete_ids = []
for i in range(20, user_canvas_version.count()): for i in range(20, user_canvas_version.count()):
cls.delete(user_canvas_version[i].id) delete_ids.append(user_canvas_version[i].id)
cls.delete_by_ids(delete_ids)
return True return True
except DoesNotExist: except DoesNotExist:
return None return None