mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-10 21:29:03 +08:00
Feat: Improve 'user_canvan_version' delete and 'document' delete performance (#6553)
### What problem does this PR solve? 1. Add delete_by_ids method 2. Add get_doc_ids_by_doc_names 3. Improve user_canvan_version's logic (avoid O(n) db IO) 4. Improve document delete logic (avoid O(n) db IO) ### Type of change - [x] Performance Improvement
This commit is contained in:
parent
539876af11
commit
27ffc0ed74
@ -628,7 +628,7 @@ def document_rm():
|
||||
tenant_id = objs[0].tenant_id
|
||||
req = request.json
|
||||
try:
|
||||
doc_ids = [DocumentService.get_doc_id_by_doc_name(doc_name) for doc_name in req.get("doc_names", [])]
|
||||
doc_ids = DocumentService.get_doc_ids_by_doc_names(req.get("doc_names", []))
|
||||
for doc_id in req.get("doc_ids", []):
|
||||
if doc_id not in doc_ids:
|
||||
doc_ids.append(doc_id)
|
||||
@ -646,11 +646,16 @@ def document_rm():
|
||||
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
||||
|
||||
errors = ""
|
||||
docs = DocumentService.get_by_ids(doc_ids)
|
||||
doc_dic = {}
|
||||
for doc in docs:
|
||||
doc_dic[doc.id] = doc
|
||||
|
||||
for doc_id in doc_ids:
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
if doc_id not in doc_dic:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
doc = doc_dic[doc_id]
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
|
@ -264,6 +264,18 @@ class CommonService:
|
||||
# Number of records deleted
|
||||
return cls.model.delete().where(cls.model.id == pid).execute()
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def delete_by_ids(cls, pids):
|
||||
# Delete multiple records by their IDs
|
||||
# Args:
|
||||
# pids: List of record IDs
|
||||
# Returns:
|
||||
# Number of records deleted
|
||||
with DB.atomic():
|
||||
res = cls.model.delete().where(cls.model.id.in_(pids)).execute()
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def filter_delete(cls, filters):
|
||||
|
@ -361,6 +361,15 @@ class DocumentService(CommonService):
|
||||
return
|
||||
return doc_id[0]["id"]
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_doc_ids_by_doc_names(cls, doc_names):
|
||||
if not doc_names:
|
||||
return []
|
||||
|
||||
query = cls.model.select(cls.model.id).where(cls.model.name.in_(doc_names))
|
||||
return list(query.scalars().iterator())
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_thumbnails(cls, docids):
|
||||
|
@ -31,8 +31,11 @@ class UserCanvasVersionService(CommonService):
|
||||
try:
|
||||
user_canvas_version = cls.model.select().where(cls.model.user_canvas_id == user_canvas_id).order_by(cls.model.create_time.desc())
|
||||
if user_canvas_version.count() > 20:
|
||||
delete_ids = []
|
||||
for i in range(20, user_canvas_version.count()):
|
||||
cls.delete(user_canvas_version[i].id)
|
||||
delete_ids.append(user_canvas_version[i].id)
|
||||
|
||||
cls.delete_by_ids(delete_ids)
|
||||
return True
|
||||
except DoesNotExist:
|
||||
return None
|
||||
|
Loading…
x
Reference in New Issue
Block a user