From f29a5de9f5cdb854b5e4b5e7ecf9ec5dd6451234 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Tue, 6 May 2025 09:44:38 +0800 Subject: [PATCH] Fix: filed_map was incorrectly persisted (#7443) ### What problem does this PR solve? Fix `filed_map` was incorrectly persisted. #7412 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/document_app.py | 23 +++++++++++++++++++++++ api/db/services/document_service.py | 20 ++++++++++++++++++++ api/db/services/knowledgebase_service.py | 10 ++++++++++ 3 files changed, 53 insertions(+) diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 0d0b934c7..c37c9d1ee 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -333,6 +333,7 @@ def rm(): pf_id = root_folder["id"] FileService.init_knowledgebase_docs(pf_id, current_user.id) errors = "" + kb_table_num_map = {} for doc_id in doc_ids: try: e, doc = DocumentService.get_by_id(doc_id) @@ -356,6 +357,16 @@ def rm(): File2DocumentService.delete_by_document_id(doc_id) if deleted_file_count > 0: STORAGE_IMPL.rm(b, n) + + doc_parser = doc.parser_id + if doc_parser == ParserType.TABLE: + kb_id = doc.kb_id + if kb_id not in kb_table_num_map: + counts = DocumentService.count_by_kb_id(kb_id=kb_id, keywords="", run_status=[TaskStatus.DONE], types=[]) + kb_table_num_map[kb_id] = counts + kb_table_num_map[kb_id] -= 1 + if kb_table_num_map[kb_id] <= 0: + KnowledgebaseService.delete_field_map(kb_id) except Exception as e: errors += str(e) @@ -378,6 +389,7 @@ def run(): code=settings.RetCode.AUTHENTICATION_ERROR ) try: + kb_table_num_map = {} for id in req["doc_ids"]: info = {"run": str(req["run"]), "progress": 0} if str(req["run"]) == TaskStatus.RUNNING.value and req.get("delete", False): @@ -400,6 +412,17 @@ def run(): e, doc = DocumentService.get_by_id(id) doc = doc.to_dict() doc["tenant_id"] = tenant_id + + doc_parser = doc.get("parser_id", ParserType.NAIVE) + if doc_parser == ParserType.TABLE: + kb_id = doc.get("kb_id") + if not kb_id: + continue + if kb_id not in kb_table_num_map: + count = DocumentService.count_by_kb_id(kb_id=kb_id, keywords="", run_status=[TaskStatus.DONE], types=[]) + kb_table_num_map[kb_id] = count + if kb_table_num_map[kb_id] <=0: + KnowledgebaseService.delete_field_map(kb_id) bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"]) queue_tasks(doc, bucket, name, 0) diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 7a653a6b4..74bc19bf1 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -97,6 +97,26 @@ class DocumentService(CommonService): return list(docs.dicts()), count + @classmethod + @DB.connection_context() + def count_by_kb_id(cls, kb_id, keywords, run_status, types): + if keywords: + docs = cls.model.select().where( + (cls.model.kb_id == kb_id), + (fn.LOWER(cls.model.name).contains(keywords.lower())) + ) + else: + docs = cls.model.select().where(cls.model.kb_id == kb_id) + + if run_status: + docs = docs.where(cls.model.run.in_(run_status)) + if types: + docs = docs.where(cls.model.type.in_(types)) + + count = docs.count() + + return count + @classmethod @DB.connection_context() def insert(cls, doc): diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 06222994d..454bdbdc7 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -269,6 +269,16 @@ class KnowledgebaseService(CommonService): dfs_update(m.parser_config, config) cls.update_by_id(id, {"parser_config": m.parser_config}) + @classmethod + @DB.connection_context() + def delete_field_map(cls, id): + e, m = cls.get_by_id(id) + if not e: + raise LookupError(f"knowledgebase({id}) not found.") + + m.parser_config.pop("field_map", None) + cls.update_by_id(id, {"parser_config": m.parser_config}) + @classmethod @DB.connection_context() def get_field_map(cls, ids):