diff --git a/api/apps/api_app.py b/api/apps/api_app.py index 1de94bef6..36c8ccbcf 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -21,7 +21,7 @@ from flask import request, Response from api.db.services.llm_service import TenantLLMService from flask_login import login_required, current_user -from api.db import FileType, LLMType, ParserType, FileSource +from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, LLMType, ParserType, FileSource from api.db.db_models import APIToken, Task, File from api.db.services import duplicate_name from api.db.services.api_service import APITokenService, API4ConversationService @@ -577,10 +577,23 @@ def list_kb_docs(): orderby = req.get("orderby", "create_time") desc = req.get("desc", True) keywords = req.get("keywords", "") - + status = req.get("status", []) + if status: + invalid_status = {s for s in status if s not in VALID_TASK_STATUS} + if invalid_status: + return get_data_error_result( + message=f"Invalid filter status conditions: {', '.join(invalid_status)}" + ) + types = req.get("types", []) + if types: + invalid_types = {t for t in types if t not in VALID_FILE_TYPES} + if invalid_types: + return get_data_error_result( + message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}" + ) try: docs, tol = DocumentService.get_by_kb_id( - kb_id, page_number, items_per_page, orderby, desc, keywords) + kb_id, page_number, items_per_page, orderby, desc, keywords, status, types) docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs] return get_json_result(data={"total": tol, "docs": docs}) diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 497dbde6d..0d0b934c7 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -25,7 +25,7 @@ from flask_login import login_required, current_user from deepdoc.parser.html_parser import RAGFlowHtmlParser from rag.nlp import search -from api.db import FileType, TaskStatus, ParserType, FileSource +from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, TaskStatus, ParserType, FileSource from api.db.db_models import File, Task from api.db.services.file2document_service import File2DocumentService from api.db.services.file_service import FileService @@ -183,7 +183,7 @@ def create(): return server_error_response(e) -@manager.route('/list', methods=['GET']) # noqa: F821 +@manager.route('/list', methods=['POST']) # noqa: F821 @login_required def list_docs(): kb_id = request.args.get("kb_id") @@ -201,13 +201,32 @@ def list_docs(): code=settings.RetCode.OPERATING_ERROR) keywords = request.args.get("keywords", "") - page_number = int(request.args.get("page", 1)) - items_per_page = int(request.args.get("page_size", 15)) + page_number = int(request.args.get("page", 0)) + items_per_page = int(request.args.get("page_size", 0)) orderby = request.args.get("orderby", "create_time") desc = request.args.get("desc", True) + + req = request.get_json() + + run_status = req.get("run_status", []) + if run_status: + invalid_status = {s for s in run_status if s not in VALID_TASK_STATUS} + if invalid_status: + return get_data_error_result( + message=f"Invalid filter run status conditions: {', '.join(invalid_status)}" + ) + + types = req.get("types", []) + if types: + invalid_types = {t for t in types if t not in VALID_FILE_TYPES} + if invalid_types: + return get_data_error_result( + message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}" + ) + try: docs, tol = DocumentService.get_by_kb_id( - kb_id, page_number, items_per_page, orderby, desc, keywords) + kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types) for doc_item in docs: if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX): @@ -331,7 +350,9 @@ def rm(): message="Database error (Document removal)!") f2d = File2DocumentService.get_by_document_id(doc_id) - deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) + deleted_file_count = 0 + if f2d: + deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) File2DocumentService.delete_by_document_id(doc_id) if deleted_file_count > 0: STORAGE_IMPL.rm(b, n) diff --git a/api/db/__init__.py b/api/db/__init__.py index 8c8a6535b..a8c85ef4c 100644 --- a/api/db/__init__.py +++ b/api/db/__init__.py @@ -49,6 +49,7 @@ class FileType(StrEnum): FOLDER = 'folder' OTHER = "other" +VALID_FILE_TYPES = {FileType.PDF, FileType.DOC, FileType.VISUAL, FileType.AURAL, FileType.VIRTUAL, FileType.FOLDER, FileType.OTHER} class LLMType(StrEnum): CHAT = 'chat' @@ -73,6 +74,7 @@ class TaskStatus(StrEnum): DONE = "3" FAIL = "4" +VALID_TASK_STATUS = {TaskStatus.UNSTART, TaskStatus.RUNNING, TaskStatus.CANCEL, TaskStatus.DONE, TaskStatus.FAIL} class ParserType(StrEnum): PRESENTATION = "presentation" diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 292f8c5bf..a30016b3b 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -70,7 +70,7 @@ class DocumentService(CommonService): @classmethod @DB.connection_context() def get_by_kb_id(cls, kb_id, page_number, items_per_page, - orderby, desc, keywords): + orderby, desc, keywords, run_status, types): if keywords: docs = cls.model.select().where( (cls.model.kb_id == kb_id), @@ -78,13 +78,21 @@ class DocumentService(CommonService): ) else: docs = cls.model.select().where(cls.model.kb_id == kb_id) + + if run_status: + docs = docs.where(cls.model.run.in_(run_status)) + if types: + docs = docs.where(cls.model.type.in_(types)) + count = docs.count() if desc: docs = docs.order_by(cls.model.getter_by(orderby).desc()) else: docs = docs.order_by(cls.model.getter_by(orderby).asc()) - docs = docs.paginate(page_number, items_per_page) + + if page_number and items_per_page: + docs = docs.paginate(page_number, items_per_page) return list(docs.dicts()), count diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 9f5875884..06222994d 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -97,7 +97,7 @@ class KnowledgebaseService(CommonService): kb = kbs[0] # Get all documents in the knowledge base - docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "") + docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "", [], []) # Check parsing status of each document for doc in docs: @@ -226,7 +226,10 @@ class KnowledgebaseService(CommonService): cls.model.chunk_num, cls.model.parser_id, cls.model.parser_config, - cls.model.pagerank] + cls.model.pagerank, + cls.model.create_time, + cls.model.update_time + ] kbs = cls.model.select(*fields).join(Tenant, on=( (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( (cls.model.id == kb_id), diff --git a/sdk/python/test/test_frontend_api/common.py b/sdk/python/test/test_frontend_api/common.py index a245093f4..63aea2c8f 100644 --- a/sdk/python/test/test_frontend_api/common.py +++ b/sdk/python/test/test_frontend_api/common.py @@ -70,7 +70,8 @@ def upload_file(auth, dataset_id, path): def list_document(auth, dataset_id): authorization = {"Authorization": auth} url = f"{HOST_ADDRESS}/v1/document/list?kb_id={dataset_id}" - res = requests.get(url=url, headers=authorization) + json = {} + res = requests.post(url=url, headers=authorization, json=json) return res.json()