diff --git a/api/apps/dataset_api.py b/api/apps/dataset_api.py index 67201c46e..8ac9a623c 100644 --- a/api/apps/dataset_api.py +++ b/api/apps/dataset_api.py @@ -750,7 +750,27 @@ def get_message_during_parsing_document(id, message): # ----------------------------stop parsing----------------------------------------------------- # ----------------------------show the status of the file----------------------------------------------------- +@manager.route("//documents//status", methods=["GET"]) +@login_required +def show_parsing_status(dataset_id, document_id): + try: + # valid dataset + exist, _ = KnowledgebaseService.get_by_id(dataset_id) + if not exist: + return construct_json_result(code=RetCode.DATA_ERROR, + message=f"This dataset: '{dataset_id}' cannot be found!") + # valid document + exist, _ = DocumentService.get_by_id(document_id) + if not exist: + return construct_json_result(code=RetCode.DATA_ERROR, + message=f"This document: '{document_id}' is not a valid document.") + _, doc = DocumentService.get_by_id(document_id) # get doc object + doc_attributes = doc.to_dict() + + return construct_json_result(data={"progress": doc_attributes["progress"], "status": doc_attributes["status"]}, code=RetCode.SUCCESS) + except Exception as e: + return construct_error_response(e) # ----------------------------list the chunks of the file----------------------------------------------------- # -- --------------------------delete the chunk----------------------------------------------------- diff --git a/sdk/python/ragflow/ragflow.py b/sdk/python/ragflow/ragflow.py index 9eccc4cb3..3e8f94984 100644 --- a/sdk/python/ragflow/ragflow.py +++ b/sdk/python/ragflow/ragflow.py @@ -159,7 +159,11 @@ class RAGFlow: # ----------------------------stop parsing----------------------------------------------------- # ----------------------------show the status of the file----------------------------------------------------- + def show_parsing_status(self, dataset_id, document_id): + endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}/status" + res = requests.get(endpoint, headers=self.authorization_header) + return res.json() # ----------------------------list the chunks of the file----------------------------------------------------- # ----------------------------delete the chunk----------------------------------------------------- diff --git a/sdk/python/test/test_document.py b/sdk/python/test/test_document.py index 38839d73a..a23c71e24 100644 --- a/sdk/python/test/test_document.py +++ b/sdk/python/test/test_document.py @@ -953,7 +953,52 @@ class TestFile(TestSdk): # ----------------------------stop parsing----------------------------------------------------- # ----------------------------show the status of the file----------------------------------------------------- + def test_show_status_with_success(self): + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_show_status_with_success") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/lol.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # parse file + res = ragflow.start_parsing_document(created_res_id, doc_id) + assert res["code"] == RetCode.SUCCESS and res["message"] == "" + # show status + status_res = ragflow.show_parsing_status(created_res_id, doc_id) + assert status_res["code"] == RetCode.SUCCESS and status_res["data"]["status"] == "1" + def test_show_status_nonexistent_document(self): + """ + Test showing the status of a document which does not exist. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_show_status_nonexistent_document") + created_res_id = created_res["data"]["dataset_id"] + res = ragflow.show_parsing_status(created_res_id, "imagination") + assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This document: 'imagination' is not a valid document." + + def test_show_status_document_in_nonexistent_dataset(self): + """ + Test showing the status of a document whose dataset is nonexistent. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_show_status_document_in_nonexistent_dataset") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # parse + res = ragflow.show_parsing_status("imagination", doc_id) + assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset: 'imagination' cannot be found!" # ----------------------------list the chunks of the file----------------------------------------------------- # ----------------------------delete the chunk-----------------------------------------------------