diff --git a/api/apps/dataset_api.py b/api/apps/dataset_api.py index 9772a2ed9..884207e28 100644 --- a/api/apps/dataset_api.py +++ b/api/apps/dataset_api.py @@ -14,6 +14,7 @@ # limitations under the License. import os +import pathlib import re import warnings @@ -42,12 +43,12 @@ MAXIMUM_OF_UPLOADING_FILES = 256 # ------------------------------ create a dataset --------------------------------------- -@manager.route('/', methods=['POST']) +@manager.route("/", methods=["POST"]) @login_required # use login @validate_request("name") # check name key def create_dataset(): # Check if Authorization header is present - authorization_token = request.headers.get('Authorization') + authorization_token = request.headers.get("Authorization") if not authorization_token: return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Authorization header is missing.") @@ -79,14 +80,15 @@ def create_dataset(): # In case that the length of the name exceeds the limit dataset_name_length = len(dataset_name) if dataset_name_length > NAME_LENGTH_LIMIT: - return construct_json_result(code=RetCode.DATA_ERROR, - message=f"Dataset name: {dataset_name} with length {dataset_name_length} exceeds {NAME_LENGTH_LIMIT}!") + return construct_json_result( + code=RetCode.DATA_ERROR, + message=f"Dataset name: {dataset_name} with length {dataset_name_length} exceeds {NAME_LENGTH_LIMIT}!") # In case that there are other fields in the data-binary if len(request_body.keys()) > 1: name_list = [] for key_name in request_body.keys(): - if key_name != 'name': + if key_name != "name": name_list.append(key_name) return construct_json_result(code=RetCode.DATA_ERROR, message=f"fields: {name_list}, are not allowed in request body.") @@ -115,7 +117,7 @@ def create_dataset(): # -----------------------------list datasets------------------------------------------------------- -@manager.route('/', methods=['GET']) +@manager.route("/", methods=["GET"]) @login_required def list_datasets(): offset = request.args.get("offset", 0) @@ -134,7 +136,7 @@ def list_datasets(): # ---------------------------------delete a dataset ---------------------------- -@manager.route('/', methods=['DELETE']) +@manager.route("/", methods=["DELETE"]) @login_required def remove_dataset(dataset_id): try: @@ -142,7 +144,7 @@ def remove_dataset(dataset_id): # according to the id, searching for the dataset if not datasets: - return construct_json_result(message=f'The dataset cannot be found for your current account.', + return construct_json_result(message=f"The dataset cannot be found for your current account.", code=RetCode.OPERATING_ERROR) # Iterating the documents inside the dataset @@ -168,7 +170,7 @@ def remove_dataset(dataset_id): # ------------------------------ get details of a dataset ---------------------------------------- -@manager.route('/', methods=['GET']) +@manager.route("/", methods=["GET"]) @login_required def get_dataset(dataset_id): try: @@ -181,7 +183,7 @@ def get_dataset(dataset_id): # ------------------------------ update a dataset -------------------------------------------- -@manager.route('/', methods=['PUT']) +@manager.route("/", methods=["PUT"]) @login_required def update_dataset(dataset_id): req = request.json @@ -192,7 +194,7 @@ def update_dataset(dataset_id): "you want to update!") # check whether the dataset can be found if not KnowledgebaseService.query(created_by=current_user.id, id=dataset_id): - return construct_json_result(message=f'Only the owner of knowledgebase is authorized for this operation!', + return construct_json_result(message=f"Only the owner of knowledgebase is authorized for this operation!", code=RetCode.OPERATING_ERROR) exist, dataset = KnowledgebaseService.get_by_id(dataset_id) @@ -200,7 +202,7 @@ def update_dataset(dataset_id): if not exist: return construct_json_result(code=RetCode.DATA_ERROR, message="This dataset cannot be found!") - if 'name' in req: + if "name" in req: name = req["name"].strip() # check whether there is duplicate name if name.lower() != dataset.name.lower() \ @@ -215,9 +217,9 @@ def update_dataset(dataset_id): # 2 parameters: embedding id and chunk method # only if chunk_num is 0, the user can update the embedding id - if req.get('embedding_model_id'): + if req.get("embedding_model_id"): if chunk_num == 0: - dataset_updating_data['embd_id'] = req['embedding_model_id'] + dataset_updating_data["embd_id"] = req["embedding_model_id"] else: construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document in this " "dataset, so you cannot change the embedding " @@ -232,18 +234,18 @@ def update_dataset(dataset_id): "change the chunk method.") # convert the photo parameter to avatar if req.get("photo"): - dataset_updating_data['avatar'] = req["photo"] + dataset_updating_data["avatar"] = req["photo"] # layout_recognize - if 'layout_recognize' in req: - if 'parser_config' not in dataset_updating_data: + if "layout_recognize" in req: + if "parser_config" not in dataset_updating_data: dataset_updating_data['parser_config'] = {} dataset_updating_data['parser_config']['layout_recognize'] = req['layout_recognize'] # TODO: updating use_raptor needs to construct a class # 6 parameters - for key in ['name', 'language', 'description', 'permission', 'id', 'token_num']: + for key in ["name", "language", "description", "permission", "id", "token_num"]: if key in req: dataset_updating_data[key] = req.get(key) @@ -265,16 +267,16 @@ def update_dataset(dataset_id): # --------------------------------content management ---------------------------------------------- # ----------------------------upload files----------------------------------------------------- -@manager.route('//documents/', methods=['POST']) +@manager.route("//documents/", methods=["POST"]) @login_required def upload_documents(dataset_id): # no files if not request.files: return construct_json_result( - message='There is no file!', code=RetCode.ARGUMENT_ERROR) + message="There is no file!", code=RetCode.ARGUMENT_ERROR) # the number of uploading files exceeds the limit - file_objs = request.files.getlist('file') + file_objs = request.files.getlist("file") num_file_objs = len(file_objs) if num_file_objs > MAXIMUM_OF_UPLOADING_FILES: @@ -288,7 +290,7 @@ def upload_documents(dataset_id): # no name if not file_name: return construct_json_result( - message='There is a file without name!', code=RetCode.ARGUMENT_ERROR) + message="There is a file without name!", code=RetCode.ARGUMENT_ERROR) # TODO: support the remote files if 'http' in file_name: @@ -316,7 +318,7 @@ def upload_documents(dataset_id): # grab all the errs err = [] - MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) + MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0)) uploaded_docs_json = [] for file in file_objs: try: @@ -373,7 +375,7 @@ def upload_documents(dataset_id): # ----------------------------delete a file----------------------------------------------------- -@manager.route('//documents/', methods=['DELETE']) +@manager.route("//documents/", methods=["DELETE"]) @login_required def delete_document(document_id, dataset_id): # string # get the root folder @@ -433,7 +435,7 @@ def delete_document(document_id, dataset_id): # string def list_documents(dataset_id): if not dataset_id: return construct_json_result( - data=False, message='Lack of "dataset_id"', code=RetCode.ARGUMENT_ERROR) + data=False, message="Lack of 'dataset_id'", code=RetCode.ARGUMENT_ERROR) # searching keywords keywords = request.args.get("keywords", "") @@ -450,9 +452,109 @@ def list_documents(dataset_id): except Exception as e: return construct_error_response(e) -# ----------------------------download a file----------------------------------------------------- +# ----------------------------update: enable rename----------------------------------------------------- +@manager.route("//documents/", methods=["PUT"]) +@login_required +def update_document(dataset_id, document_id): + req = request.json + try: + legal_parameters = set() + legal_parameters.add("name") + legal_parameters.add("enable") + legal_parameters.add("template_type") -# ----------------------------enable rename----------------------------------------------------- + for key in req.keys(): + if key not in legal_parameters: + return construct_json_result(code=RetCode.ARGUMENT_ERROR, message=f"{key} is an illegal parameter.") + + # The request body cannot be empty + if not req: + return construct_json_result( + code=RetCode.DATA_ERROR, + message="Please input at least one parameter that you want to update!") + + # Check whether there is this dataset + exist, dataset = KnowledgebaseService.get_by_id(dataset_id) + if not exist: + return construct_json_result(code=RetCode.DATA_ERROR, message=f"This dataset {dataset_id} cannot be found!") + + # The document does not exist + exist, document = DocumentService.get_by_id(document_id) + if not exist: + return construct_json_result(message=f"This document {document_id} cannot be found!", + code=RetCode.ARGUMENT_ERROR) + + # Deal with the different keys + updating_data = {} + if "name" in req: + new_name = req["name"] + updating_data["name"] = new_name + # Check whether the new_name is suitable + # 1. no name value + if not new_name: + return construct_json_result(code=RetCode.DATA_ERROR, message="There is no new name.") + + # 2. In case that there's space in the head or the tail + new_name = new_name.strip() + + # 3. Check whether the new_name has the same extension of file as before + if pathlib.Path(new_name.lower()).suffix != pathlib.Path( + document.name.lower()).suffix: + return construct_json_result( + data=False, + message="The extension of file cannot be changed", + code=RetCode.ARGUMENT_ERROR) + + # 4. Check whether the new name has already been occupied by other file + for d in DocumentService.query(name=new_name, kb_id=document.kb_id): + if d.name == new_name: + return construct_json_result( + message="Duplicated document name in the same dataset.", + code=RetCode.ARGUMENT_ERROR) + + if "enable" in req: + enable_value = req["enable"] + if is_illegal_value_for_enum(enable_value, StatusEnum): + return construct_json_result(message=f"Illegal value {enable_value} for 'enable' field.", + code=RetCode.DATA_ERROR) + updating_data["status"] = enable_value + + # TODO: Chunk-method - update parameters inside the json object parser_config + if "template_type" in req: + type_value = req["template_type"] + if is_illegal_value_for_enum(type_value, ParserType): + return construct_json_result(message=f"Illegal value {type_value} for 'template_type' field.", + code=RetCode.DATA_ERROR) + updating_data["parser_id"] = req["template_type"] + + # The process of updating + if not DocumentService.update_by_id(document_id, updating_data): + return construct_json_result( + code=RetCode.OPERATING_ERROR, + message="Failed to update document in the database! " + "Please check the status of RAGFlow server and try again!") + + # name part: file service + if "name" in req: + # Get file by document id + file_information = File2DocumentService.get_by_document_id(document_id) + if file_information: + exist, file = FileService.get_by_id(file_information[0].file_id) + FileService.update_by_id(file.id, {"name": req["name"]}) + + exist, document = DocumentService.get_by_id(document_id) + + # Success + return construct_json_result(data=document.to_json(), message="Success", code=RetCode.SUCCESS) + except Exception as e: + return construct_error_response(e) + + +# Helper method to judge whether it's an illegal value +def is_illegal_value_for_enum(value, enum_class): + return value not in enum_class.__members__.values() + +# ----------------------------download a file----------------------------------------------------- # ----------------------------start parsing----------------------------------------------------- diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index 37b6cbcd6..30ba84c85 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -144,7 +144,7 @@ def server_error_response(e): if len(e.args) > 1: return get_json_result( retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e.args[0]), data=e.args[1]) - if repr(e).find("index_not_found_exception") >=0: + if repr(e).find("index_not_found_exception") >= 0: return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg="No chunk found, please upload file and parse it.") return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e)) diff --git a/sdk/python/ragflow/ragflow.py b/sdk/python/ragflow/ragflow.py index c6c54668d..ee940e835 100644 --- a/sdk/python/ragflow/ragflow.py +++ b/sdk/python/ragflow/ragflow.py @@ -23,11 +23,11 @@ from api.settings import RetCode class RAGFlow: def __init__(self, user_key, base_url, version='v1'): - ''' + """ api_url: http:///api/v1 dataset_url: http:///api/v1/dataset document_url: http:///api/v1/dataset/{dataset_id}/documents - ''' + """ self.user_key = user_key self.api_url = f"{base_url}/api/{version}" self.dataset_url = f"{self.api_url}/dataset" @@ -50,9 +50,9 @@ class RAGFlow: def find_dataset_id_by_name(self, dataset_name): res = requests.get(self.dataset_url, headers=self.authorization_header) - for dataset in res.json()['data']: - if dataset['name'] == dataset_name: - return dataset['id'] + for dataset in res.json()["data"]: + if dataset["name"] == dataset_name: + return dataset["id"] return None def list_dataset(self, offset=0, count=-1, orderby="create_time", desc=True): @@ -78,7 +78,7 @@ class RAGFlow: response = requests.put(endpoint, json=params, headers=self.authorization_header) return response.json() - # -------------------- content management ----------------------------------------------------- +# ------------------------------- CONTENT MANAGEMENT ----------------------------------------------------- # ----------------------------upload local files----------------------------------------------------- def upload_local_file(self, dataset_id, file_paths): @@ -86,15 +86,15 @@ class RAGFlow: for file_path in file_paths: if not isinstance(file_path, str): - return {'code': RetCode.ARGUMENT_ERROR, 'message': f"{file_path} is not string."} - if 'http' in file_path: - return {'code': RetCode.ARGUMENT_ERROR, 'message': "Remote files have not unsupported."} + return {"code": RetCode.ARGUMENT_ERROR, "message": f"{file_path} is not string."} + if "http" in file_path: + return {"code": RetCode.ARGUMENT_ERROR, "message": "Remote files have not unsupported."} if os.path.isfile(file_path): - files.append(('file', open(file_path, 'rb'))) + files.append(("file", open(file_path, "rb"))) else: - return {'code': RetCode.DATA_ERROR, 'message': f"The file {file_path} does not exist"} + return {"code": RetCode.DATA_ERROR, "message": f"The file {file_path} does not exist"} - res = requests.request('POST', url=f"{self.dataset_url}/{dataset_id}/documents", files=files, + res = requests.request("POST", url=f"{self.dataset_url}/{dataset_id}/documents", files=files, headers=self.authorization_header) result_dict = json.loads(res.text) @@ -119,9 +119,13 @@ class RAGFlow: res = requests.get(endpoint, params=params, headers=self.authorization_header) return res.json() - # ----------------------------download a file----------------------------------------------------- + # ----------------------------update files: enable, rename, template_type------------------------------------------- + def update_file(self, dataset_id, document_id, **params): + endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}" + response = requests.put(endpoint, json=params, headers=self.authorization_header) + return response.json() - # ----------------------------enable rename----------------------------------------------------- + # ----------------------------download a file----------------------------------------------------- # ----------------------------start parsing----------------------------------------------------- @@ -137,8 +141,6 @@ class RAGFlow: # ----------------------------insert a new chunk----------------------------------------------------- - # ----------------------------upload a file----------------------------------------------------- - # ----------------------------get a specific chunk----------------------------------------------------- # ----------------------------retrieval test----------------------------------------------------- diff --git a/sdk/python/test/test_document.py b/sdk/python/test/test_document.py index 342763537..81b84692f 100644 --- a/sdk/python/test/test_document.py +++ b/sdk/python/test/test_document.py @@ -34,10 +34,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_two_files") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt", "test_data/test1.txt"] res = ragflow.upload_local_file(dataset_id, file_paths) - assert res['code'] == RetCode.SUCCESS and res['message'] == 'success' + assert res["code"] == RetCode.SUCCESS and res["message"] == "success" def test_upload_one_file(self): """ @@ -45,10 +45,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_one_file") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt"] res = ragflow.upload_local_file(dataset_id, file_paths) - assert res['code'] == RetCode.SUCCESS and res['message'] == 'success' + assert res["code"] == RetCode.SUCCESS and res["message"] == "success" def test_upload_nonexistent_files(self): """ @@ -56,10 +56,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_nonexistent_files") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/imagination.txt"] res = ragflow.upload_local_file(dataset_id, file_paths) - assert res['code'] == RetCode.DATA_ERROR and "does not exist" in res['message'] + assert res["code"] == RetCode.DATA_ERROR and "does not exist" in res["message"] def test_upload_file_if_dataset_does_not_exist(self): """ @@ -68,7 +68,7 @@ class TestFile(TestSdk): ragflow = RAGFlow(API_KEY, HOST_ADDRESS) file_paths = ["test_data/test.txt"] res = ragflow.upload_local_file("111", file_paths) - assert res['code'] == RetCode.DATA_ERROR and res['message'] == "Can't find this dataset" + assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Can't find this dataset" def test_upload_file_without_name(self): """ @@ -76,10 +76,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_file_without_name") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/.txt"] res = ragflow.upload_local_file(dataset_id, file_paths) - assert res['code'] == RetCode.SUCCESS + assert res["code"] == RetCode.SUCCESS def test_upload_file_without_name1(self): """ @@ -87,10 +87,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_file_without_name") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/.txt", "test_data/empty.txt"] res = ragflow.upload_local_file(dataset_id, file_paths) - assert res['code'] == RetCode.SUCCESS + assert res["code"] == RetCode.SUCCESS def test_upload_files_exceeding_the_number_limit(self): """ @@ -98,12 +98,12 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_files_exceeding_the_number_limit") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt", "test_data/test1.txt"] * 256 res = ragflow.upload_local_file(dataset_id, file_paths) - assert (res['message'] == - 'You try to upload 512 files, which exceeds the maximum number of uploading files: 256' - and res['code'] == RetCode.DATA_ERROR) + assert (res["message"] == + "You try to upload 512 files, which exceeds the maximum number of uploading files: 256" + and res["code"] == RetCode.DATA_ERROR) def test_upload_files_without_files(self): """ @@ -111,10 +111,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_files_without_files") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = [None] res = ragflow.upload_local_file(dataset_id, file_paths) - assert (res['message'] == 'None is not string.' and res['code'] == RetCode.ARGUMENT_ERROR) + assert (res["message"] == "None is not string." and res["code"] == RetCode.ARGUMENT_ERROR) def test_upload_files_with_two_files_with_same_name(self): """ @@ -122,10 +122,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_files_with_two_files_with_same_name") - dataset_id = created_res['data']['dataset_id'] - file_paths = ['test_data/test.txt'] * 2 + dataset_id = created_res["data"]["dataset_id"] + file_paths = ["test_data/test.txt"] * 2 res = ragflow.upload_local_file(dataset_id, file_paths) - assert (res['message'] == 'success' and res['code'] == RetCode.SUCCESS) + assert (res["message"] == "success" and res["code"] == RetCode.SUCCESS) def test_upload_files_with_file_paths(self): """ @@ -133,10 +133,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_files_with_file_paths") - dataset_id = created_res['data']['dataset_id'] - file_paths = ['test_data/'] + dataset_id = created_res["data"]["dataset_id"] + file_paths = ["test_data/"] res = ragflow.upload_local_file(dataset_id, file_paths) - assert (res['message'] == 'The file test_data/ does not exist' and res['code'] == RetCode.DATA_ERROR) + assert (res["message"] == "The file test_data/ does not exist" and res["code"] == RetCode.DATA_ERROR) def test_upload_files_with_remote_file_path(self): """ @@ -144,10 +144,10 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_upload_files_with_remote_file_path") - dataset_id = created_res['data']['dataset_id'] - file_paths = ['https://github.com/genostack/ragflow'] + dataset_id = created_res["data"]["dataset_id"] + file_paths = ["https://github.com/genostack/ragflow"] res = ragflow.upload_local_file(dataset_id, file_paths) - assert res['code'] == RetCode.ARGUMENT_ERROR and res['message'] == 'Remote files have not unsupported.' + assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "Remote files have not unsupported." # ----------------------------delete a file----------------------------------------------------- def test_delete_one_file(self): @@ -156,16 +156,16 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_delete_one_file") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt"] res = ragflow.upload_local_file(dataset_id, file_paths) # get the doc_id - data = res['data'][0] - doc_id = data['id'] + data = res["data"][0] + doc_id = data["id"] # delete the files deleted_res = ragflow.delete_files(doc_id, dataset_id) # assert value - assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True + assert deleted_res["code"] == RetCode.SUCCESS and deleted_res["data"] is True def test_delete_document_with_not_existing_document(self): """ @@ -173,9 +173,9 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_delete_document_with_not_existing_document") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] res = ragflow.delete_files("111", dataset_id) - assert res['code'] == RetCode.DATA_ERROR and res['message'] == 'Document 111 not found!' + assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Document 111 not found!" def test_delete_document_with_creating_100_documents_and_deleting_100_documents(self): """ @@ -184,18 +184,18 @@ class TestFile(TestSdk): # upload 100 docs ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_delete_one_file") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt"] * 100 res = ragflow.upload_local_file(dataset_id, file_paths) # get the doc_id - data = res['data'] + data = res["data"] for d in data: - doc_id = d['id'] + doc_id = d["id"] # delete the files deleted_res = ragflow.delete_files(doc_id, dataset_id) # assert value - assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True + assert deleted_res["code"] == RetCode.SUCCESS and deleted_res["data"] is True def test_delete_document_from_nonexistent_dataset(self): """ @@ -203,17 +203,17 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_delete_one_file") - dataset_id = created_res['data']['dataset_id'] + dataset_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt"] res = ragflow.upload_local_file(dataset_id, file_paths) # get the doc_id - data = res['data'][0] - doc_id = data['id'] + data = res["data"][0] + doc_id = data["id"] # delete the files deleted_res = ragflow.delete_files(doc_id, "000") # assert value - assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] == - f'The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.') + assert (deleted_res["code"] == RetCode.ARGUMENT_ERROR and deleted_res["message"] == + f"The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.") def test_delete_document_which_is_located_in_other_dataset(self): """ @@ -222,20 +222,20 @@ class TestFile(TestSdk): ragflow = RAGFlow(API_KEY, HOST_ADDRESS) # upload a document created_res = ragflow.create_dataset("test_delete_document_which_is_located_in_other_dataset") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt"] res = ragflow.upload_local_file(created_res_id, file_paths) # other dataset other_res = ragflow.create_dataset("other_dataset") - other_dataset_id = other_res['data']['dataset_id'] + other_dataset_id = other_res["data"]["dataset_id"] # get the doc_id - data = res['data'][0] - doc_id = data['id'] + data = res["data"][0] + doc_id = data["id"] # delete the files from the other dataset deleted_res = ragflow.delete_files(doc_id, other_dataset_id) # assert value - assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] == - f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.') + assert (deleted_res["code"] == RetCode.ARGUMENT_ERROR and deleted_res["message"] == + f"The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.") # ----------------------------list files----------------------------------------------------- def test_list_documents_with_success(self): @@ -245,12 +245,12 @@ class TestFile(TestSdk): ragflow = RAGFlow(API_KEY, HOST_ADDRESS) # upload a document created_res = ragflow.create_dataset("test_list_documents_with_success") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt"] ragflow.upload_local_file(created_res_id, file_paths) # Call the list_document method response = ragflow.list_files(created_res_id) - assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1 + assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 1 def test_list_documents_with_checking_size(self): """ @@ -259,12 +259,12 @@ class TestFile(TestSdk): ragflow = RAGFlow(API_KEY, HOST_ADDRESS) # upload 10 documents created_res = ragflow.create_dataset("test_list_documents_with_checking_size") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt"] * 10 ragflow.upload_local_file(created_res_id, file_paths) # Call the list_document method response = ragflow.list_files(created_res_id) - assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10 + assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 10 def test_list_documents_with_getting_empty_result(self): """ @@ -273,10 +273,10 @@ class TestFile(TestSdk): ragflow = RAGFlow(API_KEY, HOST_ADDRESS) # upload 0 documents created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] # Call the list_document method response = ragflow.list_files(created_res_id) - assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 0 + assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 0 def test_list_documents_with_creating_100_documents(self): """ @@ -285,12 +285,12 @@ class TestFile(TestSdk): ragflow = RAGFlow(API_KEY, HOST_ADDRESS) # upload 100 documents created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt"] * 100 ragflow.upload_local_file(created_res_id, file_paths) # Call the list_document method response = ragflow.list_files(created_res_id) - assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 100 + assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 100 def test_list_document_with_failure(self): """ @@ -298,9 +298,9 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_list_document_with_failure") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] response = ragflow.list_files(created_res_id, offset=-1, count=-1) - assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR + assert "IndexError" in response["message"] and response["code"] == RetCode.EXCEPTION_ERROR def test_list_document_with_verifying_offset_and_count(self): """ @@ -308,13 +308,13 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10 ragflow.upload_local_file(created_res_id, file_paths) # Call the list_document method response = ragflow.list_files(created_res_id, offset=2, count=10) - assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10 + assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 10 def test_list_document_with_verifying_keywords(self): """ @@ -322,13 +322,13 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt", "test_data/empty.txt"] ragflow.upload_local_file(created_res_id, file_paths) # Call the list_document method response = ragflow.list_files(created_res_id, keywords="empty") - assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1 + assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 1 def test_list_document_with_verifying_order_by_and_descend(self): """ @@ -336,17 +336,17 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt", "test_data/empty.txt"] ragflow.upload_local_file(created_res_id, file_paths) # Call the list_document method response = ragflow.list_files(created_res_id) - assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 2 - docs = response['data']['docs'] + assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 2 + docs = response["data"]["docs"] # reverse i = 1 for doc in docs: - assert doc['name'] in file_paths[i] + assert doc["name"] in file_paths[i] i -= 1 def test_list_document_with_verifying_order_by_and_ascend(self): @@ -355,24 +355,277 @@ class TestFile(TestSdk): """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend") - created_res_id = created_res['data']['dataset_id'] + created_res_id = created_res["data"]["dataset_id"] file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"] ragflow.upload_local_file(created_res_id, file_paths) # Call the list_document method response = ragflow.list_files(created_res_id, descend=False) - assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 3 + assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 3 - docs = response['data']['docs'] + docs = response["data"]["docs"] i = 0 for doc in docs: - assert doc['name'] in file_paths[i] + assert doc["name"] in file_paths[i] i += 1 - # TODO: have to set the limitation of the number of documents -# ----------------------------download a file----------------------------------------------------- +# ----------------------------update files: enable, rename, template_type------------------------------------------- -# ----------------------------enable rename----------------------------------------------------- + def test_update_nonexistent_document(self): + """ + Test updating a document which does not exist. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_nonexistent_document") + created_res_id = created_res["data"]["dataset_id"] + params = { + "name": "new_name" + } + res = ragflow.update_file(created_res_id, "weird_doc_id", **params) + assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == f"This document weird_doc_id cannot be found!" + + def test_update_document_without_parameters(self): + """ + Test updating a document without giving parameters. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_without_parameters") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.DATA_ERROR and + update_res["message"] == "Please input at least one parameter that you want to update!") + + def test_update_document_in_nonexistent_dataset(self): + """ + Test updating a document in the nonexistent dataset. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_in_nonexistent_dataset") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "name": "new_name" + } + update_res = ragflow.update_file("fake_dataset_id", doc_id, **params) + assert (update_res["code"] == RetCode.DATA_ERROR and + update_res["message"] == f"This dataset fake_dataset_id cannot be found!") + + def test_update_document_with_different_extension_name(self): + """ + Test the updating of a document with an extension name that differs from its original. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_different_extension_name") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "name": "new_name.doc" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.ARGUMENT_ERROR and + update_res["message"] == "The extension of file cannot be changed") + + def test_update_document_with_duplicate_name(self): + """ + Test the updating of a document with a duplicate name. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_different_extension_name") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt", "test_data/test1.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "name": "test.txt" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.ARGUMENT_ERROR and + update_res["message"] == "Duplicated document name in the same dataset.") + + def test_update_document_with_updating_its_name_with_success(self): + """ + Test the updating of a document's name with success. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt", "test_data/test1.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "name": "new_name.txt" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.SUCCESS and + update_res["message"] == "Success" and update_res["data"]["name"] == "new_name.txt") + + def test_update_document_with_updating_its_template_type_with_success(self): + """ + Test the updating of a document's template type with success. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_updating_its_template_type_with_success") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt", "test_data/test1.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "template_type": "laws" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.SUCCESS and + update_res["message"] == "Success" and update_res["data"]["parser_id"] == "laws") + + def test_update_document_with_updating_its_enable_value_with_success(self): + """ + Test the updating of a document's enable value with success. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_updating_its_enable_value_with_success") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt", "test_data/test1.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "enable": "0" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.SUCCESS and + update_res["message"] == "Success" and update_res["data"]["status"] == "0") + + def test_update_document_with_updating_illegal_parameter(self): + """ + Test the updating of a document's illegal parameter. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_updating_illegal_parameter") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt", "test_data/test1.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "illegal_parameter": "0" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + + assert (update_res["code"] == RetCode.ARGUMENT_ERROR and + update_res["message"] == "illegal_parameter is an illegal parameter.") + + def test_update_document_with_giving_its_name_value(self): + """ + Test the updating of a document's name without its name value. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt", "test_data/test1.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "name": "" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.DATA_ERROR and + update_res["message"] == "There is no new name.") + + def test_update_document_with_giving_illegal_value_for_enable(self): + """ + Test the updating of a document's with giving illegal enable's value. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt", "test_data/test1.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "enable": "?" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.DATA_ERROR and + update_res["message"] == "Illegal value ? for 'enable' field.") + + def test_update_document_with_giving_illegal_value_for_type(self): + """ + Test the updating of a document's with giving illegal type's value. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt", "test_data/test1.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # update file + params = { + "template_type": "?" + } + update_res = ragflow.update_file(created_res_id, doc_id, **params) + assert (update_res["code"] == RetCode.DATA_ERROR and + update_res["message"] == "Illegal value ? for 'template_type' field.") +# ----------------------------download a file----------------------------------------------------- # ----------------------------start parsing-----------------------------------------------------