delete_dataset method and tests created (#1186)

### What problem does this PR solve?

This PR have completed both HTTP API and Python SDK for
'delete_dataset". In addition, there are tests for it.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
cecilia-uu 2024-06-17 15:10:05 +08:00 committed by GitHub
parent 1eb4caf02a
commit 6be3626372
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 77 additions and 7 deletions

View File

@ -135,9 +135,52 @@ def list_datasets():
@manager.route('/<dataset_id>', methods=['DELETE'])
@login_required
@validate_request("dataset_id")
def remove_dataset(dataset_id):
return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to remove dataset: {dataset_id}")
req = request.json
try:
kbs = KnowledgebaseService.query(
created_by=current_user.id, id=req["dataset_id"])
if not kbs:
return construct_json_result(
data=False, message=f'Only owner of knowledgebase authorized for this operation.',
code=RetCode.OPERATING_ERROR)
for doc in DocumentService.query(kb_id=req["dataset_id"]):
if not DocumentService.remove_document(doc, kbs[0].tenant_id):
return construct_json_result(
message="Database error (Document removal)!")
f2d = File2DocumentService.get_by_document_id(doc.id)
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
File2DocumentService.delete_by_document_id(doc.id)
if not KnowledgebaseService.delete_by_id(req["dataset_id"]):
return construct_json_result(
message="Database error (Knowledgebase removal)!")
return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to remove dataset: {dataset_id}")
except Exception as e:
return construct_error_response(e)
# ------------------------------ get details of a dataset ----------------------------------------
@manager.route('/<dataset_id>', methods=['GET'])
@login_required
@validate_request("dataset_id")
def get_dataset():
dataset_id = request.args["dataset_id"]
try:
dataset = KnowledgebaseService.get_detail(dataset_id)
if not dataset:
return construct_json_result(
message="Can't find this knowledgebase!")
return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to get detail of dataset: {dataset_id}")
except Exception as e:
return construct_json_result(e)
# ------------------------------ update a dataset --------------------------------------------
@manager.route('/<dataset_id>', methods=['GET'])
@login_required
def get_dataset(dataset_id):
return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to get detail of dataset: {dataset_id}")
@manager.route('/<dataset_id>', methods=['PUT'])
@login_required
@ -146,10 +189,7 @@ def update_dataset(dataset_id):
return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to update dataset: {dataset_id}")
@manager.route('/<dataset_id>', methods=['GET'])
@login_required
def get_dataset(dataset_id):
return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to get detail of dataset: {dataset_id}")

View File

@ -39,8 +39,23 @@ class RAGFlow:
result_dict = json.loads(res.text)
return result_dict
def delete_dataset(self, dataset_name=None, dataset_id=None):
return dataset_name
def delete_dataset(self, dataset_name):
dataset_id = self.find_dataset_id_by_name(dataset_name)
if not dataset_id:
return {"success": False, "message": "Dataset not found."}
res = requests.delete(f"{self.dataset_url}/{dataset_id}", headers=self.authorization_header)
if res.status_code == 200:
return {"success": True, "message": "Dataset deleted successfully!"}
else:
return {"success": False, "message": f"Other status code: {res.status_code}"}
def find_dataset_id_by_name(self, dataset_name):
res = requests.get(self.dataset_url, headers=self.authorization_header)
for dataset in res.json()['data']:
if dataset['name'] == dataset_name:
return dataset['id']
return None
def list_dataset(self, offset=0, count=-1, orderby="create_time", desc=True):
params = {

View File

@ -101,6 +101,21 @@ class TestDataset(TestSdk):
_, res = response
assert "IndexError" in res['message']
def test_delete_one_dataset_with_success(self):
# get the real name of the created dataset
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
res = ragflow.create_dataset("kb0")
real_dataset_name = res['data']['dataset_name']
print("name", real_dataset_name)
# delete this dataset
result = ragflow.delete_dataset(real_dataset_name)
print(result)
assert result["success"] is True
def test_delete_dataset_with_not_existing_dataset(self):
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
res = ragflow.delete_dataset("weird_dataset")
assert res["success"] is False