diff --git a/docs/references/ragflow_api.md b/docs/references/ragflow_api.md new file mode 100644 index 000000000..7474d62ee --- /dev/null +++ b/docs/references/ragflow_api.md @@ -0,0 +1,148 @@ +--- +sidebar_position: 1 +slug: /api +--- + +# API reference + +RAGFlow offers RESTful APIs for you to integrate its capabilities into third-party applications. + +## Base URL +``` +https://demo.ragflow.io/api/v1/ +``` + +## Authorization + +All of RAGFlow's RESTFul APIs use API key for authorization, so keep it safe and do not expose it to the front end. +Put your API key in the request header. + +```buildoutcfg +Authorization: Bearer {API_KEY} +``` + +To get your API key: + +1. In RAGFlow, click **Chat** tab in the middle top of the page. +2. Hover over the corresponding dialogue **>** **Chat Bot API** to show the chatbot API configuration page. +3. Click **Api Key** **>** **Create new key** to create your API key. +4. Copy and keep your API key safe. + +## Create dataset + +This method creates (news) a dataset for a specific user. + +### Request + +#### Request URI + +| Method | Request URI | +|--------|-------------| +| POST | `/dataset` | + +:::note +You are *required* to save the `data.id` value returned in the response data, which is the session ID for all upcoming conversations. +::: + +#### Request parameter + +| Name | Type | Required | Description | +|----------------|--------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `dataset_name` | string | Yes | The unique identifier assigned to each newly created dataset. `dataset_name` must be less than 2 ** 10 characters and cannot be empty. The following character sets are supported:
- 26 lowercase English letters (a-z)
- 26 uppercase English letters (A-Z)
- 10 digits (0-9)
- "_", "-", "." | + +### Response + +```json +{ + "code": 0, + "data": { + "dataset_name": "kb1" + }, + "message": "success" +} +``` + +## Get dataset list + +This method lists the created datasets for a specific user. + +### Request + +#### Request URI + +| Method | Request URI | +|----------|-------------| +| GET | `/dataset` | + +### Response + +#### Response parameter + +```python +(200, +{ + "code": 102, + "data": [ + { + "avatar": None, + "chunk_num": 0, + "create_date": "Mon, 17 Jun 2024 16:00:05 GMT", + "create_time": 1718611205876, + "created_by": "b48110a0286411ef994a3043d7ee537e", + "description": None, + "doc_num": 0, + "embd_id": "BAAI/bge-large-zh-v1.5", + "id": "9bd6424a2c7f11ef81b83043d7ee537e", + "language": "Chinese", + "name": "dataset3(23)", + "parser_config": { + "pages": [ + [ + 1, + 1000000 + ] + ] + }, + "parser_id": "naive", + "permission": "me", + "similarity_threshold": 0.2, + "status": "1", + "tenant_id": "b48110a0286411ef994a3043d7ee537e", + "token_num": 0, + "update_date": "Mon, 17 Jun 2024 16:00:05 GMT", + "update_time": 1718611205876, + "vector_similarity_weight": 0.3 + }, + # ... additional datasets ... + ], + "message": "attempt to list datasets" +} +) +``` + +## Delete dataset + +This method deletes a dataset for a specific user. + +### Request + +#### Request URI + +| Method | Request URI | +|--------|-------------------------| +| DELETE | `/dataset/{dataset_id}` | + +#### Request parameter + +| Name | Type | Required | Description | +|--------------|--------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `dataset_id` | string | Yes | The ID of the dataset. Call ['GET' /dataset](#create-dataset) to retrieve the ID. | + +### Response + +```json +{ + "success": true, + "message": "Dataset deleted successfully!" +} +``` diff --git a/sdk/python/test/test_dataset.py b/sdk/python/test/test_dataset.py index 315bf7ac6..026154cbd 100644 --- a/sdk/python/test/test_dataset.py +++ b/sdk/python/test/test_dataset.py @@ -2,27 +2,134 @@ from test_sdkbase import TestSdk from ragflow import RAGFlow import pytest from common import API_KEY, HOST_ADDRESS - +from api.contants import NAME_LENGTH_LIMIT class TestDataset(TestSdk): - - def test_create_dataset(self): - ''' + """ + This class contains a suite of tests for the dataset management functionality within the RAGFlow system. + It ensures that the following functionalities as expected: 1. create a kb 2. list the kb 3. get the detail info according to the kb id 4. update the kb 5. delete the kb - ''' - + """ + # -----------------------create_dataset--------------------------------- + def test_create_dataset_with_success(self): + """ + Test the creation of a new dataset with success. + """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) # create a kb res = ragflow.create_dataset("kb1") assert res['code'] == 0 and res['message'] == 'success' - dataset_name = res['data']['dataset_name'] + def test_create_dataset_with_empty_name(self): + """ + Test the creation of a new dataset with an empty name. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset("") + assert res['message'] == 'Empty dataset name' and res['code'] == 102 + + def test_create_dataset_with_name_exceeding_limit(self): + """ + Test the creation of a new dataset with the length of name exceeding the limit. + """ + name = "k" * NAME_LENGTH_LIMIT + "b" + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset(name) + assert (res['message'] == f"Dataset name: {name} with length {len(name)} exceeds {NAME_LENGTH_LIMIT}!" + and res['code'] == 102) + + def test_create_dataset_name_with_space_in_the_middle(self): + """ + Test the creation of a new dataset whose name has space in the middle. + """ + name = "k b" + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset(name) + assert (res['code'] == 0 and res['message'] == 'success') + + def test_create_dataset_name_with_space_in_the_head(self): + """ + Test the creation of a new dataset whose name has space in the head. + """ + name = " kb" + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset(name) + assert (res['code'] == 0 and res['message'] == 'success') + + def test_create_dataset_name_with_space_in_the_tail(self): + """ + Test the creation of a new dataset whose name has space in the tail. + """ + name = "kb " + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset(name) + assert (res['code'] == 0 and res['message'] == 'success') + + def test_create_dataset_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self): + """ + Test the creation of a new dataset whose name has space in the head and tail, + and the length of the name exceeds the limit. + """ + name = " " + "k" * NAME_LENGTH_LIMIT + " " + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset(name) + assert (res['code'] == 0 and res['message'] == 'success') + + def test_create_dataset_with_two_same_name(self): + """ + Test the creation of two new datasets with the same name. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset("kb") + assert (res['code'] == 0 and res['message'] == 'success') + res = ragflow.create_dataset("kb") + assert (res['code'] == 0 and res['message'] == 'success') + + def test_create_dataset_with_only_space_in_the_name(self): + """ + Test the creation of a dataset whose name only has space. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset(" ") + assert (res['code'] == 0 and res['message'] == 'success') + + def test_create_dataset_with_space_number_exceeding_limit(self): + """ + Test the creation of a dataset with a name that only has space exceeds the allowed limit. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + name = " " * NAME_LENGTH_LIMIT + res = ragflow.create_dataset(name) + assert (res['code'] == 0 and res['message'] == 'success') + + def test_create_dataset_with_name_having_return(self): + """ + Test the creation of a dataset with a name that has return symbol. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + name = "kb\n" + res = ragflow.create_dataset(name) + assert (res['code'] == 0 and res['message'] == 'success') + + def test_create_dataset_with_name_having_the_null_character(self): + """ + Test the creation of a dataset with a name that has the null character. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + name = "kb\0" + res = ragflow.create_dataset(name) + assert (res['code'] == 0 and res['message'] == 'success') + + # -----------------------list_dataset--------------------------------- def test_list_dataset_success(self): + """ + Test listing datasets with a successful outcome. + """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) # Call the list_datasets method response = ragflow.list_dataset() @@ -32,6 +139,9 @@ class TestDataset(TestSdk): assert code == 200 def test_list_dataset_with_checking_size_and_name(self): + """ + Test listing datasets and verify the size and names of the datasets. + """ datasets_to_create = ["dataset1", "dataset2", "dataset3"] ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_response = [ragflow.create_dataset(name) for name in datasets_to_create] @@ -51,6 +161,9 @@ class TestDataset(TestSdk): assert len(listed_data) == len(datasets_to_create) def test_list_dataset_with_getting_empty_result(self): + """ + Test listing datasets that should be empty. + """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) datasets_to_create = [] created_response = [ragflow.create_dataset(name) for name in datasets_to_create] @@ -70,6 +183,9 @@ class TestDataset(TestSdk): assert len(listed_data) == 0 def test_list_dataset_with_creating_100_knowledge_bases(self): + """ + Test listing 100 datasets and verify the size and names of these datasets. + """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) datasets_to_create = ["dataset1"] * 100 created_response = [ragflow.create_dataset(name) for name in datasets_to_create] @@ -89,6 +205,9 @@ class TestDataset(TestSdk): assert len(listed_data) == 100 def test_list_dataset_with_showing_one_dataset(self): + """ + Test listing one dataset and verify the size of the dataset. + """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) response = ragflow.list_dataset(0, 1) code, response = response @@ -96,26 +215,145 @@ class TestDataset(TestSdk): assert len(datasets) == 1 def test_list_dataset_failure(self): + """ + Test listing datasets with IndexError. + """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) response = ragflow.list_dataset(-1, -1) _, res = response assert "IndexError" in res['message'] + def test_list_dataset_for_empty_datasets(self): + """ + Test listing datasets when the datasets are empty. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + response = ragflow.list_dataset() + code, response = response + datasets = response['data'] + assert len(datasets) == 0 + + # TODO: have to set the limitation of the number of datasets + + # -----------------------delete_dataset--------------------------------- def test_delete_one_dataset_with_success(self): + """ + Test deleting a dataset with success. + """ # get the real name of the created dataset ragflow = RAGFlow(API_KEY, HOST_ADDRESS) res = ragflow.create_dataset("kb0") real_dataset_name = res['data']['dataset_name'] - print("name", real_dataset_name) # delete this dataset result = ragflow.delete_dataset(real_dataset_name) - print(result) assert result["success"] is True def test_delete_dataset_with_not_existing_dataset(self): + """ + Test deleting a dataset that does not exist with failure. + """ ragflow = RAGFlow(API_KEY, HOST_ADDRESS) res = ragflow.delete_dataset("weird_dataset") assert res["success"] is False + def test_delete_dataset_with_creating_100_datasets_and_deleting_100_datasets(self): + """ + Test deleting a dataset when creating 100 datasets and deleting 100 datasets. + """ + # create 100 datasets + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + datasets_to_create = ["dataset1"] * 100 + created_response = [ragflow.create_dataset(name) for name in datasets_to_create] + real_name_to_create = set() + for response in created_response: + assert 'data' in response, "Response is missing 'data' key" + dataset_name = response['data']['dataset_name'] + real_name_to_create.add(dataset_name) + + for name in real_name_to_create: + res = ragflow.delete_dataset(name) + assert res["success"] is True + + def test_delete_dataset_with_space_in_the_middle_of_the_name(self): + """ + Test deleting a dataset when its name has space in the middle. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.delete_dataset("k b") + print(res) + assert res["success"] is True + + def test_delete_dataset_with_space_in_the_head_of_the_name(self): + """ + Test deleting a dataset when its name has space in the head. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.delete_dataset(" kb") + assert res["success"] is False + + def test_delete_dataset_with_space_in_the_tail_of_the_name(self): + """ + Test deleting a dataset when its name has space in the tail. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.delete_dataset("kb ") + assert res["success"] is False + + def test_delete_dataset_with_only_space_in_the_name(self): + """ + Test deleting a dataset when its name only has space. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.delete_dataset(" ") + assert res["success"] is False + + def test_delete_dataset_with_only_exceeding_limit_space_in_the_name(self): + """ + Test deleting a dataset when its name only has space and the number of it exceeds the limit. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + name = " " * (NAME_LENGTH_LIMIT + 1) + res = ragflow.delete_dataset(name) + assert res["success"] is False + + def test_delete_dataset_with_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self): + """ + Test deleting a dataset whose name has space in the head and tail, + and the length of the name exceeds the limit. + """ + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + name = " " + "k" * NAME_LENGTH_LIMIT + " " + res = ragflow.delete_dataset(name) + assert res["success"] is False + + # ---------------------------------mix the different methods-------------------- + def test_create_and_delete_dataset_together(self): + """ + Test creating 1 dataset, and then deleting 1 dataset. + Test creating 10 datasets, and then deleting 10 datasets. + """ + # create 1 dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + res = ragflow.create_dataset("ddd") + assert res['code'] == 0 and res['message'] == 'success' + + # delete 1 dataset + res = ragflow.delete_dataset("ddd") + assert res["success"] is True + + # create 10 datasets + datasets_to_create = ["dataset1"] * 10 + created_response = [ragflow.create_dataset(name) for name in datasets_to_create] + + real_name_to_create = set() + for response in created_response: + assert 'data' in response, "Response is missing 'data' key" + dataset_name = response['data']['dataset_name'] + real_name_to_create.add(dataset_name) + + # delete 10 datasets + for name in real_name_to_create: + res = ragflow.delete_dataset(name) + assert res["success"] is True