diff --git a/docs/references/ragflow_api.md b/docs/references/ragflow_api.md
new file mode 100644
index 000000000..7474d62ee
--- /dev/null
+++ b/docs/references/ragflow_api.md
@@ -0,0 +1,148 @@
+---
+sidebar_position: 1
+slug: /api
+---
+
+# API reference
+
+RAGFlow offers RESTful APIs for you to integrate its capabilities into third-party applications.
+
+## Base URL
+```
+https://demo.ragflow.io/api/v1/
+```
+
+## Authorization
+
+All of RAGFlow's RESTFul APIs use API key for authorization, so keep it safe and do not expose it to the front end.
+Put your API key in the request header.
+
+```buildoutcfg
+Authorization: Bearer {API_KEY}
+```
+
+To get your API key:
+
+1. In RAGFlow, click **Chat** tab in the middle top of the page.
+2. Hover over the corresponding dialogue **>** **Chat Bot API** to show the chatbot API configuration page.
+3. Click **Api Key** **>** **Create new key** to create your API key.
+4. Copy and keep your API key safe.
+
+## Create dataset
+
+This method creates (news) a dataset for a specific user.
+
+### Request
+
+#### Request URI
+
+| Method | Request URI |
+|--------|-------------|
+| POST | `/dataset` |
+
+:::note
+You are *required* to save the `data.id` value returned in the response data, which is the session ID for all upcoming conversations.
+:::
+
+#### Request parameter
+
+| Name | Type | Required | Description |
+|----------------|--------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `dataset_name` | string | Yes | The unique identifier assigned to each newly created dataset. `dataset_name` must be less than 2 ** 10 characters and cannot be empty. The following character sets are supported:
- 26 lowercase English letters (a-z)
- 26 uppercase English letters (A-Z)
- 10 digits (0-9)
- "_", "-", "." |
+
+### Response
+
+```json
+{
+ "code": 0,
+ "data": {
+ "dataset_name": "kb1"
+ },
+ "message": "success"
+}
+```
+
+## Get dataset list
+
+This method lists the created datasets for a specific user.
+
+### Request
+
+#### Request URI
+
+| Method | Request URI |
+|----------|-------------|
+| GET | `/dataset` |
+
+### Response
+
+#### Response parameter
+
+```python
+(200,
+{
+ "code": 102,
+ "data": [
+ {
+ "avatar": None,
+ "chunk_num": 0,
+ "create_date": "Mon, 17 Jun 2024 16:00:05 GMT",
+ "create_time": 1718611205876,
+ "created_by": "b48110a0286411ef994a3043d7ee537e",
+ "description": None,
+ "doc_num": 0,
+ "embd_id": "BAAI/bge-large-zh-v1.5",
+ "id": "9bd6424a2c7f11ef81b83043d7ee537e",
+ "language": "Chinese",
+ "name": "dataset3(23)",
+ "parser_config": {
+ "pages": [
+ [
+ 1,
+ 1000000
+ ]
+ ]
+ },
+ "parser_id": "naive",
+ "permission": "me",
+ "similarity_threshold": 0.2,
+ "status": "1",
+ "tenant_id": "b48110a0286411ef994a3043d7ee537e",
+ "token_num": 0,
+ "update_date": "Mon, 17 Jun 2024 16:00:05 GMT",
+ "update_time": 1718611205876,
+ "vector_similarity_weight": 0.3
+ },
+ # ... additional datasets ...
+ ],
+ "message": "attempt to list datasets"
+}
+)
+```
+
+## Delete dataset
+
+This method deletes a dataset for a specific user.
+
+### Request
+
+#### Request URI
+
+| Method | Request URI |
+|--------|-------------------------|
+| DELETE | `/dataset/{dataset_id}` |
+
+#### Request parameter
+
+| Name | Type | Required | Description |
+|--------------|--------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `dataset_id` | string | Yes | The ID of the dataset. Call ['GET' /dataset](#create-dataset) to retrieve the ID. |
+
+### Response
+
+```json
+{
+ "success": true,
+ "message": "Dataset deleted successfully!"
+}
+```
diff --git a/sdk/python/test/test_dataset.py b/sdk/python/test/test_dataset.py
index 315bf7ac6..026154cbd 100644
--- a/sdk/python/test/test_dataset.py
+++ b/sdk/python/test/test_dataset.py
@@ -2,27 +2,134 @@ from test_sdkbase import TestSdk
from ragflow import RAGFlow
import pytest
from common import API_KEY, HOST_ADDRESS
-
+from api.contants import NAME_LENGTH_LIMIT
class TestDataset(TestSdk):
-
- def test_create_dataset(self):
- '''
+ """
+ This class contains a suite of tests for the dataset management functionality within the RAGFlow system.
+ It ensures that the following functionalities as expected:
1. create a kb
2. list the kb
3. get the detail info according to the kb id
4. update the kb
5. delete the kb
- '''
-
+ """
+ # -----------------------create_dataset---------------------------------
+ def test_create_dataset_with_success(self):
+ """
+ Test the creation of a new dataset with success.
+ """
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
# create a kb
res = ragflow.create_dataset("kb1")
assert res['code'] == 0 and res['message'] == 'success'
- dataset_name = res['data']['dataset_name']
+ def test_create_dataset_with_empty_name(self):
+ """
+ Test the creation of a new dataset with an empty name.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset("")
+ assert res['message'] == 'Empty dataset name' and res['code'] == 102
+
+ def test_create_dataset_with_name_exceeding_limit(self):
+ """
+ Test the creation of a new dataset with the length of name exceeding the limit.
+ """
+ name = "k" * NAME_LENGTH_LIMIT + "b"
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset(name)
+ assert (res['message'] == f"Dataset name: {name} with length {len(name)} exceeds {NAME_LENGTH_LIMIT}!"
+ and res['code'] == 102)
+
+ def test_create_dataset_name_with_space_in_the_middle(self):
+ """
+ Test the creation of a new dataset whose name has space in the middle.
+ """
+ name = "k b"
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset(name)
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ def test_create_dataset_name_with_space_in_the_head(self):
+ """
+ Test the creation of a new dataset whose name has space in the head.
+ """
+ name = " kb"
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset(name)
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ def test_create_dataset_name_with_space_in_the_tail(self):
+ """
+ Test the creation of a new dataset whose name has space in the tail.
+ """
+ name = "kb "
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset(name)
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ def test_create_dataset_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
+ """
+ Test the creation of a new dataset whose name has space in the head and tail,
+ and the length of the name exceeds the limit.
+ """
+ name = " " + "k" * NAME_LENGTH_LIMIT + " "
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset(name)
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ def test_create_dataset_with_two_same_name(self):
+ """
+ Test the creation of two new datasets with the same name.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset("kb")
+ assert (res['code'] == 0 and res['message'] == 'success')
+ res = ragflow.create_dataset("kb")
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ def test_create_dataset_with_only_space_in_the_name(self):
+ """
+ Test the creation of a dataset whose name only has space.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset(" ")
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ def test_create_dataset_with_space_number_exceeding_limit(self):
+ """
+ Test the creation of a dataset with a name that only has space exceeds the allowed limit.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ name = " " * NAME_LENGTH_LIMIT
+ res = ragflow.create_dataset(name)
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ def test_create_dataset_with_name_having_return(self):
+ """
+ Test the creation of a dataset with a name that has return symbol.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ name = "kb\n"
+ res = ragflow.create_dataset(name)
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ def test_create_dataset_with_name_having_the_null_character(self):
+ """
+ Test the creation of a dataset with a name that has the null character.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ name = "kb\0"
+ res = ragflow.create_dataset(name)
+ assert (res['code'] == 0 and res['message'] == 'success')
+
+ # -----------------------list_dataset---------------------------------
def test_list_dataset_success(self):
+ """
+ Test listing datasets with a successful outcome.
+ """
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
# Call the list_datasets method
response = ragflow.list_dataset()
@@ -32,6 +139,9 @@ class TestDataset(TestSdk):
assert code == 200
def test_list_dataset_with_checking_size_and_name(self):
+ """
+ Test listing datasets and verify the size and names of the datasets.
+ """
datasets_to_create = ["dataset1", "dataset2", "dataset3"]
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
@@ -51,6 +161,9 @@ class TestDataset(TestSdk):
assert len(listed_data) == len(datasets_to_create)
def test_list_dataset_with_getting_empty_result(self):
+ """
+ Test listing datasets that should be empty.
+ """
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
datasets_to_create = []
created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
@@ -70,6 +183,9 @@ class TestDataset(TestSdk):
assert len(listed_data) == 0
def test_list_dataset_with_creating_100_knowledge_bases(self):
+ """
+ Test listing 100 datasets and verify the size and names of these datasets.
+ """
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
datasets_to_create = ["dataset1"] * 100
created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
@@ -89,6 +205,9 @@ class TestDataset(TestSdk):
assert len(listed_data) == 100
def test_list_dataset_with_showing_one_dataset(self):
+ """
+ Test listing one dataset and verify the size of the dataset.
+ """
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
response = ragflow.list_dataset(0, 1)
code, response = response
@@ -96,26 +215,145 @@ class TestDataset(TestSdk):
assert len(datasets) == 1
def test_list_dataset_failure(self):
+ """
+ Test listing datasets with IndexError.
+ """
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
response = ragflow.list_dataset(-1, -1)
_, res = response
assert "IndexError" in res['message']
+ def test_list_dataset_for_empty_datasets(self):
+ """
+ Test listing datasets when the datasets are empty.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ response = ragflow.list_dataset()
+ code, response = response
+ datasets = response['data']
+ assert len(datasets) == 0
+
+ # TODO: have to set the limitation of the number of datasets
+
+ # -----------------------delete_dataset---------------------------------
def test_delete_one_dataset_with_success(self):
+ """
+ Test deleting a dataset with success.
+ """
# get the real name of the created dataset
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
res = ragflow.create_dataset("kb0")
real_dataset_name = res['data']['dataset_name']
- print("name", real_dataset_name)
# delete this dataset
result = ragflow.delete_dataset(real_dataset_name)
- print(result)
assert result["success"] is True
def test_delete_dataset_with_not_existing_dataset(self):
+ """
+ Test deleting a dataset that does not exist with failure.
+ """
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
res = ragflow.delete_dataset("weird_dataset")
assert res["success"] is False
+ def test_delete_dataset_with_creating_100_datasets_and_deleting_100_datasets(self):
+ """
+ Test deleting a dataset when creating 100 datasets and deleting 100 datasets.
+ """
+ # create 100 datasets
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ datasets_to_create = ["dataset1"] * 100
+ created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
+ real_name_to_create = set()
+ for response in created_response:
+ assert 'data' in response, "Response is missing 'data' key"
+ dataset_name = response['data']['dataset_name']
+ real_name_to_create.add(dataset_name)
+
+ for name in real_name_to_create:
+ res = ragflow.delete_dataset(name)
+ assert res["success"] is True
+
+ def test_delete_dataset_with_space_in_the_middle_of_the_name(self):
+ """
+ Test deleting a dataset when its name has space in the middle.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.delete_dataset("k b")
+ print(res)
+ assert res["success"] is True
+
+ def test_delete_dataset_with_space_in_the_head_of_the_name(self):
+ """
+ Test deleting a dataset when its name has space in the head.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.delete_dataset(" kb")
+ assert res["success"] is False
+
+ def test_delete_dataset_with_space_in_the_tail_of_the_name(self):
+ """
+ Test deleting a dataset when its name has space in the tail.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.delete_dataset("kb ")
+ assert res["success"] is False
+
+ def test_delete_dataset_with_only_space_in_the_name(self):
+ """
+ Test deleting a dataset when its name only has space.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.delete_dataset(" ")
+ assert res["success"] is False
+
+ def test_delete_dataset_with_only_exceeding_limit_space_in_the_name(self):
+ """
+ Test deleting a dataset when its name only has space and the number of it exceeds the limit.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ name = " " * (NAME_LENGTH_LIMIT + 1)
+ res = ragflow.delete_dataset(name)
+ assert res["success"] is False
+
+ def test_delete_dataset_with_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
+ """
+ Test deleting a dataset whose name has space in the head and tail,
+ and the length of the name exceeds the limit.
+ """
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ name = " " + "k" * NAME_LENGTH_LIMIT + " "
+ res = ragflow.delete_dataset(name)
+ assert res["success"] is False
+
+ # ---------------------------------mix the different methods--------------------
+ def test_create_and_delete_dataset_together(self):
+ """
+ Test creating 1 dataset, and then deleting 1 dataset.
+ Test creating 10 datasets, and then deleting 10 datasets.
+ """
+ # create 1 dataset
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+ res = ragflow.create_dataset("ddd")
+ assert res['code'] == 0 and res['message'] == 'success'
+
+ # delete 1 dataset
+ res = ragflow.delete_dataset("ddd")
+ assert res["success"] is True
+
+ # create 10 datasets
+ datasets_to_create = ["dataset1"] * 10
+ created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
+
+ real_name_to_create = set()
+ for response in created_response:
+ assert 'data' in response, "Response is missing 'data' key"
+ dataset_name = response['data']['dataset_name']
+ real_name_to_create.add(dataset_name)
+
+ # delete 10 datasets
+ for name in real_name_to_create:
+ res = ragflow.delete_dataset(name)
+ assert res["success"] is True