API: created list_doc (#1327)

### What problem does this PR solve? Adds the api of listing documentation. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2025-08-12 19:39:00 +08:00 · 2024-07-01 18:15:00 +08:00 · 2024-07-01 18:15:00 +08:00 · b5389f487c
commit b5389f487c
parent 8b1c145e56
7 changed files with 410 additions and 244 deletions
--- a/api/apps/dataset_api.py
+++ b/api/apps/dataset_api.py
@ -13,13 +13,17 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

+import os
+import re
+import warnings

 from flask import request
 from flask_login import login_required, current_user
 from httpx import HTTPError

 from api.contants import NAME_LENGTH_LIMIT
-from api.db import FileSource, StatusEnum
+from api.db import FileType, ParserType, FileSource
+from api.db import StatusEnum
 from api.db.db_models import File
 from api.db.services import duplicate_name
 from api.db.services.document_service import DocumentService
@ -29,8 +33,12 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.user_service import TenantService
 from api.settings import RetCode
 from api.utils import get_uuid
-from api.utils.api_utils import construct_json_result, construct_result, construct_error_response, validate_request
+from api.utils.api_utils import construct_json_result, construct_error_response
+from api.utils.api_utils import construct_result, validate_request
+from api.utils.file_utils import filename_type, thumbnail
+from rag.utils.minio_conn import MINIO

+MAXIMUM_OF_UPLOADING_FILES = 256

 # ------------------------------ create a dataset ---------------------------------------

@ -253,3 +261,216 @@ def update_dataset(dataset_id):
        return construct_json_result(data=dataset.to_json(), code=RetCode.SUCCESS)
    except Exception as e:
        return construct_error_response(e)
+
+# --------------------------------content management ----------------------------------------------
+
+# ----------------------------upload files-----------------------------------------------------
+@manager.route('/<dataset_id>/documents/', methods=['POST'])
+@login_required
+def upload_documents(dataset_id):
+    # no files
+    if not request.files:
+        return construct_json_result(
+            message='There is no file!', code=RetCode.ARGUMENT_ERROR)
+
+    # the number of uploading files exceeds the limit
+    file_objs = request.files.getlist('file')
+    num_file_objs = len(file_objs)
+
+    if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
+        return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, "
+                                                                      f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}")
+
+    for file_obj in file_objs:
+        # the content of the file
+        file_content = file_obj.read()
+        file_name = file_obj.filename
+        # no name
+        if not file_name:
+            return construct_json_result(
+                message='There is a file without name!', code=RetCode.ARGUMENT_ERROR)
+
+        # TODO: support the remote files
+        if 'http' in file_name:
+            return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.")
+
+        # the content is empty, raising a warning
+        if file_content == b'':
+            warnings.warn(f"[WARNING]: The file {file_name} is empty.")
+
+    # no dataset
+    exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
+    if not exist:
+        return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR)
+
+    # get the root_folder
+    root_folder = FileService.get_root_folder(current_user.id)
+    # get the id of the root_folder
+    parent_file_id = root_folder["id"]  # document id
+    # this is for the new user, create '.knowledgebase' file
+    FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
+    # go inside this folder, get the kb_root_folder
+    kb_root_folder = FileService.get_kb_folder(current_user.id)
+    # link the file management to the kb_folder
+    kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"])
+
+    # grab all the errs
+    err = []
+    MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
+    uploaded_docs_json = []
+    for file in file_objs:
+        try:
+            # TODO: get this value from the database as some tenants have this limit while others don't
+            if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER:
+                return construct_json_result(code=RetCode.DATA_ERROR,
+                                             message="Exceed the maximum file number of a free user!")
+            # deal with the duplicate name
+            filename = duplicate_name(
+                DocumentService.query,
+                name=file.filename,
+                kb_id=dataset.id)
+
+            # deal with the unsupported type
+            filetype = filename_type(filename)
+            if filetype == FileType.OTHER.value:
+                return construct_json_result(code=RetCode.DATA_ERROR,
+                                             message="This type of file has not been supported yet!")
+
+            # upload to the minio
+            location = filename
+            while MINIO.obj_exist(dataset_id, location):
+                location += "_"
+            blob = file.read()
+            MINIO.put(dataset_id, location, blob)
+            doc = {
+                "id": get_uuid(),
+                "kb_id": dataset.id,
+                "parser_id": dataset.parser_id,
+                "parser_config": dataset.parser_config,
+                "created_by": current_user.id,
+                "type": filetype,
+                "name": filename,
+                "location": location,
+                "size": len(blob),
+                "thumbnail": thumbnail(filename, blob)
+            }
+            if doc["type"] == FileType.VISUAL:
+                doc["parser_id"] = ParserType.PICTURE.value
+            if re.search(r"\.(ppt|pptx|pages)$", filename):
+                doc["parser_id"] = ParserType.PRESENTATION.value
+            DocumentService.insert(doc)
+
+            FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
+            uploaded_docs_json.append(doc)
+        except Exception as e:
+            err.append(file.filename + ": " + str(e))
+
+    if err:
+        # return all the errors
+        return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
+    # success
+    return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
+
+
+# ----------------------------delete a file-----------------------------------------------------
+@manager.route('/<dataset_id>/documents/<document_id>', methods=['DELETE'])
+@login_required
+def delete_document(document_id, dataset_id):  # string
+    # get the root folder
+    root_folder = FileService.get_root_folder(current_user.id)
+    # parent file's id
+    parent_file_id = root_folder["id"]
+    # consider the new user
+    FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
+    # store all the errors that may have
+    errors = ""
+    try:
+        # whether there is this document
+        exist, doc = DocumentService.get_by_id(document_id)
+        if not exist:
+            return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
+        # whether this doc is authorized by this tenant
+        tenant_id = DocumentService.get_tenant_id(document_id)
+        if not tenant_id:
+            return construct_json_result(
+                message=f"You cannot delete this document {document_id} due to the authorization"
+                        f" reason!", code=RetCode.AUTHENTICATION_ERROR)
+
+        # get the doc's id and location
+        real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id)
+
+        if real_dataset_id != dataset_id:
+            return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
+                                                 f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
+
+        # there is an issue when removing
+        if not DocumentService.remove_document(doc, tenant_id):
+            return construct_json_result(
+                message="There was an error during the document removal process. Please check the status of the "
+                        "RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
+
+        # fetch the File2Document record associated with the provided document ID.
+        file_to_doc = File2DocumentService.get_by_document_id(document_id)
+        # delete the associated File record.
+        FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
+        # delete the File2Document record itself using the document ID. This removes the
+        # association between the document and the file after the File record has been deleted.
+        File2DocumentService.delete_by_document_id(document_id)
+
+        # delete it from minio
+        MINIO.rm(dataset_id, location)
+    except Exception as e:
+        errors += str(e)
+    if errors:
+        return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
+
+    return construct_json_result(data=True, code=RetCode.SUCCESS)
+
+
+# ----------------------------list files-----------------------------------------------------
+@manager.route('/<dataset_id>/documents/', methods=['GET'])
+@login_required
+def list_documents(dataset_id):
+    if not dataset_id:
+        return construct_json_result(
+            data=False, message='Lack of "dataset_id"', code=RetCode.ARGUMENT_ERROR)
+
+    # searching keywords
+    keywords = request.args.get("keywords", "")
+
+    offset = request.args.get("offset", 0)
+    count = request.args.get("count", -1)
+    order_by = request.args.get("order_by", "create_time")
+    descend = request.args.get("descend", True)
+    try:
+        docs, total = DocumentService.list_documents_in_dataset(dataset_id, int(offset), int(count), order_by,
+                                                                descend, keywords)
+
+        return construct_json_result(data={"total": total, "docs": docs}, message=RetCode.SUCCESS)
+    except Exception as e:
+        return construct_error_response(e)
+
+# ----------------------------download a file-----------------------------------------------------
+
+# ----------------------------enable rename-----------------------------------------------------
+
+# ----------------------------start parsing-----------------------------------------------------
+
+# ----------------------------stop parsing-----------------------------------------------------
+
+# ----------------------------show the status of the file-----------------------------------------------------
+
+# ----------------------------list the chunks of the file-----------------------------------------------------
+
+# ----------------------------delete the chunk-----------------------------------------------------
+
+# ----------------------------edit the status of the chunk-----------------------------------------------------
+
+# ----------------------------insert a new chunk-----------------------------------------------------
+
+# ----------------------------upload a file-----------------------------------------------------
+
+# ----------------------------get a specific chunk-----------------------------------------------------
+
+# ----------------------------retrieval test-----------------------------------------------------
+
--- a/api/apps/documents_api.py
+++ b/api/apps/documents_api.py
@ -1,228 +0,0 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License
-#
-
-import os
-import re
-import warnings
-
-from flask import request
-from flask_login import login_required, current_user
-
-from api.db import FileType, ParserType
-from api.db.services import duplicate_name
-from api.db.services.document_service import DocumentService
-from api.db.services.file2document_service import File2DocumentService
-from api.db.services.file_service import FileService
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.settings import RetCode
-from api.utils import get_uuid
-from api.utils.api_utils import construct_json_result
-from api.utils.file_utils import filename_type, thumbnail
-from rag.utils.minio_conn import MINIO
-from api.db.db_models import Task, File
-from api.db import FileType, TaskStatus, ParserType, FileSource
-
-
-MAXIMUM_OF_UPLOADING_FILES = 256
-
-
-# ----------------------------upload local files-----------------------------------------------------
-@manager.route('/<dataset_id>', methods=['POST'])
-@login_required
-def upload(dataset_id):
-    # no files
-    if not request.files:
-        return construct_json_result(
-            message='There is no file!', code=RetCode.ARGUMENT_ERROR)
-
-    # the number of uploading files exceeds the limit
-    file_objs = request.files.getlist('file')
-    num_file_objs = len(file_objs)
-
-    if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
-        return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, "
-                                                                      f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}")
-
-    for file_obj in file_objs:
-        # the content of the file
-        file_content = file_obj.read()
-        file_name = file_obj.filename
-        # no name
-        if not file_name:
-            return construct_json_result(
-                message='There is a file without name!', code=RetCode.ARGUMENT_ERROR)
-
-        # TODO: support the remote files
-        if 'http' in file_name:
-            return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.")
-
-        # the content is empty, raising a warning
-        if file_content == b'':
-            warnings.warn(f"[WARNING]: The file {file_name} is empty.")
-
-    # no dataset
-    exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
-    if not exist:
-        return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR)
-
-    # get the root_folder
-    root_folder = FileService.get_root_folder(current_user.id)
-    # get the id of the root_folder
-    parent_file_id = root_folder["id"]  # document id
-    # this is for the new user, create '.knowledgebase' file
-    FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
-    # go inside this folder, get the kb_root_folder
-    kb_root_folder = FileService.get_kb_folder(current_user.id)
-    # link the file management to the kb_folder
-    kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"])
-
-    # grab all the errs
-    err = []
-    MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
-    uploaded_docs_json = []
-    for file in file_objs:
-        try:
-            # TODO: get this value from the database as some tenants have this limit while others don't
-            if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER:
-                return construct_json_result(code=RetCode.DATA_ERROR,
-                                             message="Exceed the maximum file number of a free user!")
-            # deal with the duplicate name
-            filename = duplicate_name(
-                DocumentService.query,
-                name=file.filename,
-                kb_id=dataset.id)
-
-            # deal with the unsupported type
-            filetype = filename_type(filename)
-            if filetype == FileType.OTHER.value:
-                return construct_json_result(code=RetCode.DATA_ERROR,
-                                             message="This type of file has not been supported yet!")
-
-            # upload to the minio
-            location = filename
-            while MINIO.obj_exist(dataset_id, location):
-                location += "_"
-            blob = file.read()
-            MINIO.put(dataset_id, location, blob)
-            doc = {
-                "id": get_uuid(),
-                "kb_id": dataset.id,
-                "parser_id": dataset.parser_id,
-                "parser_config": dataset.parser_config,
-                "created_by": current_user.id,
-                "type": filetype,
-                "name": filename,
-                "location": location,
-                "size": len(blob),
-                "thumbnail": thumbnail(filename, blob)
-            }
-            if doc["type"] == FileType.VISUAL:
-                doc["parser_id"] = ParserType.PICTURE.value
-            if re.search(r"\.(ppt|pptx|pages)$", filename):
-                doc["parser_id"] = ParserType.PRESENTATION.value
-            DocumentService.insert(doc)
-
-            FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
-            uploaded_docs_json.append(doc)
-        except Exception as e:
-            err.append(file.filename + ": " + str(e))
-
-    if err:
-        # return all the errors
-        return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
-    # success
-    return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
-
-# ----------------------------delete a file-----------------------------------------------------
-@manager.route('/<dataset_id>/<document_id>', methods=['DELETE'])
-@login_required
-def delete(document_id, dataset_id):  # string
-    # get the root folder
-    root_folder = FileService.get_root_folder(current_user.id)
-    # parent file's id
-    parent_file_id = root_folder["id"]
-    # consider the new user
-    FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
-    # store all the errors that may have
-    errors = ""
-    try:
-        # whether there is this document
-        exist, doc = DocumentService.get_by_id(document_id)
-        if not exist:
-            return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
-        # whether this doc is authorized by this tenant
-        tenant_id = DocumentService.get_tenant_id(document_id)
-        if not tenant_id:
-            return construct_json_result(message=f"You cannot delete this document {document_id} due to the authorization"
-                                                 f" reason!", code=RetCode.AUTHENTICATION_ERROR)
-
-        # get the doc's id and location
-        real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id)
-
-        if real_dataset_id != dataset_id:
-            return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
-                                                 f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
-
-        # there is an issue when removing
-        if not DocumentService.remove_document(doc, tenant_id):
-            return construct_json_result(
-                message="There was an error during the document removal process. Please check the status of the "
-                        "RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
-
-        # fetch the File2Document record associated with the provided document ID.
-        file_to_doc = File2DocumentService.get_by_document_id(document_id)
-        # delete the associated File record.
-        FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
-        # delete the File2Document record itself using the document ID. This removes the
-        # association between the document and the file after the File record has been deleted.
-        File2DocumentService.delete_by_document_id(document_id)
-
-        # delete it from minio
-        MINIO.rm(dataset_id, location)
-    except Exception as e:
-        errors += str(e)
-    if errors:
-        return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
-
-    return construct_json_result(data=True, code=RetCode.SUCCESS)
-
-# ----------------------------upload online files------------------------------------------------
-
-# ----------------------------download a file-----------------------------------------------------
-
-# ----------------------------enable rename-----------------------------------------------------
-
-# ----------------------------list files-----------------------------------------------------
-
-# ----------------------------start parsing-----------------------------------------------------
-
-# ----------------------------stop parsing-----------------------------------------------------
-
-# ----------------------------show the status of the file-----------------------------------------------------
-
-# ----------------------------list the chunks of the file-----------------------------------------------------
-
-# ----------------------------delete the chunk-----------------------------------------------------
-
-# ----------------------------edit the status of the chunk-----------------------------------------------------
-
-# ----------------------------insert a new chunk-----------------------------------------------------
-
-# ----------------------------upload a file-----------------------------------------------------
-
-# ----------------------------get a specific chunk-----------------------------------------------------
-
-# ----------------------------retrieval test-----------------------------------------------------
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -59,6 +59,35 @@ class DocumentService(CommonService):

        return list(docs.dicts()), count

+    @classmethod
+    @DB.connection_context()
+    def list_documents_in_dataset(cls, dataset_id, offset, count, order_by, descend, keywords):
+        if keywords:
+            docs = cls.model.select().where(
+                (cls.model.kb_id == dataset_id),
+                (fn.LOWER(cls.model.name).contains(keywords.lower()))
+            )
+        else:
+            docs = cls.model.select().where(cls.model.kb_id == dataset_id)
+
+        total = docs.count()
+
+        if descend == 'True':
+            docs = docs.order_by(cls.model.getter_by(order_by).desc())
+        if descend == 'False':
+            docs = docs.order_by(cls.model.getter_by(order_by).asc())
+
+        docs = list(docs.dicts())
+        docs_length = len(docs)
+
+        if offset < 0 or offset > docs_length:
+            raise IndexError("Offset is out of the valid range.")
+
+        if count == -1:
+            return docs[offset:], total
+
+        return docs[offset:offset + count], total
+
    @classmethod
    @DB.connection_context()
    def insert(cls, doc):
--- a/api/db/services/knowledgebase_service.py
+++ b/api/db/services/knowledgebase_service.py
@ -60,6 +60,9 @@ class KnowledgebaseService(CommonService):
        if offset < 0 or offset > kbs_length:
            raise IndexError("Offset is out of the valid range.")

+        if count == -1:
+            return kbs[offset:]
+
        return kbs[offset:offset+count]

    @classmethod
--- a/docs/references/ragflow_api.md
+++ b/docs/references/ragflow_api.md
@ -275,3 +275,5 @@ You are required to input at least one parameter.
    "message": "Please input at least one parameter that you want to update!"
 }
 ```
+
+
--- a/sdk/python/ragflow/ragflow.py
+++ b/sdk/python/ragflow/ragflow.py
@ -26,12 +26,11 @@ class RAGFlow:
        '''
        api_url: http://<host_address>/api/v1
        dataset_url: http://<host_address>/api/v1/dataset
-        document_url: http://<host_address>/api/v1/documents
+        document_url: http://<host_address>/api/v1/dataset/{dataset_id}/documents
        '''
        self.user_key = user_key
        self.api_url = f"{base_url}/api/{version}"
        self.dataset_url = f"{self.api_url}/dataset"
-        self.document_url = f"{self.api_url}/documents"
        self.authorization_header = {"Authorization": "{}".format(self.user_key)}

    def create_dataset(self, dataset_name):
@ -95,7 +94,7 @@ class RAGFlow:
            else:
                return {'code': RetCode.DATA_ERROR, 'message': f"The file {file_path} does not exist"}

-        res = requests.request('POST', url=f"{self.document_url}/{dataset_id}", files=files,
+        res = requests.request('POST', url=f"{self.dataset_url}/{dataset_id}/documents", files=files,
                               headers=self.authorization_header)

        result_dict = json.loads(res.text)
@ -103,16 +102,27 @@ class RAGFlow:

    # ----------------------------delete a file-----------------------------------------------------
    def delete_files(self, document_id, dataset_id):
-        endpoint = f"{self.document_url}/{dataset_id}/{document_id}"
+        endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}"
        res = requests.delete(endpoint, headers=self.authorization_header)
        return res.json()

+    # ----------------------------list files-----------------------------------------------------
+    def list_files(self, dataset_id, offset=0, count=-1, order_by="create_time", descend=True, keywords=""):
+        params = {
+            "offset": offset,
+            "count": count,
+            "order_by": order_by,
+            "descend": descend,
+            "keywords": keywords
+        }
+        endpoint = f"{self.dataset_url}/{dataset_id}/documents/"
+        res = requests.get(endpoint, params=params, headers=self.authorization_header)
+        return res.json()
+
    # ----------------------------download a file-----------------------------------------------------

    # ----------------------------enable rename-----------------------------------------------------

-    # ----------------------------list files-----------------------------------------------------
-
    # ----------------------------start parsing-----------------------------------------------------

    # ----------------------------stop parsing-----------------------------------------------------
--- a/sdk/python/test/test_document.py
+++ b/sdk/python/test/test_document.py
@ -37,7 +37,7 @@ class TestFile(TestSdk):
        dataset_id = created_res['data']['dataset_id']
        file_paths = ["test_data/test.txt", "test_data/test1.txt"]
        res = ragflow.upload_local_file(dataset_id, file_paths)
-        assert res['code'] == RetCode.SUCCESS and res['data'] is True and res['message'] == 'success'
+        assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'

    def test_upload_one_file(self):
        """
@ -48,7 +48,7 @@ class TestFile(TestSdk):
        dataset_id = created_res['data']['dataset_id']
        file_paths = ["test_data/test.txt"]
        res = ragflow.upload_local_file(dataset_id, file_paths)
-        assert res['code'] == RetCode.SUCCESS and res['data'] is True and res['message'] == 'success'
+        assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'

    def test_upload_nonexistent_files(self):
        """
@ -237,12 +237,143 @@ class TestFile(TestSdk):
        assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
                f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.')

+# ----------------------------list files-----------------------------------------------------
+    def test_list_documents_with_success(self):
+        """
+        Test listing documents with a successful outcome.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        # upload a document
+        created_res = ragflow.create_dataset("test_list_documents_with_success")
+        created_res_id = created_res['data']['dataset_id']
+        file_paths = ["test_data/test.txt"]
+        ragflow.upload_local_file(created_res_id, file_paths)
+        # Call the list_document method
+        response = ragflow.list_files(created_res_id)
+        assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
+
+    def test_list_documents_with_checking_size(self):
+        """
+        Test listing documents and verify the size and names of the documents.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        # upload 10 documents
+        created_res = ragflow.create_dataset("test_list_documents_with_checking_size")
+        created_res_id = created_res['data']['dataset_id']
+        file_paths = ["test_data/test.txt"] * 10
+        ragflow.upload_local_file(created_res_id, file_paths)
+        # Call the list_document method
+        response = ragflow.list_files(created_res_id)
+        assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
+
+    def test_list_documents_with_getting_empty_result(self):
+        """
+        Test listing documents that should be empty.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        # upload 0 documents
+        created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result")
+        created_res_id = created_res['data']['dataset_id']
+        # Call the list_document method
+        response = ragflow.list_files(created_res_id)
+        assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 0
+
+    def test_list_documents_with_creating_100_documents(self):
+        """
+        Test listing 100 documents and verify the size of these documents.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        # upload 100 documents
+        created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents")
+        created_res_id = created_res['data']['dataset_id']
+        file_paths = ["test_data/test.txt"] * 100
+        ragflow.upload_local_file(created_res_id, file_paths)
+        # Call the list_document method
+        response = ragflow.list_files(created_res_id)
+        assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 100
+
+    def test_list_document_with_failure(self):
+        """
+        Test listing documents with IndexError.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        created_res = ragflow.create_dataset("test_list_document_with_failure")
+        created_res_id = created_res['data']['dataset_id']
+        response = ragflow.list_files(created_res_id, offset=-1, count=-1)
+        assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
+
+    def test_list_document_with_verifying_offset_and_count(self):
+        """
+        Test listing documents with verifying the functionalities of offset and count.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count")
+        created_res_id = created_res['data']['dataset_id']
+        file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10
+        ragflow.upload_local_file(created_res_id, file_paths)
+        # Call the list_document method
+        response = ragflow.list_files(created_res_id, offset=2, count=10)
+
+        assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
+
+    def test_list_document_with_verifying_keywords(self):
+        """
+        Test listing documents with verifying the functionality of searching keywords.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords")
+        created_res_id = created_res['data']['dataset_id']
+        file_paths = ["test_data/test.txt", "test_data/empty.txt"]
+        ragflow.upload_local_file(created_res_id, file_paths)
+        # Call the list_document method
+        response = ragflow.list_files(created_res_id, keywords="empty")
+
+        assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
+
+    def test_list_document_with_verifying_order_by_and_descend(self):
+        """
+        Test listing documents with verifying the functionality of order_by and descend.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend")
+        created_res_id = created_res['data']['dataset_id']
+        file_paths = ["test_data/test.txt", "test_data/empty.txt"]
+        ragflow.upload_local_file(created_res_id, file_paths)
+        # Call the list_document method
+        response = ragflow.list_files(created_res_id)
+        assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 2
+        docs = response['data']['docs']
+        # reverse
+        i = 1
+        for doc in docs:
+            assert doc['name'] in file_paths[i]
+            i -= 1
+
+    def test_list_document_with_verifying_order_by_and_ascend(self):
+        """
+        Test listing documents with verifying the functionality of order_by and ascend.
+        """
+        ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
+        created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend")
+        created_res_id = created_res['data']['dataset_id']
+        file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
+        ragflow.upload_local_file(created_res_id, file_paths)
+        # Call the list_document method
+        response = ragflow.list_files(created_res_id, descend=False)
+        assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 3
+
+        docs = response['data']['docs']
+
+        i = 0
+        for doc in docs:
+            assert doc['name'] in file_paths[i]
+            i += 1
+
+    # TODO: have to set the limitation of the number of documents
 # ----------------------------download a file-----------------------------------------------------

 # ----------------------------enable rename-----------------------------------------------------

-# ----------------------------list files-----------------------------------------------------
-
 # ----------------------------start parsing-----------------------------------------------------

 # ----------------------------stop parsing-----------------------------------------------------
@ -257,8 +388,6 @@ class TestFile(TestSdk):

 # ----------------------------insert a new chunk-----------------------------------------------------

-# ----------------------------upload a file-----------------------------------------------------
-
 # ----------------------------get a specific chunk-----------------------------------------------------

 # ----------------------------retrieval test-----------------------------------------------------