Test: Added test cases for List Chunks HTTP API (#6514)

### What problem does this PR solve? cover [list chunks](https://ragflow.io/docs/v0.17.2/http_api_reference#list-chunks) endpoints ### Type of change - [x] update test cases
2025-08-13 12:58:59 +08:00 · 2025-03-25 17:28:58 +08:00 · 2025-03-25 17:28:58 +08:00 · 0a79dfd5cf
commit 0a79dfd5cf
parent 1d73baf3d8
11 changed files with 324 additions and 93 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -145,7 +145,7 @@ jobs:
            echo "Waiting for service to be available..."
            sleep 5
          done
-          cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_http_api && pytest -s --tb=short -m "not slow"
+          cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_http_api && DOC_ENGINE=infinity pytest -s --tb=short -m "not slow"

      - name: Stop ragflow:nightly
        if: always()  # always run this step even if previous steps failed
--- a/sdk/python/test/test_http_api/common.py
+++ b/sdk/python/test/test_http_api/common.py
@ -193,6 +193,6 @@ def list_chunks(auth, dataset_id, document_id, params=None):
 def batch_add_chunks(auth, dataset_id, document_id, num):
    chunk_ids = []
    for i in range(num):
-        res = add_chunk(auth, dataset_id, document_id, {"content": f"ragflow test {i}"})
+        res = add_chunk(auth, dataset_id, document_id, {"content": f"chunk test {i}"})
        chunk_ids.append(res["data"]["chunk"]["id"])
    return chunk_ids
--- a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/conftest.py
+++ b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/conftest.py
@ -16,7 +16,7 @@


 import pytest
-from common import batch_create_datasets, bulk_upload_documents, delete_dataset, list_documnet, parse_documnet
+from common import add_chunk, batch_create_datasets, bulk_upload_documents, delete_dataset, list_documnet, parse_documnet
 from libs.utils import wait_for


@ -35,11 +35,31 @@ def chunk_management_tmp_dir(tmp_path_factory):


@pytest.fixture(scope="class")
-def get_dataset_id_and_document_id(get_http_api_auth, chunk_management_tmp_dir):
-    dataset_ids = batch_create_datasets(get_http_api_auth, 1)
-    document_ids = bulk_upload_documents(get_http_api_auth, dataset_ids[0], 1, chunk_management_tmp_dir)
-    parse_documnet(get_http_api_auth, dataset_ids[0], {"document_ids": document_ids})
-    condition(get_http_api_auth, dataset_ids[0])
+def get_dataset_id_and_document_id(get_http_api_auth, chunk_management_tmp_dir, request):
+    def cleanup():
+        delete_dataset(get_http_api_auth)

-    yield dataset_ids[0], document_ids[0]
-    delete_dataset(get_http_api_auth)
+    request.addfinalizer(cleanup)
+
+    dataset_ids = batch_create_datasets(get_http_api_auth, 1)
+    dataset_id = dataset_ids[0]
+    document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, chunk_management_tmp_dir)
+    parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
+    condition(get_http_api_auth, dataset_id)
+
+    return dataset_id, document_ids[0]
+
+
+@pytest.fixture(scope="class")
+def add_chunks(get_http_api_auth, get_dataset_id_and_document_id):
+    dataset_id, document_id = get_dataset_id_and_document_id
+    chunk_ids = []
+    for i in range(4):
+        res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": f"chunk test {i}"})
+        chunk_ids.append(res["data"]["chunk"]["id"])
+
+    # issues/6487
+    from time import sleep
+
+    sleep(1)
+    yield dataset_id, document_id, chunk_ids
--- a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_add_chunk.py
+++ b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_add_chunk.py
@ -60,7 +60,7 @@ class TestAddChunk:
                {"content": 1},
                100,
                """TypeError("unsupported operand type(s) for +: \'int\' and \'str\'")""",
-                marks=pytest.mark.xfail,
+                marks=pytest.mark.skip,
            ),
            ({"content": "a"}, 0, ""),
            ({"content": " "}, 102, "`content` is required"),
@ -83,16 +83,16 @@ class TestAddChunk:
    @pytest.mark.parametrize(
        "payload, expected_code, expected_message",
        [
-            ({"content": "a", "important_keywords": ["a", "b", "c"]}, 0, ""),
-            ({"content": "a", "important_keywords": [""]}, 0, ""),
+            ({"content": "chunk test", "important_keywords": ["a", "b", "c"]}, 0, ""),
+            ({"content": "chunk test", "important_keywords": [""]}, 0, ""),
            (
-                {"content": "a", "important_keywords": [1]},
+                {"content": "chunk test", "important_keywords": [1]},
                100,
                "TypeError('sequence item 0: expected str instance, int found')",
            ),
-            ({"content": "a", "important_keywords": ["a", "a"]}, 0, ""),
-            ({"content": "a", "important_keywords": "abc"}, 102, "`important_keywords` is required to be a list"),
-            ({"content": "a", "important_keywords": 123}, 102, "`important_keywords` is required to be a list"),
+            ({"content": "chunk test", "important_keywords": ["a", "a"]}, 0, ""),
+            ({"content": "chunk test", "important_keywords": "abc"}, 102, "`important_keywords` is required to be a list"),
+            ({"content": "chunk test", "important_keywords": 123}, 102, "`important_keywords` is required to be a list"),
        ],
    )
    def test_important_keywords(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message):
@ -111,17 +111,17 @@ class TestAddChunk:
    @pytest.mark.parametrize(
        "payload, expected_code, expected_message",
        [
-            ({"content": "a", "questions": ["a", "b", "c"]}, 0, ""),
+            ({"content": "chunk test", "questions": ["a", "b", "c"]}, 0, ""),
            pytest.param(
-                {"content": "a", "questions": [""]},
+                {"content": "chunk test", "questions": [""]},
                0,
                "",
-                marks=pytest.mark.xfail(reason="issues/6404"),
+                marks=pytest.mark.skip(reason="issues/6404"),
            ),
-            ({"content": "a", "questions": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"),
-            ({"content": "a", "questions": ["a", "a"]}, 0, ""),
-            ({"content": "a", "questions": "abc"}, 102, "`questions` is required to be a list"),
-            ({"content": "a", "questions": 123}, 102, "`questions` is required to be a list"),
+            ({"content": "chunk test", "questions": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"),
+            ({"content": "chunk test", "questions": ["a", "a"]}, 0, ""),
+            ({"content": "chunk test", "questions": "abc"}, 102, "`questions` is required to be a list"),
+            ({"content": "chunk test", "questions": 123}, 102, "`questions` is required to be a list"),
        ],
    )
    def test_questions(self, get_http_api_auth, get_dataset_id_and_document_id, payload, expected_code, expected_message):
@ -174,12 +174,12 @@ class TestAddChunk:
    )
    def test_invalid_document_id(self, get_http_api_auth, get_dataset_id_and_document_id, document_id, expected_code, expected_message):
        dataset_id, _ = get_dataset_id_and_document_id
-        res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "a"})
+        res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"})
        assert res["code"] == expected_code
        assert res["message"] == expected_message

    def test_repeated_add_chunk(self, get_http_api_auth, get_dataset_id_and_document_id):
-        payload = {"content": "a"}
+        payload = {"content": "chunk test"}
        dataset_id, document_id = get_dataset_id_and_document_id
        res = list_chunks(get_http_api_auth, dataset_id, document_id)
        chunks_count = res["data"]["doc"]["chunk_count"]
@ -198,7 +198,7 @@ class TestAddChunk:
    def test_add_chunk_to_deleted_document(self, get_http_api_auth, get_dataset_id_and_document_id):
        dataset_id, document_id = get_dataset_id_and_document_id
        delete_documnet(get_http_api_auth, dataset_id, {"ids": [document_id]})
-        res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "a"})
+        res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"})
        assert res["code"] == 102
        assert res["message"] == f"You don't own the document {document_id}."

@ -216,7 +216,7 @@ class TestAddChunk:
                    get_http_api_auth,
                    dataset_id,
                    document_id,
-                    {"content": "a"},
+                    {"content": f"chunk test {i}"},
                )
                for i in range(chunk_num)
            ]
--- a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_list_chunks.py
+++ b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_list_chunks.py
@ -0,0 +1,203 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import os
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+from common import (
+    INVALID_API_TOKEN,
+    batch_add_chunks,
+    list_chunks,
+)
+from libs.auth import RAGFlowHttpApiAuth
+
+
+class TestAuthorization:
+    @pytest.mark.parametrize(
+        "auth, expected_code, expected_message",
+        [
+            (None, 0, "`Authorization` can't be empty"),
+            (
+                RAGFlowHttpApiAuth(INVALID_API_TOKEN),
+                109,
+                "Authentication error: API key is invalid!",
+            ),
+        ],
+    )
+    def test_invalid_auth(self, get_dataset_id_and_document_id, auth, expected_code, expected_message):
+        dataset_id, document_id = get_dataset_id_and_document_id
+        res = list_chunks(auth, dataset_id, document_id)
+        assert res["code"] == expected_code
+        assert res["message"] == expected_message
+
+
+class TestChunksList:
+    @pytest.mark.parametrize(
+        "params, expected_code, expected_page_size, expected_message",
+        [
+            ({"page": None, "page_size": 2}, 0, 2, ""),
+            pytest.param({"page": 0, "page_size": 2}, 100, 0, "ValueError('Search does not support negative slicing.')", marks=pytest.mark.skip),
+            ({"page": 2, "page_size": 2}, 0, 2, ""),
+            ({"page": 3, "page_size": 2}, 0, 1, ""),
+            ({"page": "3", "page_size": 2}, 0, 1, ""),
+            pytest.param({"page": -1, "page_size": 2}, 100, 0, "ValueError('Search does not support negative slicing.')", marks=pytest.mark.skip),
+            pytest.param({"page": "a", "page_size": 2}, 100, 0, """ValueError("invalid literal for int() with base 10: \'a\'")""", marks=pytest.mark.skip),
+        ],
+    )
+    def test_page(self, get_http_api_auth, add_chunks, params, expected_code, expected_page_size, expected_message):
+        dataset_id, document_id, _ = add_chunks
+        res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
+        assert res["code"] == expected_code
+        if expected_code == 0:
+            assert len(res["data"]["chunks"]) == expected_page_size
+        else:
+            assert res["message"] == expected_message
+
+    @pytest.mark.parametrize(
+        "params, expected_code, expected_page_size, expected_message",
+        [
+            ({"page_size": None}, 0, 5, ""),
+            pytest.param({"page_size": 0}, 0, 5, "", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="Infinity does not support page_size=0")),
+            pytest.param({"page_size": 0}, 100, 0, "3013", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "elasticsearch"], reason="Infinity does not support page_size=0")),
+            ({"page_size": 1}, 0, 1, ""),
+            ({"page_size": 6}, 0, 5, ""),
+            ({"page_size": "1"}, 0, 1, ""),
+            pytest.param({"page_size": -1}, 0, 5, "", marks=pytest.mark.skip),
+            pytest.param({"page_size": "a"}, 100, 0, """ValueError("invalid literal for int() with base 10: \'a\'")""", marks=pytest.mark.skip),
+        ],
+    )
+    def test_page_size(self, get_http_api_auth, add_chunks, params, expected_code, expected_page_size, expected_message):
+        dataset_id, document_id, _ = add_chunks
+        res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
+        assert res["code"] == expected_code
+        if expected_code == 0:
+            assert len(res["data"]["chunks"]) == expected_page_size
+        else:
+            assert res["message"] == expected_message
+
+    @pytest.mark.parametrize(
+        "params, expected_page_size",
+        [
+            ({"keywords": None}, 5),
+            ({"keywords": ""}, 5),
+            ({"keywords": "1"}, 1),
+            pytest.param({"keywords": "chunk"}, 4, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")),
+            ({"keywords": "ragflow"}, 1),
+            ({"keywords": "unknown"}, 0),
+        ],
+    )
+    def test_keywords(self, get_http_api_auth, add_chunks, params, expected_page_size):
+        dataset_id, document_id, _ = add_chunks
+        res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
+        assert res["code"] == 0
+        assert len(res["data"]["chunks"]) == expected_page_size
+
+    @pytest.mark.parametrize(
+        "chunk_id, expected_code, expected_page_size, expected_message",
+        [
+            (None, 0, 5, ""),
+            ("", 0, 5, ""),
+            pytest.param(lambda r: r[0], 0, 1, "", marks=pytest.mark.skip(reason="issues/6499")),
+            pytest.param("unknown", 102, 0, "You don't own the document unknown.txt.", marks=pytest.mark.skip(reason="issues/6500")),
+        ],
+    )
+    def test_id(
+        self,
+        get_http_api_auth,
+        add_chunks,
+        chunk_id,
+        expected_code,
+        expected_page_size,
+        expected_message,
+    ):
+        dataset_id, document_id, chunk_ids = add_chunks
+        if callable(chunk_id):
+            params = {"id": chunk_id(chunk_ids)}
+        else:
+            params = {"id": chunk_id}
+        res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
+        assert res["code"] == expected_code
+        if expected_code == 0:
+            if params["id"] in [None, ""]:
+                assert len(res["data"]["chunks"]) == expected_page_size
+            else:
+                assert res["data"]["chunks"][0]["id"] == params["id"]
+        else:
+            assert res["message"] == expected_message
+
+    def test_invalid_params(self, get_http_api_auth, add_chunks):
+        dataset_id, document_id, _ = add_chunks
+        params = {"a": "b"}
+        res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
+        assert res["code"] == 0
+        assert len(res["data"]["chunks"]) == 5
+
+    def test_concurrent_list(self, get_http_api_auth, add_chunks):
+        dataset_id, document_id, _ = add_chunks
+
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(list_chunks, get_http_api_auth, dataset_id, document_id) for i in range(100)]
+        responses = [f.result() for f in futures]
+        assert all(r["code"] == 0 for r in responses)
+        assert all(len(r["data"]["chunks"]) == 5 for r in responses)
+
+    def test_default(self, get_http_api_auth, get_dataset_id_and_document_id):
+        dataset_id, document_id = get_dataset_id_and_document_id
+        res = list_chunks(get_http_api_auth, dataset_id, document_id)
+        chunks_count = res["data"]["doc"]["chunk_count"]
+        batch_add_chunks(get_http_api_auth, dataset_id, document_id, 31)
+        # issues/6487
+        from time import sleep
+
+        sleep(3)
+        res = list_chunks(get_http_api_auth, dataset_id, document_id)
+        assert res["code"] == 0
+        assert len(res["data"]["chunks"]) == 30
+        assert res["data"]["doc"]["chunk_count"] == chunks_count + 31
+
+    @pytest.mark.parametrize(
+        "dataset_id, expected_code, expected_message",
+        [
+            ("", 100, "<NotFound '404: Not Found'>"),
+            (
+                "invalid_dataset_id",
+                102,
+                "You don't own the dataset invalid_dataset_id.",
+            ),
+        ],
+    )
+    def test_invalid_dataset_id(self, get_http_api_auth, add_chunks, dataset_id, expected_code, expected_message):
+        _, document_id, _ = add_chunks
+        res = list_chunks(get_http_api_auth, dataset_id, document_id)
+        assert res["code"] == expected_code
+        assert res["message"] == expected_message
+
+    @pytest.mark.parametrize(
+        "document_id, expected_code, expected_message",
+        [
+            ("", 102, "The dataset not own the document chunks."),
+            (
+                "invalid_document_id",
+                102,
+                "You don't own the document invalid_document_id.",
+            ),
+        ],
+    )
+    def test_invalid_document_id(self, get_http_api_auth, add_chunks, document_id, expected_code, expected_message):
+        dataset_id, _, _ = add_chunks
+        res = list_chunks(get_http_api_auth, dataset_id, document_id)
+        assert res["code"] == expected_code
+        assert res["message"] == expected_message
--- a/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py
@ -20,7 +20,10 @@ from common import batch_create_datasets, delete_dataset


@pytest.fixture(scope="class")
-def get_dataset_ids(get_http_api_auth):
-    ids = batch_create_datasets(get_http_api_auth, 5)
-    yield ids
-    delete_dataset(get_http_api_auth)
+def get_dataset_ids(get_http_api_auth, request):
+    def cleanup():
+        delete_dataset(get_http_api_auth)
+
+    request.addfinalizer(cleanup)
+
+    return batch_create_datasets(get_http_api_auth, 5)
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_list_datasets.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_list_datasets.py
@ -65,14 +65,14 @@ class TestDatasetList:
                100,
                0,
                "1064",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
            pytest.param(
                {"page": "a", "page_size": 2},
                100,
                0,
                """ValueError("invalid literal for int() with base 10: \'a\'")""",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
        ],
    )
@ -97,14 +97,14 @@ class TestDatasetList:
                100,
                0,
                "1064",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
            pytest.param(
                {"page_size": "a"},
                100,
                0,
                """ValueError("invalid literal for int() with base 10: \'a\'")""",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
        ],
    )
@ -149,14 +149,14 @@ class TestDatasetList:
                0,
                lambda r: (is_sorted(r["data"]["docs"], "name", False)),
                "",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
            pytest.param(
                {"orderby": "unknown"},
                102,
                0,
                "orderby should be create_time or update_time",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
        ],
    )
@ -232,7 +232,7 @@ class TestDatasetList:
                102,
                0,
                "desc should be true or false",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
        ],
    )
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
@ -67,7 +67,7 @@ class TestDatasetUpdate:
                100,
                """AttributeError("\'NoneType\' object has no attribute \'strip\'")""",
            ),
-            pytest.param("", 102, "", marks=pytest.mark.xfail(reason="issue#5915")),
+            pytest.param("", 102, "", marks=pytest.mark.skip(reason="issue/5915")),
            ("dataset_1", 102, "Duplicated dataset name in updating dataset."),
            ("DATASET_1", 102, "Duplicated dataset name in updating dataset."),
        ],
--- a/sdk/python/test/test_http_api/test_file_management_within_dataset/conftest.py
+++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/conftest.py
@ -25,8 +25,13 @@ def file_management_tmp_dir(tmp_path_factory):


@pytest.fixture(scope="class")
-def get_dataset_id_and_document_ids(get_http_api_auth, file_management_tmp_dir):
+def get_dataset_id_and_document_ids(get_http_api_auth, file_management_tmp_dir, request):
+    def cleanup():
+        delete_dataset(get_http_api_auth)
+
+    request.addfinalizer(cleanup)
+
    dataset_ids = batch_create_datasets(get_http_api_auth, 1)
-    document_ids = bulk_upload_documents(get_http_api_auth, dataset_ids[0], 5, file_management_tmp_dir)
-    yield dataset_ids[0], document_ids
-    delete_dataset(get_http_api_auth)
+    dataset_id = dataset_ids[0]
+    document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 5, file_management_tmp_dir)
+    return dataset_id, document_ids
--- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_list_documents.py
+++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_list_documents.py
@ -84,14 +84,14 @@ class TestDocumentList:
                100,
                0,
                "1064",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
            pytest.param(
                {"page": "a", "page_size": 2},
                100,
                0,
                """ValueError("invalid literal for int() with base 10: \'a\'")""",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
        ],
    )
@ -126,14 +126,14 @@ class TestDocumentList:
                100,
                0,
                "1064",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
            pytest.param(
                {"page_size": "a"},
                100,
                0,
                """ValueError("invalid literal for int() with base 10: \'a\'")""",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
        ],
    )
@ -180,14 +180,14 @@ class TestDocumentList:
                0,
                lambda r: (is_sorted(r["data"]["docs"], "name", False)),
                "",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
            pytest.param(
                {"orderby": "unknown"},
                102,
                0,
                "orderby should be create_time or update_time",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
        ],
    )
@ -241,7 +241,7 @@ class TestDocumentList:
                0,
                lambda r: (is_sorted(r["data"]["docs"], "create_time", False)),
                "",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
            (
                {"desc": "False"},
@ -266,7 +266,7 @@ class TestDocumentList:
                102,
                0,
                "desc should be true or false",
-                marks=pytest.mark.xfail(reason="issues/5851"),
+                marks=pytest.mark.skip(reason="issues/5851"),
            ),
        ],
    )
--- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_update_document.py
+++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_update_document.py
@ -183,99 +183,99 @@ class TestUpdatedDocument:
                {"create_date": "Fri, 14 Mar 2025 16:53:42 GMT"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"create_time": 1},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"created_by": "ragflow_test"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"dataset_id": "ragflow_test"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"id": "ragflow_test"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"location": "ragflow_test.txt"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"process_begin_at": 1},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"process_duation": 1.0},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param({"progress": 1.0}, 102, "Can't change `progress`."),
            pytest.param(
                {"progress_msg": "ragflow_test"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"run": "ragflow_test"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"size": 1},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"source_type": "ragflow_test"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"thumbnail": "ragflow_test"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            ({"token_count": 1}, 102, "Can't change `token_count`."),
            pytest.param(
                {"type": "ragflow_test"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"update_date": "Fri, 14 Mar 2025 16:33:17 GMT"},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
            pytest.param(
                {"update_time": 1},
                102,
                "The input parameters are invalid.",
-                marks=pytest.mark.xfail(reason="issues/6104"),
+                marks=pytest.mark.skip(reason="issues/6104"),
            ),
        ],
    )
@ -316,35 +316,35 @@ class TestUpdatedDocument:
            {"chunk_token_num": -1},
            100,
            "AssertionError('chunk_token_num should be in range from 1 to 100000000')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"chunk_token_num": 0},
            100,
            "AssertionError('chunk_token_num should be in range from 1 to 100000000')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"chunk_token_num": 100000000},
            100,
            "AssertionError('chunk_token_num should be in range from 1 to 100000000')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"chunk_token_num": 3.14},
            102,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"chunk_token_num": "1024"},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        (
            "naive",
@ -365,7 +365,7 @@ class TestUpdatedDocument:
            {"html4excel": 1},
            100,
            "AssertionError('html4excel should be True or False')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        ("naive", {"delimiter": ""}, 0, ""),
        ("naive", {"delimiter": "`##`"}, 0, ""),
@ -374,42 +374,42 @@ class TestUpdatedDocument:
            {"delimiter": 1},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"task_page_size": -1},
            100,
            "AssertionError('task_page_size should be in range from 1 to 100000000')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"task_page_size": 0},
            100,
            "AssertionError('task_page_size should be in range from 1 to 100000000')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"task_page_size": 100000000},
            100,
            "AssertionError('task_page_size should be in range from 1 to 100000000')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"task_page_size": 3.14},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"task_page_size": "1024"},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        ("naive", {"raptor": {"use_raptor": True}}, 0, ""),
        ("naive", {"raptor": {"use_raptor": False}}, 0, ""),
@ -418,91 +418,91 @@ class TestUpdatedDocument:
            {"invalid_key": "invalid_value"},
            100,
            """AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"auto_keywords": -1},
            100,
            "AssertionError('auto_keywords should be in range from 0 to 32')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"auto_keywords": 32},
            100,
            "AssertionError('auto_keywords should be in range from 0 to 32')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"auto_questions": 3.14},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"auto_keywords": "1024"},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"auto_questions": -1},
            100,
            "AssertionError('auto_questions should be in range from 0 to 10')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"auto_questions": 10},
            100,
            "AssertionError('auto_questions should be in range from 0 to 10')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"auto_questions": 3.14},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"auto_questions": "1024"},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"topn_tags": -1},
            100,
            "AssertionError('topn_tags should be in range from 0 to 10')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"topn_tags": 10},
            100,
            "AssertionError('topn_tags should be in range from 0 to 10')",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"topn_tags": 3.14},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
        pytest.param(
            "naive",
            {"topn_tags": "1024"},
            100,
            "",
-            marks=pytest.mark.xfail(reason="issues/6098"),
+            marks=pytest.mark.skip(reason="issues/6098"),
        ),
    ],
 )