From 501c017a26ad5ead9984c3bac0d04d64a42f4331 Mon Sep 17 00:00:00 2001 From: liu an Date: Mon, 17 Mar 2025 15:36:57 +0800 Subject: [PATCH] Test: Added test cases for List Documents HTTP API (#6158) ### What problem does this PR solve? cover [list documents](https://ragflow.io/docs/dev/http_api_reference#list-documents) endpoints ### Type of change - [x] add test cases --- .../test_list_document.py | 450 ++++++++++++++++++ 1 file changed, 450 insertions(+) create mode 100644 sdk/python/test/test_http_api/test_file_management_within_dataset/test_list_document.py diff --git a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_list_document.py b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_list_document.py new file mode 100644 index 000000000..400bdc09c --- /dev/null +++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_list_document.py @@ -0,0 +1,450 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from concurrent.futures import ThreadPoolExecutor + +import pytest +from common import ( + INVALID_API_TOKEN, + batch_upload_documents, + create_datasets, + list_documnet, +) +from libs.auth import RAGFlowHttpApiAuth + + +def is_sorted(data, field, descending=True): + timestamps = [ds[field] for ds in data] + return ( + all(a >= b for a, b in zip(timestamps, timestamps[1:])) + if descending + else all(a <= b for a, b in zip(timestamps, timestamps[1:])) + ) + + +class TestAuthorization: + @pytest.mark.parametrize( + "auth, expected_code, expected_message", + [ + (None, 0, "`Authorization` can't be empty"), + ( + RAGFlowHttpApiAuth(INVALID_API_TOKEN), + 109, + "Authentication error: API key is invalid!", + ), + ], + ) + def test_invalid_auth( + self, get_http_api_auth, auth, expected_code, expected_message + ): + ids = create_datasets(get_http_api_auth, 1) + res = list_documnet(auth, ids[0]) + assert res["code"] == expected_code + assert res["message"] == expected_message + + +class TestDocumentList: + def test_default(self, get_http_api_auth, tmp_path): + ids = create_datasets(get_http_api_auth, 1) + batch_upload_documents(get_http_api_auth, ids[0], 31, tmp_path) + res = list_documnet(get_http_api_auth, ids[0]) + assert res["code"] == 0 + assert len(res["data"]["docs"]) == 30 + assert res["data"]["total"] == 31 + + @pytest.mark.parametrize( + "dataset_id, expected_code, expected_message", + [ + ("", 100, ""), + ( + "invalid_dataset_id", + 102, + "You don't own the dataset invalid_dataset_id. ", + ), + ], + ) + def test_invalid_dataset_id( + self, get_http_api_auth, dataset_id, expected_code, expected_message + ): + create_datasets(get_http_api_auth, 1) + res = list_documnet(get_http_api_auth, dataset_id) + assert res["code"] == expected_code + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "params, expected_code, expected_page_size, expected_message", + [ + ({"page": None, "page_size": 2}, 0, 2, ""), + ({"page": 0, "page_size": 2}, 0, 2, ""), + ({"page": 2, "page_size": 2}, 0, 2, ""), + ({"page": 3, "page_size": 2}, 0, 1, ""), + ({"page": "3", "page_size": 2}, 0, 1, ""), + pytest.param( + {"page": -1, "page_size": 2}, + 100, + 0, + "1064", + marks=pytest.mark.xfail(reason="issues/5851"), + ), + pytest.param( + {"page": "a", "page_size": 2}, + 100, + 0, + """ValueError("invalid literal for int() with base 10: \'a\'")""", + marks=pytest.mark.xfail(reason="issues/5851"), + ), + ], + ) + def test_page( + self, + get_http_api_auth, + tmp_path, + params, + expected_code, + expected_page_size, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + batch_upload_documents(get_http_api_auth, ids[0], 5, tmp_path) + res = list_documnet(get_http_api_auth, ids[0], params=params) + assert res["code"] == expected_code + if expected_code == 0: + assert len(res["data"]["docs"]) == expected_page_size + assert res["data"]["total"] == 5 + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "params, expected_code, expected_page_size, expected_message", + [ + ({"page_size": None}, 0, 30, ""), + ({"page_size": 0}, 0, 0, ""), + ({"page_size": 1}, 0, 1, ""), + ({"page_size": 32}, 0, 31, ""), + ({"page_size": "1"}, 0, 1, ""), + pytest.param( + {"page_size": -1}, + 100, + 0, + "1064", + marks=pytest.mark.xfail(reason="issues/5851"), + ), + pytest.param( + {"page_size": "a"}, + 100, + 0, + """ValueError("invalid literal for int() with base 10: \'a\'")""", + marks=pytest.mark.xfail(reason="issues/5851"), + ), + ], + ) + def test_page_size( + self, + get_http_api_auth, + tmp_path, + params, + expected_code, + expected_page_size, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + batch_upload_documents(get_http_api_auth, ids[0], 31, tmp_path) + res = list_documnet(get_http_api_auth, ids[0], params=params) + assert res["code"] == expected_code + if expected_code == 0: + assert len(res["data"]["docs"]) == expected_page_size + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "params, expected_code, assertions, expected_message", + [ + ( + {"orderby": None}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), + "", + ), + ( + {"orderby": "create_time"}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), + "", + ), + ( + {"orderby": "update_time"}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "update_time", True)), + "", + ), + pytest.param( + {"orderby": "name", "desc": "False"}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "name", False)), + "", + marks=pytest.mark.xfail(reason="issues/5851"), + ), + pytest.param( + {"orderby": "unknown"}, + 102, + 0, + "orderby should be create_time or update_time", + marks=pytest.mark.xfail(reason="issues/5851"), + ), + ], + ) + def test_orderby( + self, + get_http_api_auth, + tmp_path, + params, + expected_code, + assertions, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) + res = list_documnet(get_http_api_auth, ids[0], params=params) + assert res["code"] == expected_code + if expected_code == 0: + if callable(assertions): + assert assertions(res) + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "params, expected_code, assertions, expected_message", + [ + ( + {"desc": None}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), + "", + ), + ( + {"desc": "true"}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), + "", + ), + ( + {"desc": "True"}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), + "", + ), + ( + {"desc": True}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), + "", + ), + pytest.param( + {"desc": "false"}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", False)), + "", + marks=pytest.mark.xfail(reason="issues/5851"), + ), + ( + {"desc": "False"}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", False)), + "", + ), + ( + {"desc": False}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "create_time", False)), + "", + ), + ( + {"desc": "False", "orderby": "update_time"}, + 0, + lambda r: (is_sorted(r["data"]["docs"], "update_time", False)), + "", + ), + pytest.param( + {"desc": "unknown"}, + 102, + 0, + "desc should be true or false", + marks=pytest.mark.xfail(reason="issues/5851"), + ), + ], + ) + def test_desc( + self, + get_http_api_auth, + tmp_path, + params, + expected_code, + assertions, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) + res = list_documnet(get_http_api_auth, ids[0], params=params) + assert res["code"] == expected_code + if expected_code == 0: + if callable(assertions): + assert assertions(res) + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "params, expected_num", + [ + ({"keywords": None}, 3), + ({"keywords": ""}, 3), + ({"keywords": "0"}, 1), + ({"keywords": "ragflow_test_upload"}, 3), + ({"keywords": "unknown"}, 0), + ], + ) + def test_keywords(self, get_http_api_auth, tmp_path, params, expected_num): + ids = create_datasets(get_http_api_auth, 1) + batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) + res = list_documnet(get_http_api_auth, ids[0], params=params) + assert res["code"] == 0 + assert len(res["data"]["docs"]) == expected_num + assert res["data"]["total"] == expected_num + + @pytest.mark.parametrize( + "params, expected_code, expected_num, expected_message", + [ + ({"name": None}, 0, 3, ""), + ({"name": ""}, 0, 3, ""), + ({"name": "ragflow_test_upload_0.txt"}, 0, 1, ""), + ( + {"name": "unknown.txt"}, + 102, + 0, + "You don't own the document unknown.txt.", + ), + ], + ) + def test_name( + self, + get_http_api_auth, + tmp_path, + params, + expected_code, + expected_num, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) + res = list_documnet(get_http_api_auth, ids[0], params=params) + assert res["code"] == expected_code + if expected_code == 0: + if params["name"] in [None, ""]: + assert len(res["data"]["docs"]) == expected_num + else: + assert res["data"]["docs"][0]["name"] == params["name"] + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "document_id, expected_code, expected_num, expected_message", + [ + (None, 0, 3, ""), + ("", 0, 3, ""), + (lambda r: r[0], 0, 1, ""), + ("unknown.txt", 102, 0, "You don't own the document unknown.txt."), + ], + ) + def test_id( + self, + get_http_api_auth, + tmp_path, + document_id, + expected_code, + expected_num, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) + if callable(document_id): + params = {"id": document_id(document_ids)} + else: + params = {"id": document_id} + res = list_documnet(get_http_api_auth, ids[0], params=params) + + assert res["code"] == expected_code + if expected_code == 0: + if params["id"] in [None, ""]: + assert len(res["data"]["docs"]) == expected_num + else: + assert res["data"]["docs"][0]["id"] == params["id"] + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "document_id, name, expected_code, expected_num, expected_message", + [ + (lambda r: r[0], "ragflow_test_upload_0.txt", 0, 1, ""), + (lambda r: r[0], "ragflow_test_upload_1.txt", 0, 0, ""), + (lambda r: r[0], "unknown", 102, 0, "You don't own the document unknown."), + ( + "id", + "ragflow_test_upload_0.txt", + 102, + 0, + "You don't own the document id.", + ), + ], + ) + def test_name_and_id( + self, + get_http_api_auth, + tmp_path, + document_id, + name, + expected_code, + expected_num, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) + if callable(document_id): + params = {"id": document_id(document_ids), "name": name} + else: + params = {"id": document_id, "name": name} + + res = list_documnet(get_http_api_auth, ids[0], params=params) + if expected_code == 0: + assert len(res["data"]["docs"]) == expected_num + else: + assert res["message"] == expected_message + + def test_concurrent_list(self, get_http_api_auth, tmp_path): + ids = create_datasets(get_http_api_auth, 1) + batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) + + with ThreadPoolExecutor(max_workers=5) as executor: + futures = [ + executor.submit(list_documnet, get_http_api_auth, ids[0]) + for i in range(100) + ] + responses = [f.result() for f in futures] + assert all(r["code"] == 0 for r in responses) + + def test_invalid_params(self, get_http_api_auth): + ids = create_datasets(get_http_api_auth, 1) + params = {"a": "b"} + res = list_documnet(get_http_api_auth, ids[0], params=params) + assert res["code"] == 0 + assert len(res["data"]["docs"]) == 0