diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8a3455674..6b437c2bd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -96,8 +96,17 @@ jobs: echo "Waiting for service to be available..." sleep 5 done - cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_frontend_api && pytest -s --tb=short get_email.py test_dataset.py + cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_frontend_api && pytest -s --tb=short get_email.py test_dataset.py + - name: Run http api tests against Elasticsearch + run: | + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + export HOST_ADDRESS=http://host.docker.internal:9380 + until sudo docker exec ragflow-server curl -s --connect-timeout 5 ${HOST_ADDRESS} > /dev/null; do + echo "Waiting for service to be available..." + sleep 5 + done + cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_http_api && pytest -s --tb=short -m "not slow" - name: Stop ragflow:nightly if: always() # always run this step even if previous steps failed @@ -128,6 +137,16 @@ jobs: done cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_frontend_api && pytest -s --tb=short get_email.py test_dataset.py + - name: Run http api tests against Infinity + run: | + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + export HOST_ADDRESS=http://host.docker.internal:9380 + until sudo docker exec ragflow-server curl -s --connect-timeout 5 ${HOST_ADDRESS} > /dev/null; do + echo "Waiting for service to be available..." + sleep 5 + done + cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_http_api && pytest -s --tb=short -m "not slow" + - name: Stop ragflow:nightly if: always() # always run this step even if previous steps failed run: | diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml index 445b8b49d..d8a5cb84c 100644 --- a/sdk/python/pyproject.toml +++ b/sdk/python/pyproject.toml @@ -18,3 +18,8 @@ dependencies = [ test = [ "pytest>=8.0.0,<9.0.0" ] + +[tool.pytest.ini_options] +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')", +] \ No newline at end of file diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py index 423ee3d31..c32a096ce 100644 --- a/sdk/python/test/conftest.py +++ b/sdk/python/test/conftest.py @@ -18,6 +18,8 @@ import os import pytest import requests +from libs.auth import RAGFlowHttpApiAuth + HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380') @@ -85,3 +87,8 @@ def get_auth(): @pytest.fixture(scope="session") def get_email(): return EMAIL + + +@pytest.fixture(scope="session") +def get_http_api_auth(get_api_key_fixture): + return RAGFlowHttpApiAuth(get_api_key_fixture) diff --git a/sdk/python/test/data/logo.svg b/sdk/python/test/data/logo.svg new file mode 100644 index 000000000..54167d2fb --- /dev/null +++ b/sdk/python/test/data/logo.svg @@ -0,0 +1,29 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/sdk/python/test/libs/auth.py b/sdk/python/test/libs/auth.py new file mode 100644 index 000000000..a27bc737e --- /dev/null +++ b/sdk/python/test/libs/auth.py @@ -0,0 +1,25 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from requests.auth import AuthBase + + +class RAGFlowHttpApiAuth(AuthBase): + def __init__(self, token): + self._token = token + + def __call__(self, r): + r.headers["Authorization"] = f'Bearer {self._token}' + return r diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/common.py b/sdk/python/test/test_http_api/test_dataset_mangement/common.py new file mode 100644 index 000000000..f0062262b --- /dev/null +++ b/sdk/python/test/test_http_api/test_dataset_mangement/common.py @@ -0,0 +1,47 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import requests + +HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380') +API_URL = f'{HOST_ADDRESS}/api/v1/datasets' +HEADERS = {"Content-Type": "application/json"} + + +INVALID_API_TOKEN = "invalid_key_123" +DATASET_NAME_LIMIT = 128 + + +def create_dataset(auth, payload): + res = requests.post(url=API_URL, headers=HEADERS, auth=auth, json=payload) + return res.json() + + +def list_dataset(auth, params): + res = requests.get(url=API_URL, headers=HEADERS, auth=auth, params=params) + return res.json() + + +def update_dataset(auth, dataset_id, payload): + res = requests.put(url=f"{API_URL}/{dataset_id}", + headers=HEADERS, auth=auth, json=payload) + return res.json() + + +def delete_dataset(auth, payload=None): + res = requests.delete(url=API_URL, headers=HEADERS, auth=auth, json=payload) + return res.json() diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py b/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py new file mode 100644 index 000000000..fec1aee99 --- /dev/null +++ b/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py @@ -0,0 +1,25 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest + +from common import delete_dataset + + +@pytest.fixture(scope="function", autouse=True) +def clear_datasets(get_http_api_auth): + yield + delete_dataset(get_http_api_auth) diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py new file mode 100644 index 000000000..1c37d1bb3 --- /dev/null +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py @@ -0,0 +1,256 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import pytest +import base64 + +from pathlib import Path + +from common import create_dataset, INVALID_API_TOKEN, DATASET_NAME_LIMIT +from libs.auth import RAGFlowHttpApiAuth + + +class TestAuthorization: + def test_invalid_auth(self): + INVALID_API_KEY = RAGFlowHttpApiAuth(INVALID_API_TOKEN) + res = create_dataset(INVALID_API_KEY, {"name": "auth_test"}) + + assert res["code"] == 109 + assert res["message"] == 'Authentication error: API key is invalid!' + + +class TestDatasetCreation: + @pytest.mark.parametrize("payload, expected_code", [ + ({"name": "valid_name"}, 0), + ({"name": "a"*(DATASET_NAME_LIMIT+1)}, 102), + ({"name": 0}, 100), + ({"name": ""}, 102), + ({"name": "duplicated_name"}, 102), + ({"name": "case_insensitive"}, 102), + ]) + def test_basic_scenarios(self, get_http_api_auth, payload, expected_code): + if payload["name"] == "duplicated_name": + create_dataset(get_http_api_auth, payload) + elif payload["name"] == "case_insensitive": + create_dataset(get_http_api_auth, { + "name": payload["name"].upper()}) + + res = create_dataset(get_http_api_auth, payload) + + assert res["code"] == expected_code + if expected_code == 0: + assert res["data"]["name"] == payload["name"] + + if payload["name"] in ["duplicated_name", "case_insensitive"]: + assert res["message"] == "Duplicated dataset name in creating dataset." + + @pytest.mark.slow + def test_dataset_10k(self, get_http_api_auth): + for i in range(10000): + payload = {"name": f"dataset_{i}"} + res = create_dataset(get_http_api_auth, payload) + assert res["code"] == 0, f"Failed to create dataset {i}" + + +class TestAdvancedConfigurations: + def test_avatar(self, get_http_api_auth, request): + def encode_avatar(image_path): + with Path.open(image_path, "rb") as file: + binary_data = file.read() + base64_encoded = base64.b64encode(binary_data).decode("utf-8") + return base64_encoded + + payload = { + "name": "avatar_test", + "avatar": encode_avatar(Path(request.config.rootdir) / 'test/data/logo.svg') + } + res = create_dataset(get_http_api_auth, payload) + assert res["code"] == 0 + + def test_description(self, get_http_api_auth): + payload = { + "name": "description_test", + "description": "a" * 65536 + } + res = create_dataset(get_http_api_auth, payload) + assert res["code"] == 0 + + @pytest.mark.parametrize("name, permission, expected_code", [ + ("me", "me", 0), + ("team", "team", 0), + pytest.param("empty_permission", "", 0, + marks=pytest.mark.xfail(reason='issue#5709')), + ("me_upercase", "ME", 102), + ("team_upercase", "TEAM", 102), + ("other_permission", "other_permission", 102) + ]) + def test_permission(self, get_http_api_auth, name, permission, expected_code): + payload = { + "name": name, + "permission": permission + } + res = create_dataset(get_http_api_auth, payload) + assert res["code"] == expected_code + if expected_code == 0 and permission != "": + assert res["data"]["permission"] == permission + if permission == "": + assert res["data"]["permission"] == "me" + + @pytest.mark.parametrize("name, chunk_method, expected_code", [ + ("naive", "naive", 0), + ("manual", "manual", 0), + ("qa", "qa", 0), + ("table", "table", 0), + ("paper", "paper", 0), + ("book", "book", 0), + ("laws", "laws", 0), + ("presentation", "presentation", 0), + ("picture", "picture", 0), + ("one", "one", 0), + ("picknowledge_graphture", "knowledge_graph", 0), + ("email", "email", 0), + ("tag", "tag", 0), + pytest.param("empty_chunk_method", "", 0, + marks=pytest.mark.xfail(reason='issue#5709')), + ("other_chunk_method", "other_chunk_method", 102) + ]) + def test_chunk_method(self, get_http_api_auth, name, chunk_method, expected_code): + payload = { + "name": name, + "chunk_method": chunk_method + } + res = create_dataset(get_http_api_auth, payload) + assert res["code"] == expected_code + if expected_code == 0 and chunk_method != "": + assert res["data"]["chunk_method"] == chunk_method + if chunk_method == "": + assert res["data"]["chunk_method"] == "naive" + + @pytest.mark.parametrize("name, embedding_model, expected_code", [ + ("BAAI/bge-large-zh-v1.5", + "BAAI/bge-large-zh-v1.5", 0), + ("BAAI/bge-base-en-v1.5", + "BAAI/bge-base-en-v1.5", 0), + ("BAAI/bge-large-en-v1.5", + "BAAI/bge-large-en-v1.5", 0), + ("BAAI/bge-small-en-v1.5", + "BAAI/bge-small-en-v1.5", 0), + ("BAAI/bge-small-zh-v1.5", + "BAAI/bge-small-zh-v1.5", 0), + ("jinaai/jina-embeddings-v2-base-en", + "jinaai/jina-embeddings-v2-base-en", 0), + ("jinaai/jina-embeddings-v2-small-en", + "jinaai/jina-embeddings-v2-small-en", 0), + ("nomic-ai/nomic-embed-text-v1.5", + "nomic-ai/nomic-embed-text-v1.5", 0), + ("sentence-transformers/all-MiniLM-L6-v2", + "sentence-transformers/all-MiniLM-L6-v2", 0), + ("text-embedding-v2", + "text-embedding-v2", 0), + ("text-embedding-v3", + "text-embedding-v3", 0), + ("maidalun1020/bce-embedding-base_v1", + "maidalun1020/bce-embedding-base_v1", 0), + ("other_embedding_model", + "other_embedding_model", 102) + ]) + def test_embedding_model(self, get_http_api_auth, name, embedding_model, expected_code): + payload = { + "name": name, + "embedding_model": embedding_model + } + res = create_dataset(get_http_api_auth, payload) + assert res["code"] == expected_code + if expected_code == 0: + assert res["data"]["embedding_model"] == embedding_model + + @pytest.mark.parametrize("name, chunk_method, parser_config, expected_code", [ + ("naive_default", "naive", + {"chunk_token_count": 128, + "layout_recognize": "DeepDOC", + "html4excel": False, + "delimiter": "\n!?。;!?", + "task_page_size": 12, + "raptor": {"use_raptor": False} + }, + 0), + ("naive_empty", "naive", {}, 0), + pytest.param("naive_chunk_token_count_negative", "naive", + {"chunk_token_count": -1}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_chunk_token_count_zero", "naive", + {"chunk_token_count": 0}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_chunk_token_count_float", "naive", + {"chunk_token_count": 3.14}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_chunk_token_count_max", "naive", + {"chunk_token_count": 1024*1024*1024}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_chunk_token_count_str", "naive", + {"chunk_token_count": '1024'}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + ("naive_layout_recognize_DeepDOC", "naive", + {"layout_recognize": "DeepDOC"}, 0), + ("naive_layout_recognize_Naive", "naive", + {"layout_recognize": "Naive"}, 0), + ("naive_html4excel_true", "naive", {"html4excel": True}, 0), + ("naive_html4excel_false", "naive", {"html4excel": False}, 0), + pytest.param("naive_html4excel_not_bool", "naive", { + "html4excel": 1}, 102, marks=pytest.mark.xfail(reason='issue#5719')), + ("naive_delimiter_empty", "naive", {"delimiter": ""}, 0), + ("naive_delimiter_backticks", "naive", {"delimiter": "`##`"}, 0), + pytest.param("naive_delimiterl_not_str", "naive", { + "delimiterl": 1}, 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_task_page_size_negative", "naive", + {"task_page_size": -1}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_task_page_size_zero", "naive", + {"task_page_size": 0}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_task_page_size_float", "naive", + {"task_page_size": 3.14}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_task_page_size_max", "naive", + {"task_page_size": 1024*1024*1024}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + pytest.param("naive_task_page_size_str", "naive", + {"task_page_size": '1024'}, + 102, marks=pytest.mark.xfail(reason='issue#5719')), + ("naive_raptor_true", "naive", {"raptor": {"use_raptor": True}}, 0), + ("naive_raptor_false", "naive", {"raptor": {"use_raptor": False}}, 0), + ("knowledge_graph_entity_types_default", "knowledge_graph", { + "entity_types": ["organization", "person", "location", "event", "time"]}, 0), + pytest.param("knowledge_graph_entity_types_not_list", "knowledge_graph", { + "entity_types": "organization,person,location,event,time"}, 102, marks=pytest.mark.xfail(reason='issue#5719')) + ]) + def test_parser_configs(self, get_http_api_auth, name, chunk_method, parser_config, expected_code): + payload = { + "name": name, + "chunk_method": chunk_method, + "parser_config": parser_config + } + res = create_dataset(get_http_api_auth, payload) + # print(res) + assert res["code"] == expected_code + if expected_code == 0 and parser_config != {}: + for k, v in parser_config.items(): + assert res["data"]["parser_config"][k] == v + if parser_config == {}: + assert res["data"]["parser_config"] == {"chunk_token_num": 128, + "delimiter": "\\n!?;。;!?", + "html4excel": False, + "layout_recognize": "DeepDOC", + "raptor": {"use_raptor": False}}