mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-22 22:20:07 +08:00
TEST: Added test cases for Delete Dataset HTTP API (#5770)
### What problem does this PR solve? 1. cover dataset deletion endpoints 2. format code with ruff ### Type of change - [x] add testcases - [ ] style
This commit is contained in:
parent
2ad852d8df
commit
8a84d1048c
@ -22,4 +22,5 @@ test = [
|
|||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
markers = [
|
markers = [
|
||||||
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
||||||
|
"wip: marks tests as work in progress (deselect with '-m \"not wip\"')"
|
||||||
]
|
]
|
@ -15,10 +15,11 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
|
||||||
API_URL = f'{HOST_ADDRESS}/api/v1/datasets'
|
API_URL = f"{HOST_ADDRESS}/api/v1/datasets"
|
||||||
HEADERS = {"Content-Type": "application/json"}
|
HEADERS = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
|
|
||||||
@ -31,17 +32,26 @@ def create_dataset(auth, payload):
|
|||||||
return res.json()
|
return res.json()
|
||||||
|
|
||||||
|
|
||||||
def list_dataset(auth, params):
|
def list_dataset(auth, params=None):
|
||||||
res = requests.get(url=API_URL, headers=HEADERS, auth=auth, params=params)
|
res = requests.get(url=API_URL, headers=HEADERS, auth=auth, params=params)
|
||||||
return res.json()
|
return res.json()
|
||||||
|
|
||||||
|
|
||||||
def update_dataset(auth, dataset_id, payload):
|
def update_dataset(auth, dataset_id, payload):
|
||||||
res = requests.put(url=f"{API_URL}/{dataset_id}",
|
res = requests.put(
|
||||||
headers=HEADERS, auth=auth, json=payload)
|
url=f"{API_URL}/{dataset_id}", headers=HEADERS, auth=auth, json=payload
|
||||||
|
)
|
||||||
return res.json()
|
return res.json()
|
||||||
|
|
||||||
|
|
||||||
def delete_dataset(auth, payload=None):
|
def delete_dataset(auth, payload=None):
|
||||||
res = requests.delete(url=API_URL, headers=HEADERS, auth=auth, json=payload)
|
res = requests.delete(url=API_URL, headers=HEADERS, auth=auth, json=payload)
|
||||||
return res.json()
|
return res.json()
|
||||||
|
|
||||||
|
|
||||||
|
def create_datasets(auth, num):
|
||||||
|
ids = []
|
||||||
|
for i in range(num):
|
||||||
|
res = create_dataset(auth, {"name": f"dataset_{i}"})
|
||||||
|
ids.append(res["data"]["id"])
|
||||||
|
return ids
|
||||||
|
@ -15,7 +15,6 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from common import delete_dataset
|
from common import delete_dataset
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,39 +13,49 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import pytest
|
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from common import create_dataset, INVALID_API_TOKEN, DATASET_NAME_LIMIT
|
import pytest
|
||||||
|
from common import DATASET_NAME_LIMIT, INVALID_API_TOKEN, create_dataset
|
||||||
from libs.auth import RAGFlowHttpApiAuth
|
from libs.auth import RAGFlowHttpApiAuth
|
||||||
|
|
||||||
|
|
||||||
class TestAuthorization:
|
class TestAuthorization:
|
||||||
def test_invalid_auth(self):
|
@pytest.mark.parametrize(
|
||||||
INVALID_API_KEY = RAGFlowHttpApiAuth(INVALID_API_TOKEN)
|
"auth, expected_code, expected_message",
|
||||||
res = create_dataset(INVALID_API_KEY, {"name": "auth_test"})
|
[
|
||||||
|
(None, 0, "`Authorization` can't be empty"),
|
||||||
assert res["code"] == 109
|
(
|
||||||
assert res["message"] == 'Authentication error: API key is invalid!'
|
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||||
|
109,
|
||||||
|
"Authentication error: API key is invalid!",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||||
|
res = create_dataset(auth, {"name": "auth_test"})
|
||||||
|
assert res["code"] == expected_code
|
||||||
|
assert res["message"] == expected_message
|
||||||
|
|
||||||
|
|
||||||
class TestDatasetCreation:
|
class TestDatasetCreation:
|
||||||
@pytest.mark.parametrize("payload, expected_code", [
|
@pytest.mark.parametrize(
|
||||||
({"name": "valid_name"}, 0),
|
"payload, expected_code",
|
||||||
({"name": "a"*(DATASET_NAME_LIMIT+1)}, 102),
|
[
|
||||||
({"name": 0}, 100),
|
({"name": "valid_name"}, 0),
|
||||||
({"name": ""}, 102),
|
({"name": "a" * (DATASET_NAME_LIMIT + 1)}, 102),
|
||||||
({"name": "duplicated_name"}, 102),
|
({"name": 0}, 100),
|
||||||
({"name": "case_insensitive"}, 102),
|
({"name": ""}, 102),
|
||||||
])
|
({"name": "duplicated_name"}, 102),
|
||||||
|
({"name": "case_insensitive"}, 102),
|
||||||
|
],
|
||||||
|
)
|
||||||
def test_basic_scenarios(self, get_http_api_auth, payload, expected_code):
|
def test_basic_scenarios(self, get_http_api_auth, payload, expected_code):
|
||||||
if payload["name"] == "duplicated_name":
|
if payload["name"] == "duplicated_name":
|
||||||
create_dataset(get_http_api_auth, payload)
|
create_dataset(get_http_api_auth, payload)
|
||||||
elif payload["name"] == "case_insensitive":
|
elif payload["name"] == "case_insensitive":
|
||||||
create_dataset(get_http_api_auth, {
|
create_dataset(get_http_api_auth, {"name": payload["name"].upper()})
|
||||||
"name": payload["name"].upper()})
|
|
||||||
|
|
||||||
res = create_dataset(get_http_api_auth, payload)
|
res = create_dataset(get_http_api_auth, payload)
|
||||||
|
|
||||||
@ -58,7 +68,7 @@ class TestDatasetCreation:
|
|||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_dataset_10k(self, get_http_api_auth):
|
def test_dataset_10k(self, get_http_api_auth):
|
||||||
for i in range(10000):
|
for i in range(10_000):
|
||||||
payload = {"name": f"dataset_{i}"}
|
payload = {"name": f"dataset_{i}"}
|
||||||
res = create_dataset(get_http_api_auth, payload)
|
res = create_dataset(get_http_api_auth, payload)
|
||||||
assert res["code"] == 0, f"Failed to create dataset {i}"
|
assert res["code"] == 0, f"Failed to create dataset {i}"
|
||||||
@ -74,33 +84,33 @@ class TestAdvancedConfigurations:
|
|||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"name": "avatar_test",
|
"name": "avatar_test",
|
||||||
"avatar": encode_avatar(Path(request.config.rootdir) / 'test/data/logo.svg')
|
"avatar": encode_avatar(
|
||||||
|
Path(request.config.rootdir) / "test/data/logo.svg"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
res = create_dataset(get_http_api_auth, payload)
|
res = create_dataset(get_http_api_auth, payload)
|
||||||
assert res["code"] == 0
|
assert res["code"] == 0
|
||||||
|
|
||||||
def test_description(self, get_http_api_auth):
|
def test_description(self, get_http_api_auth):
|
||||||
payload = {
|
payload = {"name": "description_test", "description": "a" * 65536}
|
||||||
"name": "description_test",
|
|
||||||
"description": "a" * 65536
|
|
||||||
}
|
|
||||||
res = create_dataset(get_http_api_auth, payload)
|
res = create_dataset(get_http_api_auth, payload)
|
||||||
assert res["code"] == 0
|
assert res["code"] == 0
|
||||||
|
|
||||||
@pytest.mark.parametrize("name, permission, expected_code", [
|
@pytest.mark.parametrize(
|
||||||
("me", "me", 0),
|
"name, permission, expected_code",
|
||||||
("team", "team", 0),
|
[
|
||||||
pytest.param("empty_permission", "", 0,
|
("me", "me", 0),
|
||||||
marks=pytest.mark.xfail(reason='issue#5709')),
|
("team", "team", 0),
|
||||||
("me_upercase", "ME", 102),
|
pytest.param(
|
||||||
("team_upercase", "TEAM", 102),
|
"empty_permission", "", 0, marks=pytest.mark.xfail(reason="issue#5709")
|
||||||
("other_permission", "other_permission", 102)
|
),
|
||||||
])
|
("me_upercase", "ME", 102),
|
||||||
|
("team_upercase", "TEAM", 102),
|
||||||
|
("other_permission", "other_permission", 102),
|
||||||
|
],
|
||||||
|
)
|
||||||
def test_permission(self, get_http_api_auth, name, permission, expected_code):
|
def test_permission(self, get_http_api_auth, name, permission, expected_code):
|
||||||
payload = {
|
payload = {"name": name, "permission": permission}
|
||||||
"name": name,
|
|
||||||
"permission": permission
|
|
||||||
}
|
|
||||||
res = create_dataset(get_http_api_auth, payload)
|
res = create_dataset(get_http_api_auth, payload)
|
||||||
assert res["code"] == expected_code
|
assert res["code"] == expected_code
|
||||||
if expected_code == 0 and permission != "":
|
if expected_code == 0 and permission != "":
|
||||||
@ -108,29 +118,33 @@ class TestAdvancedConfigurations:
|
|||||||
if permission == "":
|
if permission == "":
|
||||||
assert res["data"]["permission"] == "me"
|
assert res["data"]["permission"] == "me"
|
||||||
|
|
||||||
@pytest.mark.parametrize("name, chunk_method, expected_code", [
|
@pytest.mark.parametrize(
|
||||||
("naive", "naive", 0),
|
"name, chunk_method, expected_code",
|
||||||
("manual", "manual", 0),
|
[
|
||||||
("qa", "qa", 0),
|
("naive", "naive", 0),
|
||||||
("table", "table", 0),
|
("manual", "manual", 0),
|
||||||
("paper", "paper", 0),
|
("qa", "qa", 0),
|
||||||
("book", "book", 0),
|
("table", "table", 0),
|
||||||
("laws", "laws", 0),
|
("paper", "paper", 0),
|
||||||
("presentation", "presentation", 0),
|
("book", "book", 0),
|
||||||
("picture", "picture", 0),
|
("laws", "laws", 0),
|
||||||
("one", "one", 0),
|
("presentation", "presentation", 0),
|
||||||
("picknowledge_graphture", "knowledge_graph", 0),
|
("picture", "picture", 0),
|
||||||
("email", "email", 0),
|
("one", "one", 0),
|
||||||
("tag", "tag", 0),
|
("picknowledge_graphture", "knowledge_graph", 0),
|
||||||
pytest.param("empty_chunk_method", "", 0,
|
("email", "email", 0),
|
||||||
marks=pytest.mark.xfail(reason='issue#5709')),
|
("tag", "tag", 0),
|
||||||
("other_chunk_method", "other_chunk_method", 102)
|
pytest.param(
|
||||||
])
|
"empty_chunk_method",
|
||||||
|
"",
|
||||||
|
0,
|
||||||
|
marks=pytest.mark.xfail(reason="issue#5709"),
|
||||||
|
),
|
||||||
|
("other_chunk_method", "other_chunk_method", 102),
|
||||||
|
],
|
||||||
|
)
|
||||||
def test_chunk_method(self, get_http_api_auth, name, chunk_method, expected_code):
|
def test_chunk_method(self, get_http_api_auth, name, chunk_method, expected_code):
|
||||||
payload = {
|
payload = {"name": name, "chunk_method": chunk_method}
|
||||||
"name": name,
|
|
||||||
"chunk_method": chunk_method
|
|
||||||
}
|
|
||||||
res = create_dataset(get_http_api_auth, payload)
|
res = create_dataset(get_http_api_auth, payload)
|
||||||
assert res["code"] == expected_code
|
assert res["code"] == expected_code
|
||||||
if expected_code == 0 and chunk_method != "":
|
if expected_code == 0 and chunk_method != "":
|
||||||
@ -138,105 +152,172 @@ class TestAdvancedConfigurations:
|
|||||||
if chunk_method == "":
|
if chunk_method == "":
|
||||||
assert res["data"]["chunk_method"] == "naive"
|
assert res["data"]["chunk_method"] == "naive"
|
||||||
|
|
||||||
@pytest.mark.parametrize("name, embedding_model, expected_code", [
|
@pytest.mark.parametrize(
|
||||||
("BAAI/bge-large-zh-v1.5",
|
"name, embedding_model, expected_code",
|
||||||
"BAAI/bge-large-zh-v1.5", 0),
|
[
|
||||||
("BAAI/bge-base-en-v1.5",
|
("BAAI/bge-large-zh-v1.5", "BAAI/bge-large-zh-v1.5", 0),
|
||||||
"BAAI/bge-base-en-v1.5", 0),
|
("BAAI/bge-base-en-v1.5", "BAAI/bge-base-en-v1.5", 0),
|
||||||
("BAAI/bge-large-en-v1.5",
|
("BAAI/bge-large-en-v1.5", "BAAI/bge-large-en-v1.5", 0),
|
||||||
"BAAI/bge-large-en-v1.5", 0),
|
("BAAI/bge-small-en-v1.5", "BAAI/bge-small-en-v1.5", 0),
|
||||||
("BAAI/bge-small-en-v1.5",
|
("BAAI/bge-small-zh-v1.5", "BAAI/bge-small-zh-v1.5", 0),
|
||||||
"BAAI/bge-small-en-v1.5", 0),
|
(
|
||||||
("BAAI/bge-small-zh-v1.5",
|
"jinaai/jina-embeddings-v2-base-en",
|
||||||
"BAAI/bge-small-zh-v1.5", 0),
|
"jinaai/jina-embeddings-v2-base-en",
|
||||||
("jinaai/jina-embeddings-v2-base-en",
|
0,
|
||||||
"jinaai/jina-embeddings-v2-base-en", 0),
|
),
|
||||||
("jinaai/jina-embeddings-v2-small-en",
|
(
|
||||||
"jinaai/jina-embeddings-v2-small-en", 0),
|
"jinaai/jina-embeddings-v2-small-en",
|
||||||
("nomic-ai/nomic-embed-text-v1.5",
|
"jinaai/jina-embeddings-v2-small-en",
|
||||||
"nomic-ai/nomic-embed-text-v1.5", 0),
|
0,
|
||||||
("sentence-transformers/all-MiniLM-L6-v2",
|
),
|
||||||
"sentence-transformers/all-MiniLM-L6-v2", 0),
|
("nomic-ai/nomic-embed-text-v1.5", "nomic-ai/nomic-embed-text-v1.5", 0),
|
||||||
("text-embedding-v2",
|
(
|
||||||
"text-embedding-v2", 0),
|
"sentence-transformers/all-MiniLM-L6-v2",
|
||||||
("text-embedding-v3",
|
"sentence-transformers/all-MiniLM-L6-v2",
|
||||||
"text-embedding-v3", 0),
|
0,
|
||||||
("maidalun1020/bce-embedding-base_v1",
|
),
|
||||||
"maidalun1020/bce-embedding-base_v1", 0),
|
("text-embedding-v2", "text-embedding-v2", 0),
|
||||||
("other_embedding_model",
|
("text-embedding-v3", "text-embedding-v3", 0),
|
||||||
"other_embedding_model", 102)
|
(
|
||||||
])
|
"maidalun1020/bce-embedding-base_v1",
|
||||||
def test_embedding_model(self, get_http_api_auth, name, embedding_model, expected_code):
|
"maidalun1020/bce-embedding-base_v1",
|
||||||
payload = {
|
0,
|
||||||
"name": name,
|
),
|
||||||
"embedding_model": embedding_model
|
("other_embedding_model", "other_embedding_model", 102),
|
||||||
}
|
],
|
||||||
|
)
|
||||||
|
def test_embedding_model(
|
||||||
|
self, get_http_api_auth, name, embedding_model, expected_code
|
||||||
|
):
|
||||||
|
payload = {"name": name, "embedding_model": embedding_model}
|
||||||
res = create_dataset(get_http_api_auth, payload)
|
res = create_dataset(get_http_api_auth, payload)
|
||||||
assert res["code"] == expected_code
|
assert res["code"] == expected_code
|
||||||
if expected_code == 0:
|
if expected_code == 0:
|
||||||
assert res["data"]["embedding_model"] == embedding_model
|
assert res["data"]["embedding_model"] == embedding_model
|
||||||
|
|
||||||
@pytest.mark.parametrize("name, chunk_method, parser_config, expected_code", [
|
@pytest.mark.parametrize(
|
||||||
("naive_default", "naive",
|
"name, chunk_method, parser_config, expected_code",
|
||||||
{"chunk_token_num": 128,
|
[
|
||||||
"layout_recognize": "DeepDOC",
|
(
|
||||||
"html4excel": False,
|
"naive_default",
|
||||||
"delimiter": "\n!?。;!?",
|
"naive",
|
||||||
"task_page_size": 12,
|
{
|
||||||
"raptor": {"use_raptor": False}
|
"chunk_token_num": 128,
|
||||||
},
|
"layout_recognize": "DeepDOC",
|
||||||
0),
|
"html4excel": False,
|
||||||
("naive_empty", "naive", {}, 0),
|
"delimiter": "\n!?。;!?",
|
||||||
pytest.param("naive_chunk_token_num_negative", "naive",
|
"task_page_size": 12,
|
||||||
{"chunk_token_num": -1},
|
"raptor": {"use_raptor": False},
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
},
|
||||||
pytest.param("naive_chunk_token_num_zero", "naive",
|
0,
|
||||||
{"chunk_token_num": 0},
|
),
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
("naive_empty", "naive", {}, 0),
|
||||||
pytest.param("naive_chunk_token_num_float", "naive",
|
pytest.param(
|
||||||
{"chunk_token_num": 3.14},
|
"naive_chunk_token_num_negative",
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
"naive",
|
||||||
pytest.param("naive_chunk_token_num_max", "naive",
|
{"chunk_token_num": -1},
|
||||||
{"chunk_token_num": 1024*1024*1024},
|
102,
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
pytest.param("naive_chunk_token_num_str", "naive",
|
),
|
||||||
{"chunk_token_num": '1024'},
|
pytest.param(
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
"naive_chunk_token_num_zero",
|
||||||
("naive_layout_recognize_DeepDOC", "naive",
|
"naive",
|
||||||
{"layout_recognize": "DeepDOC"}, 0),
|
{"chunk_token_num": 0},
|
||||||
("naive_layout_recognize_Naive", "naive",
|
102,
|
||||||
{"layout_recognize": "Naive"}, 0),
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
("naive_html4excel_true", "naive", {"html4excel": True}, 0),
|
),
|
||||||
("naive_html4excel_false", "naive", {"html4excel": False}, 0),
|
pytest.param(
|
||||||
pytest.param("naive_html4excel_not_bool", "naive", {
|
"naive_chunk_token_num_float",
|
||||||
"html4excel": 1}, 102, marks=pytest.mark.xfail(reason='issue#5719')),
|
"naive",
|
||||||
("naive_delimiter_empty", "naive", {"delimiter": ""}, 0),
|
{"chunk_token_num": 3.14},
|
||||||
("naive_delimiter_backticks", "naive", {"delimiter": "`##`"}, 0),
|
102,
|
||||||
pytest.param("naive_delimiterl_not_str", "naive", {
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
"delimiterl": 1}, 102, marks=pytest.mark.xfail(reason='issue#5719')),
|
),
|
||||||
pytest.param("naive_task_page_size_negative", "naive",
|
pytest.param(
|
||||||
{"task_page_size": -1},
|
"naive_chunk_token_num_max",
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
"naive",
|
||||||
pytest.param("naive_task_page_size_zero", "naive",
|
{"chunk_token_num": 1024 * 1024 * 1024},
|
||||||
{"task_page_size": 0},
|
102,
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
pytest.param("naive_task_page_size_float", "naive",
|
),
|
||||||
{"task_page_size": 3.14},
|
pytest.param(
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
"naive_chunk_token_num_str",
|
||||||
pytest.param("naive_task_page_size_max", "naive",
|
"naive",
|
||||||
{"task_page_size": 1024*1024*1024},
|
{"chunk_token_num": "1024"},
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
102,
|
||||||
pytest.param("naive_task_page_size_str", "naive",
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
{"task_page_size": '1024'},
|
),
|
||||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
(
|
||||||
("naive_raptor_true", "naive", {"raptor": {"use_raptor": True}}, 0),
|
"naive_layout_recognize_DeepDOC",
|
||||||
("naive_raptor_false", "naive", {"raptor": {"use_raptor": False}}, 0),
|
"naive",
|
||||||
])
|
{"layout_recognize": "DeepDOC"},
|
||||||
def test_parser_configs(self, get_http_api_auth, name, chunk_method, parser_config, expected_code):
|
0,
|
||||||
|
),
|
||||||
|
("naive_layout_recognize_Naive", "naive", {"layout_recognize": "Naive"}, 0),
|
||||||
|
("naive_html4excel_true", "naive", {"html4excel": True}, 0),
|
||||||
|
("naive_html4excel_false", "naive", {"html4excel": False}, 0),
|
||||||
|
pytest.param(
|
||||||
|
"naive_html4excel_not_bool",
|
||||||
|
"naive",
|
||||||
|
{"html4excel": 1},
|
||||||
|
102,
|
||||||
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
|
),
|
||||||
|
("naive_delimiter_empty", "naive", {"delimiter": ""}, 0),
|
||||||
|
("naive_delimiter_backticks", "naive", {"delimiter": "`##`"}, 0),
|
||||||
|
pytest.param(
|
||||||
|
"naive_delimiterl_not_str",
|
||||||
|
"naive",
|
||||||
|
{"delimiterl": 1},
|
||||||
|
102,
|
||||||
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"naive_task_page_size_negative",
|
||||||
|
"naive",
|
||||||
|
{"task_page_size": -1},
|
||||||
|
102,
|
||||||
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"naive_task_page_size_zero",
|
||||||
|
"naive",
|
||||||
|
{"task_page_size": 0},
|
||||||
|
102,
|
||||||
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"naive_task_page_size_float",
|
||||||
|
"naive",
|
||||||
|
{"task_page_size": 3.14},
|
||||||
|
102,
|
||||||
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"naive_task_page_size_max",
|
||||||
|
"naive",
|
||||||
|
{"task_page_size": 1024 * 1024 * 1024},
|
||||||
|
102,
|
||||||
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"naive_task_page_size_str",
|
||||||
|
"naive",
|
||||||
|
{"task_page_size": "1024"},
|
||||||
|
102,
|
||||||
|
marks=pytest.mark.xfail(reason="issue#5719"),
|
||||||
|
),
|
||||||
|
("naive_raptor_true", "naive", {"raptor": {"use_raptor": True}}, 0),
|
||||||
|
("naive_raptor_false", "naive", {"raptor": {"use_raptor": False}}, 0),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_parser_configs(
|
||||||
|
self, get_http_api_auth, name, chunk_method, parser_config, expected_code
|
||||||
|
):
|
||||||
payload = {
|
payload = {
|
||||||
"name": name,
|
"name": name,
|
||||||
"chunk_method": chunk_method,
|
"chunk_method": chunk_method,
|
||||||
"parser_config": parser_config
|
"parser_config": parser_config,
|
||||||
}
|
}
|
||||||
res = create_dataset(get_http_api_auth, payload)
|
res = create_dataset(get_http_api_auth, payload)
|
||||||
# print(res)
|
# print(res)
|
||||||
@ -245,8 +326,10 @@ class TestAdvancedConfigurations:
|
|||||||
for k, v in parser_config.items():
|
for k, v in parser_config.items():
|
||||||
assert res["data"]["parser_config"][k] == v
|
assert res["data"]["parser_config"][k] == v
|
||||||
if parser_config == {}:
|
if parser_config == {}:
|
||||||
assert res["data"]["parser_config"] == {"chunk_token_num": 128,
|
assert res["data"]["parser_config"] == {
|
||||||
"delimiter": "\\n!?;。;!?",
|
"chunk_token_num": 128,
|
||||||
"html4excel": False,
|
"delimiter": "\\n!?;。;!?",
|
||||||
"layout_recognize": "DeepDOC",
|
"html4excel": False,
|
||||||
"raptor": {"use_raptor": False}}
|
"layout_recognize": "DeepDOC",
|
||||||
|
"raptor": {"use_raptor": False},
|
||||||
|
}
|
||||||
|
@ -0,0 +1,167 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from common import (
|
||||||
|
INVALID_API_TOKEN,
|
||||||
|
create_datasets,
|
||||||
|
delete_dataset,
|
||||||
|
list_dataset,
|
||||||
|
)
|
||||||
|
from libs.auth import RAGFlowHttpApiAuth
|
||||||
|
|
||||||
|
|
||||||
|
class TestAuthorization:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"auth, expected_code, expected_message",
|
||||||
|
[
|
||||||
|
(None, 0, "`Authorization` can't be empty"),
|
||||||
|
(
|
||||||
|
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||||
|
109,
|
||||||
|
"Authentication error: API key is invalid!",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_invalid_auth(
|
||||||
|
self, get_http_api_auth, auth, expected_code, expected_message
|
||||||
|
):
|
||||||
|
ids = create_datasets(get_http_api_auth, 1)
|
||||||
|
res = delete_dataset(auth, {"ids": ids})
|
||||||
|
assert res["code"] == expected_code
|
||||||
|
assert res["message"] == expected_message
|
||||||
|
|
||||||
|
res = list_dataset(get_http_api_auth)
|
||||||
|
assert len(res["data"]) == 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestDatasetDeletion:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"payload, expected_code, expected_message, remaining",
|
||||||
|
[
|
||||||
|
(None, 0, "", 0),
|
||||||
|
({"ids": []}, 0, "", 0),
|
||||||
|
({"ids": ["invalid_id"]}, 102, "You don't own the dataset invalid_id", 3),
|
||||||
|
(
|
||||||
|
{"ids": ["\n!?。;!?\"'"]},
|
||||||
|
102,
|
||||||
|
"You don't own the dataset \n!?。;!?\"'",
|
||||||
|
3,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"not json",
|
||||||
|
100,
|
||||||
|
"AttributeError(\"'str' object has no attribute 'get'\")",
|
||||||
|
3,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_basic_scenarios(
|
||||||
|
self, get_http_api_auth, payload, expected_code, expected_message, remaining
|
||||||
|
):
|
||||||
|
create_datasets(get_http_api_auth, 3)
|
||||||
|
res = delete_dataset(get_http_api_auth, payload)
|
||||||
|
assert res["code"] == expected_code
|
||||||
|
if res["code"] != 0:
|
||||||
|
assert res["message"] == expected_message
|
||||||
|
|
||||||
|
res = list_dataset(get_http_api_auth)
|
||||||
|
assert len(res["data"]) == remaining
|
||||||
|
|
||||||
|
def test_delete_one(self, get_http_api_auth):
|
||||||
|
count = 3
|
||||||
|
ids = create_datasets(get_http_api_auth, count)
|
||||||
|
res = delete_dataset(get_http_api_auth, {"ids": ids[:1]})
|
||||||
|
assert res["code"] == 0
|
||||||
|
|
||||||
|
res = list_dataset(get_http_api_auth)
|
||||||
|
assert len(res["data"]) == count - 1
|
||||||
|
|
||||||
|
def test_delete_multi(self, get_http_api_auth):
|
||||||
|
ids = create_datasets(get_http_api_auth, 3)
|
||||||
|
res = delete_dataset(get_http_api_auth, {"ids": ids})
|
||||||
|
assert res["code"] == 0
|
||||||
|
|
||||||
|
res = list_dataset(get_http_api_auth)
|
||||||
|
assert len(res["data"]) == 0
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="issue#5760")
|
||||||
|
def test_delete_partial_invalid_id_at_beginning(self, get_http_api_auth):
|
||||||
|
count = 3
|
||||||
|
ids = create_datasets(get_http_api_auth, count)
|
||||||
|
res = delete_dataset(get_http_api_auth, {"ids": ["invalid_id"] + ids})
|
||||||
|
assert res["code"] == 102
|
||||||
|
assert res["message"] == "You don't own the dataset invalid_id"
|
||||||
|
|
||||||
|
res = list_dataset(get_http_api_auth)
|
||||||
|
assert len(res["data"]) == 3
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="issue#5760")
|
||||||
|
def test_delete_partial_invalid_id_in_middle(self, get_http_api_auth):
|
||||||
|
count = 3
|
||||||
|
ids = create_datasets(get_http_api_auth, count)
|
||||||
|
res = delete_dataset(
|
||||||
|
get_http_api_auth, {"ids": ids[:1] + ["invalid_id"] + ids[1:3]}
|
||||||
|
)
|
||||||
|
assert res["code"] == 102
|
||||||
|
assert res["message"] == "You don't own the dataset invalid_id"
|
||||||
|
|
||||||
|
res = list_dataset(get_http_api_auth)
|
||||||
|
assert len(res["data"]) == 3
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="issue#5760")
|
||||||
|
def test_delete_partial_invalid_id_at_end(self, get_http_api_auth):
|
||||||
|
count = 3
|
||||||
|
ids = create_datasets(get_http_api_auth, count)
|
||||||
|
res = delete_dataset(get_http_api_auth, {"ids": ids + ["invalid_id"]})
|
||||||
|
assert res["code"] == 102
|
||||||
|
assert res["message"] == "You don't own the dataset invalid_id"
|
||||||
|
|
||||||
|
res = list_dataset(get_http_api_auth)
|
||||||
|
assert len(res["data"]) == 3
|
||||||
|
|
||||||
|
def test_repeated_deletion(self, get_http_api_auth):
|
||||||
|
ids = create_datasets(get_http_api_auth, 1)
|
||||||
|
res = delete_dataset(get_http_api_auth, {"ids": ids})
|
||||||
|
assert res["code"] == 0
|
||||||
|
|
||||||
|
res = delete_dataset(get_http_api_auth, {"ids": ids})
|
||||||
|
assert res["code"] == 102
|
||||||
|
assert res["message"] == f"You don't own the dataset {ids[0]}"
|
||||||
|
|
||||||
|
def test_concurrent_deletion(self, get_http_api_auth):
|
||||||
|
ids = create_datasets(get_http_api_auth, 100)
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||||
|
futures = [
|
||||||
|
executor.submit(
|
||||||
|
delete_dataset, get_http_api_auth, {"ids": ids[i : i + 1]}
|
||||||
|
)
|
||||||
|
for i in range(100)
|
||||||
|
]
|
||||||
|
responses = [f.result() for f in futures]
|
||||||
|
assert all(r["code"] == 0 for r in responses)
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
def test_delete_10k(self, get_http_api_auth):
|
||||||
|
ids = create_datasets(get_http_api_auth, 10_000)
|
||||||
|
res = delete_dataset(get_http_api_auth, {"ids": ids})
|
||||||
|
assert res["code"] == 0
|
||||||
|
|
||||||
|
res = list_dataset(get_http_api_auth)
|
||||||
|
assert len(res["data"]) == 0
|
Loading…
x
Reference in New Issue
Block a user