diff --git a/api/apps/sdk/dataset.py b/api/apps/sdk/dataset.py index 574683aa2..c96a8975d 100644 --- a/api/apps/sdk/dataset.py +++ b/api/apps/sdk/dataset.py @@ -20,7 +20,6 @@ import logging from flask import request from peewee import OperationalError -from api import settings from api.db import FileSource, StatusEnum from api.db.db_models import File from api.db.services.document_service import DocumentService @@ -30,7 +29,6 @@ from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.user_service import TenantService from api.utils import get_uuid from api.utils.api_utils import ( - check_duplicate_ids, deep_merge, get_error_argument_result, get_error_data_result, @@ -39,7 +37,7 @@ from api.utils.api_utils import ( token_required, verify_embedding_availability, ) -from api.utils.validation_utils import CreateDatasetReq, UpdateDatasetReq, validate_and_parse_json_request +from api.utils.validation_utils import CreateDatasetReq, DeleteDatasetReq, UpdateDatasetReq, validate_and_parse_json_request @manager.route("/datasets", methods=["POST"]) # noqa: F821 @@ -190,72 +188,85 @@ def delete(tenant_id): required: true schema: type: object + required: + - ids properties: ids: - type: array + type: array or null items: type: string - description: List of dataset IDs to delete. + description: | + Specifies the datasets to delete: + - If `null`, all datasets will be deleted. + - If an array of IDs, only the specified datasets will be deleted. + - If an empty array, no datasets will be deleted. responses: 200: description: Successful operation. schema: type: object """ + req, err = validate_and_parse_json_request(request, DeleteDatasetReq) + if err is not None: + return get_error_argument_result(err) + + kb_id_instance_pairs = [] + if req["ids"] is None: + try: + kbs = KnowledgebaseService.query(tenant_id=tenant_id) + for kb in kbs: + kb_id_instance_pairs.append((kb.id, kb)) + except OperationalError as e: + logging.exception(e) + return get_error_data_result(message="Database operation failed") + else: + error_kb_ids = [] + for kb_id in req["ids"]: + try: + kb = KnowledgebaseService.get_or_none(id=kb_id, tenant_id=tenant_id) + if kb is None: + error_kb_ids.append(kb_id) + continue + kb_id_instance_pairs.append((kb_id, kb)) + except OperationalError as e: + logging.exception(e) + return get_error_data_result(message="Database operation failed") + if len(error_kb_ids) > 0: + return get_error_data_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""") errors = [] success_count = 0 - req = request.json - if not req: - ids = None - else: - ids = req.get("ids") - if not ids: - id_list = [] - kbs = KnowledgebaseService.query(tenant_id=tenant_id) - for kb in kbs: - id_list.append(kb.id) - else: - id_list = ids - unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "dataset") - id_list = unique_id_list - - for id in id_list: - kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id) - if not kbs: - errors.append(f"You don't own the dataset {id}") - continue - for doc in DocumentService.query(kb_id=id): - if not DocumentService.remove_document(doc, tenant_id): - errors.append(f"Remove document error for dataset {id}") + for kb_id, kb in kb_id_instance_pairs: + try: + for doc in DocumentService.query(kb_id=kb_id): + if not DocumentService.remove_document(doc, tenant_id): + errors.append(f"Remove document '{doc.id}' error for dataset '{kb_id}'") + continue + f2d = File2DocumentService.get_by_document_id(doc.id) + FileService.filter_delete( + [ + File.source_type == FileSource.KNOWLEDGEBASE, + File.id == f2d[0].file_id, + ] + ) + File2DocumentService.delete_by_document_id(doc.id) + FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name]) + if not KnowledgebaseService.delete_by_id(kb_id): + errors.append(f"Delete dataset error for {kb_id}") continue - f2d = File2DocumentService.get_by_document_id(doc.id) - FileService.filter_delete( - [ - File.source_type == FileSource.KNOWLEDGEBASE, - File.id == f2d[0].file_id, - ] - ) - File2DocumentService.delete_by_document_id(doc.id) - FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) - if not KnowledgebaseService.delete_by_id(id): - errors.append(f"Delete dataset error for {id}") - continue - success_count += 1 - if errors: - if success_count > 0: - return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} datasets with {len(errors)} errors") - else: - return get_error_data_result(message="; ".join(errors)) - if duplicate_messages: - if success_count > 0: - return get_result( - message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors", - data={"success_count": success_count, "errors": duplicate_messages}, - ) - else: - return get_error_data_result(message=";".join(duplicate_messages)) - return get_result(code=settings.RetCode.SUCCESS) + success_count += 1 + except OperationalError as e: + logging.exception(e) + return get_error_data_result(message="Database operation failed") + + if not errors: + return get_result() + + error_message = f"Successfully deleted {success_count} datasets, {len(errors)} failed. Details: {'; '.join(errors)[:128]}..." + if success_count == 0: + return get_error_data_result(message=error_message) + + return get_result(data={"success_count": success_count, "errors": errors[:5]}, message=error_message) @manager.route("/datasets/", methods=["PUT"]) # noqa: F821 @@ -373,7 +384,7 @@ def update(tenant_id, dataset_id): logging.exception(e) return get_error_data_result(message="Database operation failed") - return get_result(code=settings.RetCode.SUCCESS) + return get_result() @manager.route("/datasets", methods=["GET"]) # noqa: F821 diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py index 8e6c2e01f..eb5c44b9a 100644 --- a/api/utils/validation_utils.py +++ b/api/utils/validation_utils.py @@ -14,11 +14,13 @@ # limitations under the License. # import uuid +from collections import Counter from enum import auto from typing import Annotated, Any from flask import Request from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator +from pydantic_core import PydanticCustomError from strenum import StrEnum from werkzeug.exceptions import BadRequest, UnsupportedMediaType @@ -238,7 +240,7 @@ class CreateDatasetReq(Base): str: Validated Base64 string Raises: - ValueError: For structural errors in these cases: + PydanticCustomError: For structural errors in these cases: - Missing MIME prefix header - Invalid MIME prefix format - Unsupported image MIME type @@ -259,16 +261,16 @@ class CreateDatasetReq(Base): if "," in v: prefix, _ = v.split(",", 1) if not prefix.startswith("data:"): - raise ValueError("Invalid MIME prefix format. Must start with 'data:'") + raise PydanticCustomError("format_invalid", "Invalid MIME prefix format. Must start with 'data:'") mime_type = prefix[5:].split(";")[0] supported_mime_types = ["image/jpeg", "image/png"] if mime_type not in supported_mime_types: - raise ValueError(f"Unsupported MIME type. Allowed: {supported_mime_types}") + raise PydanticCustomError("format_invalid", "Unsupported MIME type. Allowed: {supported_mime_types}", {"supported_mime_types": supported_mime_types}) return v else: - raise ValueError("Missing MIME prefix. Expected format: data:;base64,") + raise PydanticCustomError("format_invalid", "Missing MIME prefix. Expected format: data:;base64,") @field_validator("embedding_model", mode="after") @classmethod @@ -288,7 +290,7 @@ class CreateDatasetReq(Base): str: Validated @ format Raises: - ValueError: For these violations: + PydanticCustomError: For these violations: - Missing @ separator - Empty model_name/provider - Invalid component structure @@ -300,15 +302,15 @@ class CreateDatasetReq(Base): Invalid: "text-embedding-3-large@" (empty provider) """ if "@" not in v: - raise ValueError("Embedding model identifier must follow @ format") + raise PydanticCustomError("format_invalid", "Embedding model identifier must follow @ format") components = v.split("@", 1) if len(components) != 2 or not all(components): - raise ValueError("Both model_name and provider must be non-empty strings") + raise PydanticCustomError("format_invalid", "Both model_name and provider must be non-empty strings") model_name, provider = components if not model_name.strip() or not provider.strip(): - raise ValueError("Model name and provider cannot be whitespace-only strings") + raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings") return v @field_validator("permission", mode="before") @@ -374,13 +376,13 @@ class CreateDatasetReq(Base): ParserConfig | None: Validated configuration object Raises: - ValueError: When serialized JSON exceeds 65,535 characters + PydanticCustomError: When serialized JSON exceeds 65,535 characters """ if v is None: return None if (json_str := v.model_dump_json()) and len(json_str) > 65535: - raise ValueError(f"Parser config exceeds size limit (max 65,535 characters). Current size: {len(json_str):,}") + raise PydanticCustomError("string_too_long", "Parser config exceeds size limit (max 65,535 characters). Current size: {actual}", {"actual": len(json_str)}) return v @@ -390,4 +392,88 @@ class UpdateDatasetReq(CreateDatasetReq): @field_serializer("dataset_id") def serialize_uuid_to_hex(self, v: uuid.UUID) -> str: + """ + Serializes a UUID version 1 object to its hexadecimal string representation. + + This field serializer specifically handles UUID version 1 objects, converting them + to their canonical 32-character hexadecimal format without hyphens. The conversion + is designed for consistent serialization in API responses and database storage. + + Args: + v (uuid.UUID1): The UUID version 1 object to serialize. Must be a valid + UUID1 instance generated by Python's uuid module. + + Returns: + str: 32-character lowercase hexadecimal string representation + Example: "550e8400e29b41d4a716446655440000" + + Raises: + AttributeError: If input is not a proper UUID object (missing hex attribute) + TypeError: If input is not a UUID1 instance (when type checking is enabled) + + Notes: + - Version 1 UUIDs contain timestamp and MAC address information + - The .hex property automatically converts to lowercase hexadecimal + - For cross-version compatibility, consider typing as uuid.UUID instead + """ return v.hex + + +class DeleteReq(Base): + ids: list[UUID1] | None = Field(...) + + @field_validator("ids", mode="after") + def check_duplicate_ids(cls, v: list[UUID1] | None) -> list[str] | None: + """ + Validates and converts a list of UUID1 objects to hexadecimal strings while checking for duplicates. + + This validator implements a three-stage processing pipeline: + 1. Null Handling - returns None for empty/null input + 2. UUID Conversion - transforms UUID objects to hex strings + 3. Duplicate Validation - ensures all IDs are unique + + Behavior Specifications: + - Input: None → Returns None (indicates no operation) + - Input: [] → Returns [] (empty list for explicit no-op) + - Input: [UUID1,...] → Returns validated hex strings + - Duplicates: Raises formatted PydanticCustomError + + Args: + v (list[UUID1] | None): + - None: Indicates no datasets should be processed + - Empty list: Explicit empty operation + - Populated list: Dataset UUIDs to validate/convert + + Returns: + list[str] | None: + - None when input is None + - List of 32-character hex strings (lowercase, no hyphens) + Example: ["550e8400e29b41d4a716446655440000"] + + Raises: + PydanticCustomError: When duplicates detected, containing: + - Error type: "duplicate_uuids" + - Template message: "Duplicate ids: '{duplicate_ids}'" + - Context: {"duplicate_ids": "id1, id2, ..."} + + Example: + >>> validate([UUID("..."), UUID("...")]) + ["2cdf0456e9a711ee8000000000000000", ...] + + >>> validate([UUID("..."), UUID("...")]) # Duplicates + PydanticCustomError: Duplicate ids: '2cdf0456e9a711ee8000000000000000' + """ + if not v: + return v + + uuid_hex_list = [ids.hex for ids in v] + duplicates = [item for item, count in Counter(uuid_hex_list).items() if count > 1] + + if duplicates: + duplicates_str = ", ".join(duplicates) + raise PydanticCustomError("duplicate_uuids", "Duplicate ids: '{duplicate_ids}'", {"duplicate_ids": duplicates_str}) + + return uuid_hex_list + + +class DeleteDatasetReq(DeleteReq): ... diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 6b1e427ec..00e15eb71 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -507,7 +507,7 @@ Deletes datasets by ID. - `'content-Type: application/json'` - `'Authorization: Bearer '` - Body: - - `"ids"`: `list[string]` + - `"ids"`: `list[string]` or `null` ##### Request example @@ -517,14 +517,17 @@ curl --request DELETE \ --header 'Content-Type: application/json' \ --header 'Authorization: Bearer ' \ --data '{ - "ids": ["test_1", "test_2"] + "ids": ["d94a8dc02c9711f0930f7fbc369eab6d", "e94a8dc02c9711f0930f7fbc369eab6e"] }' ``` ##### Request parameters -- `"ids"`: (*Body parameter*), `list[string]` - The IDs of the datasets to delete. If it is not specified, all datasets will be deleted. +- `"ids"`: (*Body parameter*), `list[string]` or `null`, *Required* + Specifies the datasets to delete: + - If `null`, all datasets will be deleted. + - If an array of IDs, only the specified datasets will be deleted. + - If an empty array, no datasets will be deleted. #### Response diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md index 46631bbb3..20b257a28 100644 --- a/docs/references/python_api_reference.md +++ b/docs/references/python_api_reference.md @@ -200,16 +200,19 @@ dataset = rag_object.create_dataset(name="kb_1") ### Delete datasets ```python -RAGFlow.delete_datasets(ids: list[str] = None) +RAGFlow.delete_datasets(ids: list[str] | None = None) ``` Deletes datasets by ID. #### Parameters -##### ids: `list[str]`, *Required* +##### ids: `list[str]` or `None`, *Required* -The IDs of the datasets to delete. Defaults to `None`. If it is not specified, all datasets will be deleted. +The IDs of the datasets to delete. Defaults to `None`. + - If `None`, all datasets will be deleted. + - If an array of IDs, only the specified datasets will be deleted. + - If an empty array, no datasets will be deleted. #### Returns @@ -219,7 +222,7 @@ The IDs of the datasets to delete. Defaults to `None`. If it is not specified, a #### Examples ```python -rag_object.delete_datasets(ids=["id_1","id_2"]) +rag_object.delete_datasets(ids=["d94a8dc02c9711f0930f7fbc369eab6d","e94a8dc02c9711f0930f7fbc369eab6e"]) ``` --- diff --git a/sdk/python/test/test_http_api/conftest.py b/sdk/python/test/test_http_api/conftest.py index 8dfcfbc79..1fbfe9527 100644 --- a/sdk/python/test/test_http_api/conftest.py +++ b/sdk/python/test/test_http_api/conftest.py @@ -76,7 +76,7 @@ def condition(_auth, _dataset_id): @pytest.fixture(scope="function") def clear_datasets(request, get_http_api_auth): def cleanup(): - delete_datasets(get_http_api_auth) + delete_datasets(get_http_api_auth, {"ids": None}) request.addfinalizer(cleanup) @@ -132,7 +132,7 @@ def ragflow_tmp_dir(request, tmp_path_factory): @pytest.fixture(scope="class") def add_dataset(request, get_http_api_auth): def cleanup(): - delete_datasets(get_http_api_auth) + delete_datasets(get_http_api_auth, {"ids": None}) request.addfinalizer(cleanup) @@ -143,12 +143,11 @@ def add_dataset(request, get_http_api_auth): @pytest.fixture(scope="function") def add_dataset_func(request, get_http_api_auth): def cleanup(): - delete_datasets(get_http_api_auth) + delete_datasets(get_http_api_auth, {"ids": None}) request.addfinalizer(cleanup) - dataset_ids = batch_create_datasets(get_http_api_auth, 1) - return dataset_ids[0] + return batch_create_datasets(get_http_api_auth, 1)[0] @pytest.fixture(scope="class") diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py b/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py index 8694ccead..a6490df67 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/conftest.py @@ -22,7 +22,7 @@ from common import batch_create_datasets, delete_datasets @pytest.fixture(scope="class") def add_datasets(get_http_api_auth, request): def cleanup(): - delete_datasets(get_http_api_auth) + delete_datasets(get_http_api_auth, {"ids": None}) request.addfinalizer(cleanup) @@ -32,18 +32,8 @@ def add_datasets(get_http_api_auth, request): @pytest.fixture(scope="function") def add_datasets_func(get_http_api_auth, request): def cleanup(): - delete_datasets(get_http_api_auth) + delete_datasets(get_http_api_auth, {"ids": None}) request.addfinalizer(cleanup) return batch_create_datasets(get_http_api_auth, 3) - - -@pytest.fixture(scope="function") -def add_dataset_func(get_http_api_auth, request): - def cleanup(): - delete_datasets(get_http_api_auth) - - request.addfinalizer(cleanup) - - return batch_create_datasets(get_http_api_auth, 1)[0] diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_datasets.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_datasets.py index 3c1ae7b54..78b4efb58 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_datasets.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_delete_datasets.py @@ -25,8 +25,8 @@ from common import ( from libs.auth import RAGFlowHttpApiAuth -@pytest.mark.p1 class TestAuthorization: + @pytest.mark.p1 @pytest.mark.parametrize( "auth, expected_code, expected_message", [ @@ -38,104 +38,173 @@ class TestAuthorization: ), ], ) - def test_invalid_auth(self, auth, expected_code, expected_message): + def test_auth_invalid(self, auth, expected_code, expected_message): res = delete_datasets(auth) assert res["code"] == expected_code assert res["message"] == expected_message -class TestDatasetsDeletion: - @pytest.mark.p1 +class TestRquest: + @pytest.mark.p3 + def test_content_type_bad(self, get_http_api_auth): + BAD_CONTENT_TYPE = "text/xml" + res = delete_datasets(get_http_api_auth, headers={"Content-Type": BAD_CONTENT_TYPE}) + assert res["code"] == 101, res + assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res + + @pytest.mark.p3 @pytest.mark.parametrize( - "payload, expected_code, expected_message, remaining", + "payload, expected_message", [ - (None, 0, "", 0), - ({"ids": []}, 0, "", 0), - ({"ids": ["invalid_id"]}, 102, "You don't own the dataset invalid_id", 3), - ( - {"ids": ["\n!?。;!?\"'"]}, - 102, - "You don't own the dataset \n!?。;!?\"'", - 3, - ), - ( - "not json", - 100, - "AttributeError(\"'str' object has no attribute 'get'\")", - 3, - ), - (lambda r: {"ids": r[:1]}, 0, "", 2), - (lambda r: {"ids": r}, 0, "", 0), + ("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"), + ('"a"', "Invalid request payload: expected object, got str"), ], + ids=["malformed_json_syntax", "invalid_request_payload_type"], ) - def test_basic_scenarios(self, get_http_api_auth, add_datasets_func, payload, expected_code, expected_message, remaining): - dataset_ids = add_datasets_func - if callable(payload): - payload = payload(dataset_ids) - res = delete_datasets(get_http_api_auth, payload) - assert res["code"] == expected_code - if res["code"] != 0: - assert res["message"] == expected_message + def test_payload_bad(self, get_http_api_auth, payload, expected_message): + res = delete_datasets(get_http_api_auth, data=payload) + assert res["code"] == 101, res + assert res["message"] == expected_message, res + + @pytest.mark.p3 + def test_payload_unset(self, get_http_api_auth): + res = delete_datasets(get_http_api_auth, None) + assert res["code"] == 101, res + assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res + + +class TestCapability: + @pytest.mark.p3 + def test_delete_dataset_1k(self, get_http_api_auth): + ids = batch_create_datasets(get_http_api_auth, 1_000) + res = delete_datasets(get_http_api_auth, {"ids": ids}) + assert res["code"] == 0, res res = list_datasets(get_http_api_auth) - assert len(res["data"]) == remaining - - @pytest.mark.p2 - @pytest.mark.parametrize( - "payload", - [ - lambda r: {"ids": ["invalid_id"] + r}, - lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]}, - lambda r: {"ids": r + ["invalid_id"]}, - ], - ) - def test_delete_partial_invalid_id(self, get_http_api_auth, add_datasets_func, payload): - dataset_ids = add_datasets_func - if callable(payload): - payload = payload(dataset_ids) - res = delete_datasets(get_http_api_auth, payload) - assert res["code"] == 0 - assert res["data"]["errors"][0] == "You don't own the dataset invalid_id" - assert res["data"]["success_count"] == 3 - - res = list_datasets(get_http_api_auth) - assert len(res["data"]) == 0 - - @pytest.mark.p2 - def test_repeated_deletion(self, get_http_api_auth, add_datasets_func): - dataset_ids = add_datasets_func - res = delete_datasets(get_http_api_auth, {"ids": dataset_ids}) - assert res["code"] == 0 - - res = delete_datasets(get_http_api_auth, {"ids": dataset_ids}) - assert res["code"] == 102 - assert "You don't own the dataset" in res["message"] - - @pytest.mark.p2 - def test_duplicate_deletion(self, get_http_api_auth, add_datasets_func): - dataset_ids = add_datasets_func - res = delete_datasets(get_http_api_auth, {"ids": dataset_ids + dataset_ids}) - assert res["code"] == 0 - assert "Duplicate dataset ids" in res["data"]["errors"][0] - assert res["data"]["success_count"] == 3 - - res = list_datasets(get_http_api_auth) - assert len(res["data"]) == 0 + assert len(res["data"]) == 0, res @pytest.mark.p3 def test_concurrent_deletion(self, get_http_api_auth): - ids = batch_create_datasets(get_http_api_auth, 100) + dataset_num = 1_000 + ids = batch_create_datasets(get_http_api_auth, dataset_num) with ThreadPoolExecutor(max_workers=5) as executor: - futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)] + futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(dataset_num)] responses = [f.result() for f in futures] - assert all(r["code"] == 0 for r in responses) + assert all(r["code"] == 0 for r in responses), responses - @pytest.mark.p3 - def test_delete_10k(self, get_http_api_auth): - ids = batch_create_datasets(get_http_api_auth, 10_000) - res = delete_datasets(get_http_api_auth, {"ids": ids}) - assert res["code"] == 0 + +class TestDatasetsDelete: + @pytest.mark.p1 + @pytest.mark.parametrize( + "func, expected_code, expected_message, remaining", + [ + (lambda r: {"ids": r[:1]}, 0, "", 2), + (lambda r: {"ids": r}, 0, "", 0), + ], + ids=["single_dataset", "multiple_datasets"], + ) + def test_ids(self, get_http_api_auth, add_datasets_func, func, expected_code, expected_message, remaining): + dataset_ids = add_datasets_func + if callable(func): + payload = func(dataset_ids) + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == expected_code, res res = list_datasets(get_http_api_auth) - assert len(res["data"]) == 0 + assert len(res["data"]) == remaining, res + + @pytest.mark.p1 + @pytest.mark.usefixtures("add_dataset_func") + def test_ids_empty(self, get_http_api_auth): + payload = {"ids": []} + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 0, res + + res = list_datasets(get_http_api_auth) + assert len(res["data"]) == 1, res + + @pytest.mark.p1 + @pytest.mark.usefixtures("add_datasets_func") + def test_ids_none(self, get_http_api_auth): + payload = {"ids": None} + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 0, res + + res = list_datasets(get_http_api_auth) + assert len(res["data"]) == 0, res + + @pytest.mark.p2 + @pytest.mark.usefixtures("add_dataset_func") + def test_id_not_uuid(self, get_http_api_auth): + payload = {"ids": ["not_uuid"]} + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 101, res + assert "Input should be a valid UUID" in res["message"], res + + res = list_datasets(get_http_api_auth) + assert len(res["data"]) == 1, res + + @pytest.mark.p2 + @pytest.mark.usefixtures("add_dataset_func") + def test_id_wrong_uuid(self, get_http_api_auth): + payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]} + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 102, res + assert "lacks permission for dataset" in res["message"], res + + res = list_datasets(get_http_api_auth) + assert len(res["data"]) == 1, res + + @pytest.mark.p2 + @pytest.mark.parametrize( + "func", + [ + lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r}, + lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]}, + lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]}, + ], + ) + def test_ids_partial_invalid(self, get_http_api_auth, add_datasets_func, func): + dataset_ids = add_datasets_func + if callable(func): + payload = func(dataset_ids) + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 102, res + assert "lacks permission for dataset" in res["message"], res + + res = list_datasets(get_http_api_auth) + assert len(res["data"]) == 3, res + + @pytest.mark.p2 + def test_ids_duplicate(self, get_http_api_auth, add_datasets_func): + dataset_ids = add_datasets_func + payload = {"ids": dataset_ids + dataset_ids} + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 101, res + assert "Duplicate ids:" in res["message"], res + + res = list_datasets(get_http_api_auth) + assert len(res["data"]) == 3, res + + @pytest.mark.p2 + def test_repeated_delete(self, get_http_api_auth, add_datasets_func): + dataset_ids = add_datasets_func + payload = {"ids": dataset_ids} + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 0, res + + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 102, res + assert "lacks permission for dataset" in res["message"], res + + @pytest.mark.p2 + @pytest.mark.usefixtures("add_dataset_func") + def test_field_unsupported(self, get_http_api_auth): + payload = {"unknown_field": "unknown_field"} + res = delete_datasets(get_http_api_auth, payload) + assert res["code"] == 101, res + assert "Extra inputs are not permitted" in res["message"], res + + res = list_datasets(get_http_api_auth) + assert len(res["data"]) == 1, res diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py index 1ff6a7595..40d2dc01a 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py @@ -77,6 +77,13 @@ class TestRquest: assert res["code"] == 101, res assert res["message"] == "No properties were modified", res + @pytest.mark.p3 + def test_payload_unset(self, get_http_api_auth, add_dataset_func): + dataset_id = add_dataset_func + res = update_dataset(get_http_api_auth, dataset_id, None) + assert res["code"] == 101, res + assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res + class TestCapability: @pytest.mark.p3