Refa: HTTP API delete dataset / test cases / docs (#7657)

### What problem does this PR solve?

This PR introduces Pydantic-based validation for the delete dataset HTTP
API, improving code clarity and robustness. Key changes include:

1. Pydantic Validation
2. Error Handling
3. Test Updates
4. Documentation Updates

### Type of change

- [x] Documentation Update
- [x] Refactoring
This commit is contained in:
liu an 2025-05-16 10:16:43 +08:00 committed by GitHub
parent 0e9ff8c1f7
commit ae8b628f0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 341 additions and 173 deletions

View File

@ -20,7 +20,6 @@ import logging
from flask import request from flask import request
from peewee import OperationalError from peewee import OperationalError
from api import settings
from api.db import FileSource, StatusEnum from api.db import FileSource, StatusEnum
from api.db.db_models import File from api.db.db_models import File
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
@ -30,7 +29,6 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from api.utils import get_uuid from api.utils import get_uuid
from api.utils.api_utils import ( from api.utils.api_utils import (
check_duplicate_ids,
deep_merge, deep_merge,
get_error_argument_result, get_error_argument_result,
get_error_data_result, get_error_data_result,
@ -39,7 +37,7 @@ from api.utils.api_utils import (
token_required, token_required,
verify_embedding_availability, verify_embedding_availability,
) )
from api.utils.validation_utils import CreateDatasetReq, UpdateDatasetReq, validate_and_parse_json_request from api.utils.validation_utils import CreateDatasetReq, DeleteDatasetReq, UpdateDatasetReq, validate_and_parse_json_request
@manager.route("/datasets", methods=["POST"]) # noqa: F821 @manager.route("/datasets", methods=["POST"]) # noqa: F821
@ -190,72 +188,85 @@ def delete(tenant_id):
required: true required: true
schema: schema:
type: object type: object
required:
- ids
properties: properties:
ids: ids:
type: array type: array or null
items: items:
type: string type: string
description: List of dataset IDs to delete. description: |
Specifies the datasets to delete:
- If `null`, all datasets will be deleted.
- If an array of IDs, only the specified datasets will be deleted.
- If an empty array, no datasets will be deleted.
responses: responses:
200: 200:
description: Successful operation. description: Successful operation.
schema: schema:
type: object type: object
""" """
req, err = validate_and_parse_json_request(request, DeleteDatasetReq)
if err is not None:
return get_error_argument_result(err)
kb_id_instance_pairs = []
if req["ids"] is None:
try:
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
for kb in kbs:
kb_id_instance_pairs.append((kb.id, kb))
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
else:
error_kb_ids = []
for kb_id in req["ids"]:
try:
kb = KnowledgebaseService.get_or_none(id=kb_id, tenant_id=tenant_id)
if kb is None:
error_kb_ids.append(kb_id)
continue
kb_id_instance_pairs.append((kb_id, kb))
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
if len(error_kb_ids) > 0:
return get_error_data_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
errors = [] errors = []
success_count = 0 success_count = 0
req = request.json for kb_id, kb in kb_id_instance_pairs:
if not req: try:
ids = None for doc in DocumentService.query(kb_id=kb_id):
else: if not DocumentService.remove_document(doc, tenant_id):
ids = req.get("ids") errors.append(f"Remove document '{doc.id}' error for dataset '{kb_id}'")
if not ids: continue
id_list = [] f2d = File2DocumentService.get_by_document_id(doc.id)
kbs = KnowledgebaseService.query(tenant_id=tenant_id) FileService.filter_delete(
for kb in kbs: [
id_list.append(kb.id) File.source_type == FileSource.KNOWLEDGEBASE,
else: File.id == f2d[0].file_id,
id_list = ids ]
unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "dataset") )
id_list = unique_id_list File2DocumentService.delete_by_document_id(doc.id)
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
for id in id_list: if not KnowledgebaseService.delete_by_id(kb_id):
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id) errors.append(f"Delete dataset error for {kb_id}")
if not kbs:
errors.append(f"You don't own the dataset {id}")
continue
for doc in DocumentService.query(kb_id=id):
if not DocumentService.remove_document(doc, tenant_id):
errors.append(f"Remove document error for dataset {id}")
continue continue
f2d = File2DocumentService.get_by_document_id(doc.id) success_count += 1
FileService.filter_delete( except OperationalError as e:
[ logging.exception(e)
File.source_type == FileSource.KNOWLEDGEBASE, return get_error_data_result(message="Database operation failed")
File.id == f2d[0].file_id,
] if not errors:
) return get_result()
File2DocumentService.delete_by_document_id(doc.id)
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) error_message = f"Successfully deleted {success_count} datasets, {len(errors)} failed. Details: {'; '.join(errors)[:128]}..."
if not KnowledgebaseService.delete_by_id(id): if success_count == 0:
errors.append(f"Delete dataset error for {id}") return get_error_data_result(message=error_message)
continue
success_count += 1 return get_result(data={"success_count": success_count, "errors": errors[:5]}, message=error_message)
if errors:
if success_count > 0:
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} datasets with {len(errors)} errors")
else:
return get_error_data_result(message="; ".join(errors))
if duplicate_messages:
if success_count > 0:
return get_result(
message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors",
data={"success_count": success_count, "errors": duplicate_messages},
)
else:
return get_error_data_result(message=";".join(duplicate_messages))
return get_result(code=settings.RetCode.SUCCESS)
@manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821 @manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821
@ -373,7 +384,7 @@ def update(tenant_id, dataset_id):
logging.exception(e) logging.exception(e)
return get_error_data_result(message="Database operation failed") return get_error_data_result(message="Database operation failed")
return get_result(code=settings.RetCode.SUCCESS) return get_result()
@manager.route("/datasets", methods=["GET"]) # noqa: F821 @manager.route("/datasets", methods=["GET"]) # noqa: F821

View File

@ -14,11 +14,13 @@
# limitations under the License. # limitations under the License.
# #
import uuid import uuid
from collections import Counter
from enum import auto from enum import auto
from typing import Annotated, Any from typing import Annotated, Any
from flask import Request from flask import Request
from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator
from pydantic_core import PydanticCustomError
from strenum import StrEnum from strenum import StrEnum
from werkzeug.exceptions import BadRequest, UnsupportedMediaType from werkzeug.exceptions import BadRequest, UnsupportedMediaType
@ -238,7 +240,7 @@ class CreateDatasetReq(Base):
str: Validated Base64 string str: Validated Base64 string
Raises: Raises:
ValueError: For structural errors in these cases: PydanticCustomError: For structural errors in these cases:
- Missing MIME prefix header - Missing MIME prefix header
- Invalid MIME prefix format - Invalid MIME prefix format
- Unsupported image MIME type - Unsupported image MIME type
@ -259,16 +261,16 @@ class CreateDatasetReq(Base):
if "," in v: if "," in v:
prefix, _ = v.split(",", 1) prefix, _ = v.split(",", 1)
if not prefix.startswith("data:"): if not prefix.startswith("data:"):
raise ValueError("Invalid MIME prefix format. Must start with 'data:'") raise PydanticCustomError("format_invalid", "Invalid MIME prefix format. Must start with 'data:'")
mime_type = prefix[5:].split(";")[0] mime_type = prefix[5:].split(";")[0]
supported_mime_types = ["image/jpeg", "image/png"] supported_mime_types = ["image/jpeg", "image/png"]
if mime_type not in supported_mime_types: if mime_type not in supported_mime_types:
raise ValueError(f"Unsupported MIME type. Allowed: {supported_mime_types}") raise PydanticCustomError("format_invalid", "Unsupported MIME type. Allowed: {supported_mime_types}", {"supported_mime_types": supported_mime_types})
return v return v
else: else:
raise ValueError("Missing MIME prefix. Expected format: data:<mime>;base64,<data>") raise PydanticCustomError("format_invalid", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>")
@field_validator("embedding_model", mode="after") @field_validator("embedding_model", mode="after")
@classmethod @classmethod
@ -288,7 +290,7 @@ class CreateDatasetReq(Base):
str: Validated <model_name>@<provider> format str: Validated <model_name>@<provider> format
Raises: Raises:
ValueError: For these violations: PydanticCustomError: For these violations:
- Missing @ separator - Missing @ separator
- Empty model_name/provider - Empty model_name/provider
- Invalid component structure - Invalid component structure
@ -300,15 +302,15 @@ class CreateDatasetReq(Base):
Invalid: "text-embedding-3-large@" (empty provider) Invalid: "text-embedding-3-large@" (empty provider)
""" """
if "@" not in v: if "@" not in v:
raise ValueError("Embedding model identifier must follow <model_name>@<provider> format") raise PydanticCustomError("format_invalid", "Embedding model identifier must follow <model_name>@<provider> format")
components = v.split("@", 1) components = v.split("@", 1)
if len(components) != 2 or not all(components): if len(components) != 2 or not all(components):
raise ValueError("Both model_name and provider must be non-empty strings") raise PydanticCustomError("format_invalid", "Both model_name and provider must be non-empty strings")
model_name, provider = components model_name, provider = components
if not model_name.strip() or not provider.strip(): if not model_name.strip() or not provider.strip():
raise ValueError("Model name and provider cannot be whitespace-only strings") raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings")
return v return v
@field_validator("permission", mode="before") @field_validator("permission", mode="before")
@ -374,13 +376,13 @@ class CreateDatasetReq(Base):
ParserConfig | None: Validated configuration object ParserConfig | None: Validated configuration object
Raises: Raises:
ValueError: When serialized JSON exceeds 65,535 characters PydanticCustomError: When serialized JSON exceeds 65,535 characters
""" """
if v is None: if v is None:
return None return None
if (json_str := v.model_dump_json()) and len(json_str) > 65535: if (json_str := v.model_dump_json()) and len(json_str) > 65535:
raise ValueError(f"Parser config exceeds size limit (max 65,535 characters). Current size: {len(json_str):,}") raise PydanticCustomError("string_too_long", "Parser config exceeds size limit (max 65,535 characters). Current size: {actual}", {"actual": len(json_str)})
return v return v
@ -390,4 +392,88 @@ class UpdateDatasetReq(CreateDatasetReq):
@field_serializer("dataset_id") @field_serializer("dataset_id")
def serialize_uuid_to_hex(self, v: uuid.UUID) -> str: def serialize_uuid_to_hex(self, v: uuid.UUID) -> str:
"""
Serializes a UUID version 1 object to its hexadecimal string representation.
This field serializer specifically handles UUID version 1 objects, converting them
to their canonical 32-character hexadecimal format without hyphens. The conversion
is designed for consistent serialization in API responses and database storage.
Args:
v (uuid.UUID1): The UUID version 1 object to serialize. Must be a valid
UUID1 instance generated by Python's uuid module.
Returns:
str: 32-character lowercase hexadecimal string representation
Example: "550e8400e29b41d4a716446655440000"
Raises:
AttributeError: If input is not a proper UUID object (missing hex attribute)
TypeError: If input is not a UUID1 instance (when type checking is enabled)
Notes:
- Version 1 UUIDs contain timestamp and MAC address information
- The .hex property automatically converts to lowercase hexadecimal
- For cross-version compatibility, consider typing as uuid.UUID instead
"""
return v.hex return v.hex
class DeleteReq(Base):
ids: list[UUID1] | None = Field(...)
@field_validator("ids", mode="after")
def check_duplicate_ids(cls, v: list[UUID1] | None) -> list[str] | None:
"""
Validates and converts a list of UUID1 objects to hexadecimal strings while checking for duplicates.
This validator implements a three-stage processing pipeline:
1. Null Handling - returns None for empty/null input
2. UUID Conversion - transforms UUID objects to hex strings
3. Duplicate Validation - ensures all IDs are unique
Behavior Specifications:
- Input: None Returns None (indicates no operation)
- Input: [] Returns [] (empty list for explicit no-op)
- Input: [UUID1,...] Returns validated hex strings
- Duplicates: Raises formatted PydanticCustomError
Args:
v (list[UUID1] | None):
- None: Indicates no datasets should be processed
- Empty list: Explicit empty operation
- Populated list: Dataset UUIDs to validate/convert
Returns:
list[str] | None:
- None when input is None
- List of 32-character hex strings (lowercase, no hyphens)
Example: ["550e8400e29b41d4a716446655440000"]
Raises:
PydanticCustomError: When duplicates detected, containing:
- Error type: "duplicate_uuids"
- Template message: "Duplicate ids: '{duplicate_ids}'"
- Context: {"duplicate_ids": "id1, id2, ..."}
Example:
>>> validate([UUID("..."), UUID("...")])
["2cdf0456e9a711ee8000000000000000", ...]
>>> validate([UUID("..."), UUID("...")]) # Duplicates
PydanticCustomError: Duplicate ids: '2cdf0456e9a711ee8000000000000000'
"""
if not v:
return v
uuid_hex_list = [ids.hex for ids in v]
duplicates = [item for item, count in Counter(uuid_hex_list).items() if count > 1]
if duplicates:
duplicates_str = ", ".join(duplicates)
raise PydanticCustomError("duplicate_uuids", "Duplicate ids: '{duplicate_ids}'", {"duplicate_ids": duplicates_str})
return uuid_hex_list
class DeleteDatasetReq(DeleteReq): ...

View File

@ -507,7 +507,7 @@ Deletes datasets by ID.
- `'content-Type: application/json'` - `'content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'` - `'Authorization: Bearer <YOUR_API_KEY>'`
- Body: - Body:
- `"ids"`: `list[string]` - `"ids"`: `list[string]` or `null`
##### Request example ##### Request example
@ -517,14 +517,17 @@ curl --request DELETE \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \ --header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{ --data '{
"ids": ["test_1", "test_2"] "ids": ["d94a8dc02c9711f0930f7fbc369eab6d", "e94a8dc02c9711f0930f7fbc369eab6e"]
}' }'
``` ```
##### Request parameters ##### Request parameters
- `"ids"`: (*Body parameter*), `list[string]` - `"ids"`: (*Body parameter*), `list[string]` or `null`, *Required*
The IDs of the datasets to delete. If it is not specified, all datasets will be deleted. Specifies the datasets to delete:
- If `null`, all datasets will be deleted.
- If an array of IDs, only the specified datasets will be deleted.
- If an empty array, no datasets will be deleted.
#### Response #### Response

View File

@ -200,16 +200,19 @@ dataset = rag_object.create_dataset(name="kb_1")
### Delete datasets ### Delete datasets
```python ```python
RAGFlow.delete_datasets(ids: list[str] = None) RAGFlow.delete_datasets(ids: list[str] | None = None)
``` ```
Deletes datasets by ID. Deletes datasets by ID.
#### Parameters #### Parameters
##### ids: `list[str]`, *Required* ##### ids: `list[str]` or `None`, *Required*
The IDs of the datasets to delete. Defaults to `None`. If it is not specified, all datasets will be deleted. The IDs of the datasets to delete. Defaults to `None`.
- If `None`, all datasets will be deleted.
- If an array of IDs, only the specified datasets will be deleted.
- If an empty array, no datasets will be deleted.
#### Returns #### Returns
@ -219,7 +222,7 @@ The IDs of the datasets to delete. Defaults to `None`. If it is not specified, a
#### Examples #### Examples
```python ```python
rag_object.delete_datasets(ids=["id_1","id_2"]) rag_object.delete_datasets(ids=["d94a8dc02c9711f0930f7fbc369eab6d","e94a8dc02c9711f0930f7fbc369eab6e"])
``` ```
--- ---

View File

@ -76,7 +76,7 @@ def condition(_auth, _dataset_id):
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def clear_datasets(request, get_http_api_auth): def clear_datasets(request, get_http_api_auth):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth) delete_datasets(get_http_api_auth, {"ids": None})
request.addfinalizer(cleanup) request.addfinalizer(cleanup)
@ -132,7 +132,7 @@ def ragflow_tmp_dir(request, tmp_path_factory):
@pytest.fixture(scope="class") @pytest.fixture(scope="class")
def add_dataset(request, get_http_api_auth): def add_dataset(request, get_http_api_auth):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth) delete_datasets(get_http_api_auth, {"ids": None})
request.addfinalizer(cleanup) request.addfinalizer(cleanup)
@ -143,12 +143,11 @@ def add_dataset(request, get_http_api_auth):
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def add_dataset_func(request, get_http_api_auth): def add_dataset_func(request, get_http_api_auth):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth) delete_datasets(get_http_api_auth, {"ids": None})
request.addfinalizer(cleanup) request.addfinalizer(cleanup)
dataset_ids = batch_create_datasets(get_http_api_auth, 1) return batch_create_datasets(get_http_api_auth, 1)[0]
return dataset_ids[0]
@pytest.fixture(scope="class") @pytest.fixture(scope="class")

View File

@ -22,7 +22,7 @@ from common import batch_create_datasets, delete_datasets
@pytest.fixture(scope="class") @pytest.fixture(scope="class")
def add_datasets(get_http_api_auth, request): def add_datasets(get_http_api_auth, request):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth) delete_datasets(get_http_api_auth, {"ids": None})
request.addfinalizer(cleanup) request.addfinalizer(cleanup)
@ -32,18 +32,8 @@ def add_datasets(get_http_api_auth, request):
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def add_datasets_func(get_http_api_auth, request): def add_datasets_func(get_http_api_auth, request):
def cleanup(): def cleanup():
delete_datasets(get_http_api_auth) delete_datasets(get_http_api_auth, {"ids": None})
request.addfinalizer(cleanup) request.addfinalizer(cleanup)
return batch_create_datasets(get_http_api_auth, 3) return batch_create_datasets(get_http_api_auth, 3)
@pytest.fixture(scope="function")
def add_dataset_func(get_http_api_auth, request):
def cleanup():
delete_datasets(get_http_api_auth)
request.addfinalizer(cleanup)
return batch_create_datasets(get_http_api_auth, 1)[0]

View File

@ -25,8 +25,8 @@ from common import (
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth
@pytest.mark.p1
class TestAuthorization: class TestAuthorization:
@pytest.mark.p1
@pytest.mark.parametrize( @pytest.mark.parametrize(
"auth, expected_code, expected_message", "auth, expected_code, expected_message",
[ [
@ -38,104 +38,173 @@ class TestAuthorization:
), ),
], ],
) )
def test_invalid_auth(self, auth, expected_code, expected_message): def test_auth_invalid(self, auth, expected_code, expected_message):
res = delete_datasets(auth) res = delete_datasets(auth)
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message
class TestDatasetsDeletion: class TestRquest:
@pytest.mark.p1 @pytest.mark.p3
def test_content_type_bad(self, get_http_api_auth):
BAD_CONTENT_TYPE = "text/xml"
res = delete_datasets(get_http_api_auth, headers={"Content-Type": BAD_CONTENT_TYPE})
assert res["code"] == 101, res
assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
@pytest.mark.p3
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload, expected_code, expected_message, remaining", "payload, expected_message",
[ [
(None, 0, "", 0), ("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
({"ids": []}, 0, "", 0), ('"a"', "Invalid request payload: expected object, got str"),
({"ids": ["invalid_id"]}, 102, "You don't own the dataset invalid_id", 3),
(
{"ids": ["\n!?。;!?\"'"]},
102,
"You don't own the dataset \n!?。;!?\"'",
3,
),
(
"not json",
100,
"AttributeError(\"'str' object has no attribute 'get'\")",
3,
),
(lambda r: {"ids": r[:1]}, 0, "", 2),
(lambda r: {"ids": r}, 0, "", 0),
], ],
ids=["malformed_json_syntax", "invalid_request_payload_type"],
) )
def test_basic_scenarios(self, get_http_api_auth, add_datasets_func, payload, expected_code, expected_message, remaining): def test_payload_bad(self, get_http_api_auth, payload, expected_message):
dataset_ids = add_datasets_func res = delete_datasets(get_http_api_auth, data=payload)
if callable(payload): assert res["code"] == 101, res
payload = payload(dataset_ids) assert res["message"] == expected_message, res
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == expected_code @pytest.mark.p3
if res["code"] != 0: def test_payload_unset(self, get_http_api_auth):
assert res["message"] == expected_message res = delete_datasets(get_http_api_auth, None)
assert res["code"] == 101, res
assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
class TestCapability:
@pytest.mark.p3
def test_delete_dataset_1k(self, get_http_api_auth):
ids = batch_create_datasets(get_http_api_auth, 1_000)
res = delete_datasets(get_http_api_auth, {"ids": ids})
assert res["code"] == 0, res
res = list_datasets(get_http_api_auth) res = list_datasets(get_http_api_auth)
assert len(res["data"]) == remaining assert len(res["data"]) == 0, res
@pytest.mark.p2
@pytest.mark.parametrize(
"payload",
[
lambda r: {"ids": ["invalid_id"] + r},
lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]},
lambda r: {"ids": r + ["invalid_id"]},
],
)
def test_delete_partial_invalid_id(self, get_http_api_auth, add_datasets_func, payload):
dataset_ids = add_datasets_func
if callable(payload):
payload = payload(dataset_ids)
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 0
assert res["data"]["errors"][0] == "You don't own the dataset invalid_id"
assert res["data"]["success_count"] == 3
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0
@pytest.mark.p2
def test_repeated_deletion(self, get_http_api_auth, add_datasets_func):
dataset_ids = add_datasets_func
res = delete_datasets(get_http_api_auth, {"ids": dataset_ids})
assert res["code"] == 0
res = delete_datasets(get_http_api_auth, {"ids": dataset_ids})
assert res["code"] == 102
assert "You don't own the dataset" in res["message"]
@pytest.mark.p2
def test_duplicate_deletion(self, get_http_api_auth, add_datasets_func):
dataset_ids = add_datasets_func
res = delete_datasets(get_http_api_auth, {"ids": dataset_ids + dataset_ids})
assert res["code"] == 0
assert "Duplicate dataset ids" in res["data"]["errors"][0]
assert res["data"]["success_count"] == 3
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0
@pytest.mark.p3 @pytest.mark.p3
def test_concurrent_deletion(self, get_http_api_auth): def test_concurrent_deletion(self, get_http_api_auth):
ids = batch_create_datasets(get_http_api_auth, 100) dataset_num = 1_000
ids = batch_create_datasets(get_http_api_auth, dataset_num)
with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)] futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(dataset_num)]
responses = [f.result() for f in futures] responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses) assert all(r["code"] == 0 for r in responses), responses
@pytest.mark.p3
def test_delete_10k(self, get_http_api_auth): class TestDatasetsDelete:
ids = batch_create_datasets(get_http_api_auth, 10_000) @pytest.mark.p1
res = delete_datasets(get_http_api_auth, {"ids": ids}) @pytest.mark.parametrize(
assert res["code"] == 0 "func, expected_code, expected_message, remaining",
[
(lambda r: {"ids": r[:1]}, 0, "", 2),
(lambda r: {"ids": r}, 0, "", 0),
],
ids=["single_dataset", "multiple_datasets"],
)
def test_ids(self, get_http_api_auth, add_datasets_func, func, expected_code, expected_message, remaining):
dataset_ids = add_datasets_func
if callable(func):
payload = func(dataset_ids)
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == expected_code, res
res = list_datasets(get_http_api_auth) res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0 assert len(res["data"]) == remaining, res
@pytest.mark.p1
@pytest.mark.usefixtures("add_dataset_func")
def test_ids_empty(self, get_http_api_auth):
payload = {"ids": []}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 0, res
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 1, res
@pytest.mark.p1
@pytest.mark.usefixtures("add_datasets_func")
def test_ids_none(self, get_http_api_auth):
payload = {"ids": None}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 0, res
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 0, res
@pytest.mark.p2
@pytest.mark.usefixtures("add_dataset_func")
def test_id_not_uuid(self, get_http_api_auth):
payload = {"ids": ["not_uuid"]}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 101, res
assert "Input should be a valid UUID" in res["message"], res
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 1, res
@pytest.mark.p2
@pytest.mark.usefixtures("add_dataset_func")
def test_id_wrong_uuid(self, get_http_api_auth):
payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 102, res
assert "lacks permission for dataset" in res["message"], res
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 1, res
@pytest.mark.p2
@pytest.mark.parametrize(
"func",
[
lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r},
lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]},
lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]},
],
)
def test_ids_partial_invalid(self, get_http_api_auth, add_datasets_func, func):
dataset_ids = add_datasets_func
if callable(func):
payload = func(dataset_ids)
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 102, res
assert "lacks permission for dataset" in res["message"], res
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 3, res
@pytest.mark.p2
def test_ids_duplicate(self, get_http_api_auth, add_datasets_func):
dataset_ids = add_datasets_func
payload = {"ids": dataset_ids + dataset_ids}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 101, res
assert "Duplicate ids:" in res["message"], res
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 3, res
@pytest.mark.p2
def test_repeated_delete(self, get_http_api_auth, add_datasets_func):
dataset_ids = add_datasets_func
payload = {"ids": dataset_ids}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 0, res
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 102, res
assert "lacks permission for dataset" in res["message"], res
@pytest.mark.p2
@pytest.mark.usefixtures("add_dataset_func")
def test_field_unsupported(self, get_http_api_auth):
payload = {"unknown_field": "unknown_field"}
res = delete_datasets(get_http_api_auth, payload)
assert res["code"] == 101, res
assert "Extra inputs are not permitted" in res["message"], res
res = list_datasets(get_http_api_auth)
assert len(res["data"]) == 1, res

View File

@ -77,6 +77,13 @@ class TestRquest:
assert res["code"] == 101, res assert res["code"] == 101, res
assert res["message"] == "No properties were modified", res assert res["message"] == "No properties were modified", res
@pytest.mark.p3
def test_payload_unset(self, get_http_api_auth, add_dataset_func):
dataset_id = add_dataset_func
res = update_dataset(get_http_api_auth, dataset_id, None)
assert res["code"] == 101, res
assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
class TestCapability: class TestCapability:
@pytest.mark.p3 @pytest.mark.p3