mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-10 19:18:57 +08:00
Refa: HTTP API delete dataset / test cases / docs (#7657)
### What problem does this PR solve? This PR introduces Pydantic-based validation for the delete dataset HTTP API, improving code clarity and robustness. Key changes include: 1. Pydantic Validation 2. Error Handling 3. Test Updates 4. Documentation Updates ### Type of change - [x] Documentation Update - [x] Refactoring
This commit is contained in:
parent
0e9ff8c1f7
commit
ae8b628f0a
@ -20,7 +20,6 @@ import logging
|
||||
from flask import request
|
||||
from peewee import OperationalError
|
||||
|
||||
from api import settings
|
||||
from api.db import FileSource, StatusEnum
|
||||
from api.db.db_models import File
|
||||
from api.db.services.document_service import DocumentService
|
||||
@ -30,7 +29,6 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.user_service import TenantService
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import (
|
||||
check_duplicate_ids,
|
||||
deep_merge,
|
||||
get_error_argument_result,
|
||||
get_error_data_result,
|
||||
@ -39,7 +37,7 @@ from api.utils.api_utils import (
|
||||
token_required,
|
||||
verify_embedding_availability,
|
||||
)
|
||||
from api.utils.validation_utils import CreateDatasetReq, UpdateDatasetReq, validate_and_parse_json_request
|
||||
from api.utils.validation_utils import CreateDatasetReq, DeleteDatasetReq, UpdateDatasetReq, validate_and_parse_json_request
|
||||
|
||||
|
||||
@manager.route("/datasets", methods=["POST"]) # noqa: F821
|
||||
@ -190,72 +188,85 @@ def delete(tenant_id):
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- ids
|
||||
properties:
|
||||
ids:
|
||||
type: array
|
||||
type: array or null
|
||||
items:
|
||||
type: string
|
||||
description: List of dataset IDs to delete.
|
||||
description: |
|
||||
Specifies the datasets to delete:
|
||||
- If `null`, all datasets will be deleted.
|
||||
- If an array of IDs, only the specified datasets will be deleted.
|
||||
- If an empty array, no datasets will be deleted.
|
||||
responses:
|
||||
200:
|
||||
description: Successful operation.
|
||||
schema:
|
||||
type: object
|
||||
"""
|
||||
req, err = validate_and_parse_json_request(request, DeleteDatasetReq)
|
||||
if err is not None:
|
||||
return get_error_argument_result(err)
|
||||
|
||||
kb_id_instance_pairs = []
|
||||
if req["ids"] is None:
|
||||
try:
|
||||
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
|
||||
for kb in kbs:
|
||||
kb_id_instance_pairs.append((kb.id, kb))
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
else:
|
||||
error_kb_ids = []
|
||||
for kb_id in req["ids"]:
|
||||
try:
|
||||
kb = KnowledgebaseService.get_or_none(id=kb_id, tenant_id=tenant_id)
|
||||
if kb is None:
|
||||
error_kb_ids.append(kb_id)
|
||||
continue
|
||||
kb_id_instance_pairs.append((kb_id, kb))
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
if len(error_kb_ids) > 0:
|
||||
return get_error_data_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
|
||||
|
||||
errors = []
|
||||
success_count = 0
|
||||
req = request.json
|
||||
if not req:
|
||||
ids = None
|
||||
else:
|
||||
ids = req.get("ids")
|
||||
if not ids:
|
||||
id_list = []
|
||||
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
|
||||
for kb in kbs:
|
||||
id_list.append(kb.id)
|
||||
else:
|
||||
id_list = ids
|
||||
unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "dataset")
|
||||
id_list = unique_id_list
|
||||
|
||||
for id in id_list:
|
||||
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
|
||||
if not kbs:
|
||||
errors.append(f"You don't own the dataset {id}")
|
||||
continue
|
||||
for doc in DocumentService.query(kb_id=id):
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
errors.append(f"Remove document error for dataset {id}")
|
||||
for kb_id, kb in kb_id_instance_pairs:
|
||||
try:
|
||||
for doc in DocumentService.query(kb_id=kb_id):
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
errors.append(f"Remove document '{doc.id}' error for dataset '{kb_id}'")
|
||||
continue
|
||||
f2d = File2DocumentService.get_by_document_id(doc.id)
|
||||
FileService.filter_delete(
|
||||
[
|
||||
File.source_type == FileSource.KNOWLEDGEBASE,
|
||||
File.id == f2d[0].file_id,
|
||||
]
|
||||
)
|
||||
File2DocumentService.delete_by_document_id(doc.id)
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kb.name])
|
||||
if not KnowledgebaseService.delete_by_id(kb_id):
|
||||
errors.append(f"Delete dataset error for {kb_id}")
|
||||
continue
|
||||
f2d = File2DocumentService.get_by_document_id(doc.id)
|
||||
FileService.filter_delete(
|
||||
[
|
||||
File.source_type == FileSource.KNOWLEDGEBASE,
|
||||
File.id == f2d[0].file_id,
|
||||
]
|
||||
)
|
||||
File2DocumentService.delete_by_document_id(doc.id)
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name])
|
||||
if not KnowledgebaseService.delete_by_id(id):
|
||||
errors.append(f"Delete dataset error for {id}")
|
||||
continue
|
||||
success_count += 1
|
||||
if errors:
|
||||
if success_count > 0:
|
||||
return get_result(data={"success_count": success_count, "errors": errors}, message=f"Partially deleted {success_count} datasets with {len(errors)} errors")
|
||||
else:
|
||||
return get_error_data_result(message="; ".join(errors))
|
||||
if duplicate_messages:
|
||||
if success_count > 0:
|
||||
return get_result(
|
||||
message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors",
|
||||
data={"success_count": success_count, "errors": duplicate_messages},
|
||||
)
|
||||
else:
|
||||
return get_error_data_result(message=";".join(duplicate_messages))
|
||||
return get_result(code=settings.RetCode.SUCCESS)
|
||||
success_count += 1
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
|
||||
if not errors:
|
||||
return get_result()
|
||||
|
||||
error_message = f"Successfully deleted {success_count} datasets, {len(errors)} failed. Details: {'; '.join(errors)[:128]}..."
|
||||
if success_count == 0:
|
||||
return get_error_data_result(message=error_message)
|
||||
|
||||
return get_result(data={"success_count": success_count, "errors": errors[:5]}, message=error_message)
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>", methods=["PUT"]) # noqa: F821
|
||||
@ -373,7 +384,7 @@ def update(tenant_id, dataset_id):
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
|
||||
return get_result(code=settings.RetCode.SUCCESS)
|
||||
return get_result()
|
||||
|
||||
|
||||
@manager.route("/datasets", methods=["GET"]) # noqa: F821
|
||||
|
@ -14,11 +14,13 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import uuid
|
||||
from collections import Counter
|
||||
from enum import auto
|
||||
from typing import Annotated, Any
|
||||
|
||||
from flask import Request
|
||||
from pydantic import UUID1, BaseModel, Field, StringConstraints, ValidationError, field_serializer, field_validator
|
||||
from pydantic_core import PydanticCustomError
|
||||
from strenum import StrEnum
|
||||
from werkzeug.exceptions import BadRequest, UnsupportedMediaType
|
||||
|
||||
@ -238,7 +240,7 @@ class CreateDatasetReq(Base):
|
||||
str: Validated Base64 string
|
||||
|
||||
Raises:
|
||||
ValueError: For structural errors in these cases:
|
||||
PydanticCustomError: For structural errors in these cases:
|
||||
- Missing MIME prefix header
|
||||
- Invalid MIME prefix format
|
||||
- Unsupported image MIME type
|
||||
@ -259,16 +261,16 @@ class CreateDatasetReq(Base):
|
||||
if "," in v:
|
||||
prefix, _ = v.split(",", 1)
|
||||
if not prefix.startswith("data:"):
|
||||
raise ValueError("Invalid MIME prefix format. Must start with 'data:'")
|
||||
raise PydanticCustomError("format_invalid", "Invalid MIME prefix format. Must start with 'data:'")
|
||||
|
||||
mime_type = prefix[5:].split(";")[0]
|
||||
supported_mime_types = ["image/jpeg", "image/png"]
|
||||
if mime_type not in supported_mime_types:
|
||||
raise ValueError(f"Unsupported MIME type. Allowed: {supported_mime_types}")
|
||||
raise PydanticCustomError("format_invalid", "Unsupported MIME type. Allowed: {supported_mime_types}", {"supported_mime_types": supported_mime_types})
|
||||
|
||||
return v
|
||||
else:
|
||||
raise ValueError("Missing MIME prefix. Expected format: data:<mime>;base64,<data>")
|
||||
raise PydanticCustomError("format_invalid", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>")
|
||||
|
||||
@field_validator("embedding_model", mode="after")
|
||||
@classmethod
|
||||
@ -288,7 +290,7 @@ class CreateDatasetReq(Base):
|
||||
str: Validated <model_name>@<provider> format
|
||||
|
||||
Raises:
|
||||
ValueError: For these violations:
|
||||
PydanticCustomError: For these violations:
|
||||
- Missing @ separator
|
||||
- Empty model_name/provider
|
||||
- Invalid component structure
|
||||
@ -300,15 +302,15 @@ class CreateDatasetReq(Base):
|
||||
Invalid: "text-embedding-3-large@" (empty provider)
|
||||
"""
|
||||
if "@" not in v:
|
||||
raise ValueError("Embedding model identifier must follow <model_name>@<provider> format")
|
||||
raise PydanticCustomError("format_invalid", "Embedding model identifier must follow <model_name>@<provider> format")
|
||||
|
||||
components = v.split("@", 1)
|
||||
if len(components) != 2 or not all(components):
|
||||
raise ValueError("Both model_name and provider must be non-empty strings")
|
||||
raise PydanticCustomError("format_invalid", "Both model_name and provider must be non-empty strings")
|
||||
|
||||
model_name, provider = components
|
||||
if not model_name.strip() or not provider.strip():
|
||||
raise ValueError("Model name and provider cannot be whitespace-only strings")
|
||||
raise PydanticCustomError("format_invalid", "Model name and provider cannot be whitespace-only strings")
|
||||
return v
|
||||
|
||||
@field_validator("permission", mode="before")
|
||||
@ -374,13 +376,13 @@ class CreateDatasetReq(Base):
|
||||
ParserConfig | None: Validated configuration object
|
||||
|
||||
Raises:
|
||||
ValueError: When serialized JSON exceeds 65,535 characters
|
||||
PydanticCustomError: When serialized JSON exceeds 65,535 characters
|
||||
"""
|
||||
if v is None:
|
||||
return None
|
||||
|
||||
if (json_str := v.model_dump_json()) and len(json_str) > 65535:
|
||||
raise ValueError(f"Parser config exceeds size limit (max 65,535 characters). Current size: {len(json_str):,}")
|
||||
raise PydanticCustomError("string_too_long", "Parser config exceeds size limit (max 65,535 characters). Current size: {actual}", {"actual": len(json_str)})
|
||||
return v
|
||||
|
||||
|
||||
@ -390,4 +392,88 @@ class UpdateDatasetReq(CreateDatasetReq):
|
||||
|
||||
@field_serializer("dataset_id")
|
||||
def serialize_uuid_to_hex(self, v: uuid.UUID) -> str:
|
||||
"""
|
||||
Serializes a UUID version 1 object to its hexadecimal string representation.
|
||||
|
||||
This field serializer specifically handles UUID version 1 objects, converting them
|
||||
to their canonical 32-character hexadecimal format without hyphens. The conversion
|
||||
is designed for consistent serialization in API responses and database storage.
|
||||
|
||||
Args:
|
||||
v (uuid.UUID1): The UUID version 1 object to serialize. Must be a valid
|
||||
UUID1 instance generated by Python's uuid module.
|
||||
|
||||
Returns:
|
||||
str: 32-character lowercase hexadecimal string representation
|
||||
Example: "550e8400e29b41d4a716446655440000"
|
||||
|
||||
Raises:
|
||||
AttributeError: If input is not a proper UUID object (missing hex attribute)
|
||||
TypeError: If input is not a UUID1 instance (when type checking is enabled)
|
||||
|
||||
Notes:
|
||||
- Version 1 UUIDs contain timestamp and MAC address information
|
||||
- The .hex property automatically converts to lowercase hexadecimal
|
||||
- For cross-version compatibility, consider typing as uuid.UUID instead
|
||||
"""
|
||||
return v.hex
|
||||
|
||||
|
||||
class DeleteReq(Base):
|
||||
ids: list[UUID1] | None = Field(...)
|
||||
|
||||
@field_validator("ids", mode="after")
|
||||
def check_duplicate_ids(cls, v: list[UUID1] | None) -> list[str] | None:
|
||||
"""
|
||||
Validates and converts a list of UUID1 objects to hexadecimal strings while checking for duplicates.
|
||||
|
||||
This validator implements a three-stage processing pipeline:
|
||||
1. Null Handling - returns None for empty/null input
|
||||
2. UUID Conversion - transforms UUID objects to hex strings
|
||||
3. Duplicate Validation - ensures all IDs are unique
|
||||
|
||||
Behavior Specifications:
|
||||
- Input: None → Returns None (indicates no operation)
|
||||
- Input: [] → Returns [] (empty list for explicit no-op)
|
||||
- Input: [UUID1,...] → Returns validated hex strings
|
||||
- Duplicates: Raises formatted PydanticCustomError
|
||||
|
||||
Args:
|
||||
v (list[UUID1] | None):
|
||||
- None: Indicates no datasets should be processed
|
||||
- Empty list: Explicit empty operation
|
||||
- Populated list: Dataset UUIDs to validate/convert
|
||||
|
||||
Returns:
|
||||
list[str] | None:
|
||||
- None when input is None
|
||||
- List of 32-character hex strings (lowercase, no hyphens)
|
||||
Example: ["550e8400e29b41d4a716446655440000"]
|
||||
|
||||
Raises:
|
||||
PydanticCustomError: When duplicates detected, containing:
|
||||
- Error type: "duplicate_uuids"
|
||||
- Template message: "Duplicate ids: '{duplicate_ids}'"
|
||||
- Context: {"duplicate_ids": "id1, id2, ..."}
|
||||
|
||||
Example:
|
||||
>>> validate([UUID("..."), UUID("...")])
|
||||
["2cdf0456e9a711ee8000000000000000", ...]
|
||||
|
||||
>>> validate([UUID("..."), UUID("...")]) # Duplicates
|
||||
PydanticCustomError: Duplicate ids: '2cdf0456e9a711ee8000000000000000'
|
||||
"""
|
||||
if not v:
|
||||
return v
|
||||
|
||||
uuid_hex_list = [ids.hex for ids in v]
|
||||
duplicates = [item for item, count in Counter(uuid_hex_list).items() if count > 1]
|
||||
|
||||
if duplicates:
|
||||
duplicates_str = ", ".join(duplicates)
|
||||
raise PydanticCustomError("duplicate_uuids", "Duplicate ids: '{duplicate_ids}'", {"duplicate_ids": duplicates_str})
|
||||
|
||||
return uuid_hex_list
|
||||
|
||||
|
||||
class DeleteDatasetReq(DeleteReq): ...
|
||||
|
@ -507,7 +507,7 @@ Deletes datasets by ID.
|
||||
- `'content-Type: application/json'`
|
||||
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||
- Body:
|
||||
- `"ids"`: `list[string]`
|
||||
- `"ids"`: `list[string]` or `null`
|
||||
|
||||
##### Request example
|
||||
|
||||
@ -517,14 +517,17 @@ curl --request DELETE \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>' \
|
||||
--data '{
|
||||
"ids": ["test_1", "test_2"]
|
||||
"ids": ["d94a8dc02c9711f0930f7fbc369eab6d", "e94a8dc02c9711f0930f7fbc369eab6e"]
|
||||
}'
|
||||
```
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `"ids"`: (*Body parameter*), `list[string]`
|
||||
The IDs of the datasets to delete. If it is not specified, all datasets will be deleted.
|
||||
- `"ids"`: (*Body parameter*), `list[string]` or `null`, *Required*
|
||||
Specifies the datasets to delete:
|
||||
- If `null`, all datasets will be deleted.
|
||||
- If an array of IDs, only the specified datasets will be deleted.
|
||||
- If an empty array, no datasets will be deleted.
|
||||
|
||||
#### Response
|
||||
|
||||
|
@ -200,16 +200,19 @@ dataset = rag_object.create_dataset(name="kb_1")
|
||||
### Delete datasets
|
||||
|
||||
```python
|
||||
RAGFlow.delete_datasets(ids: list[str] = None)
|
||||
RAGFlow.delete_datasets(ids: list[str] | None = None)
|
||||
```
|
||||
|
||||
Deletes datasets by ID.
|
||||
|
||||
#### Parameters
|
||||
|
||||
##### ids: `list[str]`, *Required*
|
||||
##### ids: `list[str]` or `None`, *Required*
|
||||
|
||||
The IDs of the datasets to delete. Defaults to `None`. If it is not specified, all datasets will be deleted.
|
||||
The IDs of the datasets to delete. Defaults to `None`.
|
||||
- If `None`, all datasets will be deleted.
|
||||
- If an array of IDs, only the specified datasets will be deleted.
|
||||
- If an empty array, no datasets will be deleted.
|
||||
|
||||
#### Returns
|
||||
|
||||
@ -219,7 +222,7 @@ The IDs of the datasets to delete. Defaults to `None`. If it is not specified, a
|
||||
#### Examples
|
||||
|
||||
```python
|
||||
rag_object.delete_datasets(ids=["id_1","id_2"])
|
||||
rag_object.delete_datasets(ids=["d94a8dc02c9711f0930f7fbc369eab6d","e94a8dc02c9711f0930f7fbc369eab6e"])
|
||||
```
|
||||
|
||||
---
|
||||
|
@ -76,7 +76,7 @@ def condition(_auth, _dataset_id):
|
||||
@pytest.fixture(scope="function")
|
||||
def clear_datasets(request, get_http_api_auth):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth)
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
@ -132,7 +132,7 @@ def ragflow_tmp_dir(request, tmp_path_factory):
|
||||
@pytest.fixture(scope="class")
|
||||
def add_dataset(request, get_http_api_auth):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth)
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
@ -143,12 +143,11 @@ def add_dataset(request, get_http_api_auth):
|
||||
@pytest.fixture(scope="function")
|
||||
def add_dataset_func(request, get_http_api_auth):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth)
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
dataset_ids = batch_create_datasets(get_http_api_auth, 1)
|
||||
return dataset_ids[0]
|
||||
return batch_create_datasets(get_http_api_auth, 1)[0]
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
|
@ -22,7 +22,7 @@ from common import batch_create_datasets, delete_datasets
|
||||
@pytest.fixture(scope="class")
|
||||
def add_datasets(get_http_api_auth, request):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth)
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
@ -32,18 +32,8 @@ def add_datasets(get_http_api_auth, request):
|
||||
@pytest.fixture(scope="function")
|
||||
def add_datasets_func(get_http_api_auth, request):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth)
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
return batch_create_datasets(get_http_api_auth, 3)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_dataset_func(get_http_api_auth, request):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth)
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
return batch_create_datasets(get_http_api_auth, 1)[0]
|
||||
|
@ -25,8 +25,8 @@ from common import (
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
@ -38,104 +38,173 @@ class TestAuthorization:
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
def test_auth_invalid(self, auth, expected_code, expected_message):
|
||||
res = delete_datasets(auth)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestDatasetsDeletion:
|
||||
@pytest.mark.p1
|
||||
class TestRquest:
|
||||
@pytest.mark.p3
|
||||
def test_content_type_bad(self, get_http_api_auth):
|
||||
BAD_CONTENT_TYPE = "text/xml"
|
||||
res = delete_datasets(get_http_api_auth, headers={"Content-Type": BAD_CONTENT_TYPE})
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message, remaining",
|
||||
"payload, expected_message",
|
||||
[
|
||||
(None, 0, "", 0),
|
||||
({"ids": []}, 0, "", 0),
|
||||
({"ids": ["invalid_id"]}, 102, "You don't own the dataset invalid_id", 3),
|
||||
(
|
||||
{"ids": ["\n!?。;!?\"'"]},
|
||||
102,
|
||||
"You don't own the dataset \n!?。;!?\"'",
|
||||
3,
|
||||
),
|
||||
(
|
||||
"not json",
|
||||
100,
|
||||
"AttributeError(\"'str' object has no attribute 'get'\")",
|
||||
3,
|
||||
),
|
||||
(lambda r: {"ids": r[:1]}, 0, "", 2),
|
||||
(lambda r: {"ids": r}, 0, "", 0),
|
||||
("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
|
||||
('"a"', "Invalid request payload: expected object, got str"),
|
||||
],
|
||||
ids=["malformed_json_syntax", "invalid_request_payload_type"],
|
||||
)
|
||||
def test_basic_scenarios(self, get_http_api_auth, add_datasets_func, payload, expected_code, expected_message, remaining):
|
||||
dataset_ids = add_datasets_func
|
||||
if callable(payload):
|
||||
payload = payload(dataset_ids)
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if res["code"] != 0:
|
||||
assert res["message"] == expected_message
|
||||
def test_payload_bad(self, get_http_api_auth, payload, expected_message):
|
||||
res = delete_datasets(get_http_api_auth, data=payload)
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_payload_unset(self, get_http_api_auth):
|
||||
res = delete_datasets(get_http_api_auth, None)
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
|
||||
|
||||
|
||||
class TestCapability:
|
||||
@pytest.mark.p3
|
||||
def test_delete_dataset_1k(self, get_http_api_auth):
|
||||
ids = batch_create_datasets(get_http_api_auth, 1_000)
|
||||
res = delete_datasets(get_http_api_auth, {"ids": ids})
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == remaining
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
lambda r: {"ids": ["invalid_id"] + r},
|
||||
lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]},
|
||||
lambda r: {"ids": r + ["invalid_id"]},
|
||||
],
|
||||
)
|
||||
def test_delete_partial_invalid_id(self, get_http_api_auth, add_datasets_func, payload):
|
||||
dataset_ids = add_datasets_func
|
||||
if callable(payload):
|
||||
payload = payload(dataset_ids)
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 0
|
||||
assert res["data"]["errors"][0] == "You don't own the dataset invalid_id"
|
||||
assert res["data"]["success_count"] == 3
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 0
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_repeated_deletion(self, get_http_api_auth, add_datasets_func):
|
||||
dataset_ids = add_datasets_func
|
||||
res = delete_datasets(get_http_api_auth, {"ids": dataset_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = delete_datasets(get_http_api_auth, {"ids": dataset_ids})
|
||||
assert res["code"] == 102
|
||||
assert "You don't own the dataset" in res["message"]
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_duplicate_deletion(self, get_http_api_auth, add_datasets_func):
|
||||
dataset_ids = add_datasets_func
|
||||
res = delete_datasets(get_http_api_auth, {"ids": dataset_ids + dataset_ids})
|
||||
assert res["code"] == 0
|
||||
assert "Duplicate dataset ids" in res["data"]["errors"][0]
|
||||
assert res["data"]["success_count"] == 3
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 0
|
||||
assert len(res["data"]) == 0, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_deletion(self, get_http_api_auth):
|
||||
ids = batch_create_datasets(get_http_api_auth, 100)
|
||||
dataset_num = 1_000
|
||||
ids = batch_create_datasets(get_http_api_auth, dataset_num)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)]
|
||||
futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(dataset_num)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
assert all(r["code"] == 0 for r in responses), responses
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_delete_10k(self, get_http_api_auth):
|
||||
ids = batch_create_datasets(get_http_api_auth, 10_000)
|
||||
res = delete_datasets(get_http_api_auth, {"ids": ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
class TestDatasetsDelete:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"func, expected_code, expected_message, remaining",
|
||||
[
|
||||
(lambda r: {"ids": r[:1]}, 0, "", 2),
|
||||
(lambda r: {"ids": r}, 0, "", 0),
|
||||
],
|
||||
ids=["single_dataset", "multiple_datasets"],
|
||||
)
|
||||
def test_ids(self, get_http_api_auth, add_datasets_func, func, expected_code, expected_message, remaining):
|
||||
dataset_ids = add_datasets_func
|
||||
if callable(func):
|
||||
payload = func(dataset_ids)
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 0
|
||||
assert len(res["data"]) == remaining, res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_ids_empty(self, get_http_api_auth):
|
||||
payload = {"ids": []}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 1, res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.usefixtures("add_datasets_func")
|
||||
def test_ids_none(self, get_http_api_auth):
|
||||
payload = {"ids": None}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 0, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_id_not_uuid(self, get_http_api_auth):
|
||||
payload = {"ids": ["not_uuid"]}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be a valid UUID" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 1, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_id_wrong_uuid(self, get_http_api_auth):
|
||||
payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 102, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 1, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r},
|
||||
lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]},
|
||||
lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]},
|
||||
],
|
||||
)
|
||||
def test_ids_partial_invalid(self, get_http_api_auth, add_datasets_func, func):
|
||||
dataset_ids = add_datasets_func
|
||||
if callable(func):
|
||||
payload = func(dataset_ids)
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 102, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 3, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_ids_duplicate(self, get_http_api_auth, add_datasets_func):
|
||||
dataset_ids = add_datasets_func
|
||||
payload = {"ids": dataset_ids + dataset_ids}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Duplicate ids:" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 3, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_repeated_delete(self, get_http_api_auth, add_datasets_func):
|
||||
dataset_ids = add_datasets_func
|
||||
payload = {"ids": dataset_ids}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 102, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_field_unsupported(self, get_http_api_auth):
|
||||
payload = {"unknown_field": "unknown_field"}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Extra inputs are not permitted" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 1, res
|
||||
|
@ -77,6 +77,13 @@ class TestRquest:
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == "No properties were modified", res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_payload_unset(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
res = update_dataset(get_http_api_auth, dataset_id, None)
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
|
||||
|
||||
|
||||
class TestCapability:
|
||||
@pytest.mark.p3
|
||||
|
Loading…
x
Reference in New Issue
Block a user