From 1f82889001a3cb8f2ded515bfdeb1b1faf66c106 Mon Sep 17 00:00:00 2001 From: liu an Date: Wed, 30 Apr 2025 14:50:23 +0800 Subject: [PATCH] Fix: create dataset remove unnecessary parameter constraints (#7432) ### What problem does this PR solve? Remove unnecessary parameter restrictions in dataset creation API ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/utils/validation_utils.py | 6 +++--- docs/references/http_api_reference.md | 1 - .../test_dataset_mangement/test_create_dataset.py | 14 ++------------ 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py index 9d131328f..c95381a5d 100644 --- a/api/utils/validation_utils.py +++ b/api/utils/validation_utils.py @@ -20,7 +20,7 @@ from pydantic import BaseModel, Field, StringConstraints, ValidationError, field from strenum import StrEnum -def format_validation_error_message(e: ValidationError): +def format_validation_error_message(e: ValidationError) -> str: error_messages = [] for error in e.errors(): @@ -81,7 +81,7 @@ class RaptorConfig(Base): max_token: int = Field(default=256, ge=1, le=2048) threshold: float = Field(default=0.1, ge=0.0, le=1.0) max_cluster: int = Field(default=64, ge=1, le=1024) - random_seed: int = Field(default=0, ge=0, le=10_000) + random_seed: int = Field(default=0, ge=0) class GraphragConfig(Base): @@ -104,7 +104,7 @@ class ParserConfig(Base): tag_kb_ids: List[str] = Field(default_factory=list) topn_tags: int = Field(default=1, ge=1, le=10) filename_embd_weight: Optional[float] = Field(default=None, ge=0.0, le=1.0) - task_page_size: Optional[int] = Field(default=None, ge=1, le=10_000) + task_page_size: Optional[int] = Field(default=None, ge=1) pages: Optional[List[List[int]]] = None diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index b21393bcf..d088ca2f2 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -429,7 +429,6 @@ curl --request POST \ - `"task_page_size"`: `int` For PDF only. - Defaults to `12` - Minimum: `1` - - Maximum: `10000` - `"raptor"`: `object` RAPTOR-specific settings. - Defaults to: `{"use_raptor": false}` - `"graphrag"`: `object` GRAPHRAG-specific settings. diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py index a2945a4ac..bb2c1c60a 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py @@ -311,8 +311,7 @@ class TestDatasetCreation: ("filename_embd_weight_mid", {"filename_embd_weight": 0.5}), ("filename_embd_weight_max", {"filename_embd_weight": 1.0}), ("task_page_size_min", {"task_page_size": 1}), - ("task_page_size_mid", {"task_page_size": 5_000}), - ("task_page_size_max", {"task_page_size": 10_000}), + ("task_page_size_None", {"task_page_size": None}), ("pages", {"pages": [[1, 100]]}), ("pages_none", None), ("graphrag_true", {"graphrag": {"use_graphrag": True}}), @@ -337,8 +336,6 @@ class TestDatasetCreation: ("raptor_max_cluster_mid", {"raptor": {"max_cluster": 512}}), ("raptor_max_cluster_max", {"raptor": {"max_cluster": 1024}}), ("raptor_random_seed_min", {"raptor": {"random_seed": 0}}), - ("raptor_random_seed_mid", {"raptor": {"random_seed": 5_000}}), - ("raptor_random_seed_max", {"raptor": {"random_seed": 10_000}}), ], ids=[ "default_none", @@ -366,8 +363,7 @@ class TestDatasetCreation: "filename_embd_weight_mid", "filename_embd_weight_max", "task_page_size_min", - "task_page_size_mid", - "task_page_size_max", + "task_page_size_None", "pages", "pages_none", "graphrag_true", @@ -392,8 +388,6 @@ class TestDatasetCreation: "raptor_max_cluster_mid", "raptor_max_cluster_max", "raptor_random_seed_min", - "raptor_random_seed_mid", - "raptor_random_seed_max", ], ) def test_valid_parser_config(self, get_http_api_auth, name, parser_config): @@ -462,7 +456,6 @@ class TestDatasetCreation: ("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), ("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), ("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"), - ("task_page_size_max_limit", {"task_page_size": 10_001}, "Input should be less than or equal to 10000"), ("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), ("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), ("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"), @@ -490,7 +483,6 @@ class TestDatasetCreation: ("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), ("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), ("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), - ("raptor_random_seed_max_limit", {"raptor": {"random_seed": 10_001}}, "Input should be less than or equal to 10000"), ("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), ("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), ("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config have at most 65535 characters"), @@ -520,7 +512,6 @@ class TestDatasetCreation: "filename_embd_weight_max_limit", "filename_embd_weight_type_invalid", "task_page_size_min_limit", - "task_page_size_max_limit", "task_page_size_float_not_allowed", "task_page_size_type_invalid", "pages_not_list", @@ -548,7 +539,6 @@ class TestDatasetCreation: "raptor_max_cluster_float_not_allowed", "raptor_max_cluster_type_invalid", "raptor_random_seed_min_limit", - "raptor_random_seed_max_limit", "raptor_random_seed_float_not_allowed", "raptor_random_seed_type_invalid", "parser_config_type_invalid",