mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-04 11:24:00 +08:00
Refa: remove dataset language and validate dataset name length. (#5707)
### What problem does this PR solve? #5686 #5702 ### Type of change - [x] Refactoring
This commit is contained in:
parent
df9b7b2fe9
commit
ff35c140dc
@ -66,10 +66,6 @@ def create(tenant_id):
|
|||||||
type: string
|
type: string
|
||||||
enum: ['me', 'team']
|
enum: ['me', 'team']
|
||||||
description: Dataset permission.
|
description: Dataset permission.
|
||||||
language:
|
|
||||||
type: string
|
|
||||||
enum: ['Chinese', 'English']
|
|
||||||
description: Language of the dataset.
|
|
||||||
chunk_method:
|
chunk_method:
|
||||||
type: string
|
type: string
|
||||||
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
|
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
|
||||||
@ -91,11 +87,9 @@ def create(tenant_id):
|
|||||||
req = request.json
|
req = request.json
|
||||||
e, t = TenantService.get_by_id(tenant_id)
|
e, t = TenantService.get_by_id(tenant_id)
|
||||||
permission = req.get("permission")
|
permission = req.get("permission")
|
||||||
language = req.get("language")
|
|
||||||
chunk_method = req.get("chunk_method")
|
chunk_method = req.get("chunk_method")
|
||||||
parser_config = req.get("parser_config")
|
parser_config = req.get("parser_config")
|
||||||
valid_permission = ["me", "team"]
|
valid_permission = ["me", "team"]
|
||||||
valid_language = ["Chinese", "English"]
|
|
||||||
valid_chunk_method = [
|
valid_chunk_method = [
|
||||||
"naive",
|
"naive",
|
||||||
"manual",
|
"manual",
|
||||||
@ -114,8 +108,6 @@ def create(tenant_id):
|
|||||||
check_validation = valid(
|
check_validation = valid(
|
||||||
permission,
|
permission,
|
||||||
valid_permission,
|
valid_permission,
|
||||||
language,
|
|
||||||
valid_language,
|
|
||||||
chunk_method,
|
chunk_method,
|
||||||
valid_chunk_method,
|
valid_chunk_method,
|
||||||
)
|
)
|
||||||
@ -134,6 +126,10 @@ def create(tenant_id):
|
|||||||
req["name"] = req["name"].strip()
|
req["name"] = req["name"].strip()
|
||||||
if req["name"] == "":
|
if req["name"] == "":
|
||||||
return get_error_data_result(message="`name` is not empty string!")
|
return get_error_data_result(message="`name` is not empty string!")
|
||||||
|
if len(req["name"]) >= 128:
|
||||||
|
return get_error_data_result(
|
||||||
|
message="Dataset name should not be longer than 128 characters."
|
||||||
|
)
|
||||||
if KnowledgebaseService.query(
|
if KnowledgebaseService.query(
|
||||||
name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value
|
name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value
|
||||||
):
|
):
|
||||||
@ -297,10 +293,6 @@ def update(tenant_id, dataset_id):
|
|||||||
type: string
|
type: string
|
||||||
enum: ['me', 'team']
|
enum: ['me', 'team']
|
||||||
description: Updated permission.
|
description: Updated permission.
|
||||||
language:
|
|
||||||
type: string
|
|
||||||
enum: ['Chinese', 'English']
|
|
||||||
description: Updated language.
|
|
||||||
chunk_method:
|
chunk_method:
|
||||||
type: string
|
type: string
|
||||||
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
|
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
|
||||||
@ -324,11 +316,9 @@ def update(tenant_id, dataset_id):
|
|||||||
if any(key in req for key in invalid_keys):
|
if any(key in req for key in invalid_keys):
|
||||||
return get_error_data_result(message="The input parameters are invalid.")
|
return get_error_data_result(message="The input parameters are invalid.")
|
||||||
permission = req.get("permission")
|
permission = req.get("permission")
|
||||||
language = req.get("language")
|
|
||||||
chunk_method = req.get("chunk_method")
|
chunk_method = req.get("chunk_method")
|
||||||
parser_config = req.get("parser_config")
|
parser_config = req.get("parser_config")
|
||||||
valid_permission = ["me", "team"]
|
valid_permission = ["me", "team"]
|
||||||
valid_language = ["Chinese", "English"]
|
|
||||||
valid_chunk_method = [
|
valid_chunk_method = [
|
||||||
"naive",
|
"naive",
|
||||||
"manual",
|
"manual",
|
||||||
@ -347,8 +337,6 @@ def update(tenant_id, dataset_id):
|
|||||||
check_validation = valid(
|
check_validation = valid(
|
||||||
permission,
|
permission,
|
||||||
valid_permission,
|
valid_permission,
|
||||||
language,
|
|
||||||
valid_language,
|
|
||||||
chunk_method,
|
chunk_method,
|
||||||
valid_chunk_method,
|
valid_chunk_method,
|
||||||
)
|
)
|
||||||
@ -416,6 +404,10 @@ def update(tenant_id, dataset_id):
|
|||||||
req["embd_id"] = req.pop("embedding_model")
|
req["embd_id"] = req.pop("embedding_model")
|
||||||
if "name" in req:
|
if "name" in req:
|
||||||
req["name"] = req["name"].strip()
|
req["name"] = req["name"].strip()
|
||||||
|
if len(req["name"]) >= 128:
|
||||||
|
return get_error_data_result(
|
||||||
|
message="Dataset name should not be longer than 128 characters."
|
||||||
|
)
|
||||||
if (
|
if (
|
||||||
req["name"].lower() != kb.name.lower()
|
req["name"].lower() != kb.name.lower()
|
||||||
and len(
|
and len(
|
||||||
|
@ -335,11 +335,9 @@ def generate_confirmation_token(tenent_id):
|
|||||||
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
|
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
|
||||||
|
|
||||||
|
|
||||||
def valid(permission, valid_permission, language, valid_language, chunk_method, valid_chunk_method):
|
def valid(permission, valid_permission, chunk_method, valid_chunk_method):
|
||||||
if valid_parameter(permission, valid_permission):
|
if valid_parameter(permission, valid_permission):
|
||||||
return valid_parameter(permission, valid_permission)
|
return valid_parameter(permission, valid_permission)
|
||||||
if valid_parameter(language, valid_language):
|
|
||||||
return valid_parameter(language, valid_language)
|
|
||||||
if valid_parameter(chunk_method, valid_chunk_method):
|
if valid_parameter(chunk_method, valid_chunk_method):
|
||||||
return valid_parameter(chunk_method, valid_chunk_method)
|
return valid_parameter(chunk_method, valid_chunk_method)
|
||||||
|
|
||||||
|
@ -178,7 +178,6 @@ Creates a dataset.
|
|||||||
- `"name"`: `string`
|
- `"name"`: `string`
|
||||||
- `"avatar"`: `string`
|
- `"avatar"`: `string`
|
||||||
- `"description"`: `string`
|
- `"description"`: `string`
|
||||||
- `"language"`: `string`
|
|
||||||
- `"embedding_model"`: `string`
|
- `"embedding_model"`: `string`
|
||||||
- `"permission"`: `string`
|
- `"permission"`: `string`
|
||||||
- `"chunk_method"`: `string`
|
- `"chunk_method"`: `string`
|
||||||
@ -214,11 +213,6 @@ curl --request POST \
|
|||||||
- `"description"`: (*Body parameter*), `string`
|
- `"description"`: (*Body parameter*), `string`
|
||||||
A brief description of the dataset to create.
|
A brief description of the dataset to create.
|
||||||
|
|
||||||
- `"language"`: (*Body parameter*), `string`
|
|
||||||
The language setting of the dataset to create. Available options:
|
|
||||||
- `"English"` (default)
|
|
||||||
- `"Chinese"`
|
|
||||||
|
|
||||||
- `"embedding_model"`: (*Body parameter*), `string`
|
- `"embedding_model"`: (*Body parameter*), `string`
|
||||||
The name of the embedding model to use. For example: `"BAAI/bge-zh-v1.5"`
|
The name of the embedding model to use. For example: `"BAAI/bge-zh-v1.5"`
|
||||||
|
|
||||||
|
@ -82,7 +82,6 @@ RAGFlow.create_dataset(
|
|||||||
avatar: str = "",
|
avatar: str = "",
|
||||||
description: str = "",
|
description: str = "",
|
||||||
embedding_model: str = "BAAI/bge-large-zh-v1.5",
|
embedding_model: str = "BAAI/bge-large-zh-v1.5",
|
||||||
language: str = "English",
|
|
||||||
permission: str = "me",
|
permission: str = "me",
|
||||||
chunk_method: str = "naive",
|
chunk_method: str = "naive",
|
||||||
parser_config: DataSet.ParserConfig = None
|
parser_config: DataSet.ParserConfig = None
|
||||||
@ -108,12 +107,6 @@ Base64 encoding of the avatar. Defaults to `""`
|
|||||||
|
|
||||||
A brief description of the dataset to create. Defaults to `""`.
|
A brief description of the dataset to create. Defaults to `""`.
|
||||||
|
|
||||||
##### language: `str`
|
|
||||||
|
|
||||||
The language setting of the dataset to create. Available options:
|
|
||||||
|
|
||||||
- `"English"` (default)
|
|
||||||
- `"Chinese"`
|
|
||||||
|
|
||||||
##### permission
|
##### permission
|
||||||
|
|
||||||
|
@ -30,7 +30,6 @@ class DataSet(Base):
|
|||||||
self.avatar = ""
|
self.avatar = ""
|
||||||
self.tenant_id = None
|
self.tenant_id = None
|
||||||
self.description = ""
|
self.description = ""
|
||||||
self.language = "English"
|
|
||||||
self.embedding_model = ""
|
self.embedding_model = ""
|
||||||
self.permission = "me"
|
self.permission = "me"
|
||||||
self.document_count = 0
|
self.document_count = 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user