fix kb permission (#15199)

Signed-off-by: kenwoodjw <blackxin55@gmail.com>
Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
This commit is contained in:
kenwoodjw 2025-03-10 23:47:45 +08:00 committed by GitHub
parent 9b2a9260ef
commit adda049265
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 72 additions and 24 deletions

View File

@ -283,7 +283,11 @@ class DatasetApi(Resource):
data = request.get_json()
# check embedding model setting
if data.get("indexing_technique") == "high_quality":
if (
data.get("indexing_technique") == "high_quality"
and data.get("embedding_model_provider") is not None
and data.get("embedding_model") is not None
):
DatasetService.check_embedding_model_setting(
dataset.tenant_id, data.get("embedding_model_provider"), data.get("embedding_model")
)

View File

@ -245,7 +245,7 @@ class DatasetService:
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ValueError(f"The dataset in unavailable, due to: {ex.description}")
raise ValueError(ex.description)
@staticmethod
def update_dataset(dataset_id, data, user):
@ -327,31 +327,75 @@ class DatasetService:
raise ValueError(ex.description)
else:
# add default plugin id to both setting sets, to make sure the plugin model provider is consistent
plugin_model_provider = dataset.embedding_model_provider
plugin_model_provider = str(ModelProviderID(plugin_model_provider))
new_plugin_model_provider = data["embedding_model_provider"]
new_plugin_model_provider = str(ModelProviderID(new_plugin_model_provider))
# Skip embedding model checks if not provided in the update request
if (
new_plugin_model_provider != plugin_model_provider
or data["embedding_model"] != dataset.embedding_model
"embedding_model_provider" not in data
or "embedding_model" not in data
or not data.get("embedding_model_provider")
or not data.get("embedding_model")
):
action = "update"
# If the dataset already has embedding model settings, use those
if dataset.embedding_model_provider and dataset.embedding_model:
# Keep existing values
filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
filtered_data["embedding_model"] = dataset.embedding_model
# If collection_binding_id exists, keep it too
if dataset.collection_binding_id:
filtered_data["collection_binding_id"] = dataset.collection_binding_id
# Otherwise, don't try to update embedding model settings at all
# Remove these fields from filtered_data if they exist but are None/empty
if "embedding_model_provider" in filtered_data and not filtered_data["embedding_model_provider"]:
del filtered_data["embedding_model_provider"]
if "embedding_model" in filtered_data and not filtered_data["embedding_model"]:
del filtered_data["embedding_model"]
else:
skip_embedding_update = False
try:
model_manager = ModelManager()
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
# Handle existing model provider
plugin_model_provider = dataset.embedding_model_provider
plugin_model_provider_str = None
if plugin_model_provider:
plugin_model_provider_str = str(ModelProviderID(plugin_model_provider))
# Handle new model provider from request
new_plugin_model_provider = data["embedding_model_provider"]
new_plugin_model_provider_str = None
if new_plugin_model_provider:
new_plugin_model_provider_str = str(ModelProviderID(new_plugin_model_provider))
# Only update embedding model if both values are provided and different from current
if (
plugin_model_provider_str != new_plugin_model_provider_str
or data["embedding_model"] != dataset.embedding_model
):
action = "update"
model_manager = ModelManager()
try:
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
except ProviderTokenNotInitError:
# If we can't get the embedding model, skip updating it
# and keep the existing settings if available
if dataset.embedding_model_provider and dataset.embedding_model:
filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
filtered_data["embedding_model"] = dataset.embedding_model
if dataset.collection_binding_id:
filtered_data["collection_binding_id"] = dataset.collection_binding_id
# Skip the rest of the embedding model update
skip_embedding_update = True
if not skip_embedding_update:
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = (
DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
except LLMBadRequestError:
raise ValueError(
"No Embedding Model available. Please configure a valid provider "