fix kb permission (#15199)

Signed-off-by: kenwoodjw <blackxin55@gmail.com>
Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
This commit is contained in:
kenwoodjw 2025-03-10 23:47:45 +08:00 committed by GitHub
parent 9b2a9260ef
commit adda049265
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 72 additions and 24 deletions

View File

@ -283,7 +283,11 @@ class DatasetApi(Resource):
data = request.get_json() data = request.get_json()
# check embedding model setting # check embedding model setting
if data.get("indexing_technique") == "high_quality": if (
data.get("indexing_technique") == "high_quality"
and data.get("embedding_model_provider") is not None
and data.get("embedding_model") is not None
):
DatasetService.check_embedding_model_setting( DatasetService.check_embedding_model_setting(
dataset.tenant_id, data.get("embedding_model_provider"), data.get("embedding_model") dataset.tenant_id, data.get("embedding_model_provider"), data.get("embedding_model")
) )

View File

@ -245,7 +245,7 @@ class DatasetService:
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider." "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ValueError(f"The dataset in unavailable, due to: {ex.description}") raise ValueError(ex.description)
@staticmethod @staticmethod
def update_dataset(dataset_id, data, user): def update_dataset(dataset_id, data, user):
@ -327,31 +327,75 @@ class DatasetService:
raise ValueError(ex.description) raise ValueError(ex.description)
else: else:
# add default plugin id to both setting sets, to make sure the plugin model provider is consistent # add default plugin id to both setting sets, to make sure the plugin model provider is consistent
plugin_model_provider = dataset.embedding_model_provider # Skip embedding model checks if not provided in the update request
plugin_model_provider = str(ModelProviderID(plugin_model_provider))
new_plugin_model_provider = data["embedding_model_provider"]
new_plugin_model_provider = str(ModelProviderID(new_plugin_model_provider))
if ( if (
new_plugin_model_provider != plugin_model_provider "embedding_model_provider" not in data
or data["embedding_model"] != dataset.embedding_model or "embedding_model" not in data
or not data.get("embedding_model_provider")
or not data.get("embedding_model")
): ):
action = "update" # If the dataset already has embedding model settings, use those
if dataset.embedding_model_provider and dataset.embedding_model:
# Keep existing values
filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
filtered_data["embedding_model"] = dataset.embedding_model
# If collection_binding_id exists, keep it too
if dataset.collection_binding_id:
filtered_data["collection_binding_id"] = dataset.collection_binding_id
# Otherwise, don't try to update embedding model settings at all
# Remove these fields from filtered_data if they exist but are None/empty
if "embedding_model_provider" in filtered_data and not filtered_data["embedding_model_provider"]:
del filtered_data["embedding_model_provider"]
if "embedding_model" in filtered_data and not filtered_data["embedding_model"]:
del filtered_data["embedding_model"]
else:
skip_embedding_update = False
try: try:
model_manager = ModelManager() # Handle existing model provider
embedding_model = model_manager.get_model_instance( plugin_model_provider = dataset.embedding_model_provider
tenant_id=current_user.current_tenant_id, plugin_model_provider_str = None
provider=data["embedding_model_provider"], if plugin_model_provider:
model_type=ModelType.TEXT_EMBEDDING, plugin_model_provider_str = str(ModelProviderID(plugin_model_provider))
model=data["embedding_model"],
) # Handle new model provider from request
filtered_data["embedding_model"] = embedding_model.model new_plugin_model_provider = data["embedding_model_provider"]
filtered_data["embedding_model_provider"] = embedding_model.provider new_plugin_model_provider_str = None
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding( if new_plugin_model_provider:
embedding_model.provider, embedding_model.model new_plugin_model_provider_str = str(ModelProviderID(new_plugin_model_provider))
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id # Only update embedding model if both values are provided and different from current
if (
plugin_model_provider_str != new_plugin_model_provider_str
or data["embedding_model"] != dataset.embedding_model
):
action = "update"
model_manager = ModelManager()
try:
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
except ProviderTokenNotInitError:
# If we can't get the embedding model, skip updating it
# and keep the existing settings if available
if dataset.embedding_model_provider and dataset.embedding_model:
filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
filtered_data["embedding_model"] = dataset.embedding_model
if dataset.collection_binding_id:
filtered_data["collection_binding_id"] = dataset.collection_binding_id
# Skip the rest of the embedding model update
skip_embedding_update = True
if not skip_embedding_update:
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = (
DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
except LLMBadRequestError: except LLMBadRequestError:
raise ValueError( raise ValueError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider "