From 048bc4c06e5b9470e1ec079b23c6c44854aee715 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Fri, 2 Aug 2024 20:30:22 +0800 Subject: [PATCH] fix update dataset failed when embedding model is not exist (#6920) --- api/controllers/console/datasets/datasets.py | 9 ++++++-- api/services/dataset_service.py | 22 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index c446f523b6..be0281f07a 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -189,8 +189,6 @@ class DatasetApi(Resource): dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: raise NotFound("Dataset not found.") - # check user's model setting - DatasetService.check_dataset_model_setting(dataset) parser = reqparse.RequestParser() parser.add_argument('name', nullable=False, @@ -215,6 +213,13 @@ class DatasetApi(Resource): args = parser.parse_args() data = request.get_json() + # check embedding model setting + if data.get('indexing_technique') == 'high_quality': + DatasetService.check_embedding_model_setting(dataset.tenant_id, + data.get('embedding_model_provider'), + data.get('embedding_model') + ) + # The role of the current user in the ta table must be admin, owner, editor, or dataset_operator DatasetPermissionService.check_permission( current_user, dataset, data.get('permission'), data.get('partial_member_list') diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index d5a54ba731..9052a0b785 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -197,6 +197,28 @@ class DatasetService: f"{ex.description}" ) + @staticmethod + def check_embedding_model_setting(tenant_id: str, embedding_model_provider: str, embedding_model:str): + try: + model_manager = ModelManager() + model_manager.get_model_instance( + tenant_id=tenant_id, + provider=embedding_model_provider, + model_type=ModelType.TEXT_EMBEDDING, + model=embedding_model + ) + except LLMBadRequestError: + raise ValueError( + "No Embedding Model available. Please configure a valid provider " + "in the Settings -> Model Provider." + ) + except ProviderTokenNotInitError as ex: + raise ValueError( + f"The dataset in unavailable, due to: " + f"{ex.description}" + ) + + @staticmethod def update_dataset(dataset_id, data, user): data.pop('partial_member_list', None)