From 801d135390e2adc89b1e44a417f7a14a53ec95df Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Thu, 29 Feb 2024 12:47:10 +0800 Subject: [PATCH] generalize the generation of new collection name by dataset id (#2620) --- api/commands.py | 6 +++--- api/core/rag/datasource/vdb/vector_factory.py | 6 +++--- api/core/rag/datasource/vdb/weaviate/weaviate_vector.py | 2 +- api/models/dataset.py | 4 ++++ api/services/dataset_service.py | 2 +- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/api/commands.py b/api/commands.py index 9923ccb8b8..73e765c48b 100644 --- a/api/commands.py +++ b/api/commands.py @@ -150,7 +150,7 @@ def vdb_migrate(): continue if vector_type == "weaviate": dataset_id = dataset.id - collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' + collection_name = Dataset.gen_collection_name_by_id(dataset_id) index_struct_dict = { "type": 'weaviate', "vector_store": {"class_prefix": collection_name} @@ -167,7 +167,7 @@ def vdb_migrate(): raise ValueError('Dataset Collection Bindings is not exist!') else: dataset_id = dataset.id - collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' + collection_name = Dataset.gen_collection_name_by_id(dataset_id) index_struct_dict = { "type": 'qdrant', "vector_store": {"class_prefix": collection_name} @@ -176,7 +176,7 @@ def vdb_migrate(): elif vector_type == "milvus": dataset_id = dataset.id - collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' + collection_name = Dataset.gen_collection_name_by_id(dataset_id) index_struct_dict = { "type": 'milvus', "vector_store": {"class_prefix": collection_name} diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 1921de07ed..109d36583c 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -39,7 +39,7 @@ class Vector: collection_name = class_prefix else: dataset_id = self._dataset.id - collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' + collection_name = Dataset.gen_collection_name_by_id(dataset_id) index_struct_dict = { "type": 'weaviate', "vector_store": {"class_prefix": collection_name} @@ -70,7 +70,7 @@ class Vector: collection_name = class_prefix else: dataset_id = self._dataset.id - collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' + collection_name = Dataset.gen_collection_name_by_id(dataset_id) if not self._dataset.index_struct_dict: index_struct_dict = { @@ -96,7 +96,7 @@ class Vector: collection_name = class_prefix else: dataset_id = self._dataset.id - collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node' + collection_name = Dataset.gen_collection_name_by_id(dataset_id) index_struct_dict = { "type": 'milvus', "vector_store": {"class_prefix": collection_name} diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py index 008e54085d..6e317115b8 100644 --- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py +++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py @@ -70,7 +70,7 @@ class WeaviateVector(BaseVector): return class_prefix dataset_id = dataset.id - return "Vector_index_" + dataset_id.replace("-", "_") + '_Node' + return Dataset.gen_collection_name_by_id(dataset_id) def to_index_struct(self) -> dict: return { diff --git a/api/models/dataset.py b/api/models/dataset.py index 473a796be5..94664bf49a 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -116,6 +116,10 @@ class Dataset(db.Model): } return self.retrieval_model if self.retrieval_model else default_retrieval_model + @staticmethod + def gen_collection_name_by_id(dataset_id: str) -> str: + normalized_dataset_id = dataset_id.replace("-", "_") + return f'Vector_index_{normalized_dataset_id}_Node' class DatasetProcessRule(db.Model): __tablename__ = 'dataset_process_rules' diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index b151ebada8..ad06096678 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -1244,7 +1244,7 @@ class DatasetCollectionBindingService: dataset_collection_binding = DatasetCollectionBinding( provider_name=provider_name, model_name=model_name, - collection_name="Vector_index_" + str(uuid.uuid4()).replace("-", "_") + '_Node', + collection_name=Dataset.gen_collection_name_by_id(str(uuid.uuid4())), type=collection_type ) db.session.add(dataset_collection_binding)