Lindorm vdb bug-fix (#11790)

Co-authored-by: jiangzhijie <jiangzhijie.jzj@alibaba-inc.com>
This commit is contained in:
Jiang 2024-12-18 15:19:20 +08:00 committed by GitHub
parent 558ab25f51
commit ad17ff9a92
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -49,10 +49,10 @@ class LindormVectorStoreConfig(BaseModel):
class LindormVectorStore(BaseVector): class LindormVectorStore(BaseVector):
def __init__(self, collection_name: str, config: LindormVectorStoreConfig, **kwargs): def __init__(self, collection_name: str, config: LindormVectorStoreConfig, using_ugc: bool, **kwargs):
self._routing = None self._routing = None
self._routing_field = None self._routing_field = None
if config.using_ugc: if using_ugc:
routing_value: str = kwargs.get("routing_value") routing_value: str = kwargs.get("routing_value")
if routing_value is None: if routing_value is None:
raise ValueError("UGC index should init vector with valid 'routing_value' parameter value") raise ValueError("UGC index should init vector with valid 'routing_value' parameter value")
@ -64,7 +64,7 @@ class LindormVectorStore(BaseVector):
super().__init__(collection_name.lower()) super().__init__(collection_name.lower())
self._client_config = config self._client_config = config
self._client = OpenSearch(**config.to_opensearch_params()) self._client = OpenSearch(**config.to_opensearch_params())
self._using_ugc = config.using_ugc self._using_ugc = using_ugc
self.kwargs = kwargs self.kwargs = kwargs
def get_type(self) -> str: def get_type(self) -> str:
@ -467,12 +467,16 @@ class LindormVectorStoreFactory(AbstractVectorFactory):
using_ugc = dify_config.USING_UGC_INDEX using_ugc = dify_config.USING_UGC_INDEX
routing_value = None routing_value = None
if dataset.index_struct: if dataset.index_struct:
if using_ugc: # if an existed record's index_struct_dict doesn't contain using_ugc field,
# it actually stores in the normal index format
stored_in_ugc = dataset.index_struct_dict.get("using_ugc", False)
using_ugc = stored_in_ugc
if stored_in_ugc:
dimension = dataset.index_struct_dict["dimension"] dimension = dataset.index_struct_dict["dimension"]
index_type = dataset.index_struct_dict["index_type"] index_type = dataset.index_struct_dict["index_type"]
distance_type = dataset.index_struct_dict["distance_type"] distance_type = dataset.index_struct_dict["distance_type"]
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
routing_value = dataset.index_struct_dict["vector_store"]["class_prefix"] routing_value = dataset.index_struct_dict["vector_store"]["class_prefix"]
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
else: else:
index_name = dataset.index_struct_dict["vector_store"]["class_prefix"] index_name = dataset.index_struct_dict["vector_store"]["class_prefix"]
else: else:
@ -487,6 +491,7 @@ class LindormVectorStoreFactory(AbstractVectorFactory):
"index_type": index_type, "index_type": index_type,
"dimension": dimension, "dimension": dimension,
"distance_type": distance_type, "distance_type": distance_type,
"using_ugc": using_ugc,
} }
dataset.index_struct = json.dumps(index_struct_dict) dataset.index_struct = json.dumps(index_struct_dict)
if using_ugc: if using_ugc:
@ -494,4 +499,4 @@ class LindormVectorStoreFactory(AbstractVectorFactory):
routing_value = class_prefix routing_value = class_prefix
else: else:
index_name = class_prefix index_name = class_prefix
return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value) return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value, using_ugc=using_ugc)