From fc8c76521546f6a8de58ee180ee45fa80a0152d4 Mon Sep 17 00:00:00 2001 From: Jiang <65766008+AlwaysBluer@users.noreply.github.com> Date: Tue, 25 Mar 2025 09:19:06 +0800 Subject: [PATCH] Fix/vdb lindorm (#16660) Co-authored-by: jiangzhijie --- .../rag/datasource/vdb/lindorm/lindorm_vector.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py index d3f5283034..1ba37dfb3f 100644 --- a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py +++ b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py @@ -102,8 +102,6 @@ class LindormVectorStore(BaseVector): if response["errors"]: for item in response["items"]: print(f"{item['index']['status']}: {item['index']['error']['type']}") - else: - self.refresh() def get_ids_by_metadata_field(self, key: str, value: str): query: dict[str, Any] = { @@ -167,7 +165,7 @@ class LindormVectorStore(BaseVector): if not all(isinstance(x, float) for x in query_vector): raise ValueError("All elements in query_vector should be floats") - top_k = kwargs.get("top_k", 10) + top_k = kwargs.get("top_k", 3) document_ids_filter = kwargs.get("document_ids_filter") filters = [] if document_ids_filter: @@ -210,7 +208,7 @@ class LindormVectorStore(BaseVector): must_not = kwargs.get("must_not") should = kwargs.get("should") minimum_should_match = kwargs.get("minimum_should_match", 0) - top_k = kwargs.get("top_k", 10) + top_k = kwargs.get("top_k", 3) filters = kwargs.get("filter", []) document_ids_filter = kwargs.get("document_ids_filter") if document_ids_filter: @@ -295,7 +293,7 @@ class LindormVectorStore(BaseVector): def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dict: - excludes_from_source = kwargs.get("excludes_from_source") + excludes_from_source = kwargs.get("excludes_from_source", False) analyzer = kwargs.get("analyzer", "ik_max_word") text_field = kwargs.get("text_field", Field.CONTENT_KEY.value) engine = kwargs["engine"] @@ -356,12 +354,12 @@ def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dic if excludes_from_source: # e.g. {"excludes": ["vector_field"]} - mapping["mappings"]["_source"] = {"excludes": excludes_from_source} + mapping["mappings"]["_source"] = {"excludes": [vector_field]} if using_ugc and method_name == "ivfpq": mapping["settings"]["index"]["knn_routing"] = True mapping["settings"]["index"]["knn.offline.construction"] = True - elif using_ugc and method_name == "hnsw" or using_ugc and method_name == "flat": + elif (using_ugc and method_name == "hnsw") or (using_ugc and method_name == "flat"): mapping["settings"]["index"]["knn_routing"] = True return mapping @@ -458,7 +456,7 @@ def default_vector_search_query( "query": {"knn": {vector_field: {"vector": query_vector, "k": k}}}, } - if filters is not None: + if filters is not None and len(filters) > 0: # when using filter, transform filter from List[Dict] to Dict as valid format filter_dict = {"bool": {"must": filters}} if len(filters) > 1 else filters[0] search_query["query"]["knn"][vector_field]["filter"] = filter_dict # filter should be Dict