mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-06-04 11:14:10 +08:00
Fix/vdb lindorm (#16660)
Co-authored-by: jiangzhijie <jiangzhijie.jzj@alibaba-inc.com>
This commit is contained in:
parent
86a1859d02
commit
fc8c765215
@ -102,8 +102,6 @@ class LindormVectorStore(BaseVector):
|
|||||||
if response["errors"]:
|
if response["errors"]:
|
||||||
for item in response["items"]:
|
for item in response["items"]:
|
||||||
print(f"{item['index']['status']}: {item['index']['error']['type']}")
|
print(f"{item['index']['status']}: {item['index']['error']['type']}")
|
||||||
else:
|
|
||||||
self.refresh()
|
|
||||||
|
|
||||||
def get_ids_by_metadata_field(self, key: str, value: str):
|
def get_ids_by_metadata_field(self, key: str, value: str):
|
||||||
query: dict[str, Any] = {
|
query: dict[str, Any] = {
|
||||||
@ -167,7 +165,7 @@ class LindormVectorStore(BaseVector):
|
|||||||
if not all(isinstance(x, float) for x in query_vector):
|
if not all(isinstance(x, float) for x in query_vector):
|
||||||
raise ValueError("All elements in query_vector should be floats")
|
raise ValueError("All elements in query_vector should be floats")
|
||||||
|
|
||||||
top_k = kwargs.get("top_k", 10)
|
top_k = kwargs.get("top_k", 3)
|
||||||
document_ids_filter = kwargs.get("document_ids_filter")
|
document_ids_filter = kwargs.get("document_ids_filter")
|
||||||
filters = []
|
filters = []
|
||||||
if document_ids_filter:
|
if document_ids_filter:
|
||||||
@ -210,7 +208,7 @@ class LindormVectorStore(BaseVector):
|
|||||||
must_not = kwargs.get("must_not")
|
must_not = kwargs.get("must_not")
|
||||||
should = kwargs.get("should")
|
should = kwargs.get("should")
|
||||||
minimum_should_match = kwargs.get("minimum_should_match", 0)
|
minimum_should_match = kwargs.get("minimum_should_match", 0)
|
||||||
top_k = kwargs.get("top_k", 10)
|
top_k = kwargs.get("top_k", 3)
|
||||||
filters = kwargs.get("filter", [])
|
filters = kwargs.get("filter", [])
|
||||||
document_ids_filter = kwargs.get("document_ids_filter")
|
document_ids_filter = kwargs.get("document_ids_filter")
|
||||||
if document_ids_filter:
|
if document_ids_filter:
|
||||||
@ -295,7 +293,7 @@ class LindormVectorStore(BaseVector):
|
|||||||
|
|
||||||
|
|
||||||
def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dict:
|
def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dict:
|
||||||
excludes_from_source = kwargs.get("excludes_from_source")
|
excludes_from_source = kwargs.get("excludes_from_source", False)
|
||||||
analyzer = kwargs.get("analyzer", "ik_max_word")
|
analyzer = kwargs.get("analyzer", "ik_max_word")
|
||||||
text_field = kwargs.get("text_field", Field.CONTENT_KEY.value)
|
text_field = kwargs.get("text_field", Field.CONTENT_KEY.value)
|
||||||
engine = kwargs["engine"]
|
engine = kwargs["engine"]
|
||||||
@ -356,12 +354,12 @@ def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dic
|
|||||||
|
|
||||||
if excludes_from_source:
|
if excludes_from_source:
|
||||||
# e.g. {"excludes": ["vector_field"]}
|
# e.g. {"excludes": ["vector_field"]}
|
||||||
mapping["mappings"]["_source"] = {"excludes": excludes_from_source}
|
mapping["mappings"]["_source"] = {"excludes": [vector_field]}
|
||||||
|
|
||||||
if using_ugc and method_name == "ivfpq":
|
if using_ugc and method_name == "ivfpq":
|
||||||
mapping["settings"]["index"]["knn_routing"] = True
|
mapping["settings"]["index"]["knn_routing"] = True
|
||||||
mapping["settings"]["index"]["knn.offline.construction"] = True
|
mapping["settings"]["index"]["knn.offline.construction"] = True
|
||||||
elif using_ugc and method_name == "hnsw" or using_ugc and method_name == "flat":
|
elif (using_ugc and method_name == "hnsw") or (using_ugc and method_name == "flat"):
|
||||||
mapping["settings"]["index"]["knn_routing"] = True
|
mapping["settings"]["index"]["knn_routing"] = True
|
||||||
return mapping
|
return mapping
|
||||||
|
|
||||||
@ -458,7 +456,7 @@ def default_vector_search_query(
|
|||||||
"query": {"knn": {vector_field: {"vector": query_vector, "k": k}}},
|
"query": {"knn": {vector_field: {"vector": query_vector, "k": k}}},
|
||||||
}
|
}
|
||||||
|
|
||||||
if filters is not None:
|
if filters is not None and len(filters) > 0:
|
||||||
# when using filter, transform filter from List[Dict] to Dict as valid format
|
# when using filter, transform filter from List[Dict] to Dict as valid format
|
||||||
filter_dict = {"bool": {"must": filters}} if len(filters) > 1 else filters[0]
|
filter_dict = {"bool": {"must": filters}} if len(filters) > 1 else filters[0]
|
||||||
search_query["query"]["knn"][vector_field]["filter"] = filter_dict # filter should be Dict
|
search_query["query"]["knn"][vector_field]["filter"] = filter_dict # filter should be Dict
|
||||||
|
Loading…
x
Reference in New Issue
Block a user