From b930716745db9582f04d12152878b1430f993ff7 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Wed, 22 Nov 2023 16:41:20 +0800 Subject: [PATCH] fix weaviate hybrid search issue (#1600) Co-authored-by: jyong --- api/core/index/vector_index/weaviate_vector_index.py | 2 +- api/core/vector_store/vector/weaviate.py | 11 ++++++----- api/core/vector_store/weaviate_vector_store.py | 2 +- docker/docker-compose.middleware.yaml | 4 ++-- docker/docker-compose.yaml | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/api/core/index/vector_index/weaviate_vector_index.py b/api/core/index/vector_index/weaviate_vector_index.py index 3e8d9ae1bf..1305e576c3 100644 --- a/api/core/index/vector_index/weaviate_vector_index.py +++ b/api/core/index/vector_index/weaviate_vector_index.py @@ -111,7 +111,7 @@ class WeaviateVectorIndex(BaseVectorIndex): if self._vector_store: return self._vector_store - attributes = ['doc_id', 'dataset_id', 'document_id'] + attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash'] if self._is_origin(): attributes = ['doc_id'] diff --git a/api/core/vector_store/vector/weaviate.py b/api/core/vector_store/vector/weaviate.py index afbf68db68..7da85ed9a0 100644 --- a/api/core/vector_store/vector/weaviate.py +++ b/api/core/vector_store/vector/weaviate.py @@ -60,7 +60,7 @@ def _create_weaviate_client(**kwargs: Any) -> Any: def _default_score_normalizer(val: float) -> float: - return 1 - 1 / (1 + np.exp(val)) + return 1 - val def _json_serializable(value: Any) -> Any: @@ -243,7 +243,8 @@ class Weaviate(VectorStore): query_obj = query_obj.with_where(kwargs.get("where_filter")) if kwargs.get("additional"): query_obj = query_obj.with_additional(kwargs.get("additional")) - result = query_obj.with_bm25(query=content).with_limit(k).do() + properties = ['text', 'dataset_id', 'doc_hash', 'doc_id', 'document_id'] + result = query_obj.with_bm25(query=query, properties=properties).with_limit(k).do() if "errors" in result: raise ValueError(f"Error during query: {result['errors']}") docs = [] @@ -380,14 +381,14 @@ class Weaviate(VectorStore): result = ( query_obj.with_near_vector(vector) .with_limit(k) - .with_additional("vector") + .with_additional(["vector", "distance"]) .do() ) else: result = ( query_obj.with_near_text(content) .with_limit(k) - .with_additional("vector") + .with_additional(["vector", "distance"]) .do() ) @@ -397,7 +398,7 @@ class Weaviate(VectorStore): docs_and_scores = [] for res in result["data"]["Get"][self._index_name]: text = res.pop(self._text_key) - score = np.dot(res["_additional"]["vector"], embedded_query) + score = res["_additional"]["distance"] docs_and_scores.append((Document(page_content=text, metadata=res), score)) return docs_and_scores diff --git a/api/core/vector_store/weaviate_vector_store.py b/api/core/vector_store/weaviate_vector_store.py index 6dae568827..b5b3d84a9a 100644 --- a/api/core/vector_store/weaviate_vector_store.py +++ b/api/core/vector_store/weaviate_vector_store.py @@ -1,4 +1,4 @@ -from langchain.vectorstores import Weaviate +from core.vector_store.vector.weaviate import Weaviate class WeaviateVectorStore(Weaviate): diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml index 559f5f6b5b..7213a38a0d 100644 --- a/docker/docker-compose.middleware.yaml +++ b/docker/docker-compose.middleware.yaml @@ -30,7 +30,7 @@ services: # The Weaviate vector store. weaviate: - image: semitechnologies/weaviate:1.18.4 + image: semitechnologies/weaviate:1.19.0 restart: always volumes: # Mount the Weaviate data directory to the container. @@ -63,4 +63,4 @@ services: # environment: # QDRANT__API_KEY: 'difyai123456' # ports: -# - "6333:6333" \ No newline at end of file +# - "6333:6333" diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 5cf2a97d5c..5c27bb3f8b 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -253,7 +253,7 @@ services: # The Weaviate vector store. weaviate: - image: semitechnologies/weaviate:1.18.4 + image: semitechnologies/weaviate:1.19.0 restart: always volumes: # Mount the Weaviate data directory to the container.