diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py index 203b7eff37..f62d603d8d 100644 --- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py +++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py @@ -144,6 +144,16 @@ class MilvusVector(BaseVector): utility.drop_collection(self._collection_name, None, using=alias) def text_exists(self, id: str) -> bool: + alias = uuid4().hex + if self._client_config.secure: + uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port) + else: + uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port) + connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password) + + from pymilvus import utility + if not utility.has_collection(self._collection_name, using=alias): + return False result = self._client.query(collection_name=self._collection_name, filter=f'metadata["doc_id"] == "{id}"', diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py index 6bd4b5c340..436e6b5f6a 100644 --- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py +++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py @@ -275,6 +275,13 @@ class QdrantVector(BaseVector): ) def text_exists(self, id: str) -> bool: + all_collection_name = [] + collections_response = self._client.get_collections() + collection_list = collections_response.collections + for collection in collection_list: + all_collection_name.append(collection.name) + if self._collection_name not in all_collection_name: + return False response = self._client.retrieve( collection_name=self._collection_name, ids=[id] diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 27ae15a025..71fc07967c 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -128,8 +128,8 @@ class Vector: if kwargs.get('duplicate_check', False): documents = self._filter_duplicate_texts(documents) embeddings = self._embeddings.embed_documents([document.page_content for document in documents]) - self._vector_processor.add_texts( - documents=documents, + self._vector_processor.create( + texts=documents, embeddings=embeddings, **kwargs ) diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py index 6e317115b8..5d24ee9fd2 100644 --- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py +++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py @@ -134,6 +134,11 @@ class WeaviateVector(BaseVector): def text_exists(self, id: str) -> bool: collection_name = self._collection_name + schema = self._default_schema(self._collection_name) + + # check whether the index already exists + if not self._client.schema.contains(schema): + return False result = self._client.query.get(collection_name).with_additional(["id"]).with_where({ "path": ["doc_id"], "operator": "Equal",