fix add segment when dataset and document is empty (#3021)

Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
Jyong 2024-03-29 13:06:00 +08:00 committed by GitHub
parent 2c43393bf1
commit a6cd0f0e73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 24 additions and 2 deletions

View File

@ -144,6 +144,16 @@ class MilvusVector(BaseVector):
utility.drop_collection(self._collection_name, None, using=alias) utility.drop_collection(self._collection_name, None, using=alias)
def text_exists(self, id: str) -> bool: def text_exists(self, id: str) -> bool:
alias = uuid4().hex
if self._client_config.secure:
uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port)
else:
uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port)
connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)
from pymilvus import utility
if not utility.has_collection(self._collection_name, using=alias):
return False
result = self._client.query(collection_name=self._collection_name, result = self._client.query(collection_name=self._collection_name,
filter=f'metadata["doc_id"] == "{id}"', filter=f'metadata["doc_id"] == "{id}"',

View File

@ -275,6 +275,13 @@ class QdrantVector(BaseVector):
) )
def text_exists(self, id: str) -> bool: def text_exists(self, id: str) -> bool:
all_collection_name = []
collections_response = self._client.get_collections()
collection_list = collections_response.collections
for collection in collection_list:
all_collection_name.append(collection.name)
if self._collection_name not in all_collection_name:
return False
response = self._client.retrieve( response = self._client.retrieve(
collection_name=self._collection_name, collection_name=self._collection_name,
ids=[id] ids=[id]

View File

@ -128,8 +128,8 @@ class Vector:
if kwargs.get('duplicate_check', False): if kwargs.get('duplicate_check', False):
documents = self._filter_duplicate_texts(documents) documents = self._filter_duplicate_texts(documents)
embeddings = self._embeddings.embed_documents([document.page_content for document in documents]) embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
self._vector_processor.add_texts( self._vector_processor.create(
documents=documents, texts=documents,
embeddings=embeddings, embeddings=embeddings,
**kwargs **kwargs
) )

View File

@ -134,6 +134,11 @@ class WeaviateVector(BaseVector):
def text_exists(self, id: str) -> bool: def text_exists(self, id: str) -> bool:
collection_name = self._collection_name collection_name = self._collection_name
schema = self._default_schema(self._collection_name)
# check whether the index already exists
if not self._client.schema.contains(schema):
return False
result = self._client.query.get(collection_name).with_additional(["id"]).with_where({ result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
"path": ["doc_id"], "path": ["doc_id"],
"operator": "Equal", "operator": "Equal",