mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-11 19:09:02 +08:00
fix add segment when dataset and document is empty (#3021)
Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
parent
2c43393bf1
commit
a6cd0f0e73
@ -144,6 +144,16 @@ class MilvusVector(BaseVector):
|
|||||||
utility.drop_collection(self._collection_name, None, using=alias)
|
utility.drop_collection(self._collection_name, None, using=alias)
|
||||||
|
|
||||||
def text_exists(self, id: str) -> bool:
|
def text_exists(self, id: str) -> bool:
|
||||||
|
alias = uuid4().hex
|
||||||
|
if self._client_config.secure:
|
||||||
|
uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port)
|
||||||
|
else:
|
||||||
|
uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port)
|
||||||
|
connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)
|
||||||
|
|
||||||
|
from pymilvus import utility
|
||||||
|
if not utility.has_collection(self._collection_name, using=alias):
|
||||||
|
return False
|
||||||
|
|
||||||
result = self._client.query(collection_name=self._collection_name,
|
result = self._client.query(collection_name=self._collection_name,
|
||||||
filter=f'metadata["doc_id"] == "{id}"',
|
filter=f'metadata["doc_id"] == "{id}"',
|
||||||
|
@ -275,6 +275,13 @@ class QdrantVector(BaseVector):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def text_exists(self, id: str) -> bool:
|
def text_exists(self, id: str) -> bool:
|
||||||
|
all_collection_name = []
|
||||||
|
collections_response = self._client.get_collections()
|
||||||
|
collection_list = collections_response.collections
|
||||||
|
for collection in collection_list:
|
||||||
|
all_collection_name.append(collection.name)
|
||||||
|
if self._collection_name not in all_collection_name:
|
||||||
|
return False
|
||||||
response = self._client.retrieve(
|
response = self._client.retrieve(
|
||||||
collection_name=self._collection_name,
|
collection_name=self._collection_name,
|
||||||
ids=[id]
|
ids=[id]
|
||||||
|
@ -128,8 +128,8 @@ class Vector:
|
|||||||
if kwargs.get('duplicate_check', False):
|
if kwargs.get('duplicate_check', False):
|
||||||
documents = self._filter_duplicate_texts(documents)
|
documents = self._filter_duplicate_texts(documents)
|
||||||
embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
|
embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
|
||||||
self._vector_processor.add_texts(
|
self._vector_processor.create(
|
||||||
documents=documents,
|
texts=documents,
|
||||||
embeddings=embeddings,
|
embeddings=embeddings,
|
||||||
**kwargs
|
**kwargs
|
||||||
)
|
)
|
||||||
|
@ -134,6 +134,11 @@ class WeaviateVector(BaseVector):
|
|||||||
|
|
||||||
def text_exists(self, id: str) -> bool:
|
def text_exists(self, id: str) -> bool:
|
||||||
collection_name = self._collection_name
|
collection_name = self._collection_name
|
||||||
|
schema = self._default_schema(self._collection_name)
|
||||||
|
|
||||||
|
# check whether the index already exists
|
||||||
|
if not self._client.schema.contains(schema):
|
||||||
|
return False
|
||||||
result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
|
result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
|
||||||
"path": ["doc_id"],
|
"path": ["doc_id"],
|
||||||
"operator": "Equal",
|
"operator": "Equal",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user