fix add segment when dataset and document is empty (#3021)

Co-authored-by: jyong <jyong@dify.ai>
2025-08-11 19:09:02 +08:00 · 2024-03-29 13:06:00 +08:00 · 2024-03-29 13:06:00 +08:00 · a6cd0f0e73
commit a6cd0f0e73
parent 2c43393bf1
4 changed files with 24 additions and 2 deletions
--- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py
+++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py
@ -144,6 +144,16 @@ class MilvusVector(BaseVector):
            utility.drop_collection(self._collection_name, None, using=alias)
    def text_exists(self, id: str) -> bool:
        alias = uuid4().hex
        if self._client_config.secure:
            uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port)
        else:
            uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port)
        connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)
        from pymilvus import utility
        if not utility.has_collection(self._collection_name, using=alias):
            return False
        result = self._client.query(collection_name=self._collection_name,
                                    filter=f'metadata["doc_id"] == "{id}"',
--- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
@ -275,6 +275,13 @@ class QdrantVector(BaseVector):
            )
    def text_exists(self, id: str) -> bool:
        all_collection_name = []
        collections_response = self._client.get_collections()
        collection_list = collections_response.collections
        for collection in collection_list:
            all_collection_name.append(collection.name)
        if self._collection_name not in all_collection_name:
            return False
        response = self._client.retrieve(
            collection_name=self._collection_name,
            ids=[id]
--- a/api/core/rag/datasource/vdb/vector_factory.py
+++ b/api/core/rag/datasource/vdb/vector_factory.py
@ -128,8 +128,8 @@ class Vector:
        if kwargs.get('duplicate_check', False):
            documents = self._filter_duplicate_texts(documents)
        embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
-        self._vector_processor.add_texts(
+        self._vector_processor.create(
-            documents=documents,
+            texts=documents,
            embeddings=embeddings,
            **kwargs
        )
--- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
+++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
@ -134,6 +134,11 @@ class WeaviateVector(BaseVector):
    def text_exists(self, id: str) -> bool:
        collection_name = self._collection_name
        schema = self._default_schema(self._collection_name)
        # check whether the index already exists
        if not self._client.schema.contains(schema):
            return False
        result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
            "path": ["doc_id"],
            "operator": "Equal",