From 70698024f59d2fa0e4f2d267d9bd206114e21cb5 Mon Sep 17 00:00:00 2001 From: huangzhuo1949 <167434202+huangzhuo1949@users.noreply.github.com> Date: Fri, 3 Jan 2025 20:46:39 +0800 Subject: [PATCH] fix: empty delete bug (#12339) Co-authored-by: huangzhuo --- api/core/rag/datasource/vdb/baidu/baidu_vector.py | 2 ++ api/core/rag/datasource/vdb/chroma/chroma_vector.py | 2 ++ .../rag/datasource/vdb/elasticsearch/elasticsearch_vector.py | 2 ++ api/core/rag/datasource/vdb/myscale/myscale_vector.py | 2 ++ api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py | 2 ++ api/core/rag/datasource/vdb/oracle/oraclevector.py | 2 ++ api/core/rag/datasource/vdb/pgvector/pgvector.py | 5 +++++ api/core/rag/datasource/vdb/tencent/tencent_vector.py | 2 ++ 8 files changed, 19 insertions(+) diff --git a/api/core/rag/datasource/vdb/baidu/baidu_vector.py b/api/core/rag/datasource/vdb/baidu/baidu_vector.py index 85596ad20e..a658495af7 100644 --- a/api/core/rag/datasource/vdb/baidu/baidu_vector.py +++ b/api/core/rag/datasource/vdb/baidu/baidu_vector.py @@ -113,6 +113,8 @@ class BaiduVector(BaseVector): return False def delete_by_ids(self, ids: list[str]) -> None: + if not ids: + return quoted_ids = [f"'{id}'" for id in ids] self._db.table(self._collection_name).delete(filter=f"id IN({', '.join(quoted_ids)})") diff --git a/api/core/rag/datasource/vdb/chroma/chroma_vector.py b/api/core/rag/datasource/vdb/chroma/chroma_vector.py index 0eab01b507..907c4d2285 100644 --- a/api/core/rag/datasource/vdb/chroma/chroma_vector.py +++ b/api/core/rag/datasource/vdb/chroma/chroma_vector.py @@ -83,6 +83,8 @@ class ChromaVector(BaseVector): self._client.delete_collection(self._collection_name) def delete_by_ids(self, ids: list[str]) -> None: + if not ids: + return collection = self._client.get_or_create_collection(self._collection_name) collection.delete(ids=ids) diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py index 8661828dc2..cca696baee 100644 --- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py +++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py @@ -98,6 +98,8 @@ class ElasticSearchVector(BaseVector): return bool(self._client.exists(index=self._collection_name, id=id)) def delete_by_ids(self, ids: list[str]) -> None: + if not ids: + return for id in ids: self._client.delete(index=self._collection_name, id=id) diff --git a/api/core/rag/datasource/vdb/myscale/myscale_vector.py b/api/core/rag/datasource/vdb/myscale/myscale_vector.py index e63e1f522b..556b952ec2 100644 --- a/api/core/rag/datasource/vdb/myscale/myscale_vector.py +++ b/api/core/rag/datasource/vdb/myscale/myscale_vector.py @@ -100,6 +100,8 @@ class MyScaleVector(BaseVector): return results.row_count > 0 def delete_by_ids(self, ids: list[str]) -> None: + if not ids: + return self._client.command( f"DELETE FROM {self._config.database}.{self._collection_name} WHERE id IN {str(tuple(ids))}" ) diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index 957c799a60..3c2d53ce78 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -134,6 +134,8 @@ class OceanBaseVector(BaseVector): return bool(cur.rowcount != 0) def delete_by_ids(self, ids: list[str]) -> None: + if not ids: + return self._client.delete(table_name=self._collection_name, ids=ids) def get_ids_by_metadata_field(self, key: str, value: str) -> list[str]: diff --git a/api/core/rag/datasource/vdb/oracle/oraclevector.py b/api/core/rag/datasource/vdb/oracle/oraclevector.py index dfff3563c3..a58df7eb9f 100644 --- a/api/core/rag/datasource/vdb/oracle/oraclevector.py +++ b/api/core/rag/datasource/vdb/oracle/oraclevector.py @@ -167,6 +167,8 @@ class OracleVector(BaseVector): return docs def delete_by_ids(self, ids: list[str]) -> None: + if not ids: + return with self._get_cursor() as cur: cur.execute(f"DELETE FROM {self.table_name} WHERE id IN %s" % (tuple(ids),)) diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index 271281ca7e..de443ba580 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -129,6 +129,11 @@ class PGVector(BaseVector): return docs def delete_by_ids(self, ids: list[str]) -> None: + # Avoiding crashes caused by performing delete operations on empty lists in certain scenarios + # Scenario 1: extract a document fails, resulting in a table not being created. + # Then clicking the retry button triggers a delete operation on an empty list. + if not ids: + return with self._get_cursor() as cur: cur.execute(f"DELETE FROM {self.table_name} WHERE id IN %s", (tuple(ids),)) diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py index c15f4b229f..1a4fa7b87e 100644 --- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py +++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py @@ -140,6 +140,8 @@ class TencentVector(BaseVector): return False def delete_by_ids(self, ids: list[str]) -> None: + if not ids: + return self._db.collection(self._collection_name).delete(document_ids=ids) def delete_by_metadata_field(self, key: str, value: str) -> None: