From b29299007541422a4090fdf4ad9c4ea6cc10751a Mon Sep 17 00:00:00 2001 From: k-kayashima Date: Thu, 15 May 2025 12:43:44 +0900 Subject: [PATCH] Fix: Ensure unique index names for pgvector knowledge tables (#19672) Co-authored-by: crazywoola <427733928@qq.com> --- api/core/rag/datasource/vdb/pgvector/pgvector.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index 366a21c381..04e9cf801e 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -1,3 +1,4 @@ +import hashlib import json import logging import uuid @@ -61,12 +62,12 @@ CREATE TABLE IF NOT EXISTS {table_name} ( """ SQL_CREATE_INDEX = """ -CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name} +CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx_{index_hash} ON {table_name} USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); """ SQL_CREATE_INDEX_PG_BIGM = """ -CREATE INDEX IF NOT EXISTS bigm_idx ON {table_name} +CREATE INDEX IF NOT EXISTS bigm_idx_{index_hash} ON {table_name} USING gin (text gin_bigm_ops); """ @@ -76,6 +77,7 @@ class PGVector(BaseVector): super().__init__(collection_name) self.pool = self._create_connection_pool(config) self.table_name = f"embedding_{collection_name}" + self.index_hash = hashlib.md5(self.table_name.encode()).hexdigest()[:8] self.pg_bigm = config.pg_bigm def get_type(self) -> str: @@ -256,10 +258,9 @@ class PGVector(BaseVector): # PG hnsw index only support 2000 dimension or less # ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing if dimension <= 2000: - cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name)) + cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name, index_hash=self.index_hash)) if self.pg_bigm: - cur.execute("CREATE EXTENSION IF NOT EXISTS pg_bigm") - cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name)) + cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name, index_hash=self.index_hash)) redis_client.set(collection_exist_cache_key, 1, ex=3600)