feat: add pg vector index (#12338)

Co-authored-by: huangzhuo <huangzhuo1@xiaomi.com>
This commit is contained in:
huangzhuo1949 2025-01-22 17:07:18 +08:00 committed by GitHub
parent 1e73f63ff8
commit 4c3076f2a4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -57,6 +57,11 @@ CREATE TABLE IF NOT EXISTS {table_name} (
) using heap; ) using heap;
""" """
SQL_CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name}
USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
"""
class PGVector(BaseVector): class PGVector(BaseVector):
def __init__(self, collection_name: str, config: PGVectorConfig): def __init__(self, collection_name: str, config: PGVectorConfig):
@ -205,7 +210,10 @@ class PGVector(BaseVector):
with self._get_cursor() as cur: with self._get_cursor() as cur:
cur.execute("CREATE EXTENSION IF NOT EXISTS vector") cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension)) cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension))
# TODO: create index https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing # PG hnsw index only support 2000 dimension or less
# ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
if dimension <= 2000:
cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name))
redis_client.set(collection_exist_cache_key, 1, ex=3600) redis_client.set(collection_exist_cache_key, 1, ex=3600)