test: run vdb tests on TiDB Vector with docker in CI tests (#11645)

This commit is contained in:
Bowen Liang 2024-12-15 17:16:40 +08:00 committed by GitHub
parent 7e154a467b
commit 924b4fe742
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 21 additions and 32 deletions

View File

@ -51,7 +51,7 @@ jobs:
- name: Expose Service Ports
run: sh .github/workflows/expose_service_ports.sh
- name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
- name: Set up Vector Stores (TiDB, Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
uses: hoverkraft-tech/compose-action@v2.0.2
with:
compose-file: |
@ -67,6 +67,7 @@ jobs:
pgvector
chroma
elasticsearch
tidb
- name: Test Vector Stores
run: poetry run -C api bash dev/pytest/pytest_vdb.sh

View File

@ -37,8 +37,6 @@ class TiDBVectorConfig(BaseModel):
raise ValueError("config TIDB_VECTOR_PORT is required")
if not values["user"]:
raise ValueError("config TIDB_VECTOR_USER is required")
if not values["password"]:
raise ValueError("config TIDB_VECTOR_PASSWORD is required")
if not values["database"]:
raise ValueError("config TIDB_VECTOR_DATABASE is required")
if not values["program_name"]:

View File

@ -12,11 +12,11 @@ def tidb_vector():
return TiDBVector(
collection_name="test_collection",
config=TiDBVectorConfig(
host="xxx.eu-central-1.xxx.aws.tidbcloud.com",
port="4000",
user="xxx.root",
password="xxxxxx",
database="dify",
host="localhost",
port=4000,
user="root",
password="",
database="test",
program_name="langgenius/dify",
),
)
@ -27,35 +27,14 @@ class TiDBVectorTest(AbstractVectorTest):
super().__init__()
self.vector = vector
def text_exists(self):
exist = self.vector.text_exists(self.example_doc_id)
assert exist == False
def search_by_vector(self):
hits_by_vector: list[Document] = self.vector.search_by_vector(query_vector=self.example_embedding)
assert len(hits_by_vector) == 0
def search_by_full_text(self):
hits_by_full_text: list[Document] = self.vector.search_by_full_text(query=get_example_text())
assert len(hits_by_full_text) == 0
def get_ids_by_metadata_field(self):
ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id)
assert len(ids) == 0
ids = self.vector.get_ids_by_metadata_field(key="doc_id", value=self.example_doc_id)
assert len(ids) == 1
def test_tidb_vector(setup_mock_redis, setup_tidbvector_mock, tidb_vector, mock_session):
def test_tidb_vector(setup_mock_redis, tidb_vector):
TiDBVectorTest(vector=tidb_vector).run_all_tests()
@pytest.fixture
def mock_session():
with patch("core.rag.datasource.vdb.tidb_vector.tidb_vector.Session", new_callable=MagicMock) as mock_session:
yield mock_session
@pytest.fixture
def setup_tidbvector_mock(tidb_vector, mock_session):
with patch("core.rag.datasource.vdb.tidb_vector.tidb_vector.create_engine"):
with patch.object(tidb_vector._engine, "connect"):
yield tidb_vector

View File

@ -14,3 +14,4 @@ pytest api/tests/integration_tests/vdb/chroma \
api/tests/integration_tests/vdb/upstash \
api/tests/integration_tests/vdb/couchbase \
api/tests/integration_tests/vdb/oceanbase \
api/tests/integration_tests/vdb/tidb_vector \

View File

@ -491,6 +491,16 @@ services:
- '${EXPOSE_NGINX_PORT:-80}:${NGINX_PORT:-80}'
- '${EXPOSE_NGINX_SSL_PORT:-443}:${NGINX_SSL_PORT:-443}'
# The TiDB vector store.
# For production use, please refer to https://github.com/pingcap/tidb-docker-compose
tidb:
image: pingcap/tidb:v8.4.0
ports:
- "4000:4000"
command:
- --store=unistore
restart: always
# The Weaviate vector store.
weaviate:
image: semitechnologies/weaviate:1.19.0