test: refactor vdb tests by visitor design pattern (#3838)

This commit is contained in:
Bowen Liang 2024-04-25 18:55:49 +08:00 committed by GitHub
parent 34bfb715e1
commit 86e7330fa2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 80 additions and 91 deletions

View File

@ -1,19 +1,16 @@
import uuid
from core.rag.datasource.vdb.milvus.milvus_vector import MilvusConfig, MilvusVector from core.rag.datasource.vdb.milvus.milvus_vector import MilvusConfig, MilvusVector
from models.dataset import Dataset
from tests.integration_tests.vdb.test_vector_store import ( from tests.integration_tests.vdb.test_vector_store import (
get_sample_document, AbstractTestVector,
get_sample_embedding, get_sample_text,
get_sample_query_vector,
setup_mock_redis, setup_mock_redis,
) )
def test_milvus_vector(setup_mock_redis) -> None: class TestMilvusVector(AbstractTestVector):
dataset_id = str(uuid.uuid4()) def __init__(self):
vector = MilvusVector( super().__init__()
collection_name=Dataset.gen_collection_name_by_id(dataset_id), self.vector = MilvusVector(
collection_name=self.collection_name,
config=MilvusConfig( config=MilvusConfig(
host='localhost', host='localhost',
port=19530, port=19530,
@ -22,17 +19,11 @@ def test_milvus_vector(setup_mock_redis) -> None:
) )
) )
# create vector def search_by_full_text(self):
vector.create(
texts=[get_sample_document(dataset_id)],
embeddings=[get_sample_embedding()],
)
# search by vector
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
assert len(hits_by_vector) >= 1
# milvus dos not support full text searching yet in < 2.3.x # milvus dos not support full text searching yet in < 2.3.x
hits_by_full_text = self.vector.search_by_full_text(query=get_sample_text())
assert len(hits_by_full_text) == 0
# delete vector
vector.delete() def test_milvus_vector(setup_mock_redis):
TestMilvusVector().run_all_test()

View File

@ -1,40 +1,23 @@
import uuid
from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector
from models.dataset import Dataset
from tests.integration_tests.vdb.test_vector_store import ( from tests.integration_tests.vdb.test_vector_store import (
get_sample_document, AbstractTestVector,
get_sample_embedding,
get_sample_query_vector,
get_sample_text,
setup_mock_redis, setup_mock_redis,
) )
def test_qdrant_vector(setup_mock_redis)-> None: class TestQdrantVector(AbstractTestVector):
dataset_id = str(uuid.uuid4()) def __init__(self):
vector = QdrantVector( super().__init__()
collection_name=Dataset.gen_collection_name_by_id(dataset_id), self.attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash']
group_id=dataset_id, self.vector = QdrantVector(
collection_name=self.collection_name,
group_id=self.dataset_id,
config=QdrantConfig( config=QdrantConfig(
endpoint='http://localhost:6333', endpoint='http://localhost:6333',
api_key='difyai123456', api_key='difyai123456',
) )
) )
# create vector
vector.create(
texts=[get_sample_document(dataset_id)],
embeddings=[get_sample_embedding()],
)
# search by vector def test_qdrant_vector(setup_mock_redis):
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector()) TestQdrantVector().run_all_test()
assert len(hits_by_vector) >= 1
# search by full text
hits_by_full_text = vector.search_by_full_text(query=get_sample_text())
assert len(hits_by_full_text) >= 1
# delete vector
vector.delete()

View File

@ -1,9 +1,11 @@
import uuid
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest import pytest
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions import ext_redis from extensions import ext_redis
from models.dataset import Dataset
def get_sample_text() -> str: def get_sample_text() -> str:
@ -44,3 +46,33 @@ def setup_mock_redis() -> None:
mock_redis_lock.__enter__ = MagicMock() mock_redis_lock.__enter__ = MagicMock()
mock_redis_lock.__exit__ = MagicMock() mock_redis_lock.__exit__ = MagicMock()
ext_redis.redis_client.lock = mock_redis_lock ext_redis.redis_client.lock = mock_redis_lock
class AbstractTestVector:
def __init__(self):
self.vector = None
self.dataset_id = str(uuid.uuid4())
self.collection_name = Dataset.gen_collection_name_by_id(self.dataset_id)
def create_vector(self) -> None:
self.vector.create(
texts=[get_sample_document(self.dataset_id)],
embeddings=[get_sample_embedding()],
)
def search_by_vector(self):
hits_by_vector = self.vector.search_by_vector(query_vector=get_sample_query_vector())
assert len(hits_by_vector) >= 1
def search_by_full_text(self):
hits_by_full_text = self.vector.search_by_full_text(query=get_sample_text())
assert len(hits_by_full_text) >= 1
def delete_vector(self):
self.vector.delete()
def run_all_test(self):
self.create_vector()
self.search_by_vector()
self.search_by_full_text()
self.delete_vector()

View File

@ -1,41 +1,24 @@
import uuid
from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector
from models.dataset import Dataset from models.dataset import Dataset
from tests.integration_tests.vdb.test_vector_store import ( from tests.integration_tests.vdb.test_vector_store import (
get_sample_document, AbstractTestVector,
get_sample_embedding,
get_sample_query_vector,
get_sample_text,
setup_mock_redis, setup_mock_redis,
) )
def test_weaviate_vector(setup_mock_redis) -> None: class TestWeaviateVector(AbstractTestVector):
attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash'] def __init__(self):
dataset_id = str(uuid.uuid4()) super().__init__()
vector = WeaviateVector( self.attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash']
collection_name=Dataset.gen_collection_name_by_id(dataset_id), self.vector = WeaviateVector(
collection_name=self.collection_name,
config=WeaviateConfig( config=WeaviateConfig(
endpoint='http://localhost:8080', endpoint='http://localhost:8080',
api_key='WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih', api_key='WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih',
), ),
attributes=attributes attributes=self.attributes
) )
# create vector
vector.create(
texts=[get_sample_document(dataset_id)],
embeddings=[get_sample_embedding()],
)
# search by vector def test_weaviate_vector(setup_mock_redis):
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector()) TestWeaviateVector().run_all_test()
assert len(hits_by_vector) >= 1
# search by full text
hits_by_full_text = vector.search_by_full_text(query=get_sample_text())
assert len(hits_by_full_text) >= 1
# delete vector
vector.delete()