diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml index 2786cf67d3..6dde4d71c6 100644 --- a/.github/workflows/api-tests.yml +++ b/.github/workflows/api-tests.yml @@ -37,6 +37,27 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Set up Weaviate + uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: docker/docker-compose.middleware.yaml + services: weaviate + + - name: Set up Qdrant + uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: docker/docker-compose.qdrant.yaml + services: qdrant + + - name: Set up Milvus + uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: docker/docker-compose.milvus.yaml + services: | + etcd + minio + milvus-standalone + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -49,6 +70,9 @@ jobs: - name: Install dependencies run: pip install -r ./api/requirements.txt -r ./api/requirements-dev.txt + - name: Run Unit tests + run: dev/pytest/pytest_unit_tests.sh + - name: Run ModelRuntime run: dev/pytest/pytest_model_runtime.sh @@ -58,5 +82,5 @@ jobs: - name: Run Workflow run: dev/pytest/pytest_workflow.sh - - name: Run Unit tests - run: dev/pytest/pytest_unit_tests.sh + - name: Run Vector Stores + run: dev/pytest/pytest_vdb.sh diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py index aa3625a5a5..63cf502149 100644 --- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py +++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py @@ -250,7 +250,7 @@ class MilvusVector(BaseVector): # Create the collection collection_name = self._collection_name - self._client.create_collection(collection_name=collection_name, + self._client.create_collection_with_schema(collection_name=collection_name, schema=schema, index_param=index_params, consistency_level=self._consistency_level) redis_client.set(collection_exist_cache_key, 1, ex=3600) diff --git a/api/requirements.txt b/api/requirements.txt index 6dd3df8d22..c63b1fcf6d 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -55,7 +55,7 @@ xinference-client==0.9.4 safetensors~=0.4.3 zhipuai==1.0.7 werkzeug~=3.0.1 -pymilvus~=2.3.7 +pymilvus==2.3.1 qdrant-client==1.7.3 cohere~=5.2.4 pyyaml~=6.0.1 diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/milvus/__init__.py b/api/tests/integration_tests/vdb/__init__.py similarity index 100% rename from api/tests/unit_tests/core/rag/datasource/vdb/milvus/__init__.py rename to api/tests/integration_tests/vdb/__init__.py diff --git a/api/tests/integration_tests/vdb/milvus/__init__.py b/api/tests/integration_tests/vdb/milvus/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/vdb/milvus/test_milvus.py b/api/tests/integration_tests/vdb/milvus/test_milvus.py new file mode 100644 index 0000000000..22ed73987b --- /dev/null +++ b/api/tests/integration_tests/vdb/milvus/test_milvus.py @@ -0,0 +1,38 @@ +import uuid + +from core.rag.datasource.vdb.milvus.milvus_vector import MilvusConfig, MilvusVector +from models.dataset import Dataset +from tests.integration_tests.vdb.test_vector_store import ( + get_sample_document, + get_sample_embedding, + get_sample_query_vector, + setup_mock_redis, +) + + +def test_milvus_vector(setup_mock_redis) -> None: + dataset_id = str(uuid.uuid4()) + vector = MilvusVector( + collection_name=Dataset.gen_collection_name_by_id(dataset_id), + config=MilvusConfig( + host='localhost', + port=19530, + user='root', + password='Milvus', + ) + ) + + # create vector + vector.create( + texts=[get_sample_document(dataset_id)], + embeddings=[get_sample_embedding()], + ) + + # search by vector + hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector()) + assert len(hits_by_vector) >= 1 + + # milvus dos not support full text searching yet in < 2.3.x + + # delete vector + vector.delete() diff --git a/api/tests/integration_tests/vdb/qdrant/__init__.py b/api/tests/integration_tests/vdb/qdrant/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/vdb/qdrant/test_qdrant.py b/api/tests/integration_tests/vdb/qdrant/test_qdrant.py new file mode 100644 index 0000000000..33e9d55dcf --- /dev/null +++ b/api/tests/integration_tests/vdb/qdrant/test_qdrant.py @@ -0,0 +1,40 @@ +import uuid + +from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector +from models.dataset import Dataset +from tests.integration_tests.vdb.test_vector_store import ( + get_sample_document, + get_sample_embedding, + get_sample_query_vector, + get_sample_text, + setup_mock_redis, +) + + +def test_qdrant_vector(setup_mock_redis)-> None: + dataset_id = str(uuid.uuid4()) + vector = QdrantVector( + collection_name=Dataset.gen_collection_name_by_id(dataset_id), + group_id=dataset_id, + config=QdrantConfig( + endpoint='http://localhost:6333', + api_key='difyai123456', + ) + ) + + # create vector + vector.create( + texts=[get_sample_document(dataset_id)], + embeddings=[get_sample_embedding()], + ) + + # search by vector + hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector()) + assert len(hits_by_vector) >= 1 + + # search by full text + hits_by_full_text = vector.search_by_full_text(query=get_sample_text()) + assert len(hits_by_full_text) >= 1 + + # delete vector + vector.delete() diff --git a/api/tests/integration_tests/vdb/test_vector_store.py b/api/tests/integration_tests/vdb/test_vector_store.py new file mode 100644 index 0000000000..536f3c735d --- /dev/null +++ b/api/tests/integration_tests/vdb/test_vector_store.py @@ -0,0 +1,46 @@ +from unittest.mock import MagicMock + +import pytest + +from core.rag.models.document import Document +from extensions import ext_redis + + +def get_sample_text() -> str: + return 'test_text' + + +def get_sample_embedding() -> list[float]: + return [1.1, 2.2, 3.3] + + +def get_sample_query_vector() -> list[float]: + return get_sample_embedding() + + +def get_sample_document(sample_dataset_id: str) -> Document: + doc = Document( + page_content=get_sample_text(), + metadata={ + "doc_id": sample_dataset_id, + "doc_hash": sample_dataset_id, + "document_id": sample_dataset_id, + "dataset_id": sample_dataset_id, + } + ) + return doc + + +@pytest.fixture +def setup_mock_redis() -> None: + # get + ext_redis.redis_client.get = MagicMock(return_value=None) + + # set + ext_redis.redis_client.set = MagicMock(return_value=None) + + # lock + mock_redis_lock = MagicMock() + mock_redis_lock.__enter__ = MagicMock() + mock_redis_lock.__exit__ = MagicMock() + ext_redis.redis_client.lock = mock_redis_lock diff --git a/api/tests/integration_tests/vdb/weaviate/__init__.py b/api/tests/integration_tests/vdb/weaviate/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/vdb/weaviate/test_weaviate.py b/api/tests/integration_tests/vdb/weaviate/test_weaviate.py new file mode 100644 index 0000000000..1a07d86924 --- /dev/null +++ b/api/tests/integration_tests/vdb/weaviate/test_weaviate.py @@ -0,0 +1,41 @@ +import uuid + +from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector +from models.dataset import Dataset +from tests.integration_tests.vdb.test_vector_store import ( + get_sample_document, + get_sample_embedding, + get_sample_query_vector, + get_sample_text, + setup_mock_redis, +) + + +def test_weaviate_vector(setup_mock_redis) -> None: + attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash'] + dataset_id = str(uuid.uuid4()) + vector = WeaviateVector( + collection_name=Dataset.gen_collection_name_by_id(dataset_id), + config=WeaviateConfig( + endpoint='http://localhost:8080', + api_key='WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih', + ), + attributes=attributes + ) + + # create vector + vector.create( + texts=[get_sample_document(dataset_id)], + embeddings=[get_sample_embedding()], + ) + + # search by vector + hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector()) + assert len(hits_by_vector) >= 1 + + # search by full text + hits_by_full_text = vector.search_by_full_text(query=get_sample_text()) + assert len(hits_by_full_text) >= 1 + + # delete vector + vector.delete() diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/milvus/test_milvus.py b/api/tests/unit_tests/core/rag/datasource/vdb/milvus/test_milvus.py index 73257dd338..9abc07fae5 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/milvus/test_milvus.py +++ b/api/tests/unit_tests/core/rag/datasource/vdb/milvus/test_milvus.py @@ -18,7 +18,7 @@ def test_default_value(): with pytest.raises(ValidationError) as e: MilvusConfig(**config) assert e.value.errors()[1]['msg'] == f'config MILVUS_{key.upper()} is required' - + config = MilvusConfig(**valid_config) assert config.secure is False assert config.database == 'default' diff --git a/dev/pytest/pytest_vdb.sh b/dev/pytest/pytest_vdb.sh new file mode 100755 index 0000000000..7b212150a4 --- /dev/null +++ b/dev/pytest/pytest_vdb.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -x + +pytest api/tests/integration_tests/vdb/ diff --git a/docker/milvus-standalone-docker-compose.yml b/docker/docker-compose.milvus.yaml similarity index 98% rename from docker/milvus-standalone-docker-compose.yml rename to docker/docker-compose.milvus.yaml index ae2846c817..c422efbf4b 100644 --- a/docker/milvus-standalone-docker-compose.yml +++ b/docker/docker-compose.milvus.yaml @@ -36,7 +36,7 @@ services: timeout: 20s retries: 3 - standalone: + milvus-standalone: container_name: milvus-standalone image: milvusdb/milvus:v2.3.1 command: ["milvus", "run", "standalone"] diff --git a/docker/docker-compose.qdrant.yaml b/docker/docker-compose.qdrant.yaml new file mode 100644 index 0000000000..0f72b5f10f --- /dev/null +++ b/docker/docker-compose.qdrant.yaml @@ -0,0 +1,12 @@ +version: '3' +services: + # Qdrant vector store. + qdrant: + image: langgenius/qdrant:v1.7.3 + restart: always + volumes: + - ./volumes/qdrant:/qdrant/storage + environment: + QDRANT_API_KEY: 'difyai123456' + ports: + - "6333:6333"