feat: integrate opendal storage (#11508)

Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
-LAN- 2024-12-11 14:50:54 +08:00 committed by GitHub
parent 1765fe2a29
commit 8d4bb9b40d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 798 additions and 597 deletions

View File

@ -56,20 +56,36 @@ DB_DATABASE=dify
# Storage configuration # Storage configuration
# use for store upload files, private keys... # use for store upload files, private keys...
# storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase # storage type: opendal, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
STORAGE_TYPE=local STORAGE_TYPE=opendal
STORAGE_LOCAL_PATH=storage
# Apache OpenDAL storage configuration, refer to https://github.com/apache/opendal
STORAGE_OPENDAL_SCHEME=fs
# OpenDAL FS
OPENDAL_FS_ROOT=storage
# OpenDAL S3
OPENDAL_S3_ROOT=/
OPENDAL_S3_BUCKET=your-bucket-name
OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
OPENDAL_S3_ACCESS_KEY_ID=your-access-key
OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
OPENDAL_S3_REGION=your-region
OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
# S3 Storage configuration
S3_USE_AWS_MANAGED_IAM=false S3_USE_AWS_MANAGED_IAM=false
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
S3_BUCKET_NAME=your-bucket-name S3_BUCKET_NAME=your-bucket-name
S3_ACCESS_KEY=your-access-key S3_ACCESS_KEY=your-access-key
S3_SECRET_KEY=your-secret-key S3_SECRET_KEY=your-secret-key
S3_REGION=your-region S3_REGION=your-region
# Azure Blob Storage configuration # Azure Blob Storage configuration
AZURE_BLOB_ACCOUNT_NAME=your-account-name AZURE_BLOB_ACCOUNT_NAME=your-account-name
AZURE_BLOB_ACCOUNT_KEY=your-account-key AZURE_BLOB_ACCOUNT_KEY=your-account-key
AZURE_BLOB_CONTAINER_NAME=yout-container-name AZURE_BLOB_CONTAINER_NAME=yout-container-name
AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
# Aliyun oss Storage configuration # Aliyun oss Storage configuration
ALIYUN_OSS_BUCKET_NAME=your-bucket-name ALIYUN_OSS_BUCKET_NAME=your-bucket-name
ALIYUN_OSS_ACCESS_KEY=your-access-key ALIYUN_OSS_ACCESS_KEY=your-access-key
@ -79,6 +95,7 @@ ALIYUN_OSS_AUTH_VERSION=v1
ALIYUN_OSS_REGION=your-region ALIYUN_OSS_REGION=your-region
# Don't start with '/'. OSS doesn't support leading slash in object names. # Don't start with '/'. OSS doesn't support leading slash in object names.
ALIYUN_OSS_PATH=your-path ALIYUN_OSS_PATH=your-path
# Google Storage configuration # Google Storage configuration
GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name
GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string
@ -125,8 +142,8 @@ SUPABASE_URL=your-server-url
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,* WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,* CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
# Vector database configuration
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase # support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
VECTOR_STORE=weaviate VECTOR_STORE=weaviate
# Weaviate configuration # Weaviate configuration

View File

@ -1,54 +1,69 @@
from typing import Any, Optional from typing import Any, Literal, Optional
from urllib.parse import quote_plus from urllib.parse import quote_plus
from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
from configs.middleware.cache.redis_config import RedisConfig from .cache.redis_config import RedisConfig
from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig from .storage.amazon_s3_storage_config import S3StorageConfig
from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig from .storage.azure_blob_storage_config import AzureBlobStorageConfig
from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
from configs.middleware.storage.oci_storage_config import OCIStorageConfig from .storage.oci_storage_config import OCIStorageConfig
from configs.middleware.storage.supabase_storage_config import SupabaseStorageConfig from .storage.opendal_storage_config import OpenDALStorageConfig
from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig from .storage.supabase_storage_config import SupabaseStorageConfig
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig from .vdb.analyticdb_config import AnalyticdbConfig
from configs.middleware.vdb.chroma_config import ChromaConfig from .vdb.baidu_vector_config import BaiduVectorDBConfig
from configs.middleware.vdb.couchbase_config import CouchbaseConfig from .vdb.chroma_config import ChromaConfig
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig from .vdb.couchbase_config import CouchbaseConfig
from configs.middleware.vdb.lindorm_config import LindormConfig from .vdb.elasticsearch_config import ElasticsearchConfig
from configs.middleware.vdb.milvus_config import MilvusConfig from .vdb.lindorm_config import LindormConfig
from configs.middleware.vdb.myscale_config import MyScaleConfig from .vdb.milvus_config import MilvusConfig
from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig from .vdb.myscale_config import MyScaleConfig
from configs.middleware.vdb.opensearch_config import OpenSearchConfig from .vdb.oceanbase_config import OceanBaseVectorConfig
from configs.middleware.vdb.oracle_config import OracleConfig from .vdb.opensearch_config import OpenSearchConfig
from configs.middleware.vdb.pgvector_config import PGVectorConfig from .vdb.oracle_config import OracleConfig
from configs.middleware.vdb.pgvectors_config import PGVectoRSConfig from .vdb.pgvector_config import PGVectorConfig
from configs.middleware.vdb.qdrant_config import QdrantConfig from .vdb.pgvectors_config import PGVectoRSConfig
from configs.middleware.vdb.relyt_config import RelytConfig from .vdb.qdrant_config import QdrantConfig
from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig from .vdb.relyt_config import RelytConfig
from configs.middleware.vdb.tidb_on_qdrant_config import TidbOnQdrantConfig from .vdb.tencent_vector_config import TencentVectorDBConfig
from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig from .vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
from configs.middleware.vdb.upstash_config import UpstashConfig from .vdb.tidb_vector_config import TiDBVectorConfig
from configs.middleware.vdb.vikingdb_config import VikingDBConfig from .vdb.upstash_config import UpstashConfig
from configs.middleware.vdb.weaviate_config import WeaviateConfig from .vdb.vikingdb_config import VikingDBConfig
from .vdb.weaviate_config import WeaviateConfig
class StorageConfig(BaseSettings): class StorageConfig(BaseSettings):
STORAGE_TYPE: str = Field( STORAGE_TYPE: Literal[
"opendal",
"s3",
"aliyun-oss",
"azure-blob",
"baidu-obs",
"google-storage",
"huawei-obs",
"oci-storage",
"tencent-cos",
"volcengine-tos",
"supabase",
"local",
] = Field(
description="Type of storage to use." description="Type of storage to use."
" Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', " " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
"'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.", "'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
default="local", default="opendal",
) )
STORAGE_LOCAL_PATH: str = Field( STORAGE_LOCAL_PATH: str = Field(
description="Path for local storage when STORAGE_TYPE is set to 'local'.", description="Path for local storage when STORAGE_TYPE is set to 'local'.",
default="storage", default="storage",
deprecated=True,
) )
@ -235,6 +250,7 @@ class MiddlewareConfig(
GoogleCloudStorageConfig, GoogleCloudStorageConfig,
HuaweiCloudOBSStorageConfig, HuaweiCloudOBSStorageConfig,
OCIStorageConfig, OCIStorageConfig,
OpenDALStorageConfig,
S3StorageConfig, S3StorageConfig,
SupabaseStorageConfig, SupabaseStorageConfig,
TencentCloudCOSStorageConfig, TencentCloudCOSStorageConfig,

View File

@ -0,0 +1,51 @@
from enum import StrEnum
from typing import Literal
from pydantic import Field
from pydantic_settings import BaseSettings
class OpenDALScheme(StrEnum):
FS = "fs"
S3 = "s3"
class OpenDALStorageConfig(BaseSettings):
STORAGE_OPENDAL_SCHEME: str = Field(
default=OpenDALScheme.FS.value,
description="OpenDAL scheme.",
)
# FS
OPENDAL_FS_ROOT: str = Field(
default="storage",
description="Root path for local storage.",
)
# S3
OPENDAL_S3_ROOT: str = Field(
default="/",
description="Root path for S3 storage.",
)
OPENDAL_S3_BUCKET: str = Field(
default="",
description="S3 bucket name.",
)
OPENDAL_S3_ENDPOINT: str = Field(
default="https://s3.amazonaws.com",
description="S3 endpoint URL.",
)
OPENDAL_S3_ACCESS_KEY_ID: str = Field(
default="",
description="S3 access key ID.",
)
OPENDAL_S3_SECRET_ACCESS_KEY: str = Field(
default="",
description="S3 secret access key.",
)
OPENDAL_S3_REGION: str = Field(
default="",
description="S3 region.",
)
OPENDAL_S3_SERVER_SIDE_ENCRYPTION: Literal["aws:kms", ""] = Field(
default="",
description="S3 server-side encryption.",
)

View File

@ -34,7 +34,6 @@ else
--workers ${SERVER_WORKER_AMOUNT:-1} \ --workers ${SERVER_WORKER_AMOUNT:-1} \
--worker-class ${SERVER_WORKER_CLASS:-gevent} \ --worker-class ${SERVER_WORKER_CLASS:-gevent} \
--timeout ${GUNICORN_TIMEOUT:-200} \ --timeout ${GUNICORN_TIMEOUT:-200} \
--preload \
app:app app:app
fi fi
fi fi

View File

@ -1,31 +1,43 @@
import logging import logging
from collections.abc import Generator from collections.abc import Callable, Generator, Mapping
from typing import Union from typing import Union
from flask import Flask from flask import Flask
from configs import dify_config from configs import dify_config
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
from dify_app import DifyApp from dify_app import DifyApp
from extensions.storage.base_storage import BaseStorage from extensions.storage.base_storage import BaseStorage
from extensions.storage.storage_type import StorageType from extensions.storage.storage_type import StorageType
logger = logging.getLogger(__name__)
class Storage: class Storage:
def __init__(self):
self.storage_runner = None
def init_app(self, app: Flask): def init_app(self, app: Flask):
storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE) storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE)
with app.app_context(): with app.app_context():
self.storage_runner = storage_factory() self.storage_runner = storage_factory()
@staticmethod @staticmethod
def get_storage_factory(storage_type: str) -> type[BaseStorage]: def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]:
match storage_type: match storage_type:
case StorageType.S3: case StorageType.S3:
from extensions.storage.aws_s3_storage import AwsS3Storage from extensions.storage.opendal_storage import OpenDALStorage
return AwsS3Storage kwargs = _load_s3_storage_kwargs()
return lambda: OpenDALStorage(scheme=OpenDALScheme.S3, **kwargs)
case StorageType.OPENDAL:
from extensions.storage.opendal_storage import OpenDALStorage
scheme = OpenDALScheme(dify_config.STORAGE_OPENDAL_SCHEME)
kwargs = _load_opendal_storage_kwargs(scheme)
return lambda: OpenDALStorage(scheme=scheme, **kwargs)
case StorageType.LOCAL:
from extensions.storage.opendal_storage import OpenDALStorage
kwargs = _load_local_storage_kwargs()
return lambda: OpenDALStorage(scheme=OpenDALScheme.FS, **kwargs)
case StorageType.AZURE_BLOB: case StorageType.AZURE_BLOB:
from extensions.storage.azure_blob_storage import AzureBlobStorage from extensions.storage.azure_blob_storage import AzureBlobStorage
@ -62,16 +74,14 @@ class Storage:
from extensions.storage.supabase_storage import SupabaseStorage from extensions.storage.supabase_storage import SupabaseStorage
return SupabaseStorage return SupabaseStorage
case StorageType.LOCAL | _: case _:
from extensions.storage.local_fs_storage import LocalFsStorage raise ValueError(f"Unsupported storage type {storage_type}")
return LocalFsStorage
def save(self, filename, data): def save(self, filename, data):
try: try:
self.storage_runner.save(filename, data) self.storage_runner.save(filename, data)
except Exception as e: except Exception as e:
logging.exception(f"Failed to save file {filename}") logger.exception(f"Failed to save file {filename}")
raise e raise e
def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]: def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
@ -81,45 +91,120 @@ class Storage:
else: else:
return self.load_once(filename) return self.load_once(filename)
except Exception as e: except Exception as e:
logging.exception(f"Failed to load file {filename}") logger.exception(f"Failed to load file {filename}")
raise e raise e
def load_once(self, filename: str) -> bytes: def load_once(self, filename: str) -> bytes:
try: try:
return self.storage_runner.load_once(filename) return self.storage_runner.load_once(filename)
except Exception as e: except Exception as e:
logging.exception(f"Failed to load_once file {filename}") logger.exception(f"Failed to load_once file {filename}")
raise e raise e
def load_stream(self, filename: str) -> Generator: def load_stream(self, filename: str) -> Generator:
try: try:
return self.storage_runner.load_stream(filename) return self.storage_runner.load_stream(filename)
except Exception as e: except Exception as e:
logging.exception(f"Failed to load_stream file {filename}") logger.exception(f"Failed to load_stream file {filename}")
raise e raise e
def download(self, filename, target_filepath): def download(self, filename, target_filepath):
try: try:
self.storage_runner.download(filename, target_filepath) self.storage_runner.download(filename, target_filepath)
except Exception as e: except Exception as e:
logging.exception(f"Failed to download file {filename}") logger.exception(f"Failed to download file {filename}")
raise e raise e
def exists(self, filename): def exists(self, filename):
try: try:
return self.storage_runner.exists(filename) return self.storage_runner.exists(filename)
except Exception as e: except Exception as e:
logging.exception(f"Failed to check file exists {filename}") logger.exception(f"Failed to check file exists {filename}")
raise e raise e
def delete(self, filename): def delete(self, filename):
try: try:
return self.storage_runner.delete(filename) return self.storage_runner.delete(filename)
except Exception as e: except Exception as e:
logging.exception(f"Failed to delete file {filename}") logger.exception(f"Failed to delete file {filename}")
raise e raise e
def _load_s3_storage_kwargs() -> Mapping[str, str]:
"""
Load the kwargs for S3 storage based on dify_config.
Handles special cases like AWS managed IAM and R2.
"""
kwargs = {
"root": "/",
"bucket": dify_config.S3_BUCKET_NAME,
"endpoint": dify_config.S3_ENDPOINT,
"access_key_id": dify_config.S3_ACCESS_KEY,
"secret_access_key": dify_config.S3_SECRET_KEY,
"region": dify_config.S3_REGION,
}
kwargs = {k: v for k, v in kwargs.items() if isinstance(v, str)}
# For AWS managed IAM
if dify_config.S3_USE_AWS_MANAGED_IAM:
from extensions.storage.opendal_storage import S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS
logger.debug("Using AWS managed IAM role for S3")
kwargs = {**kwargs, **{k: v for k, v in S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS.items() if k not in kwargs}}
# For Cloudflare R2
if kwargs.get("endpoint"):
from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
if is_r2_endpoint(kwargs["endpoint"]):
logger.debug("Using R2 for OpenDAL S3")
kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
return kwargs
def _load_local_storage_kwargs() -> Mapping[str, str]:
"""
Load the kwargs for local storage based on dify_config.
"""
return {
"root": dify_config.STORAGE_LOCAL_PATH,
}
def _load_opendal_storage_kwargs(scheme: OpenDALScheme) -> Mapping[str, str]:
"""
Load the kwargs for OpenDAL storage based on the given scheme.
"""
match scheme:
case OpenDALScheme.FS:
kwargs = {
"root": dify_config.OPENDAL_FS_ROOT,
}
case OpenDALScheme.S3:
# Load OpenDAL S3-related configs
kwargs = {
"root": dify_config.OPENDAL_S3_ROOT,
"bucket": dify_config.OPENDAL_S3_BUCKET,
"endpoint": dify_config.OPENDAL_S3_ENDPOINT,
"access_key_id": dify_config.OPENDAL_S3_ACCESS_KEY_ID,
"secret_access_key": dify_config.OPENDAL_S3_SECRET_ACCESS_KEY,
"region": dify_config.OPENDAL_S3_REGION,
}
# For Cloudflare R2
if kwargs.get("endpoint"):
from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
if is_r2_endpoint(kwargs["endpoint"]):
logger.debug("Using R2 for OpenDAL S3")
kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
case _:
logger.warning(f"Unrecognized OpenDAL scheme: {scheme}, will fall back to default.")
kwargs = {}
return kwargs
storage = Storage() storage = Storage()

View File

@ -7,9 +7,6 @@ from collections.abc import Generator
class BaseStorage(ABC): class BaseStorage(ABC):
"""Interface for file storage.""" """Interface for file storage."""
def __init__(self): # noqa: B027
pass
@abstractmethod @abstractmethod
def save(self, filename, data): def save(self, filename, data):
raise NotImplementedError raise NotImplementedError

View File

@ -1,62 +0,0 @@
import os
import shutil
from collections.abc import Generator
from pathlib import Path
from flask import current_app
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class LocalFsStorage(BaseStorage):
"""Implementation for local filesystem storage."""
def __init__(self):
super().__init__()
folder = dify_config.STORAGE_LOCAL_PATH
if not os.path.isabs(folder):
folder = os.path.join(current_app.root_path, folder)
self.folder = folder
def _build_filepath(self, filename: str) -> str:
"""Build the full file path based on the folder and filename."""
if not self.folder or self.folder.endswith("/"):
return self.folder + filename
else:
return self.folder + "/" + filename
def save(self, filename, data):
filepath = self._build_filepath(filename)
folder = os.path.dirname(filepath)
os.makedirs(folder, exist_ok=True)
Path(os.path.join(os.getcwd(), filepath)).write_bytes(data)
def load_once(self, filename: str) -> bytes:
filepath = self._build_filepath(filename)
if not os.path.exists(filepath):
raise FileNotFoundError("File not found")
return Path(filepath).read_bytes()
def load_stream(self, filename: str) -> Generator:
filepath = self._build_filepath(filename)
if not os.path.exists(filepath):
raise FileNotFoundError("File not found")
with open(filepath, "rb") as f:
while chunk := f.read(4096): # Read in chunks of 4KB
yield chunk
def download(self, filename, target_filepath):
filepath = self._build_filepath(filename)
if not os.path.exists(filepath):
raise FileNotFoundError("File not found")
shutil.copyfile(filepath, target_filepath)
def exists(self, filename):
filepath = self._build_filepath(filename)
return os.path.exists(filepath)
def delete(self, filename):
filepath = self._build_filepath(filename)
if os.path.exists(filepath):
os.remove(filepath)

View File

@ -0,0 +1,66 @@
from collections.abc import Generator
from pathlib import Path
from urllib.parse import urlparse
import opendal
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
from extensions.storage.base_storage import BaseStorage
S3_R2_HOSTNAME = "r2.cloudflarestorage.com"
S3_R2_COMPATIBLE_KWARGS = {
"delete_max_size": "700",
"disable_stat_with_override": "true",
"region": "auto",
}
S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS = {
"server_side_encryption": "aws:kms",
}
def is_r2_endpoint(endpoint: str) -> bool:
if not endpoint:
return False
parsed_url = urlparse(endpoint)
return bool(parsed_url.hostname and parsed_url.hostname.endswith(S3_R2_HOSTNAME))
class OpenDALStorage(BaseStorage):
def __init__(self, scheme: OpenDALScheme, **kwargs):
if scheme == OpenDALScheme.FS:
Path(kwargs["root"]).mkdir(parents=True, exist_ok=True)
self.op = opendal.Operator(scheme=scheme, **kwargs)
def save(self, filename: str, data: bytes) -> None:
self.op.write(path=filename, bs=data)
def load_once(self, filename: str) -> bytes:
if not self.exists(filename):
raise FileNotFoundError("File not found")
return self.op.read(path=filename)
def load_stream(self, filename: str) -> Generator:
if not self.exists(filename):
raise FileNotFoundError("File not found")
batch_size = 4096
file = self.op.open(path=filename, mode="rb")
while chunk := file.read(batch_size):
yield chunk
def download(self, filename: str, target_filepath: str):
if not self.exists(filename):
raise FileNotFoundError("File not found")
with Path(target_filepath).open("wb") as f:
f.write(self.op.read(path=filename))
def exists(self, filename: str):
return self.op.stat(path=filename).mode.is_file()
def delete(self, filename: str):
if self.exists(filename):
self.op.delete(path=filename)

View File

@ -9,6 +9,7 @@ class StorageType(StrEnum):
HUAWEI_OBS = "huawei-obs" HUAWEI_OBS = "huawei-obs"
LOCAL = "local" LOCAL = "local"
OCI_STORAGE = "oci-storage" OCI_STORAGE = "oci-storage"
OPENDAL = "opendal"
S3 = "s3" S3 = "s3"
TENCENT_COS = "tencent-cos" TENCENT_COS = "tencent-cos"
VOLCENGINE_TOS = "volcengine-tos" VOLCENGINE_TOS = "volcengine-tos"

886
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -134,6 +134,7 @@ bce-python-sdk = "~0.9.23"
cos-python-sdk-v5 = "1.9.30" cos-python-sdk-v5 = "1.9.30"
esdk-obs-python = "3.24.6.1" esdk-obs-python = "3.24.6.1"
google-cloud-storage = "2.16.0" google-cloud-storage = "2.16.0"
opendal = "~0.45.12"
oss2 = "2.18.5" oss2 = "2.18.5"
supabase = "~2.8.1" supabase = "~2.8.1"
tos = "~2.7.1" tos = "~2.7.1"

View File

@ -0,0 +1,20 @@
import pytest
from extensions.storage.opendal_storage import is_r2_endpoint
@pytest.mark.parametrize(
("endpoint", "expected"),
[
("https://bucket.r2.cloudflarestorage.com", True),
("https://custom-domain.r2.cloudflarestorage.com/", True),
("https://bucket.r2.cloudflarestorage.com/path", True),
("https://s3.amazonaws.com", False),
("https://storage.googleapis.com", False),
("http://localhost:9000", False),
("invalid-url", False),
("", False),
],
)
def test_is_r2_endpoint(endpoint: str, expected: bool):
assert is_r2_endpoint(endpoint) == expected

View File

@ -6,7 +6,7 @@ from extensions.storage.base_storage import BaseStorage
def get_example_folder() -> str: def get_example_folder() -> str:
return "/dify" return "~/dify"
def get_example_bucket() -> str: def get_example_bucket() -> str:
@ -22,14 +22,14 @@ def get_example_data() -> bytes:
def get_example_filepath() -> str: def get_example_filepath() -> str:
return "/test" return "~/test"
class BaseStorageTest: class BaseStorageTest:
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def setup_method(self): def setup_method(self, *args, **kwargs):
"""Should be implemented in child classes to setup specific storage.""" """Should be implemented in child classes to setup specific storage."""
self.storage = BaseStorage() self.storage: BaseStorage
def test_save(self): def test_save(self):
"""Test saving data.""" """Test saving data."""

View File

@ -1,18 +0,0 @@
from collections.abc import Generator
import pytest
from extensions.storage.local_fs_storage import LocalFsStorage
from tests.unit_tests.oss.__mock.base import (
BaseStorageTest,
get_example_folder,
)
from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
class TestLocalFS(BaseStorageTest):
@pytest.fixture(autouse=True)
def setup_method(self, setup_local_fs_mock):
"""Executed before each test method."""
self.storage = LocalFsStorage()
self.storage.folder = get_example_folder()

View File

@ -0,0 +1,19 @@
import pytest
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
from extensions.storage.opendal_storage import OpenDALStorage
from tests.unit_tests.oss.__mock.base import (
BaseStorageTest,
get_example_folder,
)
from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
class TestOpenDAL(BaseStorageTest):
@pytest.fixture(autouse=True)
def setup_method(self, *args, **kwargs):
"""Executed before each test method."""
self.storage = OpenDALStorage(
scheme=OpenDALScheme.FS,
root=get_example_folder(),
)

View File

@ -281,10 +281,23 @@ CONSOLE_CORS_ALLOW_ORIGINS=*
# ------------------------------ # ------------------------------
# The type of storage to use for storing user files. # The type of storage to use for storing user files.
# Supported values are `local` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase` # Supported values are `opendal` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
# Default: `local` # Default: `opendal`
STORAGE_TYPE=local STORAGE_TYPE=opendal
STORAGE_LOCAL_PATH=storage
# Apache OpenDAL Configuration, refer to https://github.com/apache/opendal
# The scheme for the OpenDAL storage.
STORAGE_OPENDAL_SCHEME=fs
# OpenDAL FS
OPENDAL_FS_ROOT=storage
# OpenDAL S3
OPENDAL_S3_ROOT=/
OPENDAL_S3_BUCKET=your-bucket-name
OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
OPENDAL_S3_ACCESS_KEY_ID=your-access-key
OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
OPENDAL_S3_REGION=your-region
OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
# S3 Configuration # S3 Configuration
# Whether to use AWS managed IAM roles for authenticating with the S3 service. # Whether to use AWS managed IAM roles for authenticating with the S3 service.