mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 04:19:01 +08:00
feat: integrate opendal storage (#11508)
Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
parent
1765fe2a29
commit
8d4bb9b40d
@ -56,20 +56,36 @@ DB_DATABASE=dify
|
||||
|
||||
# Storage configuration
|
||||
# use for store upload files, private keys...
|
||||
# storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
|
||||
STORAGE_TYPE=local
|
||||
STORAGE_LOCAL_PATH=storage
|
||||
# storage type: opendal, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
|
||||
STORAGE_TYPE=opendal
|
||||
|
||||
# Apache OpenDAL storage configuration, refer to https://github.com/apache/opendal
|
||||
STORAGE_OPENDAL_SCHEME=fs
|
||||
# OpenDAL FS
|
||||
OPENDAL_FS_ROOT=storage
|
||||
# OpenDAL S3
|
||||
OPENDAL_S3_ROOT=/
|
||||
OPENDAL_S3_BUCKET=your-bucket-name
|
||||
OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
|
||||
OPENDAL_S3_ACCESS_KEY_ID=your-access-key
|
||||
OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
|
||||
OPENDAL_S3_REGION=your-region
|
||||
OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
|
||||
|
||||
# S3 Storage configuration
|
||||
S3_USE_AWS_MANAGED_IAM=false
|
||||
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
|
||||
S3_BUCKET_NAME=your-bucket-name
|
||||
S3_ACCESS_KEY=your-access-key
|
||||
S3_SECRET_KEY=your-secret-key
|
||||
S3_REGION=your-region
|
||||
|
||||
# Azure Blob Storage configuration
|
||||
AZURE_BLOB_ACCOUNT_NAME=your-account-name
|
||||
AZURE_BLOB_ACCOUNT_KEY=your-account-key
|
||||
AZURE_BLOB_CONTAINER_NAME=yout-container-name
|
||||
AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
|
||||
|
||||
# Aliyun oss Storage configuration
|
||||
ALIYUN_OSS_BUCKET_NAME=your-bucket-name
|
||||
ALIYUN_OSS_ACCESS_KEY=your-access-key
|
||||
@ -79,6 +95,7 @@ ALIYUN_OSS_AUTH_VERSION=v1
|
||||
ALIYUN_OSS_REGION=your-region
|
||||
# Don't start with '/'. OSS doesn't support leading slash in object names.
|
||||
ALIYUN_OSS_PATH=your-path
|
||||
|
||||
# Google Storage configuration
|
||||
GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name
|
||||
GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string
|
||||
@ -125,8 +142,8 @@ SUPABASE_URL=your-server-url
|
||||
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
|
||||
|
||||
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
|
||||
# Vector database configuration
|
||||
# support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
|
||||
VECTOR_STORE=weaviate
|
||||
|
||||
# Weaviate configuration
|
||||
|
@ -1,54 +1,69 @@
|
||||
from typing import Any, Optional
|
||||
from typing import Any, Literal, Optional
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from configs.middleware.cache.redis_config import RedisConfig
|
||||
from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
|
||||
from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig
|
||||
from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig
|
||||
from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig
|
||||
from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig
|
||||
from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
|
||||
from configs.middleware.storage.oci_storage_config import OCIStorageConfig
|
||||
from configs.middleware.storage.supabase_storage_config import SupabaseStorageConfig
|
||||
from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
|
||||
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
|
||||
from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig
|
||||
from configs.middleware.vdb.chroma_config import ChromaConfig
|
||||
from configs.middleware.vdb.couchbase_config import CouchbaseConfig
|
||||
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
|
||||
from configs.middleware.vdb.lindorm_config import LindormConfig
|
||||
from configs.middleware.vdb.milvus_config import MilvusConfig
|
||||
from configs.middleware.vdb.myscale_config import MyScaleConfig
|
||||
from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig
|
||||
from configs.middleware.vdb.opensearch_config import OpenSearchConfig
|
||||
from configs.middleware.vdb.oracle_config import OracleConfig
|
||||
from configs.middleware.vdb.pgvector_config import PGVectorConfig
|
||||
from configs.middleware.vdb.pgvectors_config import PGVectoRSConfig
|
||||
from configs.middleware.vdb.qdrant_config import QdrantConfig
|
||||
from configs.middleware.vdb.relyt_config import RelytConfig
|
||||
from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig
|
||||
from configs.middleware.vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
|
||||
from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig
|
||||
from configs.middleware.vdb.upstash_config import UpstashConfig
|
||||
from configs.middleware.vdb.vikingdb_config import VikingDBConfig
|
||||
from configs.middleware.vdb.weaviate_config import WeaviateConfig
|
||||
from .cache.redis_config import RedisConfig
|
||||
from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
|
||||
from .storage.amazon_s3_storage_config import S3StorageConfig
|
||||
from .storage.azure_blob_storage_config import AzureBlobStorageConfig
|
||||
from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
|
||||
from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
|
||||
from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
|
||||
from .storage.oci_storage_config import OCIStorageConfig
|
||||
from .storage.opendal_storage_config import OpenDALStorageConfig
|
||||
from .storage.supabase_storage_config import SupabaseStorageConfig
|
||||
from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
|
||||
from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||
from .vdb.analyticdb_config import AnalyticdbConfig
|
||||
from .vdb.baidu_vector_config import BaiduVectorDBConfig
|
||||
from .vdb.chroma_config import ChromaConfig
|
||||
from .vdb.couchbase_config import CouchbaseConfig
|
||||
from .vdb.elasticsearch_config import ElasticsearchConfig
|
||||
from .vdb.lindorm_config import LindormConfig
|
||||
from .vdb.milvus_config import MilvusConfig
|
||||
from .vdb.myscale_config import MyScaleConfig
|
||||
from .vdb.oceanbase_config import OceanBaseVectorConfig
|
||||
from .vdb.opensearch_config import OpenSearchConfig
|
||||
from .vdb.oracle_config import OracleConfig
|
||||
from .vdb.pgvector_config import PGVectorConfig
|
||||
from .vdb.pgvectors_config import PGVectoRSConfig
|
||||
from .vdb.qdrant_config import QdrantConfig
|
||||
from .vdb.relyt_config import RelytConfig
|
||||
from .vdb.tencent_vector_config import TencentVectorDBConfig
|
||||
from .vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
|
||||
from .vdb.tidb_vector_config import TiDBVectorConfig
|
||||
from .vdb.upstash_config import UpstashConfig
|
||||
from .vdb.vikingdb_config import VikingDBConfig
|
||||
from .vdb.weaviate_config import WeaviateConfig
|
||||
|
||||
|
||||
class StorageConfig(BaseSettings):
|
||||
STORAGE_TYPE: str = Field(
|
||||
STORAGE_TYPE: Literal[
|
||||
"opendal",
|
||||
"s3",
|
||||
"aliyun-oss",
|
||||
"azure-blob",
|
||||
"baidu-obs",
|
||||
"google-storage",
|
||||
"huawei-obs",
|
||||
"oci-storage",
|
||||
"tencent-cos",
|
||||
"volcengine-tos",
|
||||
"supabase",
|
||||
"local",
|
||||
] = Field(
|
||||
description="Type of storage to use."
|
||||
" Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', "
|
||||
"'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.",
|
||||
default="local",
|
||||
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
|
||||
"'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
|
||||
default="opendal",
|
||||
)
|
||||
|
||||
STORAGE_LOCAL_PATH: str = Field(
|
||||
description="Path for local storage when STORAGE_TYPE is set to 'local'.",
|
||||
default="storage",
|
||||
deprecated=True,
|
||||
)
|
||||
|
||||
|
||||
@ -235,6 +250,7 @@ class MiddlewareConfig(
|
||||
GoogleCloudStorageConfig,
|
||||
HuaweiCloudOBSStorageConfig,
|
||||
OCIStorageConfig,
|
||||
OpenDALStorageConfig,
|
||||
S3StorageConfig,
|
||||
SupabaseStorageConfig,
|
||||
TencentCloudCOSStorageConfig,
|
||||
|
51
api/configs/middleware/storage/opendal_storage_config.py
Normal file
51
api/configs/middleware/storage/opendal_storage_config.py
Normal file
@ -0,0 +1,51 @@
|
||||
from enum import StrEnum
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class OpenDALScheme(StrEnum):
|
||||
FS = "fs"
|
||||
S3 = "s3"
|
||||
|
||||
|
||||
class OpenDALStorageConfig(BaseSettings):
|
||||
STORAGE_OPENDAL_SCHEME: str = Field(
|
||||
default=OpenDALScheme.FS.value,
|
||||
description="OpenDAL scheme.",
|
||||
)
|
||||
# FS
|
||||
OPENDAL_FS_ROOT: str = Field(
|
||||
default="storage",
|
||||
description="Root path for local storage.",
|
||||
)
|
||||
# S3
|
||||
OPENDAL_S3_ROOT: str = Field(
|
||||
default="/",
|
||||
description="Root path for S3 storage.",
|
||||
)
|
||||
OPENDAL_S3_BUCKET: str = Field(
|
||||
default="",
|
||||
description="S3 bucket name.",
|
||||
)
|
||||
OPENDAL_S3_ENDPOINT: str = Field(
|
||||
default="https://s3.amazonaws.com",
|
||||
description="S3 endpoint URL.",
|
||||
)
|
||||
OPENDAL_S3_ACCESS_KEY_ID: str = Field(
|
||||
default="",
|
||||
description="S3 access key ID.",
|
||||
)
|
||||
OPENDAL_S3_SECRET_ACCESS_KEY: str = Field(
|
||||
default="",
|
||||
description="S3 secret access key.",
|
||||
)
|
||||
OPENDAL_S3_REGION: str = Field(
|
||||
default="",
|
||||
description="S3 region.",
|
||||
)
|
||||
OPENDAL_S3_SERVER_SIDE_ENCRYPTION: Literal["aws:kms", ""] = Field(
|
||||
default="",
|
||||
description="S3 server-side encryption.",
|
||||
)
|
@ -34,7 +34,6 @@ else
|
||||
--workers ${SERVER_WORKER_AMOUNT:-1} \
|
||||
--worker-class ${SERVER_WORKER_CLASS:-gevent} \
|
||||
--timeout ${GUNICORN_TIMEOUT:-200} \
|
||||
--preload \
|
||||
app:app
|
||||
fi
|
||||
fi
|
||||
|
@ -1,31 +1,43 @@
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Callable, Generator, Mapping
|
||||
from typing import Union
|
||||
|
||||
from flask import Flask
|
||||
|
||||
from configs import dify_config
|
||||
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
|
||||
from dify_app import DifyApp
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
from extensions.storage.storage_type import StorageType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Storage:
|
||||
def __init__(self):
|
||||
self.storage_runner = None
|
||||
|
||||
def init_app(self, app: Flask):
|
||||
storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE)
|
||||
with app.app_context():
|
||||
self.storage_runner = storage_factory()
|
||||
|
||||
@staticmethod
|
||||
def get_storage_factory(storage_type: str) -> type[BaseStorage]:
|
||||
def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]:
|
||||
match storage_type:
|
||||
case StorageType.S3:
|
||||
from extensions.storage.aws_s3_storage import AwsS3Storage
|
||||
from extensions.storage.opendal_storage import OpenDALStorage
|
||||
|
||||
return AwsS3Storage
|
||||
kwargs = _load_s3_storage_kwargs()
|
||||
return lambda: OpenDALStorage(scheme=OpenDALScheme.S3, **kwargs)
|
||||
case StorageType.OPENDAL:
|
||||
from extensions.storage.opendal_storage import OpenDALStorage
|
||||
|
||||
scheme = OpenDALScheme(dify_config.STORAGE_OPENDAL_SCHEME)
|
||||
kwargs = _load_opendal_storage_kwargs(scheme)
|
||||
return lambda: OpenDALStorage(scheme=scheme, **kwargs)
|
||||
case StorageType.LOCAL:
|
||||
from extensions.storage.opendal_storage import OpenDALStorage
|
||||
|
||||
kwargs = _load_local_storage_kwargs()
|
||||
return lambda: OpenDALStorage(scheme=OpenDALScheme.FS, **kwargs)
|
||||
case StorageType.AZURE_BLOB:
|
||||
from extensions.storage.azure_blob_storage import AzureBlobStorage
|
||||
|
||||
@ -62,16 +74,14 @@ class Storage:
|
||||
from extensions.storage.supabase_storage import SupabaseStorage
|
||||
|
||||
return SupabaseStorage
|
||||
case StorageType.LOCAL | _:
|
||||
from extensions.storage.local_fs_storage import LocalFsStorage
|
||||
|
||||
return LocalFsStorage
|
||||
case _:
|
||||
raise ValueError(f"Unsupported storage type {storage_type}")
|
||||
|
||||
def save(self, filename, data):
|
||||
try:
|
||||
self.storage_runner.save(filename, data)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to save file {filename}")
|
||||
logger.exception(f"Failed to save file {filename}")
|
||||
raise e
|
||||
|
||||
def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
|
||||
@ -81,45 +91,120 @@ class Storage:
|
||||
else:
|
||||
return self.load_once(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to load file {filename}")
|
||||
logger.exception(f"Failed to load file {filename}")
|
||||
raise e
|
||||
|
||||
def load_once(self, filename: str) -> bytes:
|
||||
try:
|
||||
return self.storage_runner.load_once(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to load_once file {filename}")
|
||||
logger.exception(f"Failed to load_once file {filename}")
|
||||
raise e
|
||||
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
try:
|
||||
return self.storage_runner.load_stream(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to load_stream file {filename}")
|
||||
logger.exception(f"Failed to load_stream file {filename}")
|
||||
raise e
|
||||
|
||||
def download(self, filename, target_filepath):
|
||||
try:
|
||||
self.storage_runner.download(filename, target_filepath)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to download file {filename}")
|
||||
logger.exception(f"Failed to download file {filename}")
|
||||
raise e
|
||||
|
||||
def exists(self, filename):
|
||||
try:
|
||||
return self.storage_runner.exists(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to check file exists {filename}")
|
||||
logger.exception(f"Failed to check file exists {filename}")
|
||||
raise e
|
||||
|
||||
def delete(self, filename):
|
||||
try:
|
||||
return self.storage_runner.delete(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to delete file {filename}")
|
||||
logger.exception(f"Failed to delete file {filename}")
|
||||
raise e
|
||||
|
||||
|
||||
def _load_s3_storage_kwargs() -> Mapping[str, str]:
|
||||
"""
|
||||
Load the kwargs for S3 storage based on dify_config.
|
||||
Handles special cases like AWS managed IAM and R2.
|
||||
"""
|
||||
kwargs = {
|
||||
"root": "/",
|
||||
"bucket": dify_config.S3_BUCKET_NAME,
|
||||
"endpoint": dify_config.S3_ENDPOINT,
|
||||
"access_key_id": dify_config.S3_ACCESS_KEY,
|
||||
"secret_access_key": dify_config.S3_SECRET_KEY,
|
||||
"region": dify_config.S3_REGION,
|
||||
}
|
||||
kwargs = {k: v for k, v in kwargs.items() if isinstance(v, str)}
|
||||
|
||||
# For AWS managed IAM
|
||||
if dify_config.S3_USE_AWS_MANAGED_IAM:
|
||||
from extensions.storage.opendal_storage import S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS
|
||||
|
||||
logger.debug("Using AWS managed IAM role for S3")
|
||||
kwargs = {**kwargs, **{k: v for k, v in S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS.items() if k not in kwargs}}
|
||||
|
||||
# For Cloudflare R2
|
||||
if kwargs.get("endpoint"):
|
||||
from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
|
||||
|
||||
if is_r2_endpoint(kwargs["endpoint"]):
|
||||
logger.debug("Using R2 for OpenDAL S3")
|
||||
kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
def _load_local_storage_kwargs() -> Mapping[str, str]:
|
||||
"""
|
||||
Load the kwargs for local storage based on dify_config.
|
||||
"""
|
||||
return {
|
||||
"root": dify_config.STORAGE_LOCAL_PATH,
|
||||
}
|
||||
|
||||
|
||||
def _load_opendal_storage_kwargs(scheme: OpenDALScheme) -> Mapping[str, str]:
|
||||
"""
|
||||
Load the kwargs for OpenDAL storage based on the given scheme.
|
||||
"""
|
||||
match scheme:
|
||||
case OpenDALScheme.FS:
|
||||
kwargs = {
|
||||
"root": dify_config.OPENDAL_FS_ROOT,
|
||||
}
|
||||
case OpenDALScheme.S3:
|
||||
# Load OpenDAL S3-related configs
|
||||
kwargs = {
|
||||
"root": dify_config.OPENDAL_S3_ROOT,
|
||||
"bucket": dify_config.OPENDAL_S3_BUCKET,
|
||||
"endpoint": dify_config.OPENDAL_S3_ENDPOINT,
|
||||
"access_key_id": dify_config.OPENDAL_S3_ACCESS_KEY_ID,
|
||||
"secret_access_key": dify_config.OPENDAL_S3_SECRET_ACCESS_KEY,
|
||||
"region": dify_config.OPENDAL_S3_REGION,
|
||||
}
|
||||
|
||||
# For Cloudflare R2
|
||||
if kwargs.get("endpoint"):
|
||||
from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
|
||||
|
||||
if is_r2_endpoint(kwargs["endpoint"]):
|
||||
logger.debug("Using R2 for OpenDAL S3")
|
||||
kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
|
||||
case _:
|
||||
logger.warning(f"Unrecognized OpenDAL scheme: {scheme}, will fall back to default.")
|
||||
kwargs = {}
|
||||
return kwargs
|
||||
|
||||
|
||||
storage = Storage()
|
||||
|
||||
|
||||
|
@ -7,9 +7,6 @@ from collections.abc import Generator
|
||||
class BaseStorage(ABC):
|
||||
"""Interface for file storage."""
|
||||
|
||||
def __init__(self): # noqa: B027
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save(self, filename, data):
|
||||
raise NotImplementedError
|
||||
|
@ -1,62 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
from flask import current_app
|
||||
|
||||
from configs import dify_config
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
|
||||
class LocalFsStorage(BaseStorage):
|
||||
"""Implementation for local filesystem storage."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
folder = dify_config.STORAGE_LOCAL_PATH
|
||||
if not os.path.isabs(folder):
|
||||
folder = os.path.join(current_app.root_path, folder)
|
||||
self.folder = folder
|
||||
|
||||
def _build_filepath(self, filename: str) -> str:
|
||||
"""Build the full file path based on the folder and filename."""
|
||||
if not self.folder or self.folder.endswith("/"):
|
||||
return self.folder + filename
|
||||
else:
|
||||
return self.folder + "/" + filename
|
||||
|
||||
def save(self, filename, data):
|
||||
filepath = self._build_filepath(filename)
|
||||
folder = os.path.dirname(filepath)
|
||||
os.makedirs(folder, exist_ok=True)
|
||||
Path(os.path.join(os.getcwd(), filepath)).write_bytes(data)
|
||||
|
||||
def load_once(self, filename: str) -> bytes:
|
||||
filepath = self._build_filepath(filename)
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError("File not found")
|
||||
return Path(filepath).read_bytes()
|
||||
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
filepath = self._build_filepath(filename)
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError("File not found")
|
||||
with open(filepath, "rb") as f:
|
||||
while chunk := f.read(4096): # Read in chunks of 4KB
|
||||
yield chunk
|
||||
|
||||
def download(self, filename, target_filepath):
|
||||
filepath = self._build_filepath(filename)
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError("File not found")
|
||||
shutil.copyfile(filepath, target_filepath)
|
||||
|
||||
def exists(self, filename):
|
||||
filepath = self._build_filepath(filename)
|
||||
return os.path.exists(filepath)
|
||||
|
||||
def delete(self, filename):
|
||||
filepath = self._build_filepath(filename)
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
66
api/extensions/storage/opendal_storage.py
Normal file
66
api/extensions/storage/opendal_storage.py
Normal file
@ -0,0 +1,66 @@
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import opendal
|
||||
|
||||
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
S3_R2_HOSTNAME = "r2.cloudflarestorage.com"
|
||||
S3_R2_COMPATIBLE_KWARGS = {
|
||||
"delete_max_size": "700",
|
||||
"disable_stat_with_override": "true",
|
||||
"region": "auto",
|
||||
}
|
||||
S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS = {
|
||||
"server_side_encryption": "aws:kms",
|
||||
}
|
||||
|
||||
|
||||
def is_r2_endpoint(endpoint: str) -> bool:
|
||||
if not endpoint:
|
||||
return False
|
||||
|
||||
parsed_url = urlparse(endpoint)
|
||||
return bool(parsed_url.hostname and parsed_url.hostname.endswith(S3_R2_HOSTNAME))
|
||||
|
||||
|
||||
class OpenDALStorage(BaseStorage):
|
||||
def __init__(self, scheme: OpenDALScheme, **kwargs):
|
||||
if scheme == OpenDALScheme.FS:
|
||||
Path(kwargs["root"]).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.op = opendal.Operator(scheme=scheme, **kwargs)
|
||||
|
||||
def save(self, filename: str, data: bytes) -> None:
|
||||
self.op.write(path=filename, bs=data)
|
||||
|
||||
def load_once(self, filename: str) -> bytes:
|
||||
if not self.exists(filename):
|
||||
raise FileNotFoundError("File not found")
|
||||
|
||||
return self.op.read(path=filename)
|
||||
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
if not self.exists(filename):
|
||||
raise FileNotFoundError("File not found")
|
||||
|
||||
batch_size = 4096
|
||||
file = self.op.open(path=filename, mode="rb")
|
||||
while chunk := file.read(batch_size):
|
||||
yield chunk
|
||||
|
||||
def download(self, filename: str, target_filepath: str):
|
||||
if not self.exists(filename):
|
||||
raise FileNotFoundError("File not found")
|
||||
|
||||
with Path(target_filepath).open("wb") as f:
|
||||
f.write(self.op.read(path=filename))
|
||||
|
||||
def exists(self, filename: str):
|
||||
return self.op.stat(path=filename).mode.is_file()
|
||||
|
||||
def delete(self, filename: str):
|
||||
if self.exists(filename):
|
||||
self.op.delete(path=filename)
|
@ -9,6 +9,7 @@ class StorageType(StrEnum):
|
||||
HUAWEI_OBS = "huawei-obs"
|
||||
LOCAL = "local"
|
||||
OCI_STORAGE = "oci-storage"
|
||||
OPENDAL = "opendal"
|
||||
S3 = "s3"
|
||||
TENCENT_COS = "tencent-cos"
|
||||
VOLCENGINE_TOS = "volcengine-tos"
|
||||
|
886
api/poetry.lock
generated
886
api/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -134,6 +134,7 @@ bce-python-sdk = "~0.9.23"
|
||||
cos-python-sdk-v5 = "1.9.30"
|
||||
esdk-obs-python = "3.24.6.1"
|
||||
google-cloud-storage = "2.16.0"
|
||||
opendal = "~0.45.12"
|
||||
oss2 = "2.18.5"
|
||||
supabase = "~2.8.1"
|
||||
tos = "~2.7.1"
|
||||
|
20
api/tests/unit_tests/configs/test_opendal_config_parse.py
Normal file
20
api/tests/unit_tests/configs/test_opendal_config_parse.py
Normal file
@ -0,0 +1,20 @@
|
||||
import pytest
|
||||
|
||||
from extensions.storage.opendal_storage import is_r2_endpoint
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("endpoint", "expected"),
|
||||
[
|
||||
("https://bucket.r2.cloudflarestorage.com", True),
|
||||
("https://custom-domain.r2.cloudflarestorage.com/", True),
|
||||
("https://bucket.r2.cloudflarestorage.com/path", True),
|
||||
("https://s3.amazonaws.com", False),
|
||||
("https://storage.googleapis.com", False),
|
||||
("http://localhost:9000", False),
|
||||
("invalid-url", False),
|
||||
("", False),
|
||||
],
|
||||
)
|
||||
def test_is_r2_endpoint(endpoint: str, expected: bool):
|
||||
assert is_r2_endpoint(endpoint) == expected
|
@ -6,7 +6,7 @@ from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
|
||||
def get_example_folder() -> str:
|
||||
return "/dify"
|
||||
return "~/dify"
|
||||
|
||||
|
||||
def get_example_bucket() -> str:
|
||||
@ -22,14 +22,14 @@ def get_example_data() -> bytes:
|
||||
|
||||
|
||||
def get_example_filepath() -> str:
|
||||
return "/test"
|
||||
return "~/test"
|
||||
|
||||
|
||||
class BaseStorageTest:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_method(self):
|
||||
def setup_method(self, *args, **kwargs):
|
||||
"""Should be implemented in child classes to setup specific storage."""
|
||||
self.storage = BaseStorage()
|
||||
self.storage: BaseStorage
|
||||
|
||||
def test_save(self):
|
||||
"""Test saving data."""
|
||||
|
@ -1,18 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
import pytest
|
||||
|
||||
from extensions.storage.local_fs_storage import LocalFsStorage
|
||||
from tests.unit_tests.oss.__mock.base import (
|
||||
BaseStorageTest,
|
||||
get_example_folder,
|
||||
)
|
||||
from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
|
||||
|
||||
|
||||
class TestLocalFS(BaseStorageTest):
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_method(self, setup_local_fs_mock):
|
||||
"""Executed before each test method."""
|
||||
self.storage = LocalFsStorage()
|
||||
self.storage.folder = get_example_folder()
|
19
api/tests/unit_tests/oss/opendal/test_opendal.py
Normal file
19
api/tests/unit_tests/oss/opendal/test_opendal.py
Normal file
@ -0,0 +1,19 @@
|
||||
import pytest
|
||||
|
||||
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
|
||||
from extensions.storage.opendal_storage import OpenDALStorage
|
||||
from tests.unit_tests.oss.__mock.base import (
|
||||
BaseStorageTest,
|
||||
get_example_folder,
|
||||
)
|
||||
from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
|
||||
|
||||
|
||||
class TestOpenDAL(BaseStorageTest):
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_method(self, *args, **kwargs):
|
||||
"""Executed before each test method."""
|
||||
self.storage = OpenDALStorage(
|
||||
scheme=OpenDALScheme.FS,
|
||||
root=get_example_folder(),
|
||||
)
|
@ -281,10 +281,23 @@ CONSOLE_CORS_ALLOW_ORIGINS=*
|
||||
# ------------------------------
|
||||
|
||||
# The type of storage to use for storing user files.
|
||||
# Supported values are `local` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
|
||||
# Default: `local`
|
||||
STORAGE_TYPE=local
|
||||
STORAGE_LOCAL_PATH=storage
|
||||
# Supported values are `opendal` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
|
||||
# Default: `opendal`
|
||||
STORAGE_TYPE=opendal
|
||||
|
||||
# Apache OpenDAL Configuration, refer to https://github.com/apache/opendal
|
||||
# The scheme for the OpenDAL storage.
|
||||
STORAGE_OPENDAL_SCHEME=fs
|
||||
# OpenDAL FS
|
||||
OPENDAL_FS_ROOT=storage
|
||||
# OpenDAL S3
|
||||
OPENDAL_S3_ROOT=/
|
||||
OPENDAL_S3_BUCKET=your-bucket-name
|
||||
OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
|
||||
OPENDAL_S3_ACCESS_KEY_ID=your-access-key
|
||||
OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
|
||||
OPENDAL_S3_REGION=your-region
|
||||
OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
|
||||
|
||||
# S3 Configuration
|
||||
# Whether to use AWS managed IAM roles for authenticating with the S3 service.
|
||||
|
Loading…
x
Reference in New Issue
Block a user