Add azure blob storage support (#2919)

Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
Jyong 2024-03-20 20:49:58 +08:00 committed by GitHub
parent 9a3d5729bb
commit 86e474fff1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 64 additions and 3 deletions

View File

@ -39,7 +39,7 @@ DB_DATABASE=dify
# Storage configuration
# use for store upload files, private keys...
# storage type: local, s3
# storage type: local, s3, azure-blob
STORAGE_TYPE=local
STORAGE_LOCAL_PATH=storage
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
@ -47,6 +47,11 @@ S3_BUCKET_NAME=your-bucket-name
S3_ACCESS_KEY=your-access-key
S3_SECRET_KEY=your-secret-key
S3_REGION=your-region
# Azure Blob Storage configuration
AZURE_BLOB_ACCOUNT_NAME=your-account-name
AZURE_BLOB_ACCOUNT_KEY=your-account-key
AZURE_BLOB_CONTAINER_NAME=yout-container-name
AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
# CORS configuration
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*

View File

@ -180,6 +180,10 @@ class Config:
self.S3_ACCESS_KEY = get_env('S3_ACCESS_KEY')
self.S3_SECRET_KEY = get_env('S3_SECRET_KEY')
self.S3_REGION = get_env('S3_REGION')
self.AZURE_BLOB_ACCOUNT_NAME = get_env('AZURE_BLOB_ACCOUNT_NAME')
self.AZURE_BLOB_ACCOUNT_KEY = get_env('AZURE_BLOB_ACCOUNT_KEY')
self.AZURE_BLOB_CONTAINER_NAME = get_env('AZURE_BLOB_CONTAINER_NAME')
self.AZURE_BLOB_ACCOUNT_URL = get_env('AZURE_BLOB_ACCOUNT_URL')
# ------------------------
# Vector Store Configurations.

View File

@ -2,9 +2,11 @@ import os
import shutil
from collections.abc import Generator
from contextlib import closing
from datetime import datetime, timedelta
from typing import Union
import boto3
from azure.storage.blob import AccountSasPermissions, BlobServiceClient, ResourceTypes, generate_account_sas
from botocore.exceptions import ClientError
from flask import Flask
@ -27,6 +29,18 @@ class Storage:
endpoint_url=app.config.get('S3_ENDPOINT'),
region_name=app.config.get('S3_REGION')
)
elif self.storage_type == 'azure-blob':
self.bucket_name = app.config.get('AZURE_BLOB_CONTAINER_NAME')
sas_token = generate_account_sas(
account_name=app.config.get('AZURE_BLOB_ACCOUNT_NAME'),
account_key=app.config.get('AZURE_BLOB_ACCOUNT_KEY'),
resource_types=ResourceTypes(service=True, container=True, object=True),
permission=AccountSasPermissions(read=True, write=True, delete=True, list=True, add=True, create=True),
expiry=datetime.utcnow() + timedelta(hours=1)
)
self.client = BlobServiceClient(account_url=app.config.get('AZURE_BLOB_ACCOUNT_URL'),
credential=sas_token)
else:
self.folder = app.config.get('STORAGE_LOCAL_PATH')
if not os.path.isabs(self.folder):
@ -35,6 +49,9 @@ class Storage:
def save(self, filename, data):
if self.storage_type == 's3':
self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
elif self.storage_type == 'azure-blob':
blob_container = self.client.get_container_client(container=self.bucket_name)
blob_container.upload_blob(filename, data)
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
@ -63,6 +80,10 @@ class Storage:
raise FileNotFoundError("File not found")
else:
raise
elif self.storage_type == 'azure-blob':
blob = self.client.get_container_client(container=self.bucket_name)
blob = blob.get_blob_client(blob=filename)
data = blob.download_blob().readall()
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
@ -90,6 +111,11 @@ class Storage:
raise FileNotFoundError("File not found")
else:
raise
elif self.storage_type == 'azure-blob':
blob = self.client.get_blob_client(container=self.bucket_name, blob=filename)
with closing(blob.download_blob()) as blob_stream:
while chunk := blob_stream.readall(4096):
yield chunk
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
@ -109,6 +135,11 @@ class Storage:
if self.storage_type == 's3':
with closing(self.client) as client:
client.download_file(self.bucket_name, filename, target_filepath)
elif self.storage_type == 'azure-blob':
blob = self.client.get_blob_client(container=self.bucket_name, blob=filename)
with open(target_filepath, "wb") as my_blob:
blob_data = blob.download_blob()
blob_data.readinto(my_blob)
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
@ -128,6 +159,9 @@ class Storage:
return True
except:
return False
elif self.storage_type == 'azure-blob':
blob = self.client.get_blob_client(container=self.bucket_name, blob=filename)
return blob.exists()
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename

View File

@ -72,3 +72,5 @@ arxiv==2.1.0
yarl~=1.9.4
twilio==9.0.0
qrcode~=7.4.2
azure-storage-blob==12.9.0
azure-identity==1.15.0

View File

@ -70,7 +70,7 @@ services:
# If you want to enable cross-origin support,
# you must use the HTTPS protocol and set the configuration to `SameSite=None, Secure=true, HttpOnly=true`.
#
# The type of storage to use for storing user files. Supported values are `local` and `s3`, Default: `local`
# The type of storage to use for storing user files. Supported values are `local` and `s3` and `azure-blob`, Default: `local`
STORAGE_TYPE: local
# The path to the local storage directory, the directory relative the root path of API service codes or absolute path. Default: `storage` or `/home/john/storage`.
# only available when STORAGE_TYPE is `local`.
@ -81,6 +81,11 @@ services:
S3_ACCESS_KEY: 'ak-difyai'
S3_SECRET_KEY: 'sk-difyai'
S3_REGION: 'us-east-1'
# The Azure Blob storage configurations, only available when STORAGE_TYPE is `azure-blob`.
AZURE_BLOB_ACCOUNT_NAME: 'difyai'
AZURE_BLOB_ACCOUNT_KEY: 'difyai'
AZURE_BLOB_CONTAINER_NAME: 'difyai-container'
AZURE_BLOB_ACCOUNT_URL: 'https://<your_account_name>.blob.core.windows.net'
# The type of vector store to use. Supported values are `weaviate`, `qdrant`, `milvus`.
VECTOR_STORE: weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
@ -164,9 +169,20 @@ services:
REDIS_USE_SSL: 'false'
# The configurations of celery broker.
CELERY_BROKER_URL: redis://:difyai123456@redis:6379/1
# The type of storage to use for storing user files. Supported values are `local` and `s3`, Default: `local`
# The type of storage to use for storing user files. Supported values are `local` and `s3` and `azure-blob`, Default: `local`
STORAGE_TYPE: local
STORAGE_LOCAL_PATH: storage
# The S3 storage configurations, only available when STORAGE_TYPE is `s3`.
S3_ENDPOINT: 'https://xxx.r2.cloudflarestorage.com'
S3_BUCKET_NAME: 'difyai'
S3_ACCESS_KEY: 'ak-difyai'
S3_SECRET_KEY: 'sk-difyai'
S3_REGION: 'us-east-1'
# The Azure Blob storage configurations, only available when STORAGE_TYPE is `azure-blob`.
AZURE_BLOB_ACCOUNT_NAME: 'difyai'
AZURE_BLOB_ACCOUNT_KEY: 'difyai'
AZURE_BLOB_CONTAINER_NAME: 'difyai-container'
AZURE_BLOB_ACCOUNT_URL: 'https://<your_account_name>.blob.core.windows.net'
# The type of vector store to use. Supported values are `weaviate`, `qdrant`, `milvus`.
VECTOR_STORE: weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.