Improve storage engine (#4341)

### What problem does this PR solve?

- Bring `STORAGE_IMPL` back in `rag/svr/cache_file_svr.py`
- Simplify storage connection when working with AWS S3

### Type of change

- [x] Refactoring
This commit is contained in:
Kenny Dizi 2025-01-06 11:06:24 +07:00 committed by GitHub
parent 9c6cf12137
commit bad764bcda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 6 additions and 20 deletions

View File

@ -34,7 +34,6 @@ redis:
# max_connections: 100
# stale_timeout: 30
# s3:
# endpoint: 'endpoint'
# access_key: 'access_key'
# secret_key: 'secret_key'
# region: 'region'

View File

@ -34,7 +34,6 @@ redis:
# max_connections: 100
# stale_timeout: 30
# s3:
# endpoint: 'endpoint'
# access_key: 'access_key'
# secret_key: 'secret_key'
# region: 'region'

View File

@ -19,7 +19,7 @@ import traceback
from api.db.db_models import close_connection
from api.db.services.task_service import TaskService
from rag.utils.minio_conn import MINIOs
from rag.utils.storage_factory import STORAGE_IMPL
from rag.utils.redis_conn import REDIS_CONN
@ -44,7 +44,7 @@ def main():
key = "{}/{}".format(kb_id, loc)
if REDIS_CONN.exist(key):
continue
file_bin = MINIOs.get(kb_id, loc)
file_bin = STORAGE_IMPL.get(kb_id, loc)
REDIS_CONN.transaction(key, file_bin, 12 * 60)
logging.info("CACHE: {}".format(loc))
except Exception as e:

View File

@ -1,7 +1,6 @@
import logging
import boto3
from botocore.exceptions import ClientError
from botocore.client import Config
import time
from io import BytesIO
from rag.utils import singleton
@ -12,7 +11,6 @@ class RAGFlowS3(object):
def __init__(self):
self.conn = None
self.s3_config = settings.S3
self.endpoint = self.s3_config.get('endpoint', None)
self.access_key = self.s3_config.get('access_key', None)
self.secret_key = self.s3_config.get('secret_key', None)
self.region = self.s3_config.get('region', None)
@ -26,24 +24,14 @@ class RAGFlowS3(object):
pass
try:
config = Config(
s3={
'addressing_style': 'virtual'
}
)
self.conn = boto3.client(
's3',
endpoint_url=self.endpoint,
region_name=self.region,
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
config=config
aws_secret_access_key=self.secret_key
)
except Exception:
logging.exception(
"Fail to connect %s" % self.endpoint)
logging.exception(f"Fail to connect at region {self.region}")
def __close__(self):
del self.conn