From bad764bcdad4f4a2c0d0e520601053d825820d26 Mon Sep 17 00:00:00 2001 From: Kenny Dizi Date: Mon, 6 Jan 2025 11:06:24 +0700 Subject: [PATCH] Improve storage engine (#4341) ### What problem does this PR solve? - Bring `STORAGE_IMPL` back in `rag/svr/cache_file_svr.py` - Simplify storage connection when working with AWS S3 ### Type of change - [x] Refactoring --- conf/service_conf.yaml | 3 +-- docker/service_conf.yaml.template | 3 +-- rag/svr/cache_file_svr.py | 4 ++-- rag/utils/s3_conn.py | 16 ++-------------- 4 files changed, 6 insertions(+), 20 deletions(-) diff --git a/conf/service_conf.yaml b/conf/service_conf.yaml index bb17ff56f..18b41e164 100644 --- a/conf/service_conf.yaml +++ b/conf/service_conf.yaml @@ -22,7 +22,7 @@ infinity: db_name: 'default_db' redis: db: 1 - password: 'infini_rag_flow' + password: 'infini_rag_flow' host: 'redis:6379' # postgres: @@ -34,7 +34,6 @@ redis: # max_connections: 100 # stale_timeout: 30 # s3: -# endpoint: 'endpoint' # access_key: 'access_key' # secret_key: 'secret_key' # region: 'region' diff --git a/docker/service_conf.yaml.template b/docker/service_conf.yaml.template index 674ac9109..f4acd8bc5 100644 --- a/docker/service_conf.yaml.template +++ b/docker/service_conf.yaml.template @@ -22,7 +22,7 @@ infinity: db_name: 'default_db' redis: db: 1 - password: '${REDIS_PASSWORD:-infini_rag_flow}' + password: '${REDIS_PASSWORD:-infini_rag_flow}' host: '${REDIS_HOST:-redis}:6379' # postgres: @@ -34,7 +34,6 @@ redis: # max_connections: 100 # stale_timeout: 30 # s3: -# endpoint: 'endpoint' # access_key: 'access_key' # secret_key: 'secret_key' # region: 'region' diff --git a/rag/svr/cache_file_svr.py b/rag/svr/cache_file_svr.py index 81be82f0b..4bbadf0f8 100644 --- a/rag/svr/cache_file_svr.py +++ b/rag/svr/cache_file_svr.py @@ -19,7 +19,7 @@ import traceback from api.db.db_models import close_connection from api.db.services.task_service import TaskService -from rag.utils.minio_conn import MINIOs +from rag.utils.storage_factory import STORAGE_IMPL from rag.utils.redis_conn import REDIS_CONN @@ -44,7 +44,7 @@ def main(): key = "{}/{}".format(kb_id, loc) if REDIS_CONN.exist(key): continue - file_bin = MINIOs.get(kb_id, loc) + file_bin = STORAGE_IMPL.get(kb_id, loc) REDIS_CONN.transaction(key, file_bin, 12 * 60) logging.info("CACHE: {}".format(loc)) except Exception as e: diff --git a/rag/utils/s3_conn.py b/rag/utils/s3_conn.py index abc08db6b..03690d8ad 100644 --- a/rag/utils/s3_conn.py +++ b/rag/utils/s3_conn.py @@ -1,7 +1,6 @@ import logging import boto3 from botocore.exceptions import ClientError -from botocore.client import Config import time from io import BytesIO from rag.utils import singleton @@ -12,7 +11,6 @@ class RAGFlowS3(object): def __init__(self): self.conn = None self.s3_config = settings.S3 - self.endpoint = self.s3_config.get('endpoint', None) self.access_key = self.s3_config.get('access_key', None) self.secret_key = self.s3_config.get('secret_key', None) self.region = self.s3_config.get('region', None) @@ -26,24 +24,14 @@ class RAGFlowS3(object): pass try: - - config = Config( - s3={ - 'addressing_style': 'virtual' - } - ) - self.conn = boto3.client( 's3', - endpoint_url=self.endpoint, region_name=self.region, aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key, - config=config + aws_secret_access_key=self.secret_key ) except Exception: - logging.exception( - "Fail to connect %s" % self.endpoint) + logging.exception(f"Fail to connect at region {self.region}") def __close__(self): del self.conn