Improve storage engine (#4341)

### What problem does this PR solve?

- Bring `STORAGE_IMPL` back in `rag/svr/cache_file_svr.py`
- Simplify storage connection when working with AWS S3

### Type of change

- [x] Refactoring
This commit is contained in:
Kenny Dizi 2025-01-06 11:06:24 +07:00 committed by GitHub
parent 9c6cf12137
commit bad764bcda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 6 additions and 20 deletions

View File

@ -22,7 +22,7 @@ infinity:
db_name: 'default_db' db_name: 'default_db'
redis: redis:
db: 1 db: 1
password: 'infini_rag_flow' password: 'infini_rag_flow'
host: 'redis:6379' host: 'redis:6379'
# postgres: # postgres:
@ -34,7 +34,6 @@ redis:
# max_connections: 100 # max_connections: 100
# stale_timeout: 30 # stale_timeout: 30
# s3: # s3:
# endpoint: 'endpoint'
# access_key: 'access_key' # access_key: 'access_key'
# secret_key: 'secret_key' # secret_key: 'secret_key'
# region: 'region' # region: 'region'

View File

@ -22,7 +22,7 @@ infinity:
db_name: 'default_db' db_name: 'default_db'
redis: redis:
db: 1 db: 1
password: '${REDIS_PASSWORD:-infini_rag_flow}' password: '${REDIS_PASSWORD:-infini_rag_flow}'
host: '${REDIS_HOST:-redis}:6379' host: '${REDIS_HOST:-redis}:6379'
# postgres: # postgres:
@ -34,7 +34,6 @@ redis:
# max_connections: 100 # max_connections: 100
# stale_timeout: 30 # stale_timeout: 30
# s3: # s3:
# endpoint: 'endpoint'
# access_key: 'access_key' # access_key: 'access_key'
# secret_key: 'secret_key' # secret_key: 'secret_key'
# region: 'region' # region: 'region'

View File

@ -19,7 +19,7 @@ import traceback
from api.db.db_models import close_connection from api.db.db_models import close_connection
from api.db.services.task_service import TaskService from api.db.services.task_service import TaskService
from rag.utils.minio_conn import MINIOs from rag.utils.storage_factory import STORAGE_IMPL
from rag.utils.redis_conn import REDIS_CONN from rag.utils.redis_conn import REDIS_CONN
@ -44,7 +44,7 @@ def main():
key = "{}/{}".format(kb_id, loc) key = "{}/{}".format(kb_id, loc)
if REDIS_CONN.exist(key): if REDIS_CONN.exist(key):
continue continue
file_bin = MINIOs.get(kb_id, loc) file_bin = STORAGE_IMPL.get(kb_id, loc)
REDIS_CONN.transaction(key, file_bin, 12 * 60) REDIS_CONN.transaction(key, file_bin, 12 * 60)
logging.info("CACHE: {}".format(loc)) logging.info("CACHE: {}".format(loc))
except Exception as e: except Exception as e:

View File

@ -1,7 +1,6 @@
import logging import logging
import boto3 import boto3
from botocore.exceptions import ClientError from botocore.exceptions import ClientError
from botocore.client import Config
import time import time
from io import BytesIO from io import BytesIO
from rag.utils import singleton from rag.utils import singleton
@ -12,7 +11,6 @@ class RAGFlowS3(object):
def __init__(self): def __init__(self):
self.conn = None self.conn = None
self.s3_config = settings.S3 self.s3_config = settings.S3
self.endpoint = self.s3_config.get('endpoint', None)
self.access_key = self.s3_config.get('access_key', None) self.access_key = self.s3_config.get('access_key', None)
self.secret_key = self.s3_config.get('secret_key', None) self.secret_key = self.s3_config.get('secret_key', None)
self.region = self.s3_config.get('region', None) self.region = self.s3_config.get('region', None)
@ -26,24 +24,14 @@ class RAGFlowS3(object):
pass pass
try: try:
config = Config(
s3={
'addressing_style': 'virtual'
}
)
self.conn = boto3.client( self.conn = boto3.client(
's3', 's3',
endpoint_url=self.endpoint,
region_name=self.region, region_name=self.region,
aws_access_key_id=self.access_key, aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key
config=config
) )
except Exception: except Exception:
logging.exception( logging.exception(f"Fail to connect at region {self.region}")
"Fail to connect %s" % self.endpoint)
def __close__(self): def __close__(self):
del self.conn del self.conn