mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-22 06:00:00 +08:00

### What problem does this PR solve? #3531 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
210 lines
6.7 KiB
Python
210 lines
6.7 KiB
Python
#
|
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import os
|
|
from datetime import date
|
|
from enum import IntEnum, Enum
|
|
import rag.utils.es_conn
|
|
import rag.utils.infinity_conn
|
|
|
|
import rag.utils
|
|
from rag.nlp import search
|
|
from graphrag import search as kg_search
|
|
from api.utils import get_base_config, decrypt_database_config
|
|
from api.constants import RAG_FLOW_SERVICE_NAME
|
|
|
|
LIGHTEN = int(os.environ.get('LIGHTEN', "0"))
|
|
|
|
LLM = None
|
|
LLM_FACTORY = None
|
|
LLM_BASE_URL = None
|
|
CHAT_MDL = ""
|
|
EMBEDDING_MDL = ""
|
|
RERANK_MDL = ""
|
|
ASR_MDL = ""
|
|
IMAGE2TEXT_MDL = ""
|
|
API_KEY = None
|
|
PARSERS = None
|
|
HOST_IP = None
|
|
HOST_PORT = None
|
|
SECRET_KEY = None
|
|
|
|
DATABASE_TYPE = os.getenv("DB_TYPE", 'mysql')
|
|
DATABASE = decrypt_database_config(name=DATABASE_TYPE)
|
|
|
|
# authentication
|
|
AUTHENTICATION_CONF = None
|
|
|
|
# client
|
|
CLIENT_AUTHENTICATION = None
|
|
HTTP_APP_KEY = None
|
|
GITHUB_OAUTH = None
|
|
FEISHU_OAUTH = None
|
|
|
|
DOC_ENGINE = None
|
|
docStoreConn = None
|
|
|
|
retrievaler = None
|
|
kg_retrievaler = None
|
|
|
|
|
|
def init_settings():
|
|
global LLM, LLM_FACTORY, LLM_BASE_URL, LIGHTEN, DATABASE_TYPE, DATABASE
|
|
LIGHTEN = int(os.environ.get('LIGHTEN', "0"))
|
|
DATABASE_TYPE = os.getenv("DB_TYPE", 'mysql')
|
|
DATABASE = decrypt_database_config(name=DATABASE_TYPE)
|
|
LLM = get_base_config("user_default_llm", {})
|
|
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
|
|
LLM_BASE_URL = LLM.get("base_url")
|
|
|
|
global CHAT_MDL, EMBEDDING_MDL, RERANK_MDL, ASR_MDL, IMAGE2TEXT_MDL
|
|
if not LIGHTEN:
|
|
default_llm = {
|
|
"Tongyi-Qianwen": {
|
|
"chat_model": "qwen-plus",
|
|
"embedding_model": "text-embedding-v2",
|
|
"image2text_model": "qwen-vl-max",
|
|
"asr_model": "paraformer-realtime-8k-v1",
|
|
},
|
|
"OpenAI": {
|
|
"chat_model": "gpt-3.5-turbo",
|
|
"embedding_model": "text-embedding-ada-002",
|
|
"image2text_model": "gpt-4-vision-preview",
|
|
"asr_model": "whisper-1",
|
|
},
|
|
"Azure-OpenAI": {
|
|
"chat_model": "gpt-35-turbo",
|
|
"embedding_model": "text-embedding-ada-002",
|
|
"image2text_model": "gpt-4-vision-preview",
|
|
"asr_model": "whisper-1",
|
|
},
|
|
"ZHIPU-AI": {
|
|
"chat_model": "glm-3-turbo",
|
|
"embedding_model": "embedding-2",
|
|
"image2text_model": "glm-4v",
|
|
"asr_model": "",
|
|
},
|
|
"Ollama": {
|
|
"chat_model": "qwen-14B-chat",
|
|
"embedding_model": "flag-embedding",
|
|
"image2text_model": "",
|
|
"asr_model": "",
|
|
},
|
|
"Moonshot": {
|
|
"chat_model": "moonshot-v1-8k",
|
|
"embedding_model": "",
|
|
"image2text_model": "",
|
|
"asr_model": "",
|
|
},
|
|
"DeepSeek": {
|
|
"chat_model": "deepseek-chat",
|
|
"embedding_model": "",
|
|
"image2text_model": "",
|
|
"asr_model": "",
|
|
},
|
|
"VolcEngine": {
|
|
"chat_model": "",
|
|
"embedding_model": "",
|
|
"image2text_model": "",
|
|
"asr_model": "",
|
|
},
|
|
"BAAI": {
|
|
"chat_model": "",
|
|
"embedding_model": "BAAI/bge-large-zh-v1.5",
|
|
"image2text_model": "",
|
|
"asr_model": "",
|
|
"rerank_model": "BAAI/bge-reranker-v2-m3",
|
|
}
|
|
}
|
|
|
|
if LLM_FACTORY:
|
|
CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"] + f"@{LLM_FACTORY}"
|
|
ASR_MDL = default_llm[LLM_FACTORY]["asr_model"] + f"@{LLM_FACTORY}"
|
|
IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"] + f"@{LLM_FACTORY}"
|
|
EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"] + "@BAAI"
|
|
RERANK_MDL = default_llm["BAAI"]["rerank_model"] + "@BAAI"
|
|
|
|
global API_KEY, PARSERS, HOST_IP, HOST_PORT, SECRET_KEY
|
|
API_KEY = LLM.get("api_key", "")
|
|
PARSERS = LLM.get(
|
|
"parsers",
|
|
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email")
|
|
|
|
HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
|
|
HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
|
|
|
|
SECRET_KEY = get_base_config(
|
|
RAG_FLOW_SERVICE_NAME,
|
|
{}).get("secret_key", str(date.today()))
|
|
|
|
global AUTHENTICATION_CONF, CLIENT_AUTHENTICATION, HTTP_APP_KEY, GITHUB_OAUTH, FEISHU_OAUTH
|
|
# authentication
|
|
AUTHENTICATION_CONF = get_base_config("authentication", {})
|
|
|
|
# client
|
|
CLIENT_AUTHENTICATION = AUTHENTICATION_CONF.get(
|
|
"client", {}).get(
|
|
"switch", False)
|
|
HTTP_APP_KEY = AUTHENTICATION_CONF.get("client", {}).get("http_app_key")
|
|
GITHUB_OAUTH = get_base_config("oauth", {}).get("github")
|
|
FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
|
|
|
|
global DOC_ENGINE, docStoreConn, retrievaler, kg_retrievaler
|
|
DOC_ENGINE = os.environ.get('DOC_ENGINE', "elasticsearch")
|
|
if DOC_ENGINE == "elasticsearch":
|
|
docStoreConn = rag.utils.es_conn.ESConnection()
|
|
elif DOC_ENGINE == "infinity":
|
|
docStoreConn = rag.utils.infinity_conn.InfinityConnection()
|
|
else:
|
|
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
|
|
|
retrievaler = search.Dealer(docStoreConn)
|
|
kg_retrievaler = kg_search.KGSearch(docStoreConn)
|
|
|
|
|
|
class CustomEnum(Enum):
|
|
@classmethod
|
|
def valid(cls, value):
|
|
try:
|
|
cls(value)
|
|
return True
|
|
except BaseException:
|
|
return False
|
|
|
|
@classmethod
|
|
def values(cls):
|
|
return [member.value for member in cls.__members__.values()]
|
|
|
|
@classmethod
|
|
def names(cls):
|
|
return [member.name for member in cls.__members__.values()]
|
|
|
|
|
|
class RetCode(IntEnum, CustomEnum):
|
|
SUCCESS = 0
|
|
NOT_EFFECTIVE = 10
|
|
EXCEPTION_ERROR = 100
|
|
ARGUMENT_ERROR = 101
|
|
DATA_ERROR = 102
|
|
OPERATING_ERROR = 103
|
|
CONNECTION_ERROR = 105
|
|
RUNNING = 106
|
|
PERMISSION_ERROR = 108
|
|
AUTHENTICATION_ERROR = 109
|
|
UNAUTHORIZED = 401
|
|
SERVER_ERROR = 500
|
|
FORBIDDEN = 403
|
|
NOT_FOUND = 404
|