mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-22 14:10:01 +08:00
add lighten control (#2567)
### What problem does this PR solve? #2295 ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
This commit is contained in:
parent
9251fb39af
commit
7bb28ca2bd
@ -18,6 +18,7 @@ import json
|
||||
from flask import request
|
||||
from flask_login import login_required, current_user
|
||||
from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
|
||||
from api.settings import LIGHTEN
|
||||
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
||||
from api.db import StatusEnum, LLMType
|
||||
from api.db.db_models import TenantLLM
|
||||
@ -319,13 +320,14 @@ def my_llms():
|
||||
@login_required
|
||||
def list_app():
|
||||
self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"]
|
||||
weighted = ["Youdao","FastEmbed", "BAAI"] if LIGHTEN else []
|
||||
model_type = request.args.get("model_type")
|
||||
try:
|
||||
objs = TenantLLMService.query(tenant_id=current_user.id)
|
||||
facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
|
||||
llms = LLMService.get_all()
|
||||
llms = [m.to_dict()
|
||||
for m in llms if m.status == StatusEnum.VALID.value]
|
||||
for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted]
|
||||
for m in llms:
|
||||
m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied
|
||||
|
||||
|
134
api/settings.py
134
api/settings.py
@ -42,6 +42,7 @@ RAG_FLOW_SERVICE_NAME = "ragflow"
|
||||
SERVER_MODULE = "rag_flow_server.py"
|
||||
TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
|
||||
RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
|
||||
LIGHTEN = os.environ.get('LIGHTEN')
|
||||
|
||||
SUBPROCESS_STD_LOG_NAME = "std.log"
|
||||
|
||||
@ -57,77 +58,76 @@ REQUEST_MAX_WAIT_SEC = 300
|
||||
|
||||
USE_REGISTRY = get_base_config("use_registry")
|
||||
|
||||
default_llm = {
|
||||
"Tongyi-Qianwen": {
|
||||
"chat_model": "qwen-plus",
|
||||
"embedding_model": "text-embedding-v2",
|
||||
"image2text_model": "qwen-vl-max",
|
||||
"asr_model": "paraformer-realtime-8k-v1",
|
||||
},
|
||||
"OpenAI": {
|
||||
"chat_model": "gpt-3.5-turbo",
|
||||
"embedding_model": "text-embedding-ada-002",
|
||||
"image2text_model": "gpt-4-vision-preview",
|
||||
"asr_model": "whisper-1",
|
||||
},
|
||||
"Azure-OpenAI": {
|
||||
"chat_model": "azure-gpt-35-turbo",
|
||||
"embedding_model": "azure-text-embedding-ada-002",
|
||||
"image2text_model": "azure-gpt-4-vision-preview",
|
||||
"asr_model": "azure-whisper-1",
|
||||
},
|
||||
"ZHIPU-AI": {
|
||||
"chat_model": "glm-3-turbo",
|
||||
"embedding_model": "embedding-2",
|
||||
"image2text_model": "glm-4v",
|
||||
"asr_model": "",
|
||||
},
|
||||
"Ollama": {
|
||||
"chat_model": "qwen-14B-chat",
|
||||
"embedding_model": "flag-embedding",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
},
|
||||
"Moonshot": {
|
||||
"chat_model": "moonshot-v1-8k",
|
||||
"embedding_model": "",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
},
|
||||
"DeepSeek": {
|
||||
"chat_model": "deepseek-chat",
|
||||
"embedding_model": "",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
},
|
||||
"VolcEngine": {
|
||||
"chat_model": "",
|
||||
"embedding_model": "",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
},
|
||||
"BAAI": {
|
||||
"chat_model": "",
|
||||
"embedding_model": "BAAI/bge-large-zh-v1.5",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
"rerank_model": "BAAI/bge-reranker-v2-m3",
|
||||
}
|
||||
}
|
||||
LLM = get_base_config("user_default_llm", {})
|
||||
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
|
||||
LLM_BASE_URL = LLM.get("base_url")
|
||||
|
||||
if LLM_FACTORY not in default_llm:
|
||||
print(
|
||||
"\33[91m【ERROR】\33[0m:",
|
||||
f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
|
||||
LLM_FACTORY = "Tongyi-Qianwen"
|
||||
CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
|
||||
EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
|
||||
RERANK_MDL = default_llm["BAAI"]["rerank_model"]
|
||||
ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
|
||||
IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
|
||||
if not LIGHTEN:
|
||||
default_llm = {
|
||||
"Tongyi-Qianwen": {
|
||||
"chat_model": "qwen-plus",
|
||||
"embedding_model": "text-embedding-v2",
|
||||
"image2text_model": "qwen-vl-max",
|
||||
"asr_model": "paraformer-realtime-8k-v1",
|
||||
},
|
||||
"OpenAI": {
|
||||
"chat_model": "gpt-3.5-turbo",
|
||||
"embedding_model": "text-embedding-ada-002",
|
||||
"image2text_model": "gpt-4-vision-preview",
|
||||
"asr_model": "whisper-1",
|
||||
},
|
||||
"Azure-OpenAI": {
|
||||
"chat_model": "azure-gpt-35-turbo",
|
||||
"embedding_model": "azure-text-embedding-ada-002",
|
||||
"image2text_model": "azure-gpt-4-vision-preview",
|
||||
"asr_model": "azure-whisper-1",
|
||||
},
|
||||
"ZHIPU-AI": {
|
||||
"chat_model": "glm-3-turbo",
|
||||
"embedding_model": "embedding-2",
|
||||
"image2text_model": "glm-4v",
|
||||
"asr_model": "",
|
||||
},
|
||||
"Ollama": {
|
||||
"chat_model": "qwen-14B-chat",
|
||||
"embedding_model": "flag-embedding",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
},
|
||||
"Moonshot": {
|
||||
"chat_model": "moonshot-v1-8k",
|
||||
"embedding_model": "",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
},
|
||||
"DeepSeek": {
|
||||
"chat_model": "deepseek-chat",
|
||||
"embedding_model": "",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
},
|
||||
"VolcEngine": {
|
||||
"chat_model": "",
|
||||
"embedding_model": "",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
},
|
||||
"BAAI": {
|
||||
"chat_model": "",
|
||||
"embedding_model": "BAAI/bge-large-zh-v1.5",
|
||||
"image2text_model": "",
|
||||
"asr_model": "",
|
||||
"rerank_model": "BAAI/bge-reranker-v2-m3",
|
||||
}
|
||||
}
|
||||
|
||||
CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
|
||||
EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
|
||||
RERANK_MDL = default_llm["BAAI"]["rerank_model"] if not LIGHTEN else ""
|
||||
ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
|
||||
IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
|
||||
else:
|
||||
CHAT_MDL = EMBEDDING_MDL = RERANK_MDL = ASR_MDL = IMAGE2TEXT_MDL = ""
|
||||
|
||||
API_KEY = LLM.get("api_key", "")
|
||||
PARSERS = LLM.get(
|
||||
|
@ -16,7 +16,6 @@ import random
|
||||
|
||||
import xgboost as xgb
|
||||
from io import BytesIO
|
||||
import torch
|
||||
import re
|
||||
import pdfplumber
|
||||
import logging
|
||||
@ -25,6 +24,7 @@ import numpy as np
|
||||
from timeit import default_timer as timer
|
||||
from pypdf import PdfReader as pdf2_read
|
||||
|
||||
from api.settings import LIGHTEN
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
||||
from rag.nlp import rag_tokenizer
|
||||
@ -44,8 +44,10 @@ class RAGFlowPdfParser:
|
||||
self.tbl_det = TableStructureRecognizer()
|
||||
|
||||
self.updown_cnt_mdl = xgb.Booster()
|
||||
if torch.cuda.is_available():
|
||||
self.updown_cnt_mdl.set_param({"device": "cuda"})
|
||||
if not LIGHTEN:
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
self.updown_cnt_mdl.set_param({"device": "cuda"})
|
||||
try:
|
||||
model_dir = os.path.join(
|
||||
get_project_base_directory(),
|
||||
|
@ -25,10 +25,10 @@ from abc import ABC
|
||||
from ollama import Client
|
||||
import dashscope
|
||||
from openai import OpenAI
|
||||
from FlagEmbedding import FlagModel
|
||||
import torch
|
||||
import numpy as np
|
||||
import asyncio
|
||||
|
||||
from api.settings import LIGHTEN
|
||||
from api.utils.file_utils import get_home_cache_dir
|
||||
from rag.utils import num_tokens_from_string, truncate
|
||||
import google.generativeai as genai
|
||||
@ -60,8 +60,10 @@ class DefaultEmbedding(Base):
|
||||
^_-
|
||||
|
||||
"""
|
||||
if not DefaultEmbedding._model:
|
||||
if not LIGHTEN and not DefaultEmbedding._model:
|
||||
with DefaultEmbedding._model_lock:
|
||||
from FlagEmbedding import FlagModel
|
||||
import torch
|
||||
if not DefaultEmbedding._model:
|
||||
try:
|
||||
DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),
|
||||
|
@ -14,14 +14,14 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import re
|
||||
import threading
|
||||
import threading
|
||||
import requests
|
||||
import torch
|
||||
from FlagEmbedding import FlagReranker
|
||||
from huggingface_hub import snapshot_download
|
||||
import os
|
||||
from abc import ABC
|
||||
import numpy as np
|
||||
|
||||
from api.settings import LIGHTEN
|
||||
from api.utils.file_utils import get_home_cache_dir
|
||||
from rag.utils import num_tokens_from_string, truncate
|
||||
import json
|
||||
@ -53,7 +53,9 @@ class DefaultRerank(Base):
|
||||
^_-
|
||||
|
||||
"""
|
||||
if not DefaultRerank._model:
|
||||
if not LIGHTEN and not DefaultRerank._model:
|
||||
import torch
|
||||
from FlagEmbedding import FlagReranker
|
||||
with DefaultRerank._model_lock:
|
||||
if not DefaultRerank._model:
|
||||
try:
|
||||
|
Loading…
x
Reference in New Issue
Block a user