diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 878c6a0bd..ca0333ac5 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -18,6 +18,7 @@ import json from flask import request from flask_login import login_required, current_user from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService +from api.settings import LIGHTEN from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.db import StatusEnum, LLMType from api.db.db_models import TenantLLM @@ -319,13 +320,14 @@ def my_llms(): @login_required def list_app(): self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"] + weighted = ["Youdao","FastEmbed", "BAAI"] if LIGHTEN else [] model_type = request.args.get("model_type") try: objs = TenantLLMService.query(tenant_id=current_user.id) facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key]) llms = LLMService.get_all() llms = [m.to_dict() - for m in llms if m.status == StatusEnum.VALID.value] + for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted] for m in llms: m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied diff --git a/api/settings.py b/api/settings.py index fdb9bb595..ab73557ef 100644 --- a/api/settings.py +++ b/api/settings.py @@ -42,6 +42,7 @@ RAG_FLOW_SERVICE_NAME = "ragflow" SERVER_MODULE = "rag_flow_server.py" TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp") RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf") +LIGHTEN = os.environ.get('LIGHTEN') SUBPROCESS_STD_LOG_NAME = "std.log" @@ -57,77 +58,76 @@ REQUEST_MAX_WAIT_SEC = 300 USE_REGISTRY = get_base_config("use_registry") -default_llm = { - "Tongyi-Qianwen": { - "chat_model": "qwen-plus", - "embedding_model": "text-embedding-v2", - "image2text_model": "qwen-vl-max", - "asr_model": "paraformer-realtime-8k-v1", - }, - "OpenAI": { - "chat_model": "gpt-3.5-turbo", - "embedding_model": "text-embedding-ada-002", - "image2text_model": "gpt-4-vision-preview", - "asr_model": "whisper-1", - }, - "Azure-OpenAI": { - "chat_model": "azure-gpt-35-turbo", - "embedding_model": "azure-text-embedding-ada-002", - "image2text_model": "azure-gpt-4-vision-preview", - "asr_model": "azure-whisper-1", - }, - "ZHIPU-AI": { - "chat_model": "glm-3-turbo", - "embedding_model": "embedding-2", - "image2text_model": "glm-4v", - "asr_model": "", - }, - "Ollama": { - "chat_model": "qwen-14B-chat", - "embedding_model": "flag-embedding", - "image2text_model": "", - "asr_model": "", - }, - "Moonshot": { - "chat_model": "moonshot-v1-8k", - "embedding_model": "", - "image2text_model": "", - "asr_model": "", - }, - "DeepSeek": { - "chat_model": "deepseek-chat", - "embedding_model": "", - "image2text_model": "", - "asr_model": "", - }, - "VolcEngine": { - "chat_model": "", - "embedding_model": "", - "image2text_model": "", - "asr_model": "", - }, - "BAAI": { - "chat_model": "", - "embedding_model": "BAAI/bge-large-zh-v1.5", - "image2text_model": "", - "asr_model": "", - "rerank_model": "BAAI/bge-reranker-v2-m3", - } -} LLM = get_base_config("user_default_llm", {}) LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen") LLM_BASE_URL = LLM.get("base_url") -if LLM_FACTORY not in default_llm: - print( - "\33[91m【ERROR】\33[0m:", - f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.") - LLM_FACTORY = "Tongyi-Qianwen" -CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"] -EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"] -RERANK_MDL = default_llm["BAAI"]["rerank_model"] -ASR_MDL = default_llm[LLM_FACTORY]["asr_model"] -IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"] +if not LIGHTEN: + default_llm = { + "Tongyi-Qianwen": { + "chat_model": "qwen-plus", + "embedding_model": "text-embedding-v2", + "image2text_model": "qwen-vl-max", + "asr_model": "paraformer-realtime-8k-v1", + }, + "OpenAI": { + "chat_model": "gpt-3.5-turbo", + "embedding_model": "text-embedding-ada-002", + "image2text_model": "gpt-4-vision-preview", + "asr_model": "whisper-1", + }, + "Azure-OpenAI": { + "chat_model": "azure-gpt-35-turbo", + "embedding_model": "azure-text-embedding-ada-002", + "image2text_model": "azure-gpt-4-vision-preview", + "asr_model": "azure-whisper-1", + }, + "ZHIPU-AI": { + "chat_model": "glm-3-turbo", + "embedding_model": "embedding-2", + "image2text_model": "glm-4v", + "asr_model": "", + }, + "Ollama": { + "chat_model": "qwen-14B-chat", + "embedding_model": "flag-embedding", + "image2text_model": "", + "asr_model": "", + }, + "Moonshot": { + "chat_model": "moonshot-v1-8k", + "embedding_model": "", + "image2text_model": "", + "asr_model": "", + }, + "DeepSeek": { + "chat_model": "deepseek-chat", + "embedding_model": "", + "image2text_model": "", + "asr_model": "", + }, + "VolcEngine": { + "chat_model": "", + "embedding_model": "", + "image2text_model": "", + "asr_model": "", + }, + "BAAI": { + "chat_model": "", + "embedding_model": "BAAI/bge-large-zh-v1.5", + "image2text_model": "", + "asr_model": "", + "rerank_model": "BAAI/bge-reranker-v2-m3", + } + } + + CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"] + EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"] + RERANK_MDL = default_llm["BAAI"]["rerank_model"] if not LIGHTEN else "" + ASR_MDL = default_llm[LLM_FACTORY]["asr_model"] + IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"] +else: + CHAT_MDL = EMBEDDING_MDL = RERANK_MDL = ASR_MDL = IMAGE2TEXT_MDL = "" API_KEY = LLM.get("api_key", "") PARSERS = LLM.get( diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 5723ad618..2eab41298 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -16,7 +16,6 @@ import random import xgboost as xgb from io import BytesIO -import torch import re import pdfplumber import logging @@ -25,6 +24,7 @@ import numpy as np from timeit import default_timer as timer from pypdf import PdfReader as pdf2_read +from api.settings import LIGHTEN from api.utils.file_utils import get_project_base_directory from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer from rag.nlp import rag_tokenizer @@ -44,8 +44,10 @@ class RAGFlowPdfParser: self.tbl_det = TableStructureRecognizer() self.updown_cnt_mdl = xgb.Booster() - if torch.cuda.is_available(): - self.updown_cnt_mdl.set_param({"device": "cuda"}) + if not LIGHTEN: + import torch + if torch.cuda.is_available(): + self.updown_cnt_mdl.set_param({"device": "cuda"}) try: model_dir = os.path.join( get_project_base_directory(), diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index 6f7c4b89d..3ea9faf9f 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -25,10 +25,10 @@ from abc import ABC from ollama import Client import dashscope from openai import OpenAI -from FlagEmbedding import FlagModel -import torch import numpy as np import asyncio + +from api.settings import LIGHTEN from api.utils.file_utils import get_home_cache_dir from rag.utils import num_tokens_from_string, truncate import google.generativeai as genai @@ -60,8 +60,10 @@ class DefaultEmbedding(Base): ^_- """ - if not DefaultEmbedding._model: + if not LIGHTEN and not DefaultEmbedding._model: with DefaultEmbedding._model_lock: + from FlagEmbedding import FlagModel + import torch if not DefaultEmbedding._model: try: DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)), diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py index fcda163cc..53ed18145 100644 --- a/rag/llm/rerank_model.py +++ b/rag/llm/rerank_model.py @@ -14,14 +14,14 @@ # limitations under the License. # import re -import threading +import threading import requests -import torch -from FlagEmbedding import FlagReranker from huggingface_hub import snapshot_download import os from abc import ABC import numpy as np + +from api.settings import LIGHTEN from api.utils.file_utils import get_home_cache_dir from rag.utils import num_tokens_from_string, truncate import json @@ -53,7 +53,9 @@ class DefaultRerank(Base): ^_- """ - if not DefaultRerank._model: + if not LIGHTEN and not DefaultRerank._model: + import torch + from FlagEmbedding import FlagReranker with DefaultRerank._model_lock: if not DefaultRerank._model: try: