From 9a6dc89156497e6555b745eb5d0fedf12d75229b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E8=85=BE?= <101850389+hangters@users.noreply.github.com> Date: Mon, 12 Aug 2024 10:11:50 +0800 Subject: [PATCH] add support for PerfXCloud (#1883) ### What problem does this PR solve? #1853 add support for PerfXCloud ### Type of change - [x] New Feature (non-breaking change which adds functionality) Co-authored-by: Zhedong Cen --- conf/llm_factories.json | 154 +++++++++++++++++- rag/llm/__init__.py | 6 +- rag/llm/chat_model.py | 9 +- rag/llm/embedding_model.py | 7 + web/src/assets/svg/llm/perfx-cloud.svg | 11 ++ .../user-setting/setting-model/constant.ts | 1 + 6 files changed, 184 insertions(+), 4 deletions(-) create mode 100644 web/src/assets/svg/llm/perfx-cloud.svg diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 42302c84e..c4566bbda 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -2442,6 +2442,158 @@ "model_type": "chat" } ] - } + }, + { + "name": "PerfXCloud", + "logo": "", + "tags": "LLM,TEXT EMBEDDING", + "status": "1", + "llm": [ + { + "llm_name": "deepseek-v2-chat", + "tags": "LLM,CHAT,4k", + "max_tokens": 4096, + "model_type": "chat" + }, + { + "llm_name": "llama3.1:405b", + "tags": "LLM,CHAT,128k", + "max_tokens": 131072, + "model_type": "chat" + }, + { + "llm_name": "Qwen2-72B-Instruct", + "tags": "LLM,CHAT,128k", + "max_tokens": 131072, + "model_type": "chat" + }, + { + "llm_name": "Qwen2-72B-Instruct-GPTQ-Int4", + "tags": "LLM,CHAT,2k", + "max_tokens": 2048, + "model_type": "chat" + }, + { + "llm_name": "Qwen2-72B-Instruct-awq-int4", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Llama3-Chinese_v2", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "Yi-1_5-9B-Chat-16K", + "tags": "LLM,CHAT,16k", + "max_tokens": 16384, + "model_type": "chat" + }, + { + "llm_name": "Qwen1.5-72B-Chat-GPTQ-Int4", + "tags": "LLM,CHAT,2k", + "max_tokens": 2048, + "model_type": "chat" + }, + { + "llm_name": "Meta-Llama-3.1-8B-Instruct", + "tags": "LLM,CHAT,4k", + "max_tokens": 4096, + "model_type": "chat" + }, + { + "llm_name": "Qwen2-7B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "deepseek-v2-lite-chat", + "tags": "LLM,CHAT,2k", + "max_tokens": 2048, + "model_type": "chat" + }, + { + "llm_name": "Qwen2-7B", + "tags": "LLM,CHAT,128k", + "max_tokens": 131072, + "model_type": "chat" + }, + { + "llm_name": "chatglm3-6b", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "Meta-Llama-3-70B-Instruct-GPTQ-Int4", + "tags": "LLM,CHAT,1k", + "max_tokens": 1024, + "model_type": "chat" + }, + { + "llm_name": "Meta-Llama-3-8B-Instruct", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "Mistral-7B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "MindChat-Qwen-7B-v2", + "tags": "LLM,CHAT,2k", + "max_tokens": 2048, + "model_type": "chat" + }, + { + "llm_name": "phi-2", + "tags": "LLM,CHAT,2k", + "max_tokens": 2048, + "model_type": "chat" + }, + { + "llm_name": "SOLAR-10_7B-Instruct", + "tags": "LLM,CHAT,4k", + "max_tokens": 4096, + "model_type": "chat" + }, + { + "llm_name": "Mixtral-8x7B-Instruct-v0.1-GPTQ", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen1.5-7B", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "BAAI/bge-large-en-v1.5", + "tags": "TEXT EMBEDDING", + "max_tokens": 512, + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-large-zh-v1.5", + "tags": "TEXT EMBEDDING", + "max_tokens": 1024, + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-m3", + "tags": "TEXT EMBEDDING", + "max_tokens": 8192, + "model_type": "embedding" + } + ] + } ] } diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index eebe8bfa9..dcc9f5523 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -38,7 +38,8 @@ EmbeddingModel = { "NVIDIA": NvidiaEmbed, "LM-Studio": LmStudioEmbed, "OpenAI-API-Compatible": OpenAI_APIEmbed, - "cohere": CoHereEmbed + "cohere": CoHereEmbed, + "PerfXCloud": PerfXCloudEmbed, } @@ -84,7 +85,8 @@ ChatModel = { "LM-Studio": LmStudioChat, "OpenAI-API-Compatible": OpenAI_APIChat, "cohere": CoHereChat, - "LeptonAI": LeptonAIChat + "LeptonAI": LeptonAIChat, + "PerfXCloud": PerfXCloudChat } diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 04463e931..dc908c40d 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -987,4 +987,11 @@ class LeptonAIChat(Base): def __init__(self, key, model_name, base_url=None): if not base_url: base_url = os.path.join("https://"+model_name+".lepton.run","api","v1") - super().__init__(key, model_name, base_url) \ No newline at end of file + super().__init__(key, model_name, base_url) + + +class PerfXCloudChat(Base): + def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"): + if not base_url: + base_url = "https://cloud.perfxlab.cn/v1" + super().__init__(key, model_name, base_url) diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index f69ae8484..4ceef04f6 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -553,3 +553,10 @@ class CoHereEmbed(Base): return np.array([d for d in res.embeddings.float]), int( res.meta.billed_units.input_tokens ) + + +class PerfXCloudEmbed(OpenAIEmbed): + def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"): + if not base_url: + base_url = "https://cloud.perfxlab.cn/v1" + super().__init__(key, model_name, base_url) diff --git a/web/src/assets/svg/llm/perfx-cloud.svg b/web/src/assets/svg/llm/perfx-cloud.svg new file mode 100644 index 000000000..edb014702 --- /dev/null +++ b/web/src/assets/svg/llm/perfx-cloud.svg @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/web/src/pages/user-setting/setting-model/constant.ts b/web/src/pages/user-setting/setting-model/constant.ts index b7badc91d..db5b03077 100644 --- a/web/src/pages/user-setting/setting-model/constant.ts +++ b/web/src/pages/user-setting/setting-model/constant.ts @@ -25,6 +25,7 @@ export const IconMap = { 'OpenAI-API-Compatible': 'openai-api', cohere: 'cohere', Lepton: 'lepton', + PerfXCould: 'perfx-could' }; export const BedrockRegionList = [