From e013ac52af3ad22ad00f8750a32defcf265547d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E8=85=BE?= <101850389+hangters@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:09:10 +0800 Subject: [PATCH] add support for SILICONFLOW (#1926) ### What problem does this PR solve? #1853 add support for SILICONFLOW ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Zhedong Cen --- conf/llm_factories.json | 286 +++++++++++++++++- rag/llm/__init__.py | 9 +- rag/llm/chat_model.py | 8 +- rag/llm/embedding_model.py | 7 + rag/llm/rerank_model.py | 37 ++- web/src/assets/svg/llm/siliconflow.svg | 6 + .../user-setting/setting-model/constant.ts | 3 +- 7 files changed, 349 insertions(+), 7 deletions(-) create mode 100644 web/src/assets/svg/llm/siliconflow.svg diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 2b90ca076..2e556edb6 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -2767,6 +2767,290 @@ "model_type": "chat" } ] - } + }, + { + "name": "SILICONFLOW", + "logo": "", + "tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK", + "status": "1", + "llm": [ + { + "llm_name": "Qwen/Qwen2-7B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen2-1.5B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen1.5-7B-Chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "THUDM/glm-4-9b-chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "THUDM/chatglm3-6b", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "01-ai/Yi-1.5-9B-Chat-16K", + "tags": "LLM,CHAT,16k", + "max_tokens": 16384, + "model_type": "chat" + }, + { + "llm_name": "01-ai/Yi-1.5-6B-Chat", + "tags": "LLM,CHAT,4k", + "max_tokens": 4096, + "model_type": "chat" + }, + { + "llm_name": "google/gemma-2-9b-it", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "internlm/internlm2_5-7b-chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Meta-Llama-3-8B-Instruct", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "mistralai/Mistral-7B-Instruct-v0.2", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Pro/Qwen/Qwen2-7B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Pro/Qwen/Qwen2-1.5B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Pro/Qwen/Qwen1.5-7B-Chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Pro/THUDM/glm-4-9b-chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Pro/THUDM/chatglm3-6b", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Pro/01-ai/Yi-1.5-9B-Chat-16K", + "tags": "LLM,CHAT,16k", + "max_tokens": 16384, + "model_type": "chat" + }, + { + "llm_name": "Pro/01-ai/Yi-1.5-6B-Chat", + "tags": "LLM,CHAT,4k", + "max_tokens": 4096, + "model_type": "chat" + }, + { + "llm_name": "Pro/internlm/internlm2_5-7b-chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Pro/google/gemma-2-9b-it", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "Pro/meta-llama/Meta-Llama-3.1-8B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Pro/meta-llama/Meta-Llama-3-8B-Instruct", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "Pro/mistralai/Mistral-7B-Instruct-v0.2", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen2-72B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen2-Math-72B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen2-57B-A14B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen1.5-110B-Chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen1.5-32B-Chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "Qwen/Qwen1.5-14B-Chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "01-ai/Yi-1.5-34B-Chat-16K", + "tags": "LLM,CHAT,16k", + "max_tokens": 16384, + "model_type": "chat" + }, + { + "llm_name": "deepseek-ai/DeepSeek-Coder-V2-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "deepseek-ai/DeepSeek-V2-Chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "deepseek-ai/deepseek-llm-67b-chat", + "tags": "LLM,CHAT,4k", + "max_tokens": 4096, + "model_type": "chat" + }, + { + "llm_name": "internlm/internlm2_5-20b-chat", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "meta-llama/Meta-Llama-3-70B-Instruct", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "tags": "LLM,CHAT,32k", + "max_tokens": 32768, + "model_type": "chat" + }, + { + "llm_name": "google/gemma-2-27b-it", + "tags": "LLM,CHAT,8k", + "max_tokens": 8192, + "model_type": "chat" + }, + { + "llm_name": "BAAI/bge-m3 ", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8192, + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-large-en-v1.5 ", + "tags": "TEXT EMBEDDING,512", + "max_tokens": 512, + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-large-zh-v1.5 ", + "tags": "TEXT EMBEDDING,512", + "max_tokens": 512, + "model_type": "embedding" + }, + { + "llm_name": "netease-youdao/bce-embedding-base_vl", + "tags": "TEXT EMBEDDING,512", + "max_tokens": 512, + "model_type": "embedding" + }, + { + "llm_name": "BAAI/bge-reranker-v2-m3", + "tags": "RE-RANK, 512", + "max_tokens": 1024, + "model_type": "rerank" + }, + { + "llm_name": "netease-youdao/bce-reranker-base-v1", + "tags": "RE-RANK, 512", + "max_tokens": 1024, + "model_type": "rerank" + } + ] + } ] } diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 49a22aa15..1756051d3 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -41,7 +41,8 @@ EmbeddingModel = { "cohere": CoHereEmbed, "TogetherAI": TogetherAIEmbed, "PerfXCloud": PerfXCloudEmbed, - "Upstage": UpstageEmbed + "Upstage": UpstageEmbed, + "SILICONFLOW": SILICONFLOWEmbed } @@ -92,7 +93,8 @@ ChatModel = { "TogetherAI": TogetherAIChat, "PerfXCloud": PerfXCloudChat, "Upstage":UpstageChat, - "novita.ai": NovitaAIChat + "novita.ai": NovitaAIChat, + "SILICONFLOW": SILICONFLOWChat } @@ -105,7 +107,8 @@ RerankModel = { "LM-Studio": LmStudioRerank, "OpenAI-API-Compatible": OpenAI_APIRerank, "cohere": CoHereRerank, - "TogetherAI": TogetherAIRerank + "TogetherAI": TogetherAIRerank, + "SILICONFLOW": SILICONFLOWRerank } diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 64fc13426..4696499a5 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -1016,4 +1016,10 @@ class NovitaAIChat(Base): if not base_url: base_url = "https://api.novita.ai/v3/openai" super().__init__(key, model_name, base_url) - \ No newline at end of file + + +class SILICONFLOWChat(Base): + def __init__(self, key, model_name, base_url="https://api.siliconflow.cn/v1"): + if not base_url: + base_url = "https://api.siliconflow.cn/v1" + super().__init__(key, model_name, base_url) \ No newline at end of file diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index db5f450a4..d0e011197 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -574,3 +574,10 @@ class UpstageEmbed(OpenAIEmbed): if not base_url: base_url = "https://api.upstage.ai/v1/solar" super().__init__(key, model_name, base_url) + + +class SILICONFLOWEmbed(OpenAIEmbed): + def __init__(self, key, model_name, base_url="https://api.siliconflow.cn/v1"): + if not base_url: + base_url = "https://api.siliconflow.cn/v1" + super().__init__(key, model_name, base_url) \ No newline at end of file diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py index 7f39f7a6a..f452ea6c7 100644 --- a/rag/llm/rerank_model.py +++ b/rag/llm/rerank_model.py @@ -252,4 +252,39 @@ class TogetherAIRerank(Base): pass def similarity(self, query: str, texts: list): - raise NotImplementedError("The api has not been implement") \ No newline at end of file + raise NotImplementedError("The api has not been implement") + + +class SILICONFLOWRerank(Base): + def __init__( + self, key, model_name, base_url="https://api.siliconflow.cn/v1/rerank" + ): + if not base_url: + base_url = "https://api.siliconflow.cn/v1/rerank" + self.model_name = model_name + self.base_url = base_url + self.headers = { + "accept": "application/json", + "content-type": "application/json", + "authorization": f"Bearer {key}", + } + + def similarity(self, query: str, texts: list): + payload = { + "model": self.model_name, + "query": query, + "documents": texts, + "top_n": len(texts), + "return_documents": False, + "max_chunks_per_doc": 1024, + "overlap_tokens": 80, + } + response = requests.post( + self.base_url, json=payload, headers=self.headers + ).json() + rank = np.array([d["relevance_score"] for d in response["results"]]) + indexs = [d["index"] for d in response["results"]] + return ( + rank[indexs], + response["meta"]["tokens"]["input_tokens"] + response["meta"]["tokens"]["output_tokens"], + ) diff --git a/web/src/assets/svg/llm/siliconflow.svg b/web/src/assets/svg/llm/siliconflow.svg new file mode 100644 index 000000000..4ce6323dc --- /dev/null +++ b/web/src/assets/svg/llm/siliconflow.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/web/src/pages/user-setting/setting-model/constant.ts b/web/src/pages/user-setting/setting-model/constant.ts index b2a79ee09..196fc6476 100644 --- a/web/src/pages/user-setting/setting-model/constant.ts +++ b/web/src/pages/user-setting/setting-model/constant.ts @@ -28,7 +28,8 @@ export const IconMap = { TogetherAI:'together-ai', PerfXCould: 'perfx-could', Upstage: 'upstage', - "novita.ai": 'novita-ai' + "novita.ai": 'novita-ai', + "SILICONFLOW": 'siliconflow' }; export const BedrockRegionList = [