From cdcaae17c6b8c3a9bdad7e6f571ef98df47b0054 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Wed, 26 Feb 2025 16:04:53 +0800 Subject: [PATCH] Feat: add VLLM (#5380) ### What problem does this PR solve? Read to add VLMM. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- api/apps/llm_app.py | 4 ++++ conf/llm_factories.json | 7 +++++++ docs/references/supported_models.mdx | 1 + rag/llm/__init__.py | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 4a1ff183a..44d7d5b96 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -172,6 +172,10 @@ def add_llm(): llm_name = req["llm_name"] + "___OpenAI-API" api_key = req.get("api_key", "xxxxxxxxxxxxxxx") + elif factory == "VLLM": + llm_name = req["llm_name"] + "___OpenAI-API" + api_key = req.get("api_key", "xxxxxxxxxxxxxxx") + elif factory == "XunFei Spark": llm_name = req["llm_name"] if req["model_type"] == "chat": diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 469bde9fa..db7864a76 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -316,6 +316,13 @@ "status": "1", "llm": [] }, + { + "name": "VLLM", + "logo": "", + "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION", + "status": "1", + "llm": [] + }, { "name": "Moonshot", "logo": "", diff --git a/docs/references/supported_models.mdx b/docs/references/supported_models.mdx index ebe9bb691..decc29511 100644 --- a/docs/references/supported_models.mdx +++ b/docs/references/supported_models.mdx @@ -42,6 +42,7 @@ A complete list of models supported by RAGFlow, which will continue to expand. | Ollama | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | | | | OpenAI | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | OpenAI-API-Compatible | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | | +| VLMM | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | | | OpenRouter | :heavy_check_mark: | | | :heavy_check_mark: | | | | PerfXCloud | :heavy_check_mark: | :heavy_check_mark: | | | | | | Replicate | :heavy_check_mark: | :heavy_check_mark: | | | | | diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 2c514d4b5..ef0863f57 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -160,6 +160,7 @@ EmbeddingModel = { "NVIDIA": NvidiaEmbed, "LM-Studio": LmStudioEmbed, "OpenAI-API-Compatible": OpenAI_APIEmbed, + "VLLM": OpenAI_APIEmbed, "Cohere": CoHereEmbed, "TogetherAI": TogetherAIEmbed, "PerfXCloud": PerfXCloudEmbed, @@ -188,6 +189,7 @@ CvModel = { "LM-Studio": LmStudioCV, "StepFun": StepFunCV, "OpenAI-API-Compatible": OpenAI_APICV, + "VLLM": OpenAI_APICV, "TogetherAI": TogetherAICV, "01.AI": YiCV, "Tencent Hunyuan": HunyuanCV, @@ -215,6 +217,7 @@ ChatModel = { "NVIDIA": NvidiaChat, "LM-Studio": LmStudioChat, "OpenAI-API-Compatible": OpenAI_APIChat, + "VLLM": OpenAI_APIChat, "Cohere": CoHereChat, "LeptonAI": LeptonAIChat, "TogetherAI": TogetherAIChat, @@ -244,6 +247,7 @@ RerankModel = { "NVIDIA": NvidiaRerank, "LM-Studio": LmStudioRerank, "OpenAI-API-Compatible": OpenAI_APIRerank, + "VLLM": OpenAI_APIRerank, "Cohere": CoHereRerank, "TogetherAI": TogetherAIRerank, "SILICONFLOW": SILICONFLOWRerank,