Feat: support vision llm for gpustack (#6636)

### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/6138 This PR is going to support vision llm for gpustack, modify url path from `/v1-openai` to `/v1` ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2025-07-15 11:41:50 +08:00 · 2025-03-31 15:33:52 +08:00 · 2025-03-31 15:33:52 +08:00 · 46b5e32cd7
commit 46b5e32cd7
parent 7d9dd1e5d3
6 changed files with 22 additions and 11 deletions
--- a/rag/llm/init.py
+++ b/rag/llm/init.py
@ -107,7 +107,8 @@ from .cv_model import (
    YiCV,
    HunyuanCV,
    AnthropicCV,
-    SILICONFLOWCV
+    SILICONFLOWCV,
+    GPUStackCV,
 )

 from .rerank_model import (
@ -145,7 +146,7 @@ from .tts_model import (
    SparkTTS,
    XinferenceTTS,
    GPUStackTTS,
-    SILICONFLOWTTS
+    SILICONFLOWTTS,
 )

 EmbeddingModel = {
@ -202,6 +203,7 @@ CvModel = {
    "Tencent Hunyuan": HunyuanCV,
    "Anthropic": AnthropicCV,
    "SILICONFLOW": SILICONFLOWCV,
+    "GPUStack": GPUStackCV,
 }

 ChatModel = {
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -1586,6 +1586,6 @@ class GPUStackChat(Base):
    def __init__(self, key=None, model_name="", base_url=""):
        if not base_url:
            raise ValueError("Local llm url cannot be None")
-        if base_url.split("/")[-1] != "v1-openai":
-            base_url = os.path.join(base_url, "v1-openai")
+        if base_url.split("/")[-1] != "v1":
+            base_url = os.path.join(base_url, "v1")
        super().__init__(key, model_name, base_url)
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@ -1028,4 +1028,14 @@ class AnthropicCV(Base):
        except Exception as e:
            yield ans + "\n**ERROR**: " + str(e)

-        yield total_tokens
+        yield total_tokens
+
+class GPUStackCV(GptV4):
+    def __init__(self, key, model_name, lang="Chinese", base_url=""):
+        if not base_url:
+            raise ValueError("Local llm url cannot be None")
+        if base_url.split("/")[-1] != "v1":
+            base_url = os.path.join(base_url, "v1")
+        self.client = OpenAI(api_key=key, base_url=base_url)
+        self.model_name = model_name
+        self.lang = lang
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@ -832,9 +832,8 @@ class GPUStackEmbed(OpenAIEmbed):
    def __init__(self, key, model_name, base_url):
        if not base_url:
            raise ValueError("url cannot be None")
-        if base_url.split("/")[-1] != "v1-openai":
-            base_url = os.path.join(base_url, "v1-openai")
+        if base_url.split("/")[-1] != "v1":
+            base_url = os.path.join(base_url, "v1")

-        print(key,base_url)
        self.client = OpenAI(api_key=key, base_url=base_url)
        self.model_name = model_name
--- a/rag/llm/sequence2txt_model.py
+++ b/rag/llm/sequence2txt_model.py
@ -198,8 +198,8 @@ class GPUStackSeq2txt(Base):
    def __init__(self, key, model_name, base_url):
        if not base_url:
            raise ValueError("url cannot be None")
-        if base_url.split("/")[-1] != "v1-openai":
-            base_url = os.path.join(base_url, "v1-openai")
+        if base_url.split("/")[-1] != "v1":
+            base_url = os.path.join(base_url, "v1")
        self.base_url = base_url
        self.model_name = model_name
        self.key = key
--- a/rag/llm/tts_model.py
+++ b/rag/llm/tts_model.py
@ -378,7 +378,7 @@ class GPUStackTTS:
        }

        response = requests.post(
-            f"{self.base_url}/v1-openai/audio/speech",
+            f"{self.base_url}/v1/audio/speech",
            headers=self.headers,
            json=payload,
            stream=stream