add step-1v-8k cv model (#1686)

### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: lijianyong <lijianyong@stepfun.com>
2025-08-16 20:15:58 +08:00 · 2024-07-30 16:57:27 +08:00 · 2024-07-30 16:57:27 +08:00 · 9169643157
commit 9169643157
parent 5cff780ec4
3 changed files with 23 additions and 2 deletions
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@ -1920,7 +1920,7 @@
                {
                    "llm_name": "step-1v-8k",
                    "tags": "LLM,CHAT,IMAGE2TEXT",
-                    "max_tokens": 8000,
+                    "max_tokens": 8192,
                    "model_type": "image2text"
                }
            ]
--- a/rag/llm/init.py
+++ b/rag/llm/init.py
@ -52,7 +52,8 @@ CvModel = {
    "OpenRouter": OpenRouterCV,
    "LocalAI": LocalAICV,
    "NVIDIA": NvidiaCV,
-    "LM-Studio": LmStudioCV
+    "LM-Studio": LmStudioCV,
    "StepFun":StepFunCV
 }
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@ -622,6 +622,26 @@ class NvidiaCV(Base):
            }
        ]
 class StepFunCV(Base):
    def __init__(self, key, model_name="step-1v-8k", lang="Chinese", base_url="https://api.stepfun.com/v1"):
        if not base_url: base_url="https://api.stepfun.com/v1"
        self.client = OpenAI(api_key=key, base_url=base_url)
        self.model_name = model_name
        self.lang = lang
    def describe(self, image, max_tokens=4096):
        b64 = self.image2base64(image)
        prompt = self.prompt(b64)
        for i in range(len(prompt)):
            for c in prompt[i]["content"]:
                if "text" in c: c["type"] = "text"
        res = self.client.chat.completions.create(
            model=self.model_name,
            messages=prompt,
            max_tokens=max_tokens,
        )
        return res.choices[0].message.content.strip(), res.usage.total_tokens
 class LmStudioCV(GptV4):
    def __init__(self, key, model_name, base_url, lang="Chinese"):