feat(ark): support doubao vision series models (#11740)

2025-08-14 10:45:52 +08:00 · 2024-12-17 15:43:11 +08:00 · 2024-12-17 15:43:11 +08:00 · 99430a5931
commit 99430a5931
parent c9b4029ce7
4 changed files with 30 additions and 4 deletions
--- a/api/core/model_runtime/model_providers/volcengine_maas/client.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/client.py
@ -1,4 +1,3 @@
-import re
 from collections.abc import Generator
 from typing import Optional, cast

@ -104,17 +103,16 @@ class ArkClientV3:
                    if message_content.type == PromptMessageContentType.TEXT:
                        content.append(
                            ChatCompletionContentPartTextParam(
-                                text=message_content.text,
+                                text=message_content.data,
                                type="text",
                            )
                        )
                    elif message_content.type == PromptMessageContentType.IMAGE:
                        message_content = cast(ImagePromptMessageContent, message_content)
-                        image_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", message_content.data)
                        content.append(
                            ChatCompletionContentPartImageParam(
                                image_url=ImageURL(
-                                    url=image_data,
+                                    url=message_content.data,
                                    detail=message_content.detail.value,
                                ),
                                type="image_url",
--- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
@ -132,6 +132,14 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
        messages_dict = [ArkClientV3.convert_prompt_message(m) for m in messages]
        for message in messages_dict:
            for key, value in message.items():
+                # Ignore tokens for image type
+                if isinstance(value, list):
+                    text = ""
+                    for item in value:
+                        if isinstance(item, dict) and item["type"] == "text":
+                            text += item["text"]
+
+                    value = text
                num_tokens += self._get_num_tokens_by_gpt2(str(key))
                num_tokens += self._get_num_tokens_by_gpt2(str(value))

--- a/api/core/model_runtime/model_providers/volcengine_maas/llm/models.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/models.py
@ -16,6 +16,14 @@ class ModelConfig(BaseModel):


 configs: dict[str, ModelConfig] = {
+    "Doubao-vision-pro-32k": ModelConfig(
+        properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
+        features=[ModelFeature.VISION],
+    ),
+    "Doubao-vision-lite-32k": ModelConfig(
+        properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
+        features=[ModelFeature.VISION],
+    ),
    "Doubao-pro-4k": ModelConfig(
        properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT),
        features=[ModelFeature.TOOL_CALL],
--- a/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml
+++ b/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml
@ -118,6 +118,18 @@ model_credential_schema:
      type: select
      required: true
      options:
+        - label:
+            en_US: Doubao-vision-pro-32k
+          value: Doubao-vision-pro-32k
+          show_on:
+            - variable: __model_type
+              value: llm
+        - label:
+            en_US: Doubao-vision-lite-32k
+          value: Doubao-vision-lite-32k
+          show_on:
+            - variable: __model_type
+              value: llm
        - label:
            en_US: Doubao-pro-4k
          value: Doubao-pro-4k