feat(ark): support doubao vision series models (#11740)

2025-08-15 07:05:53 +08:00 · 2024-12-17 15:43:11 +08:00 · 2024-12-17 15:43:11 +08:00 · 99430a5931
commit 99430a5931
parent c9b4029ce7
4 changed files with 30 additions and 4 deletions
--- a/api/core/model_runtime/model_providers/volcengine_maas/client.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/client.py
@ -1,4 +1,3 @@
 import re
 from collections.abc import Generator
 from typing import Optional, cast
@ -104,17 +103,16 @@ class ArkClientV3:
                    if message_content.type == PromptMessageContentType.TEXT:
                        content.append(
                            ChatCompletionContentPartTextParam(
-                                text=message_content.text,
+                                text=message_content.data,
                                type="text",
                            )
                        )
                    elif message_content.type == PromptMessageContentType.IMAGE:
                        message_content = cast(ImagePromptMessageContent, message_content)
                        image_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", message_content.data)
                        content.append(
                            ChatCompletionContentPartImageParam(
                                image_url=ImageURL(
-                                    url=image_data,
+                                    url=message_content.data,
                                    detail=message_content.detail.value,
                                ),
                                type="image_url",
--- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
@ -132,6 +132,14 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
        messages_dict = [ArkClientV3.convert_prompt_message(m) for m in messages]
        for message in messages_dict:
            for key, value in message.items():
                # Ignore tokens for image type
                if isinstance(value, list):
                    text = ""
                    for item in value:
                        if isinstance(item, dict) and item["type"] == "text":
                            text += item["text"]
                    value = text
                num_tokens += self._get_num_tokens_by_gpt2(str(key))
                num_tokens += self._get_num_tokens_by_gpt2(str(value))
--- a/api/core/model_runtime/model_providers/volcengine_maas/llm/models.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/models.py
@ -16,6 +16,14 @@ class ModelConfig(BaseModel):
 configs: dict[str, ModelConfig] = {
    "Doubao-vision-pro-32k": ModelConfig(
        properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
        features=[ModelFeature.VISION],
    ),
    "Doubao-vision-lite-32k": ModelConfig(
        properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
        features=[ModelFeature.VISION],
    ),
    "Doubao-pro-4k": ModelConfig(
        properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT),
        features=[ModelFeature.TOOL_CALL],
--- a/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml
+++ b/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml
@ -118,6 +118,18 @@ model_credential_schema:
      type: select
      required: true
      options:
        - label:
            en_US: Doubao-vision-pro-32k
          value: Doubao-vision-pro-32k
          show_on:
            - variable: __model_type
              value: llm
        - label:
            en_US: Doubao-vision-lite-32k
          value: Doubao-vision-lite-32k
          show_on:
            - variable: __model_type
              value: llm
        - label:
            en_US: Doubao-pro-4k
          value: Doubao-pro-4k