diff --git a/api/core/model_runtime/model_providers/volcengine_maas/client.py b/api/core/model_runtime/model_providers/volcengine_maas/client.py index cfe21e4b9f..1cffd902c7 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/client.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/client.py @@ -1,4 +1,3 @@ -import re from collections.abc import Generator from typing import Optional, cast @@ -104,17 +103,16 @@ class ArkClientV3: if message_content.type == PromptMessageContentType.TEXT: content.append( ChatCompletionContentPartTextParam( - text=message_content.text, + text=message_content.data, type="text", ) ) elif message_content.type == PromptMessageContentType.IMAGE: message_content = cast(ImagePromptMessageContent, message_content) - image_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", message_content.data) content.append( ChatCompletionContentPartImageParam( image_url=ImageURL( - url=image_data, + url=message_content.data, detail=message_content.detail.value, ), type="image_url", diff --git a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py index 1c776cec7e..9e19b7deda 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py @@ -132,6 +132,14 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel): messages_dict = [ArkClientV3.convert_prompt_message(m) for m in messages] for message in messages_dict: for key, value in message.items(): + # Ignore tokens for image type + if isinstance(value, list): + text = "" + for item in value: + if isinstance(item, dict) and item["type"] == "text": + text += item["text"] + + value = text num_tokens += self._get_num_tokens_by_gpt2(str(key)) num_tokens += self._get_num_tokens_by_gpt2(str(value)) diff --git a/api/core/model_runtime/model_providers/volcengine_maas/llm/models.py b/api/core/model_runtime/model_providers/volcengine_maas/llm/models.py index d8be14b024..f7698f9443 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/llm/models.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/models.py @@ -16,6 +16,14 @@ class ModelConfig(BaseModel): configs: dict[str, ModelConfig] = { + "Doubao-vision-pro-32k": ModelConfig( + properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT), + features=[ModelFeature.VISION], + ), + "Doubao-vision-lite-32k": ModelConfig( + properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT), + features=[ModelFeature.VISION], + ), "Doubao-pro-4k": ModelConfig( properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT), features=[ModelFeature.TOOL_CALL], diff --git a/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml b/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml index 13e00da76f..57492b6d9f 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml +++ b/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml @@ -118,6 +118,18 @@ model_credential_schema: type: select required: true options: + - label: + en_US: Doubao-vision-pro-32k + value: Doubao-vision-pro-32k + show_on: + - variable: __model_type + value: llm + - label: + en_US: Doubao-vision-lite-32k + value: Doubao-vision-lite-32k + show_on: + - variable: __model_type + value: llm - label: en_US: Doubao-pro-4k value: Doubao-pro-4k