feat(ark): support doubao vision series models (#11740)

This commit is contained in:
sino 2024-12-17 15:43:11 +08:00 committed by GitHub
parent c9b4029ce7
commit 99430a5931
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 30 additions and 4 deletions

View File

@ -1,4 +1,3 @@
import re
from collections.abc import Generator from collections.abc import Generator
from typing import Optional, cast from typing import Optional, cast
@ -104,17 +103,16 @@ class ArkClientV3:
if message_content.type == PromptMessageContentType.TEXT: if message_content.type == PromptMessageContentType.TEXT:
content.append( content.append(
ChatCompletionContentPartTextParam( ChatCompletionContentPartTextParam(
text=message_content.text, text=message_content.data,
type="text", type="text",
) )
) )
elif message_content.type == PromptMessageContentType.IMAGE: elif message_content.type == PromptMessageContentType.IMAGE:
message_content = cast(ImagePromptMessageContent, message_content) message_content = cast(ImagePromptMessageContent, message_content)
image_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", message_content.data)
content.append( content.append(
ChatCompletionContentPartImageParam( ChatCompletionContentPartImageParam(
image_url=ImageURL( image_url=ImageURL(
url=image_data, url=message_content.data,
detail=message_content.detail.value, detail=message_content.detail.value,
), ),
type="image_url", type="image_url",

View File

@ -132,6 +132,14 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
messages_dict = [ArkClientV3.convert_prompt_message(m) for m in messages] messages_dict = [ArkClientV3.convert_prompt_message(m) for m in messages]
for message in messages_dict: for message in messages_dict:
for key, value in message.items(): for key, value in message.items():
# Ignore tokens for image type
if isinstance(value, list):
text = ""
for item in value:
if isinstance(item, dict) and item["type"] == "text":
text += item["text"]
value = text
num_tokens += self._get_num_tokens_by_gpt2(str(key)) num_tokens += self._get_num_tokens_by_gpt2(str(key))
num_tokens += self._get_num_tokens_by_gpt2(str(value)) num_tokens += self._get_num_tokens_by_gpt2(str(value))

View File

@ -16,6 +16,14 @@ class ModelConfig(BaseModel):
configs: dict[str, ModelConfig] = { configs: dict[str, ModelConfig] = {
"Doubao-vision-pro-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.VISION],
),
"Doubao-vision-lite-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.VISION],
),
"Doubao-pro-4k": ModelConfig( "Doubao-pro-4k": ModelConfig(
properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT), properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL], features=[ModelFeature.TOOL_CALL],

View File

@ -118,6 +118,18 @@ model_credential_schema:
type: select type: select
required: true required: true
options: options:
- label:
en_US: Doubao-vision-pro-32k
value: Doubao-vision-pro-32k
show_on:
- variable: __model_type
value: llm
- label:
en_US: Doubao-vision-lite-32k
value: Doubao-vision-lite-32k
show_on:
- variable: __model_type
value: llm
- label: - label:
en_US: Doubao-pro-4k en_US: Doubao-pro-4k
value: Doubao-pro-4k value: Doubao-pro-4k