Feat : add deepseek support for tongyi (#13445)

This commit is contained in:
Fei He 2025-02-10 10:26:03 +08:00 committed by GitHub
parent 939a9ecd21
commit 75113c26c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 128 additions and 3 deletions

View File

@ -1,3 +1,7 @@
- deepseek-r1
- deepseek-r1-distill-qwen-14b
- deepseek-r1-distill-qwen-32b
- deepseek-v3
- qwen-vl-max-0809
- qwen-vl-max-0201
- qwen-vl-max

View File

@ -0,0 +1,21 @@
model: deepseek-r1-distill-qwen-14b
label:
zh_Hans: DeepSeek-R1-Distill-Qwen-14B
en_US: DeepSeek-R1-Distill-Qwen-14B
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: max_tokens
use_template: max_tokens
min: 1
max: 8192
default: 4096
pricing:
input: "0.001"
output: "0.003"
unit: "0.001"
currency: RMB

View File

@ -0,0 +1,21 @@
model: deepseek-r1-distill-qwen-32b
label:
zh_Hans: DeepSeek-R1-Distill-Qwen-32B
en_US: DeepSeek-R1-Distill-Qwen-32B
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: max_tokens
use_template: max_tokens
min: 1
max: 8192
default: 4096
pricing:
input: "0.002"
output: "0.006"
unit: "0.001"
currency: RMB

View File

@ -0,0 +1,21 @@
model: deepseek-r1
label:
zh_Hans: DeepSeek-R1
en_US: DeepSeek-R1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: max_tokens
use_template: max_tokens
min: 1
max: 8192
default: 4096
pricing:
input: "0.004"
output: "0.016"
unit: '0.001'
currency: RMB

View File

@ -0,0 +1,52 @@
model: deepseek-v3
label:
zh_Hans: DeepSeek-V3
en_US: DeepSeek-V3
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "0.002"
output: "0.008"
unit: "0.001"
currency: RMB

View File

@ -197,8 +197,7 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
else:
# nothing different between chat model and completion model in tongyi
params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
response = Generation.call(**params, result_format="message", stream=stream)
response = Generation.call(**params, result_format="message", stream=stream, incremental_output=True)
if stream:
return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
@ -258,6 +257,9 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
"""
full_text = ""
tool_calls = []
is_reasoning_started = False
# for index, response in enumerate(responses):
index = 0
for index, response in enumerate(responses):
if response.status_code not in {200, HTTPStatus.OK}:
raise ServiceUnavailableError(
@ -311,7 +313,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
),
)
else:
resp_content = response.output.choices[0].message.content
message = response.output.choices[0].message
resp_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
message, is_reasoning_started
)
if not resp_content:
if "tool_calls" in response.output.choices[0].message:
tool_calls = response.output.choices[0].message["tool_calls"]