mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 06:05:51 +08:00
Feat : add deepseek support for tongyi (#13445)
This commit is contained in:
parent
939a9ecd21
commit
75113c26c6
@ -1,3 +1,7 @@
|
||||
- deepseek-r1
|
||||
- deepseek-r1-distill-qwen-14b
|
||||
- deepseek-r1-distill-qwen-32b
|
||||
- deepseek-v3
|
||||
- qwen-vl-max-0809
|
||||
- qwen-vl-max-0201
|
||||
- qwen-vl-max
|
||||
|
@ -0,0 +1,21 @@
|
||||
model: deepseek-r1-distill-qwen-14b
|
||||
label:
|
||||
zh_Hans: DeepSeek-R1-Distill-Qwen-14B
|
||||
en_US: DeepSeek-R1-Distill-Qwen-14B
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 8192
|
||||
default: 4096
|
||||
pricing:
|
||||
input: "0.001"
|
||||
output: "0.003"
|
||||
unit: "0.001"
|
||||
currency: RMB
|
@ -0,0 +1,21 @@
|
||||
model: deepseek-r1-distill-qwen-32b
|
||||
label:
|
||||
zh_Hans: DeepSeek-R1-Distill-Qwen-32B
|
||||
en_US: DeepSeek-R1-Distill-Qwen-32B
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 8192
|
||||
default: 4096
|
||||
pricing:
|
||||
input: "0.002"
|
||||
output: "0.006"
|
||||
unit: "0.001"
|
||||
currency: RMB
|
@ -0,0 +1,21 @@
|
||||
model: deepseek-r1
|
||||
label:
|
||||
zh_Hans: DeepSeek-R1
|
||||
en_US: DeepSeek-R1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 8192
|
||||
default: 4096
|
||||
pricing:
|
||||
input: "0.004"
|
||||
output: "0.016"
|
||||
unit: '0.001'
|
||||
currency: RMB
|
@ -0,0 +1,52 @@
|
||||
model: deepseek-v3
|
||||
label:
|
||||
zh_Hans: DeepSeek-V3
|
||||
en_US: DeepSeek-V3
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: "0.002"
|
||||
output: "0.008"
|
||||
unit: "0.001"
|
||||
currency: RMB
|
@ -197,8 +197,7 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
|
||||
else:
|
||||
# nothing different between chat model and completion model in tongyi
|
||||
params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
|
||||
response = Generation.call(**params, result_format="message", stream=stream)
|
||||
|
||||
response = Generation.call(**params, result_format="message", stream=stream, incremental_output=True)
|
||||
if stream:
|
||||
return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
|
||||
|
||||
@ -258,6 +257,9 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
|
||||
"""
|
||||
full_text = ""
|
||||
tool_calls = []
|
||||
is_reasoning_started = False
|
||||
# for index, response in enumerate(responses):
|
||||
index = 0
|
||||
for index, response in enumerate(responses):
|
||||
if response.status_code not in {200, HTTPStatus.OK}:
|
||||
raise ServiceUnavailableError(
|
||||
@ -311,7 +313,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
|
||||
),
|
||||
)
|
||||
else:
|
||||
resp_content = response.output.choices[0].message.content
|
||||
message = response.output.choices[0].message
|
||||
|
||||
resp_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
|
||||
message, is_reasoning_started
|
||||
)
|
||||
if not resp_content:
|
||||
if "tool_calls" in response.output.choices[0].message:
|
||||
tool_calls = response.output.choices[0].message["tool_calls"]
|
||||
|
Loading…
x
Reference in New Issue
Block a user