From 75113c26c668084f5725d076b06537c4f0bc4e6e Mon Sep 17 00:00:00 2001 From: Fei He Date: Mon, 10 Feb 2025 10:26:03 +0800 Subject: [PATCH] Feat : add deepseek support for tongyi (#13445) --- .../model_providers/tongyi/llm/_position.yaml | 4 ++ .../llm/deepseek-r1-distill-qwen-14B.yaml | 21 ++++++++ .../llm/deepseek-r1-distill-qwen-32B.yaml | 21 ++++++++ .../tongyi/llm/deepseek-r1.yaml | 21 ++++++++ .../tongyi/llm/deepseek-v3.yaml | 52 +++++++++++++++++++ .../model_providers/tongyi/llm/llm.py | 12 +++-- 6 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml diff --git a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml index d7ba51e1d9..785ddeb494 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml @@ -1,3 +1,7 @@ +- deepseek-r1 +- deepseek-r1-distill-qwen-14b +- deepseek-r1-distill-qwen-32b +- deepseek-v3 - qwen-vl-max-0809 - qwen-vl-max-0201 - qwen-vl-max diff --git a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml new file mode 100644 index 0000000000..2bce8805c6 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml @@ -0,0 +1,21 @@ +model: deepseek-r1-distill-qwen-14b +label: + zh_Hans: DeepSeek-R1-Distill-Qwen-14B + en_US: DeepSeek-R1-Distill-Qwen-14B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.001" + output: "0.003" + unit: "0.001" + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml new file mode 100644 index 0000000000..dfc155ff6a --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml @@ -0,0 +1,21 @@ +model: deepseek-r1-distill-qwen-32b +label: + zh_Hans: DeepSeek-R1-Distill-Qwen-32B + en_US: DeepSeek-R1-Distill-Qwen-32B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.002" + output: "0.006" + unit: "0.001" + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml new file mode 100644 index 0000000000..742e7f0001 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml @@ -0,0 +1,21 @@ +model: deepseek-r1 +label: + zh_Hans: DeepSeek-R1 + en_US: DeepSeek-R1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.004" + output: "0.016" + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml new file mode 100644 index 0000000000..23f38d60d3 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml @@ -0,0 +1,52 @@ +model: deepseek-v3 +label: + zh_Hans: DeepSeek-V3 + en_US: DeepSeek-V3 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: "0.002" + output: "0.008" + unit: "0.001" + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py index 75c62a9080..1dce372bba 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py +++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py @@ -197,8 +197,7 @@ class TongyiLargeLanguageModel(LargeLanguageModel): else: # nothing different between chat model and completion model in tongyi params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages) - response = Generation.call(**params, result_format="message", stream=stream) - + response = Generation.call(**params, result_format="message", stream=stream, incremental_output=True) if stream: return self._handle_generate_stream_response(model, credentials, response, prompt_messages) @@ -258,6 +257,9 @@ class TongyiLargeLanguageModel(LargeLanguageModel): """ full_text = "" tool_calls = [] + is_reasoning_started = False + # for index, response in enumerate(responses): + index = 0 for index, response in enumerate(responses): if response.status_code not in {200, HTTPStatus.OK}: raise ServiceUnavailableError( @@ -311,7 +313,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel): ), ) else: - resp_content = response.output.choices[0].message.content + message = response.output.choices[0].message + + resp_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content( + message, is_reasoning_started + ) if not resp_content: if "tool_calls" in response.output.choices[0].message: tool_calls = response.output.choices[0].message["tool_calls"]