From 75113c26c668084f5725d076b06537c4f0bc4e6e Mon Sep 17 00:00:00 2001
From: Fei He <droxer.he@gmail.com>
Date: Mon, 10 Feb 2025 10:26:03 +0800
Subject: [PATCH] Feat : add deepseek support for tongyi (#13445)

---
 .../model_providers/tongyi/llm/_position.yaml |  4 ++
 .../llm/deepseek-r1-distill-qwen-14B.yaml     | 21 ++++++++
 .../llm/deepseek-r1-distill-qwen-32B.yaml     | 21 ++++++++
 .../tongyi/llm/deepseek-r1.yaml               | 21 ++++++++
 .../tongyi/llm/deepseek-v3.yaml               | 52 +++++++++++++++++++
 .../model_providers/tongyi/llm/llm.py         | 12 +++--
 6 files changed, 128 insertions(+), 3 deletions(-)
 create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
 create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
 create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
 create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml

diff --git a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
index d7ba51e1d9..785ddeb494 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
@@ -1,3 +1,7 @@
+- deepseek-r1
+- deepseek-r1-distill-qwen-14b
+- deepseek-r1-distill-qwen-32b
+- deepseek-v3
 - qwen-vl-max-0809
 - qwen-vl-max-0201
 - qwen-vl-max
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
new file mode 100644
index 0000000000..2bce8805c6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-r1-distill-qwen-14b
+label:
+  zh_Hans: DeepSeek-R1-Distill-Qwen-14B
+  en_US: DeepSeek-R1-Distill-Qwen-14B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.001"
+  output: "0.003"
+  unit: "0.001"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
new file mode 100644
index 0000000000..dfc155ff6a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-r1-distill-qwen-32b
+label:
+  zh_Hans: DeepSeek-R1-Distill-Qwen-32B
+  en_US: DeepSeek-R1-Distill-Qwen-32B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.002"
+  output: "0.006"
+  unit: "0.001"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
new file mode 100644
index 0000000000..742e7f0001
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
@@ -0,0 +1,21 @@
+model: deepseek-r1
+label:
+  zh_Hans: DeepSeek-R1
+  en_US: DeepSeek-R1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.004"
+  output: "0.016"
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml
new file mode 100644
index 0000000000..23f38d60d3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml
@@ -0,0 +1,52 @@
+model: deepseek-v3
+label:
+  zh_Hans: DeepSeek-V3
+  en_US: DeepSeek-V3
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "0.002"
+  output: "0.008"
+  unit: "0.001"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
index 75c62a9080..1dce372bba 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -197,8 +197,7 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
         else:
             # nothing different between chat model and completion model in tongyi
             params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
-            response = Generation.call(**params, result_format="message", stream=stream)
-
+            response = Generation.call(**params, result_format="message", stream=stream, incremental_output=True)
         if stream:
             return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
 
@@ -258,6 +257,9 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
         """
         full_text = ""
         tool_calls = []
+        is_reasoning_started = False
+        # for index, response in enumerate(responses):
+        index = 0
         for index, response in enumerate(responses):
             if response.status_code not in {200, HTTPStatus.OK}:
                 raise ServiceUnavailableError(
@@ -311,7 +313,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
                     ),
                 )
             else:
-                resp_content = response.output.choices[0].message.content
+                message = response.output.choices[0].message
+
+                resp_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
+                    message, is_reasoning_started
+                )
                 if not resp_content:
                     if "tool_calls" in response.output.choices[0].message:
                         tool_calls = response.output.choices[0].message["tool_calls"]