From 939a9ecd210c522b9935a8fb42a790e2173d97fb Mon Sep 17 00:00:00 2001
From: xhe <xw897002528@gmail.com>
Date: Mon, 10 Feb 2025 10:25:07 +0800
Subject: [PATCH] chore: use the wrap thinking api for volcengine (#13432)

Signed-off-by: xhe <xw897002528@gmail.com>
---
 .../__base/large_language_model.py            |  4 ++-
 .../volcengine_maas/llm/llm.py                | 29 ++++++++-----------
 2 files changed, 15 insertions(+), 18 deletions(-)
diff --git a/api/core/model_runtime/model_providers/__base/large_language_model.py b/api/core/model_runtime/model_providers/__base/large_language_model.py
index f377f12919..710e168e4f 100644
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@@ -419,7 +419,9 @@ if you are not sure about the structure.
                 is_reasoning = True
             else:
                 content = reasoning_content
-        elif is_reasoning:
+        elif is_reasoning and content:
+            # do not end reasoning when content is empty
+            # there may be more reasoning_content later that follows previous reasoning closely
             content = "\n</think>" + content
             is_reasoning = False
         return content, is_reasoning
diff --git a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
index 83c534add8..d513d6b548 100644
--- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
@@ -1,5 +1,4 @@
 import logging
-import re
 from collections.abc import Generator
 from typing import Optional
 
@@ -231,6 +230,17 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
             return _handle_chat_response()
         return _handle_stream_chat_response()
 
+    def wrap_thinking(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
+        content = ""
+        reasoning_content = None
+        if hasattr(delta, "content"):
+            content = delta.content
+        if hasattr(delta, "reasoning_content"):
+            reasoning_content = delta.reasoning_content
+        return self._wrap_thinking_by_reasoning_content(
+            {"content": content, "reasoning_content": reasoning_content}, is_reasoning
+        )
+
     def _generate_v3(
         self,
         model: str,
@@ -253,22 +263,7 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
                 content = ""
                 if chunk.choices:
                     delta = chunk.choices[0].delta
-                    if is_reasoning_started and not hasattr(delta, "reasoning_content") and not delta.content:
-                        content = ""
-                    elif hasattr(delta, "reasoning_content"):
-                        if not is_reasoning_started:
-                            is_reasoning_started = True
-                            content = "> 💭 " + delta.reasoning_content
-                        else:
-                            content = delta.reasoning_content
-
-                        if "\n" in content:
-                            content = re.sub(r"\n(?!(>|\n))", "\n> ", content)
-                    elif is_reasoning_started:
-                        content = "\n\n" + delta.content
-                        is_reasoning_started = False
-                    else:
-                        content = delta.content
+                    content, is_reasoning_started = self.wrap_thinking(delta, is_reasoning_started)
 
                 yield LLMResultChunk(
                     model=model,