chore: use the wrap thinking api for volcengine (#13432)

Signed-off-by: xhe <xw897002528@gmail.com>
2025-08-14 23:25:54 +08:00 · 2025-02-10 10:25:07 +08:00 · 2025-02-10 10:25:07 +08:00 · 939a9ecd21
commit 939a9ecd21
parent f307c7cd88
2 changed files with 15 additions and 18 deletions
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@ -419,7 +419,9 @@ if you are not sure about the structure.
                is_reasoning = True
            else:
                content = reasoning_content
-        elif is_reasoning:
+        elif is_reasoning and content:
+            # do not end reasoning when content is empty
+            # there may be more reasoning_content later that follows previous reasoning closely
            content = "\n</think>" + content
            is_reasoning = False
        return content, is_reasoning
--- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
@ -1,5 +1,4 @@
 import logging
-import re
 from collections.abc import Generator
 from typing import Optional

@ -231,6 +230,17 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
            return _handle_chat_response()
        return _handle_stream_chat_response()

+    def wrap_thinking(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
+        content = ""
+        reasoning_content = None
+        if hasattr(delta, "content"):
+            content = delta.content
+        if hasattr(delta, "reasoning_content"):
+            reasoning_content = delta.reasoning_content
+        return self._wrap_thinking_by_reasoning_content(
+            {"content": content, "reasoning_content": reasoning_content}, is_reasoning
+        )
+
    def _generate_v3(
        self,
        model: str,
@ -253,22 +263,7 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
                content = ""
                if chunk.choices:
                    delta = chunk.choices[0].delta
-                    if is_reasoning_started and not hasattr(delta, "reasoning_content") and not delta.content:
-                        content = ""
-                    elif hasattr(delta, "reasoning_content"):
-                        if not is_reasoning_started:
-                            is_reasoning_started = True
-                            content = "> 💭 " + delta.reasoning_content
-                        else:
-                            content = delta.reasoning_content
-
-                        if "\n" in content:
-                            content = re.sub(r"\n(?!(>|\n))", "\n> ", content)
-                    elif is_reasoning_started:
-                        content = "\n\n" + delta.content
-                        is_reasoning_started = False
-                    else:
-                        content = delta.content
+                    content, is_reasoning_started = self.wrap_thinking(delta, is_reasoning_started)

                yield LLMResultChunk(
                    model=model,