chore: use the wrap thinking api for volcengine (#13432)

Signed-off-by: xhe <xw897002528@gmail.com>
2025-08-14 10:06:14 +08:00 · 2025-02-10 10:25:07 +08:00 · 2025-02-10 10:25:07 +08:00 · 939a9ecd21
commit 939a9ecd21
parent f307c7cd88
2 changed files with 15 additions and 18 deletions
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@ -419,7 +419,9 @@ if you are not sure about the structure.
                is_reasoning = True
            else:
                content = reasoning_content
-        elif is_reasoning:
+        elif is_reasoning and content:
            # do not end reasoning when content is empty
            # there may be more reasoning_content later that follows previous reasoning closely
            content = "\n</think>" + content
            is_reasoning = False
        return content, is_reasoning
--- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
@ -1,5 +1,4 @@
 import logging
 import re
 from collections.abc import Generator
 from typing import Optional
@ -231,6 +230,17 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
            return _handle_chat_response()
        return _handle_stream_chat_response()
    def wrap_thinking(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
        content = ""
        reasoning_content = None
        if hasattr(delta, "content"):
            content = delta.content
        if hasattr(delta, "reasoning_content"):
            reasoning_content = delta.reasoning_content
        return self._wrap_thinking_by_reasoning_content(
            {"content": content, "reasoning_content": reasoning_content}, is_reasoning
        )
    def _generate_v3(
        self,
        model: str,
@ -253,22 +263,7 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
                content = ""
                if chunk.choices:
                    delta = chunk.choices[0].delta
-                    if is_reasoning_started and not hasattr(delta, "reasoning_content") and not delta.content:
+                    content, is_reasoning_started = self.wrap_thinking(delta, is_reasoning_started)
                        content = ""
                    elif hasattr(delta, "reasoning_content"):
                        if not is_reasoning_started:
                            is_reasoning_started = True
                            content = "> 💭 " + delta.reasoning_content
                        else:
                            content = delta.reasoning_content
                        if "\n" in content:
                            content = re.sub(r"\n(?!(>|\n))", "\n> ", content)
                    elif is_reasoning_started:
                        content = "\n\n" + delta.content
                        is_reasoning_started = False
                    else:
                        content = delta.content
                yield LLMResultChunk(
                    model=model,