chore: use the wrap thinking api for volcengine (#13432)

Signed-off-by: xhe <xw897002528@gmail.com>
This commit is contained in:
xhe 2025-02-10 10:25:07 +08:00 committed by GitHub
parent f307c7cd88
commit 939a9ecd21
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 15 additions and 18 deletions

View File

@ -419,7 +419,9 @@ if you are not sure about the structure.
is_reasoning = True
else:
content = reasoning_content
elif is_reasoning:
elif is_reasoning and content:
# do not end reasoning when content is empty
# there may be more reasoning_content later that follows previous reasoning closely
content = "\n</think>" + content
is_reasoning = False
return content, is_reasoning

View File

@ -1,5 +1,4 @@
import logging
import re
from collections.abc import Generator
from typing import Optional
@ -231,6 +230,17 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
return _handle_chat_response()
return _handle_stream_chat_response()
def wrap_thinking(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
content = ""
reasoning_content = None
if hasattr(delta, "content"):
content = delta.content
if hasattr(delta, "reasoning_content"):
reasoning_content = delta.reasoning_content
return self._wrap_thinking_by_reasoning_content(
{"content": content, "reasoning_content": reasoning_content}, is_reasoning
)
def _generate_v3(
self,
model: str,
@ -253,22 +263,7 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
content = ""
if chunk.choices:
delta = chunk.choices[0].delta
if is_reasoning_started and not hasattr(delta, "reasoning_content") and not delta.content:
content = ""
elif hasattr(delta, "reasoning_content"):
if not is_reasoning_started:
is_reasoning_started = True
content = "> 💭 " + delta.reasoning_content
else:
content = delta.reasoning_content
if "\n" in content:
content = re.sub(r"\n(?!(>|\n))", "\n> ", content)
elif is_reasoning_started:
content = "\n\n" + delta.content
is_reasoning_started = False
else:
content = delta.content
content, is_reasoning_started = self.wrap_thinking(delta, is_reasoning_started)
yield LLMResultChunk(
model=model,