diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py index 734cf28b15..a0d9c450d5 100644 --- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py @@ -473,6 +473,8 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): finish_reason = None # The default value of finish_reason is None message_id, usage = None, None + is_reasoning_started = False + is_reasoning_started_tag = False for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter): chunk = chunk.strip() if chunk: @@ -514,6 +516,33 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): delta = choice["delta"] delta_content = delta.get("content") + if not is_reasoning_started_tag and "" in delta_content: + is_reasoning_started_tag = True + delta_content = "> 💭 " + delta_content.replace("", "") + elif is_reasoning_started_tag and "" in delta_content: + delta_content = delta_content.replace("", "") + "\n\n" + is_reasoning_started_tag = False + elif is_reasoning_started_tag: + if "\n\n" in delta_content: + delta_content = delta_content.replace("\n\n", "\n> ") + elif "\n" in delta_content: + delta_content = delta_content.replace("\n", "\n> ") + + reasoning_content = delta.get("reasoning_content") + if reasoning_content: + if not is_reasoning_started: + delta_content = "> 💭 " + reasoning_content + is_reasoning_started = True + elif "\n\n" in delta_content: + delta_content = reasoning_content.replace("\n\n", "\n> ") + elif "\n" in delta_content: + delta_content = reasoning_content.replace("\n", "\n> ") + elif is_reasoning_started: + # If we were in reasoning mode but now getting regular content, + # add \n\n to close the reasoning block + delta_content = "\n\n" + delta_content + is_reasoning_started = False + assistant_message_tool_calls = None if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call": diff --git a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py index f0b2b101b7..40c3777c5c 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py @@ -247,15 +247,36 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel): req_params["tools"] = tools def _handle_stream_chat_response(chunks: Generator[ChatCompletionChunk]) -> Generator: + is_reasoning_started = False for chunk in chunks: + content = "" + if chunk.choices: + if hasattr(chunk.choices[0].delta, "reasoning_content"): + delta_content = "" + if not is_reasoning_started: + is_reasoning_started = True + delta_content = "> 💭 " + chunk.choices[0].delta.reasoning_content + else: + delta_content = chunk.choices[0].delta.reasoning_content + + if "\n\n" in delta_content: + delta_content = delta_content.replace("\n\n", "\n> ") + elif "\n" in delta_content: + delta_content = delta_content.replace("\n", "\n> ") + + content = delta_content + elif is_reasoning_started: + content = "\n\n" + chunk.choices[0].delta.content + is_reasoning_started = False + else: + content = chunk.choices[0].delta.content + yield LLMResultChunk( model=model, prompt_messages=prompt_messages, delta=LLMResultChunkDelta( index=0, - message=AssistantPromptMessage( - content=chunk.choices[0].delta.content if chunk.choices else "", tool_calls=[] - ), + message=AssistantPromptMessage(content=content, tool_calls=[]), usage=self._calc_response_usage( model=model, credentials=credentials,