From 04a0ae3aa957884617bab1c3121beb47c4e39183 Mon Sep 17 00:00:00 2001 From: Novice <857526207@qq.com> Date: Mon, 17 Mar 2025 16:47:10 +0800 Subject: [PATCH] feat: add llm blocking invoke (#15732) --- api/core/plugin/backwards_invocation/model.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/api/core/plugin/backwards_invocation/model.py b/api/core/plugin/backwards_invocation/model.py index 61e0f12680..490a475c16 100644 --- a/api/core/plugin/backwards_invocation/model.py +++ b/api/core/plugin/backwards_invocation/model.py @@ -3,7 +3,7 @@ from binascii import hexlify, unhexlify from collections.abc import Generator from core.model_manager import ModelManager -from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk +from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta from core.model_runtime.entities.message_entities import ( PromptMessage, SystemPromptMessage, @@ -46,7 +46,7 @@ class PluginModelBackwardsInvocation(BaseBackwardsInvocation): model_parameters=payload.completion_params, tools=payload.tools, stop=payload.stop, - stream=payload.stream or True, + stream=True if payload.stream is None else payload.stream, user=user_id, ) @@ -64,7 +64,21 @@ class PluginModelBackwardsInvocation(BaseBackwardsInvocation): else: if response.usage: LLMNode.deduct_llm_quota(tenant_id=tenant.id, model_instance=model_instance, usage=response.usage) - return response + + def handle_non_streaming(response: LLMResult) -> Generator[LLMResultChunk, None, None]: + yield LLMResultChunk( + model=response.model, + prompt_messages=response.prompt_messages, + system_fingerprint=response.system_fingerprint, + delta=LLMResultChunkDelta( + index=0, + message=response.message, + usage=response.usage, + finish_reason="", + ), + ) + + return handle_non_streaming(response) @classmethod def invoke_text_embedding(cls, user_id: str, tenant: Tenant, payload: RequestInvokeTextEmbedding):