diff --git a/api/core/plugin/backwards_invocation/model.py b/api/core/plugin/backwards_invocation/model.py index 5ec9620f22..17cfaf2edf 100644 --- a/api/core/plugin/backwards_invocation/model.py +++ b/api/core/plugin/backwards_invocation/model.py @@ -58,6 +58,7 @@ class PluginModelBackwardsInvocation(BaseBackwardsInvocation): LLMNode.deduct_llm_quota( tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage ) + chunk.prompt_messages = [] yield chunk return handle() @@ -68,7 +69,7 @@ class PluginModelBackwardsInvocation(BaseBackwardsInvocation): def handle_non_streaming(response: LLMResult) -> Generator[LLMResultChunk, None, None]: yield LLMResultChunk( model=response.model, - prompt_messages=response.prompt_messages, + prompt_messages=[], system_fingerprint=response.system_fingerprint, delta=LLMResultChunkDelta( index=0,