diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py index 1b412b8639..203aca3384 100644 --- a/api/core/app/apps/base_app_runner.py +++ b/api/core/app/apps/base_app_runner.py @@ -309,7 +309,7 @@ class AppRunner: if not prompt_messages: prompt_messages = result.prompt_messages - if not usage and result.delta.usage: + if result.delta.usage: usage = result.delta.usage if not usage: diff --git a/api/core/model_runtime/model_providers/spark/llm/llm.py b/api/core/model_runtime/model_providers/spark/llm/llm.py index 57193dc031..1181ba699a 100644 --- a/api/core/model_runtime/model_providers/spark/llm/llm.py +++ b/api/core/model_runtime/model_providers/spark/llm/llm.py @@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel): :param prompt_messages: prompt messages :return: llm response chunk generator result """ + completion = "" for index, content in enumerate(client.subscribe()): if isinstance(content, dict): delta = content["data"] else: delta = content - + completion += delta assistant_prompt_message = AssistantPromptMessage( content=delta or "", ) - + temp_assistant_prompt_message = AssistantPromptMessage( + content=completion, + ) prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) - completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) + completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message]) # transform usage usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)