From d0e0111f88da0fc972b07b1f97893df18cbc0522 Mon Sep 17 00:00:00 2001 From: cherryhuahua <68722306+cherryhuahua@users.noreply.github.com> Date: Wed, 25 Sep 2024 14:51:42 +0800 Subject: [PATCH] fix:Spark's large language model token calculation error #7911 (#8755) --- api/core/app/apps/base_app_runner.py | 2 +- api/core/model_runtime/model_providers/spark/llm/llm.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py index 1b412b8639..203aca3384 100644 --- a/api/core/app/apps/base_app_runner.py +++ b/api/core/app/apps/base_app_runner.py @@ -309,7 +309,7 @@ class AppRunner: if not prompt_messages: prompt_messages = result.prompt_messages - if not usage and result.delta.usage: + if result.delta.usage: usage = result.delta.usage if not usage: diff --git a/api/core/model_runtime/model_providers/spark/llm/llm.py b/api/core/model_runtime/model_providers/spark/llm/llm.py index 57193dc031..1181ba699a 100644 --- a/api/core/model_runtime/model_providers/spark/llm/llm.py +++ b/api/core/model_runtime/model_providers/spark/llm/llm.py @@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel): :param prompt_messages: prompt messages :return: llm response chunk generator result """ + completion = "" for index, content in enumerate(client.subscribe()): if isinstance(content, dict): delta = content["data"] else: delta = content - + completion += delta assistant_prompt_message = AssistantPromptMessage( content=delta or "", ) - + temp_assistant_prompt_message = AssistantPromptMessage( + content=completion, + ) prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) - completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) + completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message]) # transform usage usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)