diff --git a/api/core/app_runner/app_runner.py b/api/core/app_runner/app_runner.py index 2b8ddc5d4e..f9678b372f 100644 --- a/api/core/app_runner/app_runner.py +++ b/api/core/app_runner/app_runner.py @@ -84,7 +84,7 @@ class AppRunner: return rest_tokens - def recale_llm_max_tokens(self, model_config: ModelConfigEntity, + def recalc_llm_max_tokens(self, model_config: ModelConfigEntity, prompt_messages: list[PromptMessage]): # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit model_type_instance = model_config.provider_model_bundle.model_type_instance diff --git a/api/core/app_runner/basic_app_runner.py b/api/core/app_runner/basic_app_runner.py index 99df249ddf..83f4f6929a 100644 --- a/api/core/app_runner/basic_app_runner.py +++ b/api/core/app_runner/basic_app_runner.py @@ -181,7 +181,7 @@ class BasicApplicationRunner(AppRunner): return # Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit - self.recale_llm_max_tokens( + self.recalc_llm_max_tokens( model_config=app_orchestration_config.model_config, prompt_messages=prompt_messages ) diff --git a/api/core/features/assistant_cot_runner.py b/api/core/features/assistant_cot_runner.py index 809834c8cb..09ab27109b 100644 --- a/api/core/features/assistant_cot_runner.py +++ b/api/core/features/assistant_cot_runner.py @@ -131,7 +131,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): ) # recale llm max tokens - self.recale_llm_max_tokens(self.model_config, prompt_messages) + self.recalc_llm_max_tokens(self.model_config, prompt_messages) # invoke model chunks: Generator[LLMResultChunk, None, None] = model_instance.invoke_llm( prompt_messages=prompt_messages, diff --git a/api/core/features/assistant_fc_runner.py b/api/core/features/assistant_fc_runner.py index 7ad9d7bd2a..afb312341d 100644 --- a/api/core/features/assistant_fc_runner.py +++ b/api/core/features/assistant_fc_runner.py @@ -106,7 +106,7 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner): ) # recale llm max tokens - self.recale_llm_max_tokens(self.model_config, prompt_messages) + self.recalc_llm_max_tokens(self.model_config, prompt_messages) # invoke model chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm( prompt_messages=prompt_messages,