diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 0000000000..cb4aa7ebd8 --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,3 @@ +{ + "MD024": false +} diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cdc9f6ac5..57c6180068 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to Dify will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.15.7] - 2025-04-27 + +### Added + +- Added support for GPT-4.1 in model providers (#18912) +- Added support for Amazon Bedrock DeepSeek-R1 model (#18908) +- Added support for Amazon Bedrock Claude Sonnet 3.7 model (#18788) +- Refined version compatibility logic in app DSL service + +### Fixed + +- Fixed issue with creating apps from template categories (#18807, #18868) +- Fixed DSL version check when creating apps from explore templates (#18872, #18878) + ## [0.15.6] - 2025-04-22 ### Security diff --git a/api/configs/packaging/__init__.py b/api/configs/packaging/__init__.py index 00918242b3..9bdaa53cc3 100644 --- a/api/configs/packaging/__init__.py +++ b/api/configs/packaging/__init__.py @@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings): CURRENT_VERSION: str = Field( description="Dify version", - default="0.15.6", + default="0.15.7", ) COMMIT_SHA: str = Field( diff --git a/api/controllers/console/auth/forgot_password.py b/api/controllers/console/auth/forgot_password.py index 541eeb17c7..5b89005671 100644 --- a/api/controllers/console/auth/forgot_password.py +++ b/api/controllers/console/auth/forgot_password.py @@ -6,13 +6,9 @@ from flask_restful import Resource, reqparse # type: ignore from constants.languages import languages from controllers.console import api -from controllers.console.auth.error import (EmailCodeError, InvalidEmailError, - InvalidTokenError, - PasswordMismatchError) -from controllers.console.error import (AccountInFreezeError, AccountNotFound, - EmailSendIpLimitError) -from controllers.console.wraps import (email_password_login_enabled, - setup_required) +from controllers.console.auth.error import EmailCodeError, InvalidEmailError, InvalidTokenError, PasswordMismatchError +from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError +from controllers.console.wraps import email_password_login_enabled, setup_required from events.tenant_event import tenant_was_created from extensions.ext_database import db from libs.helper import email, extract_remote_ip diff --git a/api/controllers/console/wraps.py b/api/controllers/console/wraps.py index 11b7140b1b..b8aece3beb 100644 --- a/api/controllers/console/wraps.py +++ b/api/controllers/console/wraps.py @@ -11,8 +11,7 @@ from models.model import DifySetup from services.feature_service import FeatureService, LicenseStatus from services.operation_service import OperationService -from .error import (NotInitValidateError, NotSetupError, - UnauthorizedAndForceLogout) +from .error import NotInitValidateError, NotSetupError, UnauthorizedAndForceLogout def account_initialization_required(view): diff --git a/api/core/agent/cot_agent_runner.py b/api/core/agent/cot_agent_runner.py index bbe1865daf..1b14eb1f54 100644 --- a/api/core/agent/cot_agent_runner.py +++ b/api/core/agent/cot_agent_runner.py @@ -104,7 +104,6 @@ class CotAgentRunner(BaseAgentRunner, ABC): # recalc llm max tokens prompt_messages = self._organize_prompt_messages() - self.recalc_llm_max_tokens(self.model_config, prompt_messages) # invoke model chunks = model_instance.invoke_llm( prompt_messages=prompt_messages, diff --git a/api/core/agent/fc_agent_runner.py b/api/core/agent/fc_agent_runner.py index b862c96072..9340812024 100644 --- a/api/core/agent/fc_agent_runner.py +++ b/api/core/agent/fc_agent_runner.py @@ -84,7 +84,6 @@ class FunctionCallAgentRunner(BaseAgentRunner): # recalc llm max tokens prompt_messages = self._organize_prompt_messages() - self.recalc_llm_max_tokens(self.model_config, prompt_messages) # invoke model chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm( prompt_messages=prompt_messages, diff --git a/api/core/app/apps/agent_chat/app_runner.py b/api/core/app/apps/agent_chat/app_runner.py index c670536140..8b4d535680 100644 --- a/api/core/app/apps/agent_chat/app_runner.py +++ b/api/core/app/apps/agent_chat/app_runner.py @@ -55,20 +55,6 @@ class AgentChatAppRunner(AppRunner): query = application_generate_entity.query files = application_generate_entity.files - # Pre-calculate the number of tokens of the prompt messages, - # and return the rest number of tokens by model context token size limit and max token size limit. - # If the rest number of tokens is not enough, raise exception. - # Include: prompt template, inputs, query(optional), files(optional) - # Not Include: memory, external data, dataset context - self.get_pre_calculate_rest_tokens( - app_record=app_record, - model_config=application_generate_entity.model_conf, - prompt_template_entity=app_config.prompt_template, - inputs=inputs, - files=files, - query=query, - ) - memory = None if application_generate_entity.conversation_id: # get memory of conversation (read-only) diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py index 07a248d77a..1f78c7d099 100644 --- a/api/core/app/apps/base_app_runner.py +++ b/api/core/app/apps/base_app_runner.py @@ -15,10 +15,8 @@ from core.app.features.annotation_reply.annotation_reply import AnnotationReplyF from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature from core.external_data_tool.external_data_fetch import ExternalDataFetch from core.memory.token_buffer_memory import TokenBufferMemory -from core.model_manager import ModelInstance from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage -from core.model_runtime.entities.model_entities import ModelPropertyKey from core.model_runtime.errors.invoke import InvokeBadRequestError from core.moderation.input_moderation import InputModeration from core.prompt.advanced_prompt_transform import AdvancedPromptTransform @@ -31,106 +29,6 @@ if TYPE_CHECKING: class AppRunner: - def get_pre_calculate_rest_tokens( - self, - app_record: App, - model_config: ModelConfigWithCredentialsEntity, - prompt_template_entity: PromptTemplateEntity, - inputs: Mapping[str, str], - files: Sequence["File"], - query: Optional[str] = None, - ) -> int: - """ - Get pre calculate rest tokens - :param app_record: app record - :param model_config: model config entity - :param prompt_template_entity: prompt template entity - :param inputs: inputs - :param files: files - :param query: query - :return: - """ - # Invoke model - model_instance = ModelInstance( - provider_model_bundle=model_config.provider_model_bundle, model=model_config.model - ) - - model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE) - - max_tokens = 0 - for parameter_rule in model_config.model_schema.parameter_rules: - if parameter_rule.name == "max_tokens" or ( - parameter_rule.use_template and parameter_rule.use_template == "max_tokens" - ): - max_tokens = ( - model_config.parameters.get(parameter_rule.name) - or model_config.parameters.get(parameter_rule.use_template or "") - ) or 0 - - if model_context_tokens is None: - return -1 - - if max_tokens is None: - max_tokens = 0 - - # get prompt messages without memory and context - prompt_messages, stop = self.organize_prompt_messages( - app_record=app_record, - model_config=model_config, - prompt_template_entity=prompt_template_entity, - inputs=inputs, - files=files, - query=query, - ) - - prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages) - - rest_tokens: int = model_context_tokens - max_tokens - prompt_tokens - if rest_tokens < 0: - raise InvokeBadRequestError( - "Query or prefix prompt is too long, you can reduce the prefix prompt, " - "or shrink the max token, or switch to a llm with a larger token limit size." - ) - - return rest_tokens - - def recalc_llm_max_tokens( - self, model_config: ModelConfigWithCredentialsEntity, prompt_messages: list[PromptMessage] - ): - # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit - model_instance = ModelInstance( - provider_model_bundle=model_config.provider_model_bundle, model=model_config.model - ) - - model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE) - - max_tokens = 0 - for parameter_rule in model_config.model_schema.parameter_rules: - if parameter_rule.name == "max_tokens" or ( - parameter_rule.use_template and parameter_rule.use_template == "max_tokens" - ): - max_tokens = ( - model_config.parameters.get(parameter_rule.name) - or model_config.parameters.get(parameter_rule.use_template or "") - ) or 0 - - if model_context_tokens is None: - return -1 - - if max_tokens is None: - max_tokens = 0 - - prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages) - - if prompt_tokens + max_tokens > model_context_tokens: - max_tokens = max(model_context_tokens - prompt_tokens, 16) - - for parameter_rule in model_config.model_schema.parameter_rules: - if parameter_rule.name == "max_tokens" or ( - parameter_rule.use_template and parameter_rule.use_template == "max_tokens" - ): - model_config.parameters[parameter_rule.name] = max_tokens - def organize_prompt_messages( self, app_record: App, diff --git a/api/core/app/apps/chat/app_runner.py b/api/core/app/apps/chat/app_runner.py index 425f1ab7ef..878d562bb0 100644 --- a/api/core/app/apps/chat/app_runner.py +++ b/api/core/app/apps/chat/app_runner.py @@ -50,20 +50,6 @@ class ChatAppRunner(AppRunner): query = application_generate_entity.query files = application_generate_entity.files - # Pre-calculate the number of tokens of the prompt messages, - # and return the rest number of tokens by model context token size limit and max token size limit. - # If the rest number of tokens is not enough, raise exception. - # Include: prompt template, inputs, query(optional), files(optional) - # Not Include: memory, external data, dataset context - self.get_pre_calculate_rest_tokens( - app_record=app_record, - model_config=application_generate_entity.model_conf, - prompt_template_entity=app_config.prompt_template, - inputs=inputs, - files=files, - query=query, - ) - memory = None if application_generate_entity.conversation_id: # get memory of conversation (read-only) @@ -194,9 +180,6 @@ class ChatAppRunner(AppRunner): if hosting_moderation_result: return - # Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit - self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages) - # Invoke model model_instance = ModelInstance( provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle, diff --git a/api/core/app/apps/completion/app_runner.py b/api/core/app/apps/completion/app_runner.py index 41278b75b4..79fcaf15d2 100644 --- a/api/core/app/apps/completion/app_runner.py +++ b/api/core/app/apps/completion/app_runner.py @@ -43,20 +43,6 @@ class CompletionAppRunner(AppRunner): query = application_generate_entity.query files = application_generate_entity.files - # Pre-calculate the number of tokens of the prompt messages, - # and return the rest number of tokens by model context token size limit and max token size limit. - # If the rest number of tokens is not enough, raise exception. - # Include: prompt template, inputs, query(optional), files(optional) - # Not Include: memory, external data, dataset context - self.get_pre_calculate_rest_tokens( - app_record=app_record, - model_config=application_generate_entity.model_conf, - prompt_template_entity=app_config.prompt_template, - inputs=inputs, - files=files, - query=query, - ) - # organize all inputs and template to prompt messages # Include: prompt template, inputs, query(optional), files(optional) prompt_messages, stop = self.organize_prompt_messages( @@ -152,9 +138,6 @@ class CompletionAppRunner(AppRunner): if hosting_moderation_result: return - # Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit - self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages) - # Invoke model model_instance = ModelInstance( provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle, diff --git a/api/core/memory/token_buffer_memory.py b/api/core/memory/token_buffer_memory.py index 003a0c85b1..2a67024e1d 100644 --- a/api/core/memory/token_buffer_memory.py +++ b/api/core/memory/token_buffer_memory.py @@ -26,7 +26,7 @@ class TokenBufferMemory: self.model_instance = model_instance def get_history_prompt_messages( - self, max_token_limit: int = 2000, message_limit: Optional[int] = None + self, max_token_limit: int = 100000, message_limit: Optional[int] = None ) -> Sequence[PromptMessage]: """ Get history prompt messages. diff --git a/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3.7-sonnet-v1.yaml b/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3.7-sonnet-v1.yaml new file mode 100644 index 0000000000..6f95a91f0f --- /dev/null +++ b/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3.7-sonnet-v1.yaml @@ -0,0 +1,115 @@ +model: us.anthropic.claude-3-7-sonnet-20250219-v1:0 +label: + en_US: Claude 3.7 Sonnet(US.Cross Region Inference) +icon: icon_s_en.svg +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 200000 +# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html +parameter_rules: + - name: enable_cache + label: + zh_Hans: 启用提示缓存 + en_US: Enable Prompt Cache + type: boolean + required: false + default: true + help: + zh_Hans: 启用提示缓存可以提高性能并降低成本。Claude 3.7 Sonnet支持在system、messages和tools字段中使用缓存检查点。 + en_US: Enable prompt caching to improve performance and reduce costs. Claude 3.7 Sonnet supports cache checkpoints in system, messages, and tools fields. + - name: reasoning_type + label: + zh_Hans: 推理配置 + en_US: Reasoning Type + type: boolean + required: false + default: false + placeholder: + zh_Hans: 设置推理配置 + en_US: Set reasoning configuration + help: + zh_Hans: 控制模型的推理能力。启用时,temperature将固定为1且top_p将被禁用。 + en_US: Controls the model's reasoning capability. When enabled, temperature will be fixed to 1 and top_p will be disabled. + - name: reasoning_budget + show_on: + - variable: reasoning_type + value: true + label: + zh_Hans: 推理预算 + en_US: Reasoning Budget + type: int + default: 1024 + min: 0 + max: 128000 + help: + zh_Hans: 推理的预算限制(最小1024),必须小于max_tokens。仅在推理类型为enabled时可用。 + en_US: Budget limit for reasoning (minimum 1024), must be less than max_tokens. Only available when reasoning type is enabled. + + - name: max_tokens + use_template: max_tokens + required: true + label: + zh_Hans: 最大token数 + en_US: Max Tokens + type: int + default: 8192 + min: 1 + max: 128000 + help: + zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。 + en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter. + - name: temperature + use_template: temperature + required: false + label: + zh_Hans: 模型温度 + en_US: Model Temperature + type: float + default: 1 + min: 0.0 + max: 1.0 + help: + zh_Hans: 生成内容的随机性。当推理功能启用时,该值将被固定为1。 + en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1. + - name: top_p + show_on: + - variable: reasoning_type + value: disabled + use_template: top_p + label: + zh_Hans: Top P + en_US: Top P + required: false + type: float + default: 0.999 + min: 0.000 + max: 1.000 + help: + zh_Hans: 在核采样中的概率阈值。当推理功能启用时,该参数将被禁用。 + en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled. + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + required: false + type: int + default: 0 + min: 0 + # tip docs from aws has error, max value is 500 + max: 500 + help: + zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 + en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. + - name: response_format + use_template: response_format +pricing: + input: '0.003' + output: '0.015' + unit: '0.001' + currency: USD \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/bedrock/llm/llm.py b/api/core/model_runtime/model_providers/bedrock/llm/llm.py index 29bd673d57..8d93d39ab5 100644 --- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py +++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py @@ -58,6 +58,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel): # TODO There is invoke issue: context limit on Cohere Model, will add them after fixed. CONVERSE_API_ENABLED_MODEL_INFO = [ {"prefix": "anthropic.claude-v2", "support_system_prompts": True, "support_tool_use": False}, + {"prefix": "us.deepseek", "support_system_prompts": True, "support_tool_use": False}, {"prefix": "anthropic.claude-v1", "support_system_prompts": True, "support_tool_use": False}, {"prefix": "us.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True}, {"prefix": "eu.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True}, diff --git a/api/core/model_runtime/model_providers/bedrock/llm/us.deepseek-r1.yaml b/api/core/model_runtime/model_providers/bedrock/llm/us.deepseek-r1.yaml new file mode 100644 index 0000000000..1191e3bd79 --- /dev/null +++ b/api/core/model_runtime/model_providers/bedrock/llm/us.deepseek-r1.yaml @@ -0,0 +1,63 @@ +model: us.deepseek.r1-v1:0 +label: + en_US: DeepSeek-R1(US.Cross Region Inference) +icon: icon_s_en.svg +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: max_tokens + use_template: max_tokens + required: true + label: + zh_Hans: 最大token数 + en_US: Max Tokens + type: int + default: 8192 + min: 1 + max: 128000 + help: + zh_Hans: 停止前生成的最大令牌数。 + en_US: The maximum number of tokens to generate before stopping. + - name: temperature + use_template: temperature + required: false + label: + zh_Hans: 模型温度 + en_US: Model Temperature + type: float + default: 1 + min: 0.0 + max: 1.0 + help: + zh_Hans: 生成内容的随机性。当推理功能启用时,该值将被固定为1。 + en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1. + - name: top_p + show_on: + - variable: reasoning_type + value: disabled + use_template: top_p + label: + zh_Hans: Top P + en_US: Top P + required: false + type: float + default: 0.999 + min: 0.000 + max: 1.000 + help: + zh_Hans: 在核采样中的概率阈值。当推理功能启用时,该参数将被禁用。 + en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled. + - name: response_format + use_template: response_format +pricing: + input: '0.001' + output: '0.005' + unit: '0.001' + currency: USD \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/openai/llm/_position.yaml b/api/core/model_runtime/model_providers/openai/llm/_position.yaml index 0d3143c2ae..688d4dbd09 100644 --- a/api/core/model_runtime/model_providers/openai/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/openai/llm/_position.yaml @@ -1,3 +1,4 @@ +- gpt-4.1 - o1 - o1-2024-12-17 - o1-mini diff --git a/api/core/model_runtime/model_providers/openai/llm/gpt-4-1.yaml b/api/core/model_runtime/model_providers/openai/llm/gpt-4-1.yaml new file mode 100644 index 0000000000..f9f6edb4e6 --- /dev/null +++ b/api/core/model_runtime/model_providers/openai/llm/gpt-4-1.yaml @@ -0,0 +1,60 @@ +model: gpt-4.1 +label: + zh_Hans: gpt-4.1 + en_US: gpt-4.1 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call + - vision +model_properties: + mode: chat + context_size: 1047576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 32768 + - name: reasoning_effort + label: + zh_Hans: 推理工作 + en_US: Reasoning Effort + type: string + help: + zh_Hans: 限制推理模型的推理工作 + en_US: Constrains effort on reasoning for reasoning models + required: false + options: + - low + - medium + - high + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object + - json_schema + - name: json_schema + use_template: json_schema +pricing: + input: '2.00' + output: '8.00' + unit: '0.000001' + currency: USD \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/openai/llm/llm.py b/api/core/model_runtime/model_providers/openai/llm/llm.py index e9d59a4ae4..37f7b33514 100644 --- a/api/core/model_runtime/model_providers/openai/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai/llm/llm.py @@ -1057,7 +1057,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel): model = "gpt-4o" try: - encoding = tiktoken.encoding_for_model(model) + encoding = tiktoken.get_encoding(model) except KeyError: logger.warning("Warning: model not found. Using cl100k_base encoding.") model = "cl100k_base" diff --git a/api/core/workflow/nodes/code/code_node.py b/api/core/workflow/nodes/code/code_node.py index 2f82bf8c38..f52835e835 100644 --- a/api/core/workflow/nodes/code/code_node.py +++ b/api/core/workflow/nodes/code/code_node.py @@ -195,7 +195,7 @@ class CodeNode(BaseNode[CodeNodeData]): if output_config.type == "object": # check if output is object if not isinstance(result.get(output_name), dict): - if isinstance(result.get(output_name), type(None)): + if result.get(output_name) is None: transformed_result[output_name] = None else: raise OutputValidationError( @@ -223,7 +223,7 @@ class CodeNode(BaseNode[CodeNodeData]): elif output_config.type == "array[number]": # check if array of number available if not isinstance(result[output_name], list): - if isinstance(result[output_name], type(None)): + if result[output_name] is None: transformed_result[output_name] = None else: raise OutputValidationError( @@ -244,7 +244,7 @@ class CodeNode(BaseNode[CodeNodeData]): elif output_config.type == "array[string]": # check if array of string available if not isinstance(result[output_name], list): - if isinstance(result[output_name], type(None)): + if result[output_name] is None: transformed_result[output_name] = None else: raise OutputValidationError( @@ -265,7 +265,7 @@ class CodeNode(BaseNode[CodeNodeData]): elif output_config.type == "array[object]": # check if array of object available if not isinstance(result[output_name], list): - if isinstance(result[output_name], type(None)): + if result[output_name] is None: transformed_result[output_name] = None else: raise OutputValidationError( diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index 7e28aa7a3f..baa02c86f4 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -968,14 +968,12 @@ def _handle_memory_chat_mode( *, memory: TokenBufferMemory | None, memory_config: MemoryConfig | None, - model_config: ModelConfigWithCredentialsEntity, + model_config: ModelConfigWithCredentialsEntity, # TODO(-LAN-): Needs to remove ) -> Sequence[PromptMessage]: memory_messages: Sequence[PromptMessage] = [] # Get messages from memory for chat model if memory and memory_config: - rest_tokens = _calculate_rest_token(prompt_messages=[], model_config=model_config) memory_messages = memory.get_history_prompt_messages( - max_token_limit=rest_tokens, message_limit=memory_config.window.size if memory_config.window.enabled else None, ) return memory_messages diff --git a/api/poetry.lock b/api/poetry.lock index 1fdc292b92..9f66d07b5f 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -10473,44 +10473,44 @@ client = ["SQLAlchemy (>=1.4,<3)"] [[package]] name = "tiktoken" -version = "0.8.0" +version = "0.9.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false python-versions = ">=3.9" groups = ["main"] markers = "python_version == \"3.11\" or python_version >= \"3.12\"" files = [ - {file = "tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e"}, - {file = "tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9269348cb650726f44dd3bbb3f9110ac19a8dcc8f54949ad3ef652ca22a38e21"}, - {file = "tiktoken-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e13f37bc4ef2d012731e93e0fef21dc3b7aea5bb9009618de9a4026844e560"}, - {file = "tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d13c981511331eac0d01a59b5df7c0d4060a8be1e378672822213da51e0a2"}, - {file = "tiktoken-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6b2ddbc79a22621ce8b1166afa9f9a888a664a579350dc7c09346a3b5de837d9"}, - {file = "tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005"}, - {file = "tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1"}, - {file = "tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a"}, - {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d"}, - {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47"}, - {file = "tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419"}, - {file = "tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99"}, - {file = "tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586"}, - {file = "tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b"}, - {file = "tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab"}, - {file = "tiktoken-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2908c0d043a7d03ebd80347266b0e58440bdef5564f84f4d29fb235b5df3b04"}, - {file = "tiktoken-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:294440d21a2a51e12d4238e68a5972095534fe9878be57d905c476017bff99fc"}, - {file = "tiktoken-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:d8f3192733ac4d77977432947d563d7e1b310b96497acd3c196c9bddb36ed9db"}, - {file = "tiktoken-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:02be1666096aff7da6cbd7cdaa8e7917bfed3467cd64b38b1f112e96d3b06a24"}, - {file = "tiktoken-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94ff53c5c74b535b2cbf431d907fc13c678bbd009ee633a2aca269a04389f9a"}, - {file = "tiktoken-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b231f5e8982c245ee3065cd84a4712d64692348bc609d84467c57b4b72dcbc5"}, - {file = "tiktoken-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4177faa809bd55f699e88c96d9bb4635d22e3f59d635ba6fd9ffedf7150b9953"}, - {file = "tiktoken-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5376b6f8dc4753cd81ead935c5f518fa0fbe7e133d9e25f648d8c4dabdd4bad7"}, - {file = "tiktoken-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:18228d624807d66c87acd8f25fc135665617cab220671eb65b50f5d70fa51f69"}, - {file = "tiktoken-0.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e17807445f0cf1f25771c9d86496bd8b5c376f7419912519699f3cc4dc5c12e"}, - {file = "tiktoken-0.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:886f80bd339578bbdba6ed6d0567a0d5c6cfe198d9e587ba6c447654c65b8edc"}, - {file = "tiktoken-0.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6adc8323016d7758d6de7313527f755b0fc6c72985b7d9291be5d96d73ecd1e1"}, - {file = "tiktoken-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b591fb2b30d6a72121a80be24ec7a0e9eb51c5500ddc7e4c2496516dd5e3816b"}, - {file = "tiktoken-0.8.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:845287b9798e476b4d762c3ebda5102be87ca26e5d2c9854002825d60cdb815d"}, - {file = "tiktoken-0.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:1473cfe584252dc3fa62adceb5b1c763c1874e04511b197da4e6de51d6ce5a02"}, - {file = "tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2"}, + {file = "tiktoken-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:586c16358138b96ea804c034b8acf3f5d3f0258bd2bc3b0227af4af5d622e382"}, + {file = "tiktoken-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9c59ccc528c6c5dd51820b3474402f69d9a9e1d656226848ad68a8d5b2e5108"}, + {file = "tiktoken-0.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0968d5beeafbca2a72c595e8385a1a1f8af58feaebb02b227229b69ca5357fd"}, + {file = "tiktoken-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92a5fb085a6a3b7350b8fc838baf493317ca0e17bd95e8642f95fc69ecfed1de"}, + {file = "tiktoken-0.9.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:15a2752dea63d93b0332fb0ddb05dd909371ededa145fe6a3242f46724fa7990"}, + {file = "tiktoken-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:26113fec3bd7a352e4b33dbaf1bd8948de2507e30bd95a44e2b1156647bc01b4"}, + {file = "tiktoken-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f32cc56168eac4851109e9b5d327637f15fd662aa30dd79f964b7c39fbadd26e"}, + {file = "tiktoken-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:45556bc41241e5294063508caf901bf92ba52d8ef9222023f83d2483a3055348"}, + {file = "tiktoken-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03935988a91d6d3216e2ec7c645afbb3d870b37bcb67ada1943ec48678e7ee33"}, + {file = "tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b3d80aad8d2c6b9238fc1a5524542087c52b860b10cbf952429ffb714bc1136"}, + {file = "tiktoken-0.9.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b2a21133be05dc116b1d0372af051cd2c6aa1d2188250c9b553f9fa49301b336"}, + {file = "tiktoken-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:11a20e67fdf58b0e2dea7b8654a288e481bb4fc0289d3ad21291f8d0849915fb"}, + {file = "tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03"}, + {file = "tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210"}, + {file = "tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794"}, + {file = "tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22"}, + {file = "tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2"}, + {file = "tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16"}, + {file = "tiktoken-0.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2b0e8e05a26eda1249e824156d537015480af7ae222ccb798e5234ae0285dbdb"}, + {file = "tiktoken-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27d457f096f87685195eea0165a1807fae87b97b2161fe8c9b1df5bd74ca6f63"}, + {file = "tiktoken-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cf8ded49cddf825390e36dd1ad35cd49589e8161fdcb52aa25f0583e90a3e01"}, + {file = "tiktoken-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc156cb314119a8bb9748257a2eaebd5cc0753b6cb491d26694ed42fc7cb3139"}, + {file = "tiktoken-0.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cd69372e8c9dd761f0ab873112aba55a0e3e506332dd9f7522ca466e817b1b7a"}, + {file = "tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95"}, + {file = "tiktoken-0.9.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c6386ca815e7d96ef5b4ac61e0048cd32ca5a92d5781255e13b31381d28667dc"}, + {file = "tiktoken-0.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:75f6d5db5bc2c6274b674ceab1615c1778e6416b14705827d19b40e6355f03e0"}, + {file = "tiktoken-0.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e15b16f61e6f4625a57a36496d28dd182a8a60ec20a534c5343ba3cafa156ac7"}, + {file = "tiktoken-0.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebcec91babf21297022882344c3f7d9eed855931466c3311b1ad6b64befb3df"}, + {file = "tiktoken-0.9.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e5fd49e7799579240f03913447c0cdfa1129625ebd5ac440787afc4345990427"}, + {file = "tiktoken-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:26242ca9dc8b58e875ff4ca078b9a94d2f0813e6a535dcd2205df5d49d927cc7"}, + {file = "tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d"}, ] [package.dependencies] @@ -12389,4 +12389,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "d197cdff507a70323c1d6aca11609188f54970f67715af744fe6def15b7776fd" +content-hash = "0df8aef68385b6596306fd18af317a835023d648eb5028cd57ec463f176e4c0f" diff --git a/api/pyproject.toml b/api/pyproject.toml index 72ec6d287e..e41e8ba86f 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -85,7 +85,7 @@ sentry-sdk = { version = "~1.44.1", extras = ["flask"] } sqlalchemy = "~2.0.29" starlette = "0.41.0" tencentcloud-sdk-python-hunyuan = "~3.0.1294" -tiktoken = "~0.8.0" +tiktoken = "^0.9.0" tokenizers = "~0.15.0" transformers = "~4.35.0" unstructured = { version = "~0.16.1", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] } diff --git a/api/services/app_dsl_service.py b/api/services/app_dsl_service.py index 15119247f8..8378f3b02d 100644 --- a/api/services/app_dsl_service.py +++ b/api/services/app_dsl_service.py @@ -55,13 +55,19 @@ def _check_version_compatibility(imported_version: str) -> ImportStatus: except version.InvalidVersion: return ImportStatus.FAILED - # Compare major version and minor version - if current_ver.major != imported_ver.major or current_ver.minor != imported_ver.minor: + # If imported version is newer than current, always return PENDING + if imported_ver > current_ver: return ImportStatus.PENDING - if current_ver.micro != imported_ver.micro: + # If imported version is older than current's major, return PENDING + if imported_ver.major < current_ver.major: + return ImportStatus.PENDING + + # If imported version is older than current's minor, return COMPLETED_WITH_WARNINGS + if imported_ver.minor < current_ver.minor: return ImportStatus.COMPLETED_WITH_WARNINGS + # If imported version equals or is older than current's micro, return COMPLETED return ImportStatus.COMPLETED diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index d760ac679d..fc6bf106cb 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env services: # API service api: - image: langgenius/dify-api:0.15.6 + image: langgenius/dify-api:0.15.7 restart: always environment: # Use the shared environment variables. @@ -25,7 +25,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.15.6 + image: langgenius/dify-api:0.15.7 restart: always environment: # Use the shared environment variables. @@ -47,7 +47,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.15.6 + image: langgenius/dify-web:0.15.7 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index a37527f8b6..72a42c9841 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -394,7 +394,7 @@ x-shared-env: &shared-api-worker-env services: # API service api: - image: langgenius/dify-api:0.15.6 + image: langgenius/dify-api:0.15.7 restart: always environment: # Use the shared environment variables. @@ -417,7 +417,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.15.6 + image: langgenius/dify-api:0.15.7 restart: always environment: # Use the shared environment variables. @@ -439,7 +439,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.15.6 + image: langgenius/dify-web:0.15.7 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} diff --git a/web/package.json b/web/package.json index b8905fc6f1..be8e70e62c 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "dify-web", - "version": "0.15.6", + "version": "0.15.7", "private": true, "engines": { "node": ">=18.17.0"