diff --git a/api/core/model_runtime/model_providers/bedrock/bedrock.py b/api/core/model_runtime/model_providers/bedrock/bedrock.py index 96cb90280e..e99bc52ff8 100644 --- a/api/core/model_runtime/model_providers/bedrock/bedrock.py +++ b/api/core/model_runtime/model_providers/bedrock/bedrock.py @@ -17,9 +17,11 @@ class BedrockProvider(ModelProvider): """ try: model_instance = self.get_model_instance(ModelType.LLM) - bedrock_validate_model_name = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1') + + # Use `amazon.titan-text-lite-v1` model by default for validating credentials + model_for_validation = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1') model_instance.validate_credentials( - model=bedrock_validate_model_name, + model=model_for_validation, credentials=credentials ) except CredentialsValidateFailedError as ex: diff --git a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml index e1923f8f8a..35374c69ba 100644 --- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml +++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml @@ -74,7 +74,7 @@ provider_credential_schema: label: en_US: Available Model Name zh_Hans: 可用模型名称 - type: text-input + type: secret-input placeholder: en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation. zh_Hans: 为了进行验证,请输入一个您可用的模型名称 (例如:amazon.titan-text-lite-v1) diff --git a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-instant-v1.yaml b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-instant-v1.yaml index 94b741f50d..8422f079c5 100644 --- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-instant-v1.yaml +++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-instant-v1.yaml @@ -1,33 +1,50 @@ model: anthropic.claude-instant-v1 label: - en_US: Claude Instant V1 + en_US: Claude Instant 1 model_type: llm model_properties: mode: chat context_size: 100000 parameter_rules: - - name: temperature - use_template: temperature - - name: topP - use_template: top_p - - name: topK - label: - zh_Hans: 取样数量 - en_US: Top K - type: int - help: - zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 - en_US: Only sample from the top K options for each subsequent token. - required: false - default: 250 - min: 0 - max: 500 - - name: max_tokens_to_sample + - name: max_tokens use_template: max_tokens required: true + type: int default: 4096 min: 1 max: 4096 + help: + zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。 + en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter. + - name: temperature + use_template: temperature + required: false + type: float + default: 1 + min: 0.0 + max: 1.0 + help: + zh_Hans: 生成内容的随机性。 + en_US: The amount of randomness injected into the response. + - name: top_p + required: false + type: float + default: 0.999 + min: 0.000 + max: 1.000 + help: + zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。 + en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both. + - name: top_k + required: false + type: int + default: 0 + min: 0 + # tip docs from aws has error, max value is 500 + max: 500 + help: + zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 + en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. pricing: input: '0.0008' output: '0.0024' diff --git a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v1.yaml b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v1.yaml index d2eba135f9..cb2271d401 100644 --- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v1.yaml +++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v1.yaml @@ -1,33 +1,50 @@ model: anthropic.claude-v1 label: - en_US: Claude V1 + en_US: Claude 1 model_type: llm model_properties: mode: chat context_size: 100000 parameter_rules: - - name: temperature - use_template: temperature - - name: top_p - use_template: top_p - - name: top_k - label: - zh_Hans: 取样数量 - en_US: Top K - type: int - help: - zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 - en_US: Only sample from the top K options for each subsequent token. - required: false - default: 250 - min: 0 - max: 500 - - name: max_tokens_to_sample + - name: max_tokens use_template: max_tokens required: true + type: int default: 4096 min: 1 max: 4096 + help: + zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。 + en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter. + - name: temperature + use_template: temperature + required: false + type: float + default: 1 + min: 0.0 + max: 1.0 + help: + zh_Hans: 生成内容的随机性。 + en_US: The amount of randomness injected into the response. + - name: top_p + required: false + type: float + default: 0.999 + min: 0.000 + max: 1.000 + help: + zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。 + en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both. + - name: top_k + required: false + type: int + default: 0 + min: 0 + # tip docs from aws has error, max value is 500 + max: 500 + help: + zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 + en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. pricing: input: '0.008' output: '0.024' diff --git a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.1.yaml b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.1.yaml index 3490e57427..1a3239c85e 100644 --- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.1.yaml +++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.1.yaml @@ -1,33 +1,50 @@ model: anthropic.claude-v2:1 label: - en_US: Claude V2.1 + en_US: Claude 2.1 model_type: llm model_properties: mode: chat context_size: 200000 parameter_rules: - - name: temperature - use_template: temperature - - name: top_p - use_template: top_p - - name: top_k - label: - zh_Hans: 取样数量 - en_US: Top K - type: int - help: - zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 - en_US: Only sample from the top K options for each subsequent token. - required: false - default: 250 - min: 0 - max: 500 - - name: max_tokens_to_sample + - name: max_tokens use_template: max_tokens required: true + type: int default: 4096 min: 1 max: 4096 + help: + zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。 + en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter. + - name: temperature + use_template: temperature + required: false + type: float + default: 1 + min: 0.0 + max: 1.0 + help: + zh_Hans: 生成内容的随机性。 + en_US: The amount of randomness injected into the response. + - name: top_p + required: false + type: float + default: 0.999 + min: 0.000 + max: 1.000 + help: + zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。 + en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both. + - name: top_k + required: false + type: int + default: 0 + min: 0 + # tip docs from aws has error, max value is 500 + max: 500 + help: + zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 + en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. pricing: input: '0.008' output: '0.024' diff --git a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.yaml b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.yaml index f2ac2e013d..0343e3bbec 100644 --- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.yaml +++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.yaml @@ -1,33 +1,50 @@ model: anthropic.claude-v2 label: - en_US: Claude V2 + en_US: Claude 2 model_type: llm model_properties: mode: chat context_size: 100000 parameter_rules: - - name: temperature - use_template: temperature - - name: top_p - use_template: top_p - - name: top_k - label: - zh_Hans: 取样数量 - en_US: Top K - type: int - help: - zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 - en_US: Only sample from the top K options for each subsequent token. - required: false - default: 250 - min: 0 - max: 500 - - name: max_tokens_to_sample + - name: max_tokens use_template: max_tokens required: true + type: int default: 4096 min: 1 max: 4096 + help: + zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。 + en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter. + - name: temperature + use_template: temperature + required: false + type: float + default: 1 + min: 0.0 + max: 1.0 + help: + zh_Hans: 生成内容的随机性。 + en_US: The amount of randomness injected into the response. + - name: top_p + required: false + type: float + default: 0.999 + min: 0.000 + max: 1.000 + help: + zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。 + en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both. + - name: top_k + required: false + type: int + default: 0 + min: 0 + # tip docs from aws has error, max value is 500 + max: 500 + help: + zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。 + en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses. pricing: input: '0.008' output: '0.024' diff --git a/api/core/model_runtime/model_providers/bedrock/llm/llm.py b/api/core/model_runtime/model_providers/bedrock/llm/llm.py index b274cec35f..0e256999c0 100644 --- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py +++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py @@ -72,16 +72,16 @@ class BedrockLargeLanguageModel(LargeLanguageModel): :return: full response or stream response chunk generator result """ - # invoke claude 3 models via anthropic official SDK - if "anthropic.claude-3" in model: - return self._invoke_claude3(model, credentials, prompt_messages, model_parameters, stop, stream, user) - # invoke model + # invoke anthropic models via anthropic official SDK + if "anthropic" in model: + return self._generate_anthropic(model, credentials, prompt_messages, model_parameters, stop, stream, user) + # invoke other models via boto3 client return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user) - def _invoke_claude3(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, + def _generate_anthropic(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]: """ - Invoke Claude3 large language model + Invoke Anthropic large language model :param model: model name :param credentials: model credentials @@ -114,7 +114,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel): # ref: https://github.com/anthropics/anthropic-sdk-python/blob/e84645b07ca5267066700a104b4d8d6a8da1383d/src/anthropic/resources/messages.py#L465 # extra_model_kwargs['metadata'] = message_create_params.Metadata(user_id=user) - system, prompt_message_dicts = self._convert_claude3_prompt_messages(prompt_messages) + system, prompt_message_dicts = self._convert_claude_prompt_messages(prompt_messages) if system: extra_model_kwargs['system'] = system @@ -128,11 +128,11 @@ class BedrockLargeLanguageModel(LargeLanguageModel): ) if stream: - return self._handle_claude3_stream_response(model, credentials, response, prompt_messages) + return self._handle_claude_stream_response(model, credentials, response, prompt_messages) - return self._handle_claude3_response(model, credentials, response, prompt_messages) + return self._handle_claude_response(model, credentials, response, prompt_messages) - def _handle_claude3_response(self, model: str, credentials: dict, response: Message, + def _handle_claude_response(self, model: str, credentials: dict, response: Message, prompt_messages: list[PromptMessage]) -> LLMResult: """ Handle llm chat response @@ -172,7 +172,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel): return response - def _handle_claude3_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent], + def _handle_claude_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent], prompt_messages: list[PromptMessage], ) -> Generator: """ Handle llm chat stream response @@ -231,7 +231,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel): except Exception as ex: raise InvokeError(str(ex)) - def _calc_claude3_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage: + def _calc_claude_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage: """ Calculate response usage @@ -275,7 +275,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel): return usage - def _convert_claude3_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]: + def _convert_claude_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]: """ Convert prompt messages to dict list and system """ @@ -295,11 +295,11 @@ class BedrockLargeLanguageModel(LargeLanguageModel): prompt_message_dicts = [] for message in prompt_messages: if not isinstance(message, SystemPromptMessage): - prompt_message_dicts.append(self._convert_claude3_prompt_message_to_dict(message)) + prompt_message_dicts.append(self._convert_claude_prompt_message_to_dict(message)) return system, prompt_message_dicts - def _convert_claude3_prompt_message_to_dict(self, message: PromptMessage) -> dict: + def _convert_claude_prompt_message_to_dict(self, message: PromptMessage) -> dict: """ Convert PromptMessage to dict """ @@ -405,7 +405,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel): if "anthropic.claude-3" in model: try: - self._invoke_claude3(model=model, + self._invoke_claude(model=model, credentials=credentials, prompt_messages=[{"role": "user", "content": "ping"}], model_parameters={},