mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-05-21 20:18:57 +08:00
let claude models in bedrock support the response_format parameter (#8220)
Co-authored-by: duyalei <>
This commit is contained in:
parent
fe8191b899
commit
f515af2232
@ -52,6 +52,8 @@ parameter_rules:
|
|||||||
help:
|
help:
|
||||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||||
|
- name: response_format
|
||||||
|
use_template: response_format
|
||||||
pricing:
|
pricing:
|
||||||
input: '0.00025'
|
input: '0.00025'
|
||||||
output: '0.00125'
|
output: '0.00125'
|
||||||
|
@ -52,6 +52,8 @@ parameter_rules:
|
|||||||
help:
|
help:
|
||||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||||
|
- name: response_format
|
||||||
|
use_template: response_format
|
||||||
pricing:
|
pricing:
|
||||||
input: '0.015'
|
input: '0.015'
|
||||||
output: '0.075'
|
output: '0.075'
|
||||||
|
@ -51,6 +51,8 @@ parameter_rules:
|
|||||||
help:
|
help:
|
||||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||||
|
- name: response_format
|
||||||
|
use_template: response_format
|
||||||
pricing:
|
pricing:
|
||||||
input: '0.003'
|
input: '0.003'
|
||||||
output: '0.015'
|
output: '0.015'
|
||||||
|
@ -51,6 +51,8 @@ parameter_rules:
|
|||||||
help:
|
help:
|
||||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||||
|
- name: response_format
|
||||||
|
use_template: response_format
|
||||||
pricing:
|
pricing:
|
||||||
input: '0.003'
|
input: '0.003'
|
||||||
output: '0.015'
|
output: '0.015'
|
||||||
|
@ -45,6 +45,8 @@ parameter_rules:
|
|||||||
help:
|
help:
|
||||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||||
|
- name: response_format
|
||||||
|
use_template: response_format
|
||||||
pricing:
|
pricing:
|
||||||
input: '0.008'
|
input: '0.008'
|
||||||
output: '0.024'
|
output: '0.024'
|
||||||
|
@ -45,6 +45,8 @@ parameter_rules:
|
|||||||
help:
|
help:
|
||||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||||
|
- name: response_format
|
||||||
|
use_template: response_format
|
||||||
pricing:
|
pricing:
|
||||||
input: '0.008'
|
input: '0.008'
|
||||||
output: '0.024'
|
output: '0.024'
|
||||||
|
@ -20,6 +20,7 @@ from botocore.exceptions import (
|
|||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
|
|
||||||
# local import
|
# local import
|
||||||
|
from core.model_runtime.callbacks.base_callback import Callback
|
||||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||||
from core.model_runtime.entities.message_entities import (
|
from core.model_runtime.entities.message_entities import (
|
||||||
AssistantPromptMessage,
|
AssistantPromptMessage,
|
||||||
@ -44,6 +45,14 @@ from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
|||||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
ANTHROPIC_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
|
||||||
|
The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
|
||||||
|
if you are not sure about the structure.
|
||||||
|
|
||||||
|
<instructions>
|
||||||
|
{{instructions}}
|
||||||
|
</instructions>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class BedrockLargeLanguageModel(LargeLanguageModel):
|
class BedrockLargeLanguageModel(LargeLanguageModel):
|
||||||
@ -70,6 +79,40 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
|
|||||||
logger.info(f"current model id: {model_id} did not support by Converse API")
|
logger.info(f"current model id: {model_id} did not support by Converse API")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _code_block_mode_wrapper(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
credentials: dict,
|
||||||
|
prompt_messages: list[PromptMessage],
|
||||||
|
model_parameters: dict,
|
||||||
|
tools: Optional[list[PromptMessageTool]] = None,
|
||||||
|
stop: Optional[list[str]] = None,
|
||||||
|
stream: bool = True,
|
||||||
|
user: Optional[str] = None,
|
||||||
|
callbacks: list[Callback] = None,
|
||||||
|
) -> Union[LLMResult, Generator]:
|
||||||
|
"""
|
||||||
|
Code block mode wrapper for invoking large language model
|
||||||
|
"""
|
||||||
|
if model_parameters.get("response_format"):
|
||||||
|
stop = stop or []
|
||||||
|
if "```\n" not in stop:
|
||||||
|
stop.append("```\n")
|
||||||
|
if "\n```" not in stop:
|
||||||
|
stop.append("\n```")
|
||||||
|
response_format = model_parameters.pop("response_format")
|
||||||
|
format_prompt = SystemPromptMessage(
|
||||||
|
content=ANTHROPIC_BLOCK_MODE_PROMPT.replace("{{instructions}}", prompt_messages[0].content).replace(
|
||||||
|
"{{block}}", response_format
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
|
||||||
|
prompt_messages[0] = format_prompt
|
||||||
|
else:
|
||||||
|
prompt_messages.insert(0, format_prompt)
|
||||||
|
prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
|
||||||
|
return self._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
|
||||||
|
|
||||||
def _invoke(
|
def _invoke(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user