Support new Claude-3.5 Sonnet max token limit (#6335)

This commit is contained in:
Richards Tu 2024-07-18 07:47:06 +08:00 committed by GitHub
parent f55876bcc5
commit 4782fb50c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 3 deletions

View File

@ -27,9 +27,9 @@ parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
default: 4096
default: 8192
min: 1
max: 4096
max: 8192
- name: response_format
use_template: response_format
pricing:

View File

@ -113,6 +113,11 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
if system:
extra_model_kwargs['system'] = system
# Add the new header for claude-3-5-sonnet-20240620 model
headers = {}
if model == "claude-3-5-sonnet-20240620":
headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15"
if tools:
extra_model_kwargs['tools'] = [
self._transform_tool_prompt(tool) for tool in tools
@ -121,6 +126,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
model=model,
messages=prompt_message_dicts,
stream=stream,
headers=headers,
**model_parameters,
**extra_model_kwargs
)
@ -130,6 +136,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
model=model,
messages=prompt_message_dicts,
stream=stream,
headers=headers,
**model_parameters,
**extra_model_kwargs
)
@ -138,7 +145,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages)
return self._handle_chat_generate_response(model, credentials, response, prompt_messages)
def _code_block_mode_wrapper(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None,