fix error with xinference tool calling with qwen2-instruct and add timeout retry setttings for xinference (#11012)

Co-authored-by: crazywoola <427733928@qq.com>
This commit is contained in:
cyflhn 2024-11-24 15:29:30 +08:00 committed by GitHub
parent ae3a2cb272
commit 03ba4bc760
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 31 additions and 3 deletions

View File

@ -63,6 +63,9 @@ from core.model_runtime.model_providers.xinference.xinference_helper import (
) )
from core.model_runtime.utils import helper from core.model_runtime.utils import helper
DEFAULT_MAX_RETRIES = 3
DEFAULT_INVOKE_TIMEOUT = 60
class XinferenceAILargeLanguageModel(LargeLanguageModel): class XinferenceAILargeLanguageModel(LargeLanguageModel):
def _invoke( def _invoke(
@ -315,7 +318,12 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
message_dict = {"role": "system", "content": message.content} message_dict = {"role": "system", "content": message.content}
elif isinstance(message, ToolPromptMessage): elif isinstance(message, ToolPromptMessage):
message = cast(ToolPromptMessage, message) message = cast(ToolPromptMessage, message)
message_dict = {"tool_call_id": message.tool_call_id, "role": "tool", "content": message.content} message_dict = {
"tool_call_id": message.tool_call_id,
"role": "tool",
"content": message.content,
"name": message.name,
}
else: else:
raise ValueError(f"Unknown message type {type(message)}") raise ValueError(f"Unknown message type {type(message)}")
@ -466,8 +474,8 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
client = OpenAI( client = OpenAI(
base_url=f'{credentials["server_url"]}/v1', base_url=f'{credentials["server_url"]}/v1',
api_key=api_key, api_key=api_key,
max_retries=3, max_retries=int(credentials.get("max_retries") or DEFAULT_MAX_RETRIES),
timeout=60, timeout=int(credentials.get("invoke_timeout") or DEFAULT_INVOKE_TIMEOUT),
) )
xinference_client = Client( xinference_client = Client(

View File

@ -56,3 +56,23 @@ model_credential_schema:
placeholder: placeholder:
zh_Hans: 在此输入您的API密钥 zh_Hans: 在此输入您的API密钥
en_US: Enter the api key en_US: Enter the api key
- variable: invoke_timeout
label:
zh_Hans: 调用超时时间 (单位:秒)
en_US: invoke timeout (unit:second)
type: text-input
required: true
default: '60'
placeholder:
zh_Hans: 在此输入调用超时时间
en_US: Enter invoke timeout value
- variable: max_retries
label:
zh_Hans: 调用重试次数
en_US: max retries
type: text-input
required: true
default: '3'
placeholder:
zh_Hans: 在此输入调用重试次数
en_US: Enter max retries