mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-13 23:35:53 +08:00
fix error with xinference tool calling with qwen2-instruct and add timeout retry setttings for xinference (#11012)
Co-authored-by: crazywoola <427733928@qq.com>
This commit is contained in:
parent
ae3a2cb272
commit
03ba4bc760
@ -63,6 +63,9 @@ from core.model_runtime.model_providers.xinference.xinference_helper import (
|
||||
)
|
||||
from core.model_runtime.utils import helper
|
||||
|
||||
DEFAULT_MAX_RETRIES = 3
|
||||
DEFAULT_INVOKE_TIMEOUT = 60
|
||||
|
||||
|
||||
class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
||||
def _invoke(
|
||||
@ -315,7 +318,12 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
||||
message_dict = {"role": "system", "content": message.content}
|
||||
elif isinstance(message, ToolPromptMessage):
|
||||
message = cast(ToolPromptMessage, message)
|
||||
message_dict = {"tool_call_id": message.tool_call_id, "role": "tool", "content": message.content}
|
||||
message_dict = {
|
||||
"tool_call_id": message.tool_call_id,
|
||||
"role": "tool",
|
||||
"content": message.content,
|
||||
"name": message.name,
|
||||
}
|
||||
else:
|
||||
raise ValueError(f"Unknown message type {type(message)}")
|
||||
|
||||
@ -466,8 +474,8 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
||||
client = OpenAI(
|
||||
base_url=f'{credentials["server_url"]}/v1',
|
||||
api_key=api_key,
|
||||
max_retries=3,
|
||||
timeout=60,
|
||||
max_retries=int(credentials.get("max_retries") or DEFAULT_MAX_RETRIES),
|
||||
timeout=int(credentials.get("invoke_timeout") or DEFAULT_INVOKE_TIMEOUT),
|
||||
)
|
||||
|
||||
xinference_client = Client(
|
||||
|
@ -56,3 +56,23 @@ model_credential_schema:
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的API密钥
|
||||
en_US: Enter the api key
|
||||
- variable: invoke_timeout
|
||||
label:
|
||||
zh_Hans: 调用超时时间 (单位:秒)
|
||||
en_US: invoke timeout (unit:second)
|
||||
type: text-input
|
||||
required: true
|
||||
default: '60'
|
||||
placeholder:
|
||||
zh_Hans: 在此输入调用超时时间
|
||||
en_US: Enter invoke timeout value
|
||||
- variable: max_retries
|
||||
label:
|
||||
zh_Hans: 调用重试次数
|
||||
en_US: max retries
|
||||
type: text-input
|
||||
required: true
|
||||
default: '3'
|
||||
placeholder:
|
||||
zh_Hans: 在此输入调用重试次数
|
||||
en_US: Enter max retries
|
||||
|
Loading…
x
Reference in New Issue
Block a user