mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 23:15:59 +08:00
fix error with xinference tool calling with qwen2-instruct and add timeout retry setttings for xinference (#11012)
Co-authored-by: crazywoola <427733928@qq.com>
This commit is contained in:
parent
ae3a2cb272
commit
03ba4bc760
@ -63,6 +63,9 @@ from core.model_runtime.model_providers.xinference.xinference_helper import (
|
|||||||
)
|
)
|
||||||
from core.model_runtime.utils import helper
|
from core.model_runtime.utils import helper
|
||||||
|
|
||||||
|
DEFAULT_MAX_RETRIES = 3
|
||||||
|
DEFAULT_INVOKE_TIMEOUT = 60
|
||||||
|
|
||||||
|
|
||||||
class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
||||||
def _invoke(
|
def _invoke(
|
||||||
@ -315,7 +318,12 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
|||||||
message_dict = {"role": "system", "content": message.content}
|
message_dict = {"role": "system", "content": message.content}
|
||||||
elif isinstance(message, ToolPromptMessage):
|
elif isinstance(message, ToolPromptMessage):
|
||||||
message = cast(ToolPromptMessage, message)
|
message = cast(ToolPromptMessage, message)
|
||||||
message_dict = {"tool_call_id": message.tool_call_id, "role": "tool", "content": message.content}
|
message_dict = {
|
||||||
|
"tool_call_id": message.tool_call_id,
|
||||||
|
"role": "tool",
|
||||||
|
"content": message.content,
|
||||||
|
"name": message.name,
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown message type {type(message)}")
|
raise ValueError(f"Unknown message type {type(message)}")
|
||||||
|
|
||||||
@ -466,8 +474,8 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
|||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
base_url=f'{credentials["server_url"]}/v1',
|
base_url=f'{credentials["server_url"]}/v1',
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
max_retries=3,
|
max_retries=int(credentials.get("max_retries") or DEFAULT_MAX_RETRIES),
|
||||||
timeout=60,
|
timeout=int(credentials.get("invoke_timeout") or DEFAULT_INVOKE_TIMEOUT),
|
||||||
)
|
)
|
||||||
|
|
||||||
xinference_client = Client(
|
xinference_client = Client(
|
||||||
|
@ -56,3 +56,23 @@ model_credential_schema:
|
|||||||
placeholder:
|
placeholder:
|
||||||
zh_Hans: 在此输入您的API密钥
|
zh_Hans: 在此输入您的API密钥
|
||||||
en_US: Enter the api key
|
en_US: Enter the api key
|
||||||
|
- variable: invoke_timeout
|
||||||
|
label:
|
||||||
|
zh_Hans: 调用超时时间 (单位:秒)
|
||||||
|
en_US: invoke timeout (unit:second)
|
||||||
|
type: text-input
|
||||||
|
required: true
|
||||||
|
default: '60'
|
||||||
|
placeholder:
|
||||||
|
zh_Hans: 在此输入调用超时时间
|
||||||
|
en_US: Enter invoke timeout value
|
||||||
|
- variable: max_retries
|
||||||
|
label:
|
||||||
|
zh_Hans: 调用重试次数
|
||||||
|
en_US: max retries
|
||||||
|
type: text-input
|
||||||
|
required: true
|
||||||
|
default: '3'
|
||||||
|
placeholder:
|
||||||
|
zh_Hans: 在此输入调用重试次数
|
||||||
|
en_US: Enter max retries
|
||||||
|
Loading…
x
Reference in New Issue
Block a user