From 03ba4bc7608de396679a8e1c3f02e470f1197c4f Mon Sep 17 00:00:00 2001 From: cyflhn Date: Sun, 24 Nov 2024 15:29:30 +0800 Subject: [PATCH] fix error with xinference tool calling with qwen2-instruct and add timeout retry setttings for xinference (#11012) Co-authored-by: crazywoola <427733928@qq.com> --- .../model_providers/xinference/llm/llm.py | 14 ++++++++++--- .../xinference/xinference.yaml | 20 +++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py index b82f0430c5..8d86d6937d 100644 --- a/api/core/model_runtime/model_providers/xinference/llm/llm.py +++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py @@ -63,6 +63,9 @@ from core.model_runtime.model_providers.xinference.xinference_helper import ( ) from core.model_runtime.utils import helper +DEFAULT_MAX_RETRIES = 3 +DEFAULT_INVOKE_TIMEOUT = 60 + class XinferenceAILargeLanguageModel(LargeLanguageModel): def _invoke( @@ -315,7 +318,12 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel): message_dict = {"role": "system", "content": message.content} elif isinstance(message, ToolPromptMessage): message = cast(ToolPromptMessage, message) - message_dict = {"tool_call_id": message.tool_call_id, "role": "tool", "content": message.content} + message_dict = { + "tool_call_id": message.tool_call_id, + "role": "tool", + "content": message.content, + "name": message.name, + } else: raise ValueError(f"Unknown message type {type(message)}") @@ -466,8 +474,8 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel): client = OpenAI( base_url=f'{credentials["server_url"]}/v1', api_key=api_key, - max_retries=3, - timeout=60, + max_retries=int(credentials.get("max_retries") or DEFAULT_MAX_RETRIES), + timeout=int(credentials.get("invoke_timeout") or DEFAULT_INVOKE_TIMEOUT), ) xinference_client = Client( diff --git a/api/core/model_runtime/model_providers/xinference/xinference.yaml b/api/core/model_runtime/model_providers/xinference/xinference.yaml index be9073c1ca..3500136693 100644 --- a/api/core/model_runtime/model_providers/xinference/xinference.yaml +++ b/api/core/model_runtime/model_providers/xinference/xinference.yaml @@ -56,3 +56,23 @@ model_credential_schema: placeholder: zh_Hans: 在此输入您的API密钥 en_US: Enter the api key + - variable: invoke_timeout + label: + zh_Hans: 调用超时时间 (单位:秒) + en_US: invoke timeout (unit:second) + type: text-input + required: true + default: '60' + placeholder: + zh_Hans: 在此输入调用超时时间 + en_US: Enter invoke timeout value + - variable: max_retries + label: + zh_Hans: 调用重试次数 + en_US: max retries + type: text-input + required: true + default: '3' + placeholder: + zh_Hans: 在此输入调用重试次数 + en_US: Enter max retries