feat: refactor tongyi models (#3496)

This commit is contained in:
takatost 2024-04-15 22:28:32 +08:00 committed by GitHub
parent fd90d99cd0
commit 5b447d61a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 639 additions and 177 deletions

View File

@ -1,7 +1,5 @@
import os import os
from werkzeug.exceptions import Unauthorized
if not os.environ.get("DEBUG") or os.environ.get("DEBUG").lower() != 'true': if not os.environ.get("DEBUG") or os.environ.get("DEBUG").lower() != 'true':
from gevent import monkey from gevent import monkey
@ -11,10 +9,6 @@ if not os.environ.get("DEBUG") or os.environ.get("DEBUG").lower() != 'true':
grpc.experimental.gevent.init_gevent() grpc.experimental.gevent.init_gevent()
import langchain
langchain.verbose = True
import json import json
import logging import logging
import threading import threading
@ -24,6 +18,7 @@ import warnings
from flask import Flask, Response, request from flask import Flask, Response, request
from flask_cors import CORS from flask_cors import CORS
from werkzeug.exceptions import Unauthorized
from commands import register_commands from commands import register_commands
from config import CloudEditionConfig, Config from config import CloudEditionConfig, Config
from extensions import ( from extensions import (

View File

@ -1,4 +1,5 @@
import logging import logging
import os
import threading import threading
import uuid import uuid
from collections.abc import Generator from collections.abc import Generator
@ -189,6 +190,8 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
logger.exception("Validation Error when generating") logger.exception("Validation Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except (ValueError, InvokeError) as e: except (ValueError, InvokeError) as e:
if os.environ.get("DEBUG") and os.environ.get("DEBUG").lower() == 'true':
logger.exception("Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except Exception as e: except Exception as e:
logger.exception("Unknown Error when generating") logger.exception("Unknown Error when generating")

View File

@ -1,4 +1,5 @@
import logging import logging
import os
import threading import threading
import uuid import uuid
from collections.abc import Generator from collections.abc import Generator
@ -198,6 +199,8 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
logger.exception("Validation Error when generating") logger.exception("Validation Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except (ValueError, InvokeError) as e: except (ValueError, InvokeError) as e:
if os.environ.get("DEBUG") and os.environ.get("DEBUG").lower() == 'true':
logger.exception("Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except Exception as e: except Exception as e:
logger.exception("Unknown Error when generating") logger.exception("Unknown Error when generating")

View File

@ -1,4 +1,5 @@
import logging import logging
import os
import threading import threading
import uuid import uuid
from collections.abc import Generator from collections.abc import Generator
@ -195,6 +196,8 @@ class ChatAppGenerator(MessageBasedAppGenerator):
logger.exception("Validation Error when generating") logger.exception("Validation Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except (ValueError, InvokeError) as e: except (ValueError, InvokeError) as e:
if os.environ.get("DEBUG") and os.environ.get("DEBUG").lower() == 'true':
logger.exception("Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except Exception as e: except Exception as e:
logger.exception("Unknown Error when generating") logger.exception("Unknown Error when generating")

View File

@ -1,4 +1,5 @@
import logging import logging
import os
import threading import threading
import uuid import uuid
from collections.abc import Generator from collections.abc import Generator
@ -184,6 +185,8 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
logger.exception("Validation Error when generating") logger.exception("Validation Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except (ValueError, InvokeError) as e: except (ValueError, InvokeError) as e:
if os.environ.get("DEBUG") and os.environ.get("DEBUG").lower() == 'true':
logger.exception("Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except Exception as e: except Exception as e:
logger.exception("Unknown Error when generating") logger.exception("Unknown Error when generating")

View File

@ -1,4 +1,5 @@
import logging import logging
import os
import threading import threading
import uuid import uuid
from collections.abc import Generator from collections.abc import Generator
@ -137,6 +138,8 @@ class WorkflowAppGenerator(BaseAppGenerator):
logger.exception("Validation Error when generating") logger.exception("Validation Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except (ValueError, InvokeError) as e: except (ValueError, InvokeError) as e:
if os.environ.get("DEBUG") and os.environ.get("DEBUG").lower() == 'true':
logger.exception("Error when generating")
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER) queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
except Exception as e: except Exception as e:
logger.exception("Unknown Error when generating") logger.exception("Unknown Error when generating")

View File

@ -602,7 +602,7 @@ class CohereLargeLanguageModel(LargeLanguageModel):
parameter_definitions = {} parameter_definitions = {}
for p_key, p_val in properties.items(): for p_key, p_val in properties.items():
required = False required = False
if property in required_properties: if p_key in required_properties:
required = True required = True
desc = p_val['description'] desc = p_val['description']

View File

@ -90,9 +90,9 @@ model_credential_schema:
options: options:
- value: 'true' - value: 'true'
label: label:
en_US: Yes en_US: 'Yes'
zh_Hans: zh_Hans:
- value: 'false' - value: 'false'
label: label:
en_US: No en_US: 'No'
zh_Hans: zh_Hans:

View File

@ -1,82 +0,0 @@
from typing import Any, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms import Tongyi
from langchain.llms.tongyi import generate_with_retry, stream_generate_with_retry
from langchain.schema import Generation, LLMResult
class EnhanceTongyi(Tongyi):
@property
def _default_params(self) -> dict[str, Any]:
"""Get the default parameters for calling OpenAI API."""
normal_params = {
"top_p": self.top_p,
"api_key": self.dashscope_api_key
}
return {**normal_params, **self.model_kwargs}
def _generate(
self,
prompts: list[str],
stop: Optional[list[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> LLMResult:
generations = []
params: dict[str, Any] = {
**{"model": self.model_name},
**self._default_params,
**kwargs,
}
if self.streaming:
if len(prompts) > 1:
raise ValueError("Cannot stream results with multiple prompts.")
params["stream"] = True
text = ''
for stream_resp in stream_generate_with_retry(
self, prompt=prompts[0], **params
):
if not generations:
current_text = stream_resp["output"]["text"]
else:
current_text = stream_resp["output"]["text"][len(text):]
text = stream_resp["output"]["text"]
generations.append(
[
Generation(
text=current_text,
generation_info=dict(
finish_reason=stream_resp["output"]["finish_reason"],
),
)
]
)
if run_manager:
run_manager.on_llm_new_token(
current_text,
verbose=self.verbose,
logprobs=None,
)
else:
for prompt in prompts:
completion = generate_with_retry(
self,
prompt=prompt,
**params,
)
generations.append(
[
Generation(
text=completion["output"]["text"],
generation_info=dict(
finish_reason=completion["output"]["finish_reason"],
),
)
]
)
return LLMResult(generations=generations)

View File

@ -1,8 +1,13 @@
import base64
import os
import tempfile
import uuid
from collections.abc import Generator from collections.abc import Generator
from typing import Optional, Union from http import HTTPStatus
from typing import Optional, Union, cast
from dashscope import get_tokenizer from dashscope import Generation, MultiModalConversation, get_tokenizer
from dashscope.api_entities.dashscope_response import DashScopeAPIResponse from dashscope.api_entities.dashscope_response import GenerationResponse
from dashscope.common.error import ( from dashscope.common.error import (
AuthenticationError, AuthenticationError,
InvalidParameter, InvalidParameter,
@ -11,17 +16,21 @@ from dashscope.common.error import (
UnsupportedHTTPMethod, UnsupportedHTTPMethod,
UnsupportedModel, UnsupportedModel,
) )
from langchain.llms.tongyi import generate_with_retry, stream_generate_with_retry
from core.model_runtime.callbacks.base_callback import Callback from core.model_runtime.callbacks.base_callback import Callback
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import ( from core.model_runtime.entities.message_entities import (
AssistantPromptMessage, AssistantPromptMessage,
ImagePromptMessageContent,
PromptMessage, PromptMessage,
PromptMessageContentType,
PromptMessageTool, PromptMessageTool,
SystemPromptMessage, SystemPromptMessage,
TextPromptMessageContent,
ToolPromptMessage,
UserPromptMessage, UserPromptMessage,
) )
from core.model_runtime.entities.model_entities import ModelFeature
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
InvokeAuthorizationError, InvokeAuthorizationError,
InvokeBadRequestError, InvokeBadRequestError,
@ -33,10 +42,9 @@ from core.model_runtime.errors.invoke import (
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from ._client import EnhanceTongyi
class TongyiLargeLanguageModel(LargeLanguageModel): class TongyiLargeLanguageModel(LargeLanguageModel):
tokenizers = {}
def _invoke(self, model: str, credentials: dict, def _invoke(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
@ -57,13 +65,13 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:return: full response or stream response chunk generator result :return: full response or stream response chunk generator result
""" """
# invoke model # invoke model
return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user) return self._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
def _code_block_mode_wrapper(self, model: str, credentials: dict, def _code_block_mode_wrapper(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
stream: bool = True, user: str | None = None, callbacks: list[Callback] = None) \ stream: bool = True, user: str | None = None, callbacks: list[Callback] = None) \
-> LLMResult | Generator: -> LLMResult | Generator:
""" """
Wrapper for code block mode Wrapper for code block mode
""" """
@ -88,7 +96,7 @@ if you are not sure about the structure.
stream=stream, stream=stream,
user=user user=user
) )
model_parameters.pop("response_format") model_parameters.pop("response_format")
stop = stop or [] stop = stop or []
stop.extend(["\n```", "```\n"]) stop.extend(["\n```", "```\n"])
@ -99,13 +107,13 @@ if you are not sure about the structure.
# override the system message # override the system message
prompt_messages[0] = SystemPromptMessage( prompt_messages[0] = SystemPromptMessage(
content=block_prompts content=block_prompts
.replace("{{instructions}}", prompt_messages[0].content) .replace("{{instructions}}", prompt_messages[0].content)
) )
else: else:
# insert the system message # insert the system message
prompt_messages.insert(0, SystemPromptMessage( prompt_messages.insert(0, SystemPromptMessage(
content=block_prompts content=block_prompts
.replace("{{instructions}}", f"Please output a valid {code_block} object.") .replace("{{instructions}}", f"Please output a valid {code_block} object.")
)) ))
mode = self.get_model_mode(model, credentials) mode = self.get_model_mode(model, credentials)
@ -138,7 +146,7 @@ if you are not sure about the structure.
prompt_messages=prompt_messages, prompt_messages=prompt_messages,
input_generator=response input_generator=response
) )
return response return response
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
@ -152,7 +160,14 @@ if you are not sure about the structure.
:param tools: tools for tool calling :param tools: tools for tool calling
:return: :return:
""" """
tokenizer = get_tokenizer(model) if model in ['qwen-turbo-chat', 'qwen-plus-chat']:
model = model.replace('-chat', '')
if model in self.tokenizers:
tokenizer = self.tokenizers[model]
else:
tokenizer = get_tokenizer(model)
self.tokenizers[model] = tokenizer
# convert string to token ids # convert string to token ids
tokens = tokenizer.encode(self._convert_messages_to_prompt(prompt_messages)) tokens = tokenizer.encode(self._convert_messages_to_prompt(prompt_messages))
@ -184,6 +199,7 @@ if you are not sure about the structure.
def _generate(self, model: str, credentials: dict, def _generate(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None, stream: bool = True, stop: Optional[list[str]] = None, stream: bool = True,
user: Optional[str] = None) -> Union[LLMResult, Generator]: user: Optional[str] = None) -> Union[LLMResult, Generator]:
""" """
@ -192,24 +208,27 @@ if you are not sure about the structure.
:param model: model name :param model: model name
:param credentials: credentials :param credentials: credentials
:param prompt_messages: prompt messages :param prompt_messages: prompt messages
:param tools: tools for tool calling
:param model_parameters: model parameters :param model_parameters: model parameters
:param stop: stop words :param stop: stop words
:param stream: is stream response :param stream: is stream response
:param user: unique user id :param user: unique user id
:return: full response or stream response chunk generator result :return: full response or stream response chunk generator result
""" """
extra_model_kwargs = {}
if stop:
extra_model_kwargs['stop'] = stop
# transform credentials to kwargs for model instance # transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials) credentials_kwargs = self._to_credential_kwargs(credentials)
client = EnhanceTongyi( mode = self.get_model_mode(model, credentials)
model_name=model,
streaming=stream, if model in ['qwen-turbo-chat', 'qwen-plus-chat']:
dashscope_api_key=credentials_kwargs['api_key'], model = model.replace('-chat', '')
)
extra_model_kwargs = {}
if tools:
extra_model_kwargs['tools'] = self._convert_tools(tools)
if stop:
extra_model_kwargs['stop'] = stop
params = { params = {
'model': model, 'model': model,
@ -218,30 +237,27 @@ if you are not sure about the structure.
**extra_model_kwargs, **extra_model_kwargs,
} }
mode = self.get_model_mode(model, credentials) model_schema = self.get_model_schema(model, credentials)
if ModelFeature.VISION in (model_schema.features or []):
params['messages'] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages, rich_content=True)
if mode == LLMMode.CHAT: response = MultiModalConversation.call(**params, stream=stream)
params['messages'] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
else: else:
params['prompt'] = self._convert_messages_to_prompt(prompt_messages) if mode == LLMMode.CHAT:
params['messages'] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
else:
params['prompt'] = prompt_messages[0].content.rstrip()
response = Generation.call(**params,
result_format='message',
stream=stream)
if stream: if stream:
responses = stream_generate_with_retry( return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
client,
stream=True,
incremental_output=True,
**params
)
return self._handle_generate_stream_response(model, credentials, responses, prompt_messages)
response = generate_with_retry(
client,
**params,
)
return self._handle_generate_response(model, credentials, response, prompt_messages) return self._handle_generate_response(model, credentials, response, prompt_messages)
def _handle_generate_response(self, model: str, credentials: dict, response: DashScopeAPIResponse, def _handle_generate_response(self, model: str, credentials: dict, response: GenerationResponse,
prompt_messages: list[PromptMessage]) -> LLMResult: prompt_messages: list[PromptMessage]) -> LLMResult:
""" """
Handle llm response Handle llm response
@ -254,7 +270,7 @@ if you are not sure about the structure.
""" """
# transform assistant message to prompt message # transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage( assistant_prompt_message = AssistantPromptMessage(
content=response.output.text content=response.output.choices[0].message.content,
) )
# transform usage # transform usage
@ -270,32 +286,65 @@ if you are not sure about the structure.
return result return result
def _handle_generate_stream_response(self, model: str, credentials: dict, responses: Generator, def _handle_generate_stream_response(self, model: str, credentials: dict,
responses: Generator[GenerationResponse, None, None],
prompt_messages: list[PromptMessage]) -> Generator: prompt_messages: list[PromptMessage]) -> Generator:
""" """
Handle llm stream response Handle llm stream response
:param model: model name :param model: model name
:param credentials: credentials :param credentials: credentials
:param response: response :param responses: response
:param prompt_messages: prompt messages :param prompt_messages: prompt messages
:return: llm response chunk generator result :return: llm response chunk generator result
""" """
full_text = ''
tool_calls = []
for index, response in enumerate(responses): for index, response in enumerate(responses):
resp_finish_reason = response.output.finish_reason if response.status_code != 200 and response.status_code != HTTPStatus.OK:
resp_content = response.output.text raise ServiceUnavailableError(
usage = response.usage f"Failed to invoke model {model}, status code: {response.status_code}, "
f"message: {response.message}"
)
if resp_finish_reason is None and (resp_content is None or resp_content == ''): resp_finish_reason = response.output.choices[0].finish_reason
continue
# transform assistant message to prompt message if resp_finish_reason is not None and resp_finish_reason != 'null':
assistant_prompt_message = AssistantPromptMessage( resp_content = response.output.choices[0].message.content
content=resp_content if resp_content else '',
) assistant_prompt_message = AssistantPromptMessage(
content='',
)
if 'tool_calls' in response.output.choices[0].message:
tool_calls = response.output.choices[0].message['tool_calls']
elif resp_content:
# special for qwen-vl
if isinstance(resp_content, list):
resp_content = resp_content[0]['text']
# transform assistant message to prompt message
assistant_prompt_message.content = resp_content.replace(full_text, '', 1)
full_text = resp_content
if tool_calls:
message_tool_calls = []
for tool_call_obj in tool_calls:
message_tool_call = AssistantPromptMessage.ToolCall(
id=tool_call_obj['function']['name'],
type='function',
function=AssistantPromptMessage.ToolCall.ToolCallFunction(
name=tool_call_obj['function']['name'],
arguments=tool_call_obj['function']['arguments']
)
)
message_tool_calls.append(message_tool_call)
assistant_prompt_message.tool_calls = message_tool_calls
if resp_finish_reason is not None:
# transform usage # transform usage
usage = response.usage
usage = self._calc_response_usage(model, credentials, usage.input_tokens, usage.output_tokens) usage = self._calc_response_usage(model, credentials, usage.input_tokens, usage.output_tokens)
yield LLMResultChunk( yield LLMResultChunk(
@ -309,6 +358,23 @@ if you are not sure about the structure.
) )
) )
else: else:
resp_content = response.output.choices[0].message.content
if not resp_content:
if 'tool_calls' in response.output.choices[0].message:
tool_calls = response.output.choices[0].message['tool_calls']
continue
# special for qwen-vl
if isinstance(resp_content, list):
resp_content = resp_content[0]['text']
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(
content=resp_content.replace(full_text, '', 1),
)
full_text = resp_content
yield LLMResultChunk( yield LLMResultChunk(
model=model, model=model,
prompt_messages=prompt_messages, prompt_messages=prompt_messages,
@ -343,11 +409,20 @@ if you are not sure about the structure.
content = message.content content = message.content
if isinstance(message, UserPromptMessage): if isinstance(message, UserPromptMessage):
message_text = f"{human_prompt} {content}" if isinstance(content, str):
message_text = f"{human_prompt} {content}"
else:
message_text = ""
for sub_message in content:
if sub_message.type == PromptMessageContentType.TEXT:
message_text = f"{human_prompt} {sub_message.data}"
break
elif isinstance(message, AssistantPromptMessage): elif isinstance(message, AssistantPromptMessage):
message_text = f"{ai_prompt} {content}" message_text = f"{ai_prompt} {content}"
elif isinstance(message, SystemPromptMessage): elif isinstance(message, SystemPromptMessage):
message_text = content message_text = content
elif isinstance(message, ToolPromptMessage):
message_text = content
else: else:
raise ValueError(f"Got unknown type {message}") raise ValueError(f"Got unknown type {message}")
@ -370,7 +445,8 @@ if you are not sure about the structure.
# trim off the trailing ' ' that might come from the "Assistant: " # trim off the trailing ' ' that might come from the "Assistant: "
return text.rstrip() return text.rstrip()
def _convert_prompt_messages_to_tongyi_messages(self, prompt_messages: list[PromptMessage]) -> list[dict]: def _convert_prompt_messages_to_tongyi_messages(self, prompt_messages: list[PromptMessage],
rich_content: bool = False) -> list[dict]:
""" """
Convert prompt messages to tongyi messages Convert prompt messages to tongyi messages
@ -382,23 +458,118 @@ if you are not sure about the structure.
if isinstance(prompt_message, SystemPromptMessage): if isinstance(prompt_message, SystemPromptMessage):
tongyi_messages.append({ tongyi_messages.append({
'role': 'system', 'role': 'system',
'content': prompt_message.content, 'content': prompt_message.content if not rich_content else [{"text": prompt_message.content}],
}) })
elif isinstance(prompt_message, UserPromptMessage): elif isinstance(prompt_message, UserPromptMessage):
tongyi_messages.append({ if isinstance(prompt_message.content, str):
'role': 'user', tongyi_messages.append({
'content': prompt_message.content, 'role': 'user',
}) 'content': prompt_message.content if not rich_content else [{"text": prompt_message.content}],
})
else:
sub_messages = []
for message_content in prompt_message.content:
if message_content.type == PromptMessageContentType.TEXT:
message_content = cast(TextPromptMessageContent, message_content)
sub_message_dict = {
"text": message_content.data
}
sub_messages.append(sub_message_dict)
elif message_content.type == PromptMessageContentType.IMAGE:
message_content = cast(ImagePromptMessageContent, message_content)
image_url = message_content.data
if message_content.data.startswith("data:"):
# convert image base64 data to file in /tmp
image_url = self._save_base64_image_to_file(message_content.data)
sub_message_dict = {
"image": image_url
}
sub_messages.append(sub_message_dict)
# resort sub_messages to ensure text is always at last
sub_messages = sorted(sub_messages, key=lambda x: 'text' in x)
tongyi_messages.append({
'role': 'user',
'content': sub_messages
})
elif isinstance(prompt_message, AssistantPromptMessage): elif isinstance(prompt_message, AssistantPromptMessage):
content = prompt_message.content
if not content:
content = ' '
tongyi_messages.append({ tongyi_messages.append({
'role': 'assistant', 'role': 'assistant',
'content': prompt_message.content, 'content': content if not rich_content else [{"text": content}],
})
elif isinstance(prompt_message, ToolPromptMessage):
tongyi_messages.append({
"role": "tool",
"content": prompt_message.content,
"name": prompt_message.tool_call_id
}) })
else: else:
raise ValueError(f"Got unknown type {prompt_message}") raise ValueError(f"Got unknown type {prompt_message}")
return tongyi_messages return tongyi_messages
def _save_base64_image_to_file(self, base64_image: str) -> str:
"""
Save base64 image to file
'data:{upload_file.mime_type};base64,{encoded_string}'
:param base64_image: base64 image data
:return: image file path
"""
# get mime type and encoded string
mime_type, encoded_string = base64_image.split(',')[0].split(';')[0].split(':')[1], base64_image.split(',')[1]
# save image to file
temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, f"{uuid.uuid4()}.{mime_type.split('/')[1]}")
with open(file_path, "wb") as image_file:
image_file.write(base64.b64decode(encoded_string))
return f"file://{file_path}"
def _convert_tools(self, tools: list[PromptMessageTool]) -> list[dict]:
"""
Convert tools
"""
tool_definitions = []
for tool in tools:
properties = tool.parameters['properties']
required_properties = tool.parameters['required']
properties_definitions = {}
for p_key, p_val in properties.items():
desc = p_val['description']
if 'enum' in p_val:
desc += (f"; Only accepts one of the following predefined options: "
f"[{', '.join(p_val['enum'])}]")
properties_definitions[p_key] = {
'description': desc,
'type': p_val['type'],
}
tool_definition = {
"type": "function",
"function": {
"name": tool.name,
"description": tool.description,
"parameters": properties_definitions,
"required": required_properties
}
}
tool_definitions.append(tool_definition)
return tool_definitions
@property @property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
""" """

View File

@ -0,0 +1,81 @@
model: qwen-max-0403
label:
en_US: qwen-max-0403
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
type: float
default: 0.3
min: 0.0
max: 2.0
help:
zh_Hans: 用于控制随机性和多样性的程度。具体来说temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值使得更多的低概率词被选择生成结果更加多样化而较低的temperature值则会增强概率分布的峰值使得高概率词更容易被选择生成结果更加确定。
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: max_tokens
use_template: max_tokens
type: int
default: 2000
min: 1
max: 2000
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量它定义了生成的上限但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
- name: top_p
use_template: top_p
type: float
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
- name: top_k
type: int
min: 0
max: 99
label:
zh_Hans: 取样数量
en_US: Top k
help:
zh_Hans: 生成时采样候选集的大小。例如取值为50时仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大生成的随机性越高取值越小生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
- name: seed
required: false
type: int
default: 1234
label:
zh_Hans: 随机种子
en_US: Random seed
help:
zh_Hans: 生成时使用的随机数种子用户控制模型生成内容的随机性。支持无符号64位整数默认值为 1234。在使用seed时模型将尽可能生成相同或相似的结果但目前不保证每次生成的结果完全相同。
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: repetition_penalty
required: false
type: float
default: 1.1
label:
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- name: enable_search
type: boolean
default: false
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
input: '0.12'
output: '0.12'
unit: '0.001'
currency: RMB

View File

@ -2,6 +2,10 @@ model: qwen-max-1201
label: label:
en_US: qwen-max-1201 en_US: qwen-max-1201
model_type: llm model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties: model_properties:
mode: chat mode: chat
context_size: 8192 context_size: 8192
@ -9,7 +13,7 @@ parameter_rules:
- name: temperature - name: temperature
use_template: temperature use_template: temperature
type: float type: float
default: 0.85 default: 0.3
min: 0.0 min: 0.0
max: 2.0 max: 2.0
help: help:

View File

@ -2,6 +2,10 @@ model: qwen-max-longcontext
label: label:
en_US: qwen-max-longcontext en_US: qwen-max-longcontext
model_type: llm model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties: model_properties:
mode: chat mode: chat
context_size: 32768 context_size: 32768
@ -9,7 +13,7 @@ parameter_rules:
- name: temperature - name: temperature
use_template: temperature use_template: temperature
type: float type: float
default: 0.85 default: 0.3
min: 0.0 min: 0.0
max: 2.0 max: 2.0
help: help:

View File

@ -2,6 +2,10 @@ model: qwen-max
label: label:
en_US: qwen-max en_US: qwen-max
model_type: llm model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties: model_properties:
mode: chat mode: chat
context_size: 8192 context_size: 8192
@ -9,7 +13,7 @@ parameter_rules:
- name: temperature - name: temperature
use_template: temperature use_template: temperature
type: float type: float
default: 0.85 default: 0.3
min: 0.0 min: 0.0
max: 2.0 max: 2.0
help: help:

View File

@ -0,0 +1,81 @@
model: qwen-plus-chat
label:
en_US: qwen-plus-chat
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
type: float
default: 0.3
min: 0.0
max: 2.0
help:
zh_Hans: 用于控制随机性和多样性的程度。具体来说temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值使得更多的低概率词被选择生成结果更加多样化而较低的temperature值则会增强概率分布的峰值使得高概率词更容易被选择生成结果更加确定。
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: max_tokens
use_template: max_tokens
type: int
default: 1500
min: 1
max: 1500
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量它定义了生成的上限但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
- name: top_p
use_template: top_p
type: float
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
- name: top_k
type: int
min: 0
max: 99
label:
zh_Hans: 取样数量
en_US: Top k
help:
zh_Hans: 生成时采样候选集的大小。例如取值为50时仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大生成的随机性越高取值越小生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
- name: seed
required: false
type: int
default: 1234
label:
zh_Hans: 随机种子
en_US: Random seed
help:
zh_Hans: 生成时使用的随机数种子用户控制模型生成内容的随机性。支持无符号64位整数默认值为 1234。在使用seed时模型将尽可能生成相同或相似的结果但目前不保证每次生成的结果完全相同。
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: repetition_penalty
required: false
type: float
default: 1.1
label:
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- name: enable_search
type: boolean
default: false
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
input: '0.02'
output: '0.02'
unit: '0.001'
currency: RMB

View File

@ -2,6 +2,8 @@ model: qwen-plus
label: label:
en_US: qwen-plus en_US: qwen-plus
model_type: llm model_type: llm
features:
- agent-thought
model_properties: model_properties:
mode: completion mode: completion
context_size: 32768 context_size: 32768
@ -9,7 +11,7 @@ parameter_rules:
- name: temperature - name: temperature
use_template: temperature use_template: temperature
type: float type: float
default: 0.85 default: 0.3
min: 0.0 min: 0.0
max: 2.0 max: 2.0
help: help:

View File

@ -0,0 +1,81 @@
model: qwen-turbo-chat
label:
en_US: qwen-turbo-chat
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
type: float
default: 0.3
min: 0.0
max: 2.0
help:
zh_Hans: 用于控制随机性和多样性的程度。具体来说temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值使得更多的低概率词被选择生成结果更加多样化而较低的temperature值则会增强概率分布的峰值使得高概率词更容易被选择生成结果更加确定。
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: max_tokens
use_template: max_tokens
type: int
default: 1500
min: 1
max: 1500
help:
zh_Hans: 用于指定模型在生成内容时token的最大数量它定义了生成的上限但不保证每次都会生成到这个数量。
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
- name: top_p
use_template: top_p
type: float
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
- name: top_k
type: int
min: 0
max: 99
label:
zh_Hans: 取样数量
en_US: Top k
help:
zh_Hans: 生成时采样候选集的大小。例如取值为50时仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大生成的随机性越高取值越小生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
- name: seed
required: false
type: int
default: 1234
label:
zh_Hans: 随机种子
en_US: Random seed
help:
zh_Hans: 生成时使用的随机数种子用户控制模型生成内容的随机性。支持无符号64位整数默认值为 1234。在使用seed时模型将尽可能生成相同或相似的结果但目前不保证每次生成的结果完全相同。
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: repetition_penalty
required: false
type: float
default: 1.1
label:
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- name: enable_search
type: boolean
default: false
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
input: '0.008'
output: '0.008'
unit: '0.001'
currency: RMB

View File

@ -2,6 +2,8 @@ model: qwen-turbo
label: label:
en_US: qwen-turbo en_US: qwen-turbo
model_type: llm model_type: llm
features:
- agent-thought
model_properties: model_properties:
mode: completion mode: completion
context_size: 8192 context_size: 8192
@ -9,7 +11,7 @@ parameter_rules:
- name: temperature - name: temperature
use_template: temperature use_template: temperature
type: float type: float
default: 0.85 default: 0.3
min: 0.0 min: 0.0
max: 2.0 max: 2.0
help: help:

View File

@ -0,0 +1,47 @@
model: qwen-vl-max
label:
en_US: qwen-vl-max
model_type: llm
features:
- vision
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: top_p
use_template: top_p
type: float
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
- name: top_k
type: int
min: 0
max: 99
label:
zh_Hans: 取样数量
en_US: Top k
help:
zh_Hans: 生成时采样候选集的大小。例如取值为50时仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大生成的随机性越高取值越小生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
- name: seed
required: false
type: int
default: 1234
label:
zh_Hans: 随机种子
en_US: Random seed
help:
zh_Hans: 生成时使用的随机数种子用户控制模型生成内容的随机性。支持无符号64位整数默认值为 1234。在使用seed时模型将尽可能生成相同或相似的结果但目前不保证每次生成的结果完全相同。
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
pricing:
input: '0.02'
output: '0.02'
unit: '0.001'
currency: RMB

View File

@ -0,0 +1,47 @@
model: qwen-vl-plus
label:
en_US: qwen-vl-plus
model_type: llm
features:
- vision
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: top_p
use_template: top_p
type: float
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
- name: top_k
type: int
min: 0
max: 99
label:
zh_Hans: 取样数量
en_US: Top k
help:
zh_Hans: 生成时采样候选集的大小。例如取值为50时仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大生成的随机性越高取值越小生成的确定性越高。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
- name: seed
required: false
type: int
default: 1234
label:
zh_Hans: 随机种子
en_US: Random seed
help:
zh_Hans: 生成时使用的随机数种子用户控制模型生成内容的随机性。支持无符号64位整数默认值为 1234。在使用seed时模型将尽可能生成相同或相似的结果但目前不保证每次生成的结果完全相同。
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
pricing:
input: '0.008'
output: '0.008'
unit: '0.001'
currency: RMB

View File

@ -37,8 +37,11 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
:return: embeddings result :return: embeddings result
""" """
credentials_kwargs = self._to_credential_kwargs(credentials) credentials_kwargs = self._to_credential_kwargs(credentials)
dashscope.api_key = credentials_kwargs["dashscope_api_key"] embeddings, embedding_used_tokens = self.embed_documents(
embeddings, embedding_used_tokens = self.embed_documents(model, texts) credentials_kwargs=credentials_kwargs,
model=model,
texts=texts
)
return TextEmbeddingResult( return TextEmbeddingResult(
embeddings=embeddings, embeddings=embeddings,
@ -74,17 +77,19 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
try: try:
# transform credentials to kwargs for model instance # transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials) credentials_kwargs = self._to_credential_kwargs(credentials)
dashscope.api_key = credentials_kwargs["dashscope_api_key"]
# call embedding model # call embedding model
self.embed_documents(model=model, texts=["ping"]) self.embed_documents(credentials_kwargs=credentials_kwargs, model=model, texts=["ping"])
except Exception as ex: except Exception as ex:
raise CredentialsValidateFailedError(str(ex)) raise CredentialsValidateFailedError(str(ex))
@staticmethod @staticmethod
def embed_documents(model: str, texts: list[str]) -> tuple[list[list[float]], int]: def embed_documents(credentials_kwargs: dict, model: str, texts: list[str]) -> tuple[list[list[float]], int]:
"""Call out to Tongyi's embedding endpoint. """Call out to Tongyi's embedding endpoint.
Args: Args:
credentials_kwargs: The credentials to use for the call.
model: The model to use for embedding.
texts: The list of texts to embed. texts: The list of texts to embed.
Returns: Returns:
@ -93,7 +98,12 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
embeddings = [] embeddings = []
embedding_used_tokens = 0 embedding_used_tokens = 0
for text in texts: for text in texts:
response = dashscope.TextEmbedding.call(model=model, input=text, text_type="document") response = dashscope.TextEmbedding.call(
api_key=credentials_kwargs["dashscope_api_key"],
model=model,
input=text,
text_type="document"
)
data = response.output["embeddings"][0] data = response.output["embeddings"][0]
embeddings.append(data["embedding"]) embeddings.append(data["embedding"])
embedding_used_tokens += response.usage["total_tokens"] embedding_used_tokens += response.usage["total_tokens"]

View File

@ -118,7 +118,6 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
:param content_text: text content to be translated :param content_text: text content to be translated
:return: text translated to audio file :return: text translated to audio file
""" """
dashscope.api_key = credentials.get('dashscope_api_key')
word_limit = self._get_model_word_limit(model, credentials) word_limit = self._get_model_word_limit(model, credentials)
audio_type = self._get_model_audio_type(model, credentials) audio_type = self._get_model_audio_type(model, credentials)
tts_file_id = self._get_file_name(content_text) tts_file_id = self._get_file_name(content_text)
@ -127,6 +126,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit)) sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
for sentence in sentences: for sentence in sentences:
response = dashscope.audio.tts.SpeechSynthesizer.call(model=voice, sample_rate=48000, response = dashscope.audio.tts.SpeechSynthesizer.call(model=voice, sample_rate=48000,
api_key=credentials.get('dashscope_api_key'),
text=sentence.strip(), text=sentence.strip(),
format=audio_type, word_timestamp_enabled=True, format=audio_type, word_timestamp_enabled=True,
phoneme_timestamp_enabled=True) phoneme_timestamp_enabled=True)
@ -146,8 +146,8 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
:param audio_type: audio file type :param audio_type: audio file type
:return: text translated to audio file :return: text translated to audio file
""" """
dashscope.api_key = credentials.get('dashscope_api_key')
response = dashscope.audio.tts.SpeechSynthesizer.call(model=voice, sample_rate=48000, response = dashscope.audio.tts.SpeechSynthesizer.call(model=voice, sample_rate=48000,
api_key=credentials.get('dashscope_api_key'),
text=sentence.strip(), text=sentence.strip(),
format=audio_type) format=audio_type)
if isinstance(response.get_audio_data(), bytes): if isinstance(response.get_audio_data(), bytes):

View File

@ -43,7 +43,7 @@ model_credential_schema:
placeholder: placeholder:
zh_Hans: 在此输入您的上下文大小 zh_Hans: 在此输入您的上下文大小
en_US: Enter the context size en_US: Enter the context size
default: 2048 default: '2048'
- variable: completion_type - variable: completion_type
label: label:
zh_Hans: 补全类型 zh_Hans: 补全类型
@ -69,16 +69,16 @@ model_credential_schema:
en_US: Stream output en_US: Stream output
type: select type: select
required: true required: true
default: true default: 'true'
placeholder: placeholder:
zh_Hans: 是否支持流式输出 zh_Hans: 是否支持流式输出
en_US: Whether to support stream output en_US: Whether to support stream output
options: options:
- label: - label:
zh_Hans: zh_Hans:
en_US: Yes en_US: 'Yes'
value: true value: 'true'
- label: - label:
zh_Hans: zh_Hans:
en_US: No en_US: 'No'
value: false value: 'false'

View File

@ -9,7 +9,6 @@ flask-restful~=0.3.10
flask-cors~=4.0.0 flask-cors~=4.0.0
gunicorn~=21.2.0 gunicorn~=21.2.0
gevent~=23.9.1 gevent~=23.9.1
langchain==0.0.250
openai~=1.13.3 openai~=1.13.3
tiktoken~=0.6.0 tiktoken~=0.6.0
psycopg2-binary~=2.9.6 psycopg2-binary~=2.9.6
@ -47,7 +46,7 @@ google-search-results==2.4.2
googleapis-common-protos==1.63.0 googleapis-common-protos==1.63.0
replicate~=0.22.0 replicate~=0.22.0
websocket-client~=1.7.0 websocket-client~=1.7.0
dashscope[tokenizer]~=1.14.0 dashscope[tokenizer]~=1.17.0
huggingface_hub~=0.16.4 huggingface_hub~=0.16.4
transformers~=4.35.0 transformers~=4.35.0
tokenizers~=0.15.0 tokenizers~=0.15.0
@ -79,4 +78,5 @@ azure-storage-blob==12.9.0
azure-identity==1.15.0 azure-identity==1.15.0
lxml==5.1.0 lxml==5.1.0
xlrd~=2.0.1 xlrd~=2.0.1
pydantic~=1.10.0
pgvecto-rs==0.1.4 pgvecto-rs==0.1.4