ISSUE=11042: add tts model in siliconflow (#11043)

This commit is contained in:
SiliconFlow, Inc 2024-11-25 11:04:13 +08:00 committed by GitHub
parent aae29e72ae
commit a4fc057a1c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 143 additions and 1 deletions

View File

@ -24,4 +24,3 @@
- meta-llama/Meta-Llama-3.1-8B-Instruct
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- deepseek-ai/DeepSeek-V2-Chat

View File

@ -18,6 +18,7 @@ supported_model_types:
- text-embedding
- rerank
- speech2text
- tts
configurate_methods:
- predefined-model
- customizable-model

View File

@ -0,0 +1,37 @@
model: fishaudio/fish-speech-1.4
model_type: tts
model_properties:
default_voice: 'fishaudio/fish-speech-1.4:alex'
voices:
- mode: "fishaudio/fish-speech-1.4:alex"
name: "Alex男声"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:benjamin"
name: "Benjamin男声"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:charles"
name: "Charles男声"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:david"
name: "David男声"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:anna"
name: "Anna女声"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:bella"
name: "Bella女声"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:claire"
name: "Claire女声"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:diana"
name: "Diana女声"
language: [ "zh-Hans", "en-US" ]
audio_type: 'mp3'
max_workers: 5
# stream: false
pricing:
input: '0.015'
output: '0'
unit: '0.001'
currency: RMB

View File

@ -0,0 +1,105 @@
import concurrent.futures
from typing import Any, Optional
from openai import OpenAI
from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.tts_model import TTSModel
from core.model_runtime.model_providers.openai._common import _CommonOpenAI
class SiliconFlowText2SpeechModel(_CommonOpenAI, TTSModel):
"""
Model class for SiliconFlow Speech to text model.
"""
def _invoke(
self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, user: Optional[str] = None
) -> Any:
"""
_invoke text2speech model
:param model: model name
:param tenant_id: user tenant id
:param credentials: model credentials
:param content_text: text content to be translated
:param voice: model timbre
:param user: unique user id
:return: text translated to audio file
"""
if not voice or voice not in [
d["value"] for d in self.get_tts_model_voices(model=model, credentials=credentials)
]:
voice = self._get_model_default_voice(model, credentials)
# if streaming:
return self._tts_invoke_streaming(model=model, credentials=credentials, content_text=content_text, voice=voice)
def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
"""
validate credentials text2speech model
:param model: model name
:param credentials: model credentials
:param user: unique user id
:return: text translated to audio file
"""
try:
self._tts_invoke_streaming(
model=model,
credentials=credentials,
content_text="Hello SiliconFlow!",
voice=self._get_model_default_voice(model, credentials),
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> Any:
"""
_tts_invoke_streaming text2speech model
:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param voice: model timbre
:return: text translated to audio file
"""
try:
# doc: https://docs.siliconflow.cn/capabilities/text-to-speech
self._add_custom_parameters(credentials)
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
model_support_voice = [
x.get("value") for x in self.get_tts_model_voices(model=model, credentials=credentials)
]
if not voice or voice not in model_support_voice:
voice = self._get_model_default_voice(model, credentials)
if len(content_text) > 4096:
sentences = self._split_text_into_sentences(content_text, max_length=4096)
executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
futures = [
executor.submit(
client.audio.speech.with_streaming_response.create,
model=model,
response_format="mp3",
input=sentences[i],
voice=voice,
)
for i in range(len(sentences))
]
for future in futures:
yield from future.result().__enter__().iter_bytes(1024) # noqa:PLC2801
else:
response = client.audio.speech.with_streaming_response.create(
model=model, voice=voice, response_format="mp3", input=content_text.strip()
)
yield from response.__enter__().iter_bytes(1024) # noqa:PLC2801
except Exception as ex:
raise InvokeBadRequestError(str(ex))
@classmethod
def _add_custom_parameters(cls, credentials: dict) -> None:
credentials["openai_api_base"] = "https://api.siliconflow.cn"
credentials["openai_api_key"] = credentials["api_key"]