diff --git a/api/controllers/console/app/audio.py b/api/controllers/console/app/audio.py index d2c1891b65..77eaf136fc 100644 --- a/api/controllers/console/app/audio.py +++ b/api/controllers/console/app/audio.py @@ -88,7 +88,7 @@ class ChatMessageTextApi(Resource): response = AudioService.transcript_tts( tenant_id=app_model.tenant_id, text=request.form['text'], - voice=app_model.app_model_config.text_to_speech_dict.get('voice'), + voice=request.form['voice'] if request.form['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'), streaming=False ) diff --git a/api/controllers/console/explore/audio.py b/api/controllers/console/explore/audio.py index f957d38174..dc546ce0dd 100644 --- a/api/controllers/console/explore/audio.py +++ b/api/controllers/console/explore/audio.py @@ -85,7 +85,7 @@ class ChatTextApi(InstalledAppResource): response = AudioService.transcript_tts( tenant_id=app_model.tenant_id, text=request.form['text'], - voice=app_model.app_model_config.text_to_speech_dict.get('voice'), + voice=request.form['voice'] if request.form['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'), streaming=False ) return {'data': response.data.decode('latin1')} diff --git a/api/controllers/service_api/app/audio.py b/api/controllers/service_api/app/audio.py index 58ab56a292..60ca2171d5 100644 --- a/api/controllers/service_api/app/audio.py +++ b/api/controllers/service_api/app/audio.py @@ -87,7 +87,7 @@ class TextApi(Resource): tenant_id=app_model.tenant_id, text=args['text'], end_user=end_user, - voice=app_model.app_model_config.text_to_speech_dict.get('voice'), + voice=args['voice'] if args['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'), streaming=args['streaming'] ) diff --git a/api/controllers/web/audio.py b/api/controllers/web/audio.py index c628c16606..4e677ae288 100644 --- a/api/controllers/web/audio.py +++ b/api/controllers/web/audio.py @@ -84,7 +84,7 @@ class TextApi(WebApiResource): tenant_id=app_model.tenant_id, text=request.form['text'], end_user=end_user.external_user_id, - voice=app_model.app_model_config.text_to_speech_dict.get('voice'), + voice=request.form['voice'] if request.form['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'), streaming=False ) diff --git a/api/core/model_runtime/model_providers/openai/tts/tts.py b/api/core/model_runtime/model_providers/openai/tts/tts.py index b1718c063c..f5e2ec4b7c 100644 --- a/api/core/model_runtime/model_providers/openai/tts/tts.py +++ b/api/core/model_runtime/model_providers/openai/tts/tts.py @@ -34,7 +34,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel): :return: text translated to audio file """ audio_type = self._get_model_audio_type(model, credentials) - if not voice: + if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]: voice = self._get_model_default_voice(model, credentials) if streaming: return Response(stream_with_context(self._tts_invoke_streaming(model=model, diff --git a/api/core/model_runtime/model_providers/tongyi/tts/tts.py b/api/core/model_runtime/model_providers/tongyi/tts/tts.py index 6bd17684fe..937f469bdf 100644 --- a/api/core/model_runtime/model_providers/tongyi/tts/tts.py +++ b/api/core/model_runtime/model_providers/tongyi/tts/tts.py @@ -34,7 +34,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel): :return: text translated to audio file """ audio_type = self._get_model_audio_type(model, credentials) - if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials): + if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]: voice = self._get_model_default_voice(model, credentials) if streaming: return Response(stream_with_context(self._tts_invoke_streaming(model=model, diff --git a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx index 6bd40547ca..02cac061b4 100644 --- a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx +++ b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx @@ -40,6 +40,7 @@ const TextToSpeech: FC = () => { { languageInfo?.example && ( )} diff --git a/web/app/components/base/audio-btn/index.tsx b/web/app/components/base/audio-btn/index.tsx index c10755d3a1..6c9f3e253b 100644 --- a/web/app/components/base/audio-btn/index.tsx +++ b/web/app/components/base/audio-btn/index.tsx @@ -9,12 +9,14 @@ import { textToAudio } from '@/service/share' type AudioBtnProps = { value: string + voice?: string className?: string isAudition?: boolean } const AudioBtn = ({ value, + voice, className, isAudition, }: AudioBtnProps) => { @@ -27,13 +29,16 @@ const AudioBtn = ({ const pathname = usePathname() const removeCodeBlocks = (inputText: any) => { const codeBlockRegex = /```[\s\S]*?```/g - return inputText.replace(codeBlockRegex, '') + if (inputText) + return inputText.replace(codeBlockRegex, '') + return '' } const playAudio = async () => { const formData = new FormData() if (value !== '') { formData.append('text', removeCodeBlocks(value)) + formData.append('voice', removeCodeBlocks(voice)) let url = '' let isPublic = false diff --git a/web/app/components/base/chat/chat/answer/operation.tsx b/web/app/components/base/chat/chat/answer/operation.tsx index eb5dead657..8a791d82da 100644 --- a/web/app/components/base/chat/chat/answer/operation.tsx +++ b/web/app/components/base/chat/chat/answer/operation.tsx @@ -77,6 +77,7 @@ const Operation: FC = ({ {(!isOpeningStatement && config?.text_to_speech?.enabled) && ( )}