diff --git a/api/Dockerfile b/api/Dockerfile
index d060055941..ae4f7e82bb 100644
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -30,7 +30,7 @@ ENV TZ UTC
 WORKDIR /app/api
 
 RUN apt-get update \
-    && apt-get install -y --no-install-recommends bash curl wget vim nodejs \
+    && apt-get install -y --no-install-recommends bash curl wget vim nodejs ffmpeg \
     && apt-get autoremove \
     && rm -rf /var/lib/apt/lists/*
 
diff --git a/api/app.py b/api/app.py
index e46cb84bb8..caf4e8f459 100644
--- a/api/app.py
+++ b/api/app.py
@@ -124,6 +124,7 @@ def load_user_from_request(request_from_flask_login):
     else:
         return None
 
+
 @login_manager.unauthorized_handler
 def unauthorized_handler():
     """Handle unauthorized requests."""
diff --git a/api/controllers/console/app/audio.py b/api/controllers/console/app/audio.py
index ed8b36c00c..7eef2abc32 100644
--- a/api/controllers/console/app/audio.py
+++ b/api/controllers/console/app/audio.py
@@ -32,9 +32,10 @@ class ChatMessageAudioApi(Resource):
         file = request.files['file']
 
         try:
-            response = AudioService.transcript(
+            response = AudioService.transcript_asr(
                 tenant_id=app_model.tenant_id,
                 file=file,
+                promot=app_model.app_model_config.pre_prompt
             )
 
             return response
@@ -62,6 +63,48 @@ class ChatMessageAudioApi(Resource):
         except Exception as e:
             logging.exception("internal server error.")
             raise InternalServerError()
-        
 
-api.add_resource(ChatMessageAudioApi, '/apps/<uuid:app_id>/audio-to-text')
\ No newline at end of file
+
+class ChatMessageTextApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self, app_id):
+        app_id = str(app_id)
+        app_model = _get_app(app_id, None)
+        try:
+            response = AudioService.transcript_tts(
+                tenant_id=app_model.tenant_id,
+                text=request.form['text'],
+                streaming=False
+            )
+
+            return {'data': response.data.decode('latin1')}
+        except services.errors.app_model_config.AppModelConfigBrokenError:
+            logging.exception("App model config broken.")
+            raise AppUnavailableError()
+        except NoAudioUploadedServiceError:
+            raise NoAudioUploadedError()
+        except AudioTooLargeServiceError as e:
+            raise AudioTooLargeError(str(e))
+        except UnsupportedAudioTypeServiceError:
+            raise UnsupportedAudioTypeError()
+        except ProviderNotSupportSpeechToTextServiceError:
+            raise ProviderNotSupportSpeechToTextError()
+        except ProviderTokenNotInitError as ex:
+            raise ProviderNotInitializeError(ex.description)
+        except QuotaExceededError:
+            raise ProviderQuotaExceededError()
+        except ModelCurrentlyNotSupportError:
+            raise ProviderModelCurrentlyNotSupportError()
+        except InvokeError as e:
+            raise CompletionRequestError(e.description)
+        except ValueError as e:
+            raise e
+        except Exception as e:
+            logging.exception("internal server error.")
+            raise InternalServerError()
+
+
+api.add_resource(ChatMessageAudioApi, '/apps/<uuid:app_id>/audio-to-text')
+api.add_resource(ChatMessageTextApi, '/apps/<uuid:app_id>/text-to-audio')
diff --git a/api/controllers/console/explore/audio.py b/api/controllers/console/explore/audio.py
index 00ae66e663..651cdf16b5 100644
--- a/api/controllers/console/explore/audio.py
+++ b/api/controllers/console/explore/audio.py
@@ -29,7 +29,7 @@ class ChatAudioApi(InstalledAppResource):
         file = request.files['file']
 
         try:
-            response = AudioService.transcript(
+            response = AudioService.transcript_asr(
                 tenant_id=app_model.tenant_id,
                 file=file,
             )
@@ -59,6 +59,48 @@ class ChatAudioApi(InstalledAppResource):
         except Exception as e:
             logging.exception("internal server error.")
             raise InternalServerError()
-        
 
-api.add_resource(ChatAudioApi, '/installed-apps/<uuid:installed_app_id>/audio-to-text', endpoint='installed_app_audio')
\ No newline at end of file
+
+class ChatTextApi(InstalledAppResource):
+    def post(self, installed_app):
+        app_model = installed_app.app
+        app_model_config: AppModelConfig = app_model.app_model_config
+
+        if not app_model_config.text_to_speech_dict['enabled']:
+            raise AppUnavailableError()
+
+        try:
+            response = AudioService.transcript_tts(
+                tenant_id=app_model.tenant_id,
+                text=request.form['text'],
+                streaming=False
+            )
+            return {'data': response.data.decode('latin1')}
+        except services.errors.app_model_config.AppModelConfigBrokenError:
+            logging.exception("App model config broken.")
+            raise AppUnavailableError()
+        except NoAudioUploadedServiceError:
+            raise NoAudioUploadedError()
+        except AudioTooLargeServiceError as e:
+            raise AudioTooLargeError(str(e))
+        except UnsupportedAudioTypeServiceError:
+            raise UnsupportedAudioTypeError()
+        except ProviderNotSupportSpeechToTextServiceError:
+            raise ProviderNotSupportSpeechToTextError()
+        except ProviderTokenNotInitError as ex:
+            raise ProviderNotInitializeError(ex.description)
+        except QuotaExceededError:
+            raise ProviderQuotaExceededError()
+        except ModelCurrentlyNotSupportError:
+            raise ProviderModelCurrentlyNotSupportError()
+        except InvokeError as e:
+            raise CompletionRequestError(e.description)
+        except ValueError as e:
+            raise e
+        except Exception as e:
+            logging.exception("internal server error.")
+            raise InternalServerError()
+
+
+api.add_resource(ChatAudioApi, '/installed-apps/<uuid:installed_app_id>/audio-to-text', endpoint='installed_app_audio')
+api.add_resource(ChatTextApi, '/installed-apps/<uuid:installed_app_id>/text-to-audio', endpoint='installed_app_text')
diff --git a/api/controllers/console/explore/parameter.py b/api/controllers/console/explore/parameter.py
index 0a76f9d58a..6bdfa34095 100644
--- a/api/controllers/console/explore/parameter.py
+++ b/api/controllers/console/explore/parameter.py
@@ -31,6 +31,7 @@ class AppParameterApi(InstalledAppResource):
         'suggested_questions': fields.Raw,
         'suggested_questions_after_answer': fields.Raw,
         'speech_to_text': fields.Raw,
+        'text_to_speech': fields.Raw,
         'retriever_resource': fields.Raw,
         'annotation_reply': fields.Raw,
         'more_like_this': fields.Raw,
@@ -51,6 +52,7 @@ class AppParameterApi(InstalledAppResource):
             'suggested_questions': app_model_config.suggested_questions_list,
             'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
             'speech_to_text': app_model_config.speech_to_text_dict,
+            'text_to_speech': app_model_config.text_to_speech_dict,
             'retriever_resource': app_model_config.retriever_resource_dict,
             'annotation_reply': app_model_config.annotation_reply_dict,
             'more_like_this': app_model_config.more_like_this_dict,
diff --git a/api/controllers/service_api/app/app.py b/api/controllers/service_api/app/app.py
index 0be38d3083..aa4323a146 100644
--- a/api/controllers/service_api/app/app.py
+++ b/api/controllers/service_api/app/app.py
@@ -33,6 +33,7 @@ class AppParameterApi(AppApiResource):
         'suggested_questions': fields.Raw,
         'suggested_questions_after_answer': fields.Raw,
         'speech_to_text': fields.Raw,
+        'text_to_speech': fields.Raw,
         'retriever_resource': fields.Raw,
         'annotation_reply': fields.Raw,
         'more_like_this': fields.Raw,
@@ -52,6 +53,7 @@ class AppParameterApi(AppApiResource):
             'suggested_questions': app_model_config.suggested_questions_list,
             'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
             'speech_to_text': app_model_config.speech_to_text_dict,
+            'text_to_speech': app_model_config.text_to_speech_dict,
             'retriever_resource': app_model_config.retriever_resource_dict,
             'annotation_reply': app_model_config.annotation_reply_dict,
             'more_like_this': app_model_config.more_like_this_dict,
diff --git a/api/controllers/service_api/app/audio.py b/api/controllers/service_api/app/audio.py
index 17e9abdb55..3e642b69d3 100644
--- a/api/controllers/service_api/app/audio.py
+++ b/api/controllers/service_api/app/audio.py
@@ -10,6 +10,7 @@ from controllers.service_api.wraps import AppApiResource
 from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
 from core.model_runtime.errors.invoke import InvokeError
 from flask import request
+from flask_restful import reqparse
 from models.model import App, AppModelConfig
 from services.audio_service import AudioService
 from services.errors.audio import (AudioTooLargeServiceError, NoAudioUploadedServiceError,
@@ -22,14 +23,15 @@ class AudioApi(AppApiResource):
         app_model_config: AppModelConfig = app_model.app_model_config
 
         if not app_model_config.speech_to_text_dict['enabled']:
-            raise AppUnavailableError() 
+            raise AppUnavailableError()
 
         file = request.files['file']
 
         try:
-            response = AudioService.transcript(
+            response = AudioService.transcript_asr(
                 tenant_id=app_model.tenant_id,
                 file=file,
+                end_user=end_user
             )
 
             return response
@@ -57,5 +59,49 @@ class AudioApi(AppApiResource):
         except Exception as e:
             logging.exception("internal server error.")
             raise InternalServerError()
-        
-api.add_resource(AudioApi, '/audio-to-text')
\ No newline at end of file
+
+
+class TextApi(AppApiResource):
+    def post(self, app_model: App, end_user):
+        parser = reqparse.RequestParser()
+        parser.add_argument('text', type=str, required=True, nullable=False, location='json')
+        parser.add_argument('user', type=str, required=True, nullable=False, location='json')
+        args = parser.parse_args()
+
+        try:
+            response = AudioService.transcript_tts(
+                tenant_id=app_model.tenant_id,
+                text=args['text'],
+                end_user=args['user'],
+                streaming=False
+            )
+
+            return response
+        except services.errors.app_model_config.AppModelConfigBrokenError:
+            logging.exception("App model config broken.")
+            raise AppUnavailableError()
+        except NoAudioUploadedServiceError:
+            raise NoAudioUploadedError()
+        except AudioTooLargeServiceError as e:
+            raise AudioTooLargeError(str(e))
+        except UnsupportedAudioTypeServiceError:
+            raise UnsupportedAudioTypeError()
+        except ProviderNotSupportSpeechToTextServiceError:
+            raise ProviderNotSupportSpeechToTextError()
+        except ProviderTokenNotInitError as ex:
+            raise ProviderNotInitializeError(ex.description)
+        except QuotaExceededError:
+            raise ProviderQuotaExceededError()
+        except ModelCurrentlyNotSupportError:
+            raise ProviderModelCurrentlyNotSupportError()
+        except InvokeError as e:
+            raise CompletionRequestError(e.description)
+        except ValueError as e:
+            raise e
+        except Exception as e:
+            logging.exception("internal server error.")
+            raise InternalServerError()
+
+
+api.add_resource(AudioApi, '/audio-to-text')
+api.add_resource(TextApi, '/text-to-audio')
diff --git a/api/controllers/web/app.py b/api/controllers/web/app.py
index a51259f2e4..913f7fbee1 100644
--- a/api/controllers/web/app.py
+++ b/api/controllers/web/app.py
@@ -32,6 +32,7 @@ class AppParameterApi(WebApiResource):
         'suggested_questions': fields.Raw,
         'suggested_questions_after_answer': fields.Raw,
         'speech_to_text': fields.Raw,
+        'text_to_speech': fields.Raw,
         'retriever_resource': fields.Raw,
         'annotation_reply': fields.Raw,
         'more_like_this': fields.Raw,
@@ -51,6 +52,7 @@ class AppParameterApi(WebApiResource):
             'suggested_questions': app_model_config.suggested_questions_list,
             'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
             'speech_to_text': app_model_config.speech_to_text_dict,
+            'text_to_speech': app_model_config.text_to_speech_dict,
             'retriever_resource': app_model_config.retriever_resource_dict,
             'annotation_reply': app_model_config.annotation_reply_dict,
             'more_like_this': app_model_config.more_like_this_dict,
diff --git a/api/controllers/web/audio.py b/api/controllers/web/audio.py
index edbe9b71b8..310374a256 100644
--- a/api/controllers/web/audio.py
+++ b/api/controllers/web/audio.py
@@ -28,7 +28,7 @@ class AudioApi(WebApiResource):
         file = request.files['file']
 
         try:
-            response = AudioService.transcript(
+            response = AudioService.transcript_asr(
                 tenant_id=app_model.tenant_id,
                 file=file,
             )
@@ -59,4 +59,43 @@ class AudioApi(WebApiResource):
             logging.exception("internal server error.")
             raise InternalServerError()
 
-api.add_resource(AudioApi, '/audio-to-text')
\ No newline at end of file
+
+class TextApi(WebApiResource):
+    def post(self, app_model: App, end_user):
+        try:
+            response = AudioService.transcript_tts(
+                tenant_id=app_model.tenant_id,
+                text=request.form['text'],
+                end_user=end_user.external_user_id,
+                streaming=False
+            )
+
+            return {'data': response.data.decode('latin1')}
+        except services.errors.app_model_config.AppModelConfigBrokenError:
+            logging.exception("App model config broken.")
+            raise AppUnavailableError()
+        except NoAudioUploadedServiceError:
+            raise NoAudioUploadedError()
+        except AudioTooLargeServiceError as e:
+            raise AudioTooLargeError(str(e))
+        except UnsupportedAudioTypeServiceError:
+            raise UnsupportedAudioTypeError()
+        except ProviderNotSupportSpeechToTextServiceError:
+            raise ProviderNotSupportSpeechToTextError()
+        except ProviderTokenNotInitError as ex:
+            raise ProviderNotInitializeError(ex.description)
+        except QuotaExceededError:
+            raise ProviderQuotaExceededError()
+        except ModelCurrentlyNotSupportError:
+            raise ProviderModelCurrentlyNotSupportError()
+        except InvokeError as e:
+            raise CompletionRequestError(e.description)
+        except ValueError as e:
+            raise e
+        except Exception as e:
+            logging.exception("internal server error.")
+            raise InternalServerError()
+
+
+api.add_resource(AudioApi, '/audio-to-text')
+api.add_resource(TextApi, '/text-to-audio')
diff --git a/api/core/application_manager.py b/api/core/application_manager.py
index 96ed0a41cb..86c0bc0d0d 100644
--- a/api/core/application_manager.py
+++ b/api/core/application_manager.py
@@ -555,6 +555,12 @@ class ApplicationManager:
             if 'enabled' in speech_to_text_dict and speech_to_text_dict['enabled']:
                 properties['speech_to_text'] = True
 
+        # text to speech
+        text_to_speech_dict = copy_app_model_config_dict.get('text_to_speech')
+        if text_to_speech_dict:
+            if 'enabled' in text_to_speech_dict and text_to_speech_dict['enabled']:
+                properties['text_to_speech'] = True
+
         # sensitive word avoidance
         sensitive_word_avoidance_dict = copy_app_model_config_dict.get('sensitive_word_avoidance')
         if sensitive_word_avoidance_dict:
diff --git a/api/core/entities/application_entities.py b/api/core/entities/application_entities.py
index 95a9d90f97..56172188e4 100644
--- a/api/core/entities/application_entities.py
+++ b/api/core/entities/application_entities.py
@@ -219,6 +219,7 @@ class AppOrchestrationConfigEntity(BaseModel):
     show_retrieve_source: bool = False
     more_like_this: bool = False
     speech_to_text: bool = False
+    text_to_speech: bool = False
     sensitive_word_avoidance: Optional[SensitiveWordAvoidanceEntity] = None
 
 
@@ -283,7 +284,6 @@ class ApplicationGenerateEntity(BaseModel):
     query: Optional[str] = None
     files: list[FileObj] = []
     user_id: str
-
     # extras
     stream: bool
     invoke_from: InvokeFrom
diff --git a/api/core/model_manager.py b/api/core/model_manager.py
index e75f624f2e..20316510dd 100644
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@@ -12,6 +12,7 @@ from core.model_runtime.model_providers.__base.large_language_model import Large
 from core.model_runtime.model_providers.__base.moderation_model import ModerationModel
 from core.model_runtime.model_providers.__base.rerank_model import RerankModel
 from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
+from core.model_runtime.model_providers.__base.tts_model import TTSModel
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
 from core.provider_manager import ProviderManager
 
@@ -144,7 +145,7 @@ class ModelInstance:
             user=user
         )
 
-    def invoke_speech2text(self, file: IO[bytes], user: Optional[str] = None, **params) \
+    def invoke_speech2text(self, file: IO[bytes], user: Optional[str] = None) \
             -> str:
         """
         Invoke large language model
@@ -161,8 +162,29 @@ class ModelInstance:
             model=self.model,
             credentials=self.credentials,
             file=file,
+            user=user
+        )
+
+    def invoke_tts(self, content_text: str, streaming: bool, user: Optional[str] = None) \
+            -> str:
+        """
+        Invoke large language model
+
+        :param content_text: text content to be translated
+        :param user: unique user id
+        :param streaming: output is streaming
+        :return: text for given audio file
+        """
+        if not isinstance(self.model_type_instance, TTSModel):
+            raise Exception(f"Model type instance is not TTSModel")
+
+        self.model_type_instance = cast(TTSModel, self.model_type_instance)
+        return self.model_type_instance.invoke(
+            model=self.model,
+            credentials=self.credentials,
+            content_text=content_text,
             user=user,
-            **params
+            streaming=streaming
         )
 
 
diff --git a/api/core/model_runtime/entities/model_entities.py b/api/core/model_runtime/entities/model_entities.py
index 8ad9bd7206..23c492cedb 100644
--- a/api/core/model_runtime/entities/model_entities.py
+++ b/api/core/model_runtime/entities/model_entities.py
@@ -15,7 +15,7 @@ class ModelType(Enum):
     RERANK = "rerank"
     SPEECH2TEXT = "speech2text"
     MODERATION = "moderation"
-    # TTS = "tts"
+    TTS = "tts"
     # TEXT2IMG = "text2img"
 
     @classmethod
@@ -33,6 +33,8 @@ class ModelType(Enum):
             return cls.RERANK
         elif origin_model_type == 'speech2text' or origin_model_type == cls.SPEECH2TEXT.value:
             return cls.SPEECH2TEXT
+        elif origin_model_type == 'tts' or origin_model_type == cls.TTS.value:
+            return cls.TTS
         elif origin_model_type == cls.MODERATION.value:
             return cls.MODERATION
         else:
@@ -52,6 +54,8 @@ class ModelType(Enum):
             return 'reranking'
         elif self == self.SPEECH2TEXT:
             return 'speech2text'
+        elif self == self.TTS:
+            return 'tts'
         elif self == self.MODERATION:
             return 'moderation'
         else:
@@ -120,6 +124,10 @@ class ModelPropertyKey(Enum):
     FILE_UPLOAD_LIMIT = "file_upload_limit"
     SUPPORTED_FILE_EXTENSIONS = "supported_file_extensions"
     MAX_CHARACTERS_PER_CHUNK = "max_characters_per_chunk"
+    DEFAULT_VOICE = "default_voice"
+    WORD_LIMIT = "word_limit"
+    AUDOI_TYPE = "audio_type"
+    MAX_WORKERS = "max_workers"
 
 
 class ProviderModel(BaseModel):
diff --git a/api/core/model_runtime/model_providers/__base/tts_model.py b/api/core/model_runtime/model_providers/__base/tts_model.py
new file mode 100644
index 0000000000..c3f3b65fa4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/__base/tts_model.py
@@ -0,0 +1,42 @@
+from abc import abstractmethod
+from typing import Optional
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+
+
+class TTSModel(AIModel):
+    """
+    Model class for ttstext model.
+    """
+    model_type: ModelType = ModelType.TTS
+
+    def invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None):
+        """
+        Invoke large language model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param content_text: text content to be translated
+        :param streaming: output is streaming
+        :param user: unique user id
+        :return: translated audio file
+        """
+        try:
+            return self._invoke(model=model, credentials=credentials, user=user, streaming=streaming, content_text=content_text)
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+
+    @abstractmethod
+    def _invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None):
+        """
+        Invoke large language model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param content_text: text content to be translated
+        :param streaming: output is streaming
+        :param user: unique user id
+        :return: translated audio file
+        """
+        raise NotImplementedError
diff --git a/api/core/model_runtime/model_providers/openai/openai.yaml b/api/core/model_runtime/model_providers/openai/openai.yaml
index 02587576bf..3af99e107e 100644
--- a/api/core/model_runtime/model_providers/openai/openai.yaml
+++ b/api/core/model_runtime/model_providers/openai/openai.yaml
@@ -20,6 +20,7 @@ supported_model_types:
   - text-embedding
   - speech2text
   - moderation
+  - tts
 configurate_methods:
   - predefined-model
   - customizable-model
diff --git a/api/core/model_runtime/model_providers/openai/tts/__init__.py b/api/core/model_runtime/model_providers/openai/tts/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/openai/tts/tts-1-hd.yaml b/api/core/model_runtime/model_providers/openai/tts/tts-1-hd.yaml
new file mode 100644
index 0000000000..aa7ed537a4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/openai/tts/tts-1-hd.yaml
@@ -0,0 +1,7 @@
+model: tts-1-hd
+model_type: tts
+model_properties:
+  default_voice: 'alloy'
+  word_limit: 120
+  audio_type: 'mp3'
+  max_workers: 5
diff --git a/api/core/model_runtime/model_providers/openai/tts/tts-1.yaml b/api/core/model_runtime/model_providers/openai/tts/tts-1.yaml
new file mode 100644
index 0000000000..96f54a7340
--- /dev/null
+++ b/api/core/model_runtime/model_providers/openai/tts/tts-1.yaml
@@ -0,0 +1,7 @@
+model: tts-1
+model_type: tts
+model_properties:
+  default_voice: 'alloy'
+  word_limit: 120
+  audio_type: 'mp3'
+  max_workers: 5
diff --git a/api/core/model_runtime/model_providers/openai/tts/tts.py b/api/core/model_runtime/model_providers/openai/tts/tts.py
new file mode 100644
index 0000000000..64e748ea28
--- /dev/null
+++ b/api/core/model_runtime/model_providers/openai/tts/tts.py
@@ -0,0 +1,235 @@
+import uuid
+import hashlib
+import subprocess
+from io import BytesIO
+from typing import Optional
+from functools import reduce
+from pydub import AudioSegment
+
+from core.model_runtime.entities.model_entities import ModelPropertyKey
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.errors.invoke import InvokeBadRequestError
+from core.model_runtime.model_providers.__base.tts_model import TTSModel
+from core.model_runtime.model_providers.openai._common import _CommonOpenAI
+
+from typing_extensions import Literal
+from flask import Response, stream_with_context
+from openai import OpenAI
+import concurrent.futures
+
+
+class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
+    """
+    Model class for OpenAI Speech to text model.
+    """
+    def _invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None) -> any:
+        """
+        _invoke text2speech model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param content_text: text content to be translated
+        :param streaming: output is streaming
+        :param user: unique user id
+        :return: text translated to audio file
+        """
+        self._is_ffmpeg_installed()
+        audio_type = self._get_model_audio_type(model, credentials)
+        if streaming:
+            return Response(stream_with_context(self._tts_invoke_streaming(model=model,
+                                                                           credentials=credentials,
+                                                                           content_text=content_text,
+                                                                           user=user)),
+                            status=200, mimetype=f'audio/{audio_type}')
+        else:
+            return self._tts_invoke(model=model, credentials=credentials, content_text=content_text, user=user)
+
+    def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
+        """
+        validate credentials text2speech model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param user: unique user id
+        :return: text translated to audio file
+        """
+        try:
+            self._tts_invoke(
+                model=model,
+                credentials=credentials,
+                content_text='Hello world!',
+                user=user
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    def _tts_invoke(self, model: str, credentials: dict, content_text: str, user: Optional[str] = None) -> any:
+        """
+        _tts_invoke text2speech model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param content_text: text content to be translated
+        :param user: unique user id
+        :return: text translated to audio file
+        """
+        audio_type = self._get_model_audio_type(model, credentials)
+        word_limit = self._get_model_word_limit(model, credentials)
+        max_workers = self._get_model_workers_limit(model, credentials)
+
+        try:
+            sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
+            audio_bytes_list = list()
+
+            # Create a thread pool and map the function to the list of sentences
+            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                futures = [executor.submit(self._process_sentence, sentence, model, credentials) for sentence
+                           in sentences]
+                for future in futures:
+                    try:
+                        audio_bytes_list.append(future.result())
+                    except Exception as ex:
+                        raise InvokeBadRequestError(str(ex))
+
+            audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in
+                              audio_bytes_list if audio_bytes]
+            combined_segment = reduce(lambda x, y: x + y, audio_segments)
+            buffer: BytesIO = BytesIO()
+            combined_segment.export(buffer, format=audio_type)
+            buffer.seek(0)
+            return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}")
+        except Exception as ex:
+            raise InvokeBadRequestError(str(ex))
+
+    # Todo: To improve the streaming function
+    def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, user: Optional[str] = None) -> any:
+        """
+        _tts_invoke_streaming text2speech model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param content_text: text content to be translated
+        :param user: unique user id
+        :return: text translated to audio file
+        """
+        # transform credentials to kwargs for model instance
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        voice_name = self._get_model_voice(model, credentials)
+        word_limit = self._get_model_word_limit(model, credentials)
+        audio_type = self._get_model_audio_type(model, credentials)
+        tts_file_id = self._get_file_name(content_text)
+        file_path = f'storage/generate_files/{audio_type}/{tts_file_id}.{audio_type}'
+        try:
+            client = OpenAI(**credentials_kwargs)
+            sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
+            for sentence in sentences:
+                response = client.audio.speech.create(model=model, voice=voice_name, input=sentence.strip())
+                response.stream_to_file(file_path)
+        except Exception as ex:
+            raise InvokeBadRequestError(str(ex))
+
+    def _get_model_voice(self, model: str, credentials: dict) -> Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]:
+        """
+        Get voice for given tts model
+
+        :param model: model name
+        :param credentials: model credentials
+        :return: voice
+        """
+        model_schema = self.get_model_schema(model, credentials)
+
+        if model_schema and ModelPropertyKey.DEFAULT_VOICE in model_schema.model_properties:
+            return model_schema.model_properties[ModelPropertyKey.DEFAULT_VOICE]
+
+    def _get_model_audio_type(self, model: str, credentials: dict) -> str:
+        """
+        Get audio type for given tts model
+
+        :param model: model name
+        :param credentials: model credentials
+        :return: voice
+        """
+        model_schema = self.get_model_schema(model, credentials)
+
+        if model_schema and ModelPropertyKey.AUDOI_TYPE in model_schema.model_properties:
+            return model_schema.model_properties[ModelPropertyKey.AUDOI_TYPE]
+
+    def _get_model_word_limit(self, model: str, credentials: dict) -> int:
+        """
+        Get audio type for given tts model
+        :return: audio type
+        """
+        model_schema = self.get_model_schema(model, credentials)
+
+        if model_schema and ModelPropertyKey.WORD_LIMIT in model_schema.model_properties:
+            return model_schema.model_properties[ModelPropertyKey.WORD_LIMIT]
+
+    def _get_model_workers_limit(self, model: str, credentials: dict) -> int:
+        """
+        Get audio max workers for given tts model
+        :return: audio type
+        """
+        model_schema = self.get_model_schema(model, credentials)
+
+        if model_schema and ModelPropertyKey.MAX_WORKERS in model_schema.model_properties:
+            return model_schema.model_properties[ModelPropertyKey.MAX_WORKERS]
+
+    @staticmethod
+    def _split_text_into_sentences(text: str, limit: int, delimiters=None):
+        if delimiters is None:
+            delimiters = set('。！？；\n')
+
+        buf = []
+        word_count = 0
+        for char in text:
+            buf.append(char)
+            if char in delimiters:
+                if word_count >= limit:
+                    yield ''.join(buf)
+                    buf = []
+                    word_count = 0
+                else:
+                    word_count += 1
+            else:
+                word_count += 1
+
+        if buf:
+            yield ''.join(buf)
+
+    @staticmethod
+    def _get_file_name(file_content: str) -> str:
+        hash_object = hashlib.sha256(file_content.encode())
+        hex_digest = hash_object.hexdigest()
+
+        namespace_uuid = uuid.UUID('a5da6ef9-b303-596f-8e88-bf8fa40f4b31')
+        unique_uuid = uuid.uuid5(namespace_uuid, hex_digest)
+        return str(unique_uuid)
+
+    def _process_sentence(self, sentence: str, model: str, credentials: dict):
+        """
+        _tts_invoke openai text2speech model api
+
+        :param model: model name
+        :param credentials: model credentials
+        :param sentence: text content to be translated
+        :return: text translated to audio file
+        """
+        # transform credentials to kwargs for model instance
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        voice_name = self._get_model_voice(model, credentials)
+
+        client = OpenAI(**credentials_kwargs)
+        response = client.audio.speech.create(model=model, voice=voice_name, input=sentence.strip())
+        if isinstance(response.read(), bytes):
+            return response.read()
+
+    @staticmethod
+    def _is_ffmpeg_installed():
+        try:
+            output = subprocess.check_output("ffmpeg -version", shell=True)
+            if "ffmpeg version" in output.decode("utf-8"):
+                return True
+            else:
+                raise InvokeBadRequestError("ffmpeg is not installed")
+        except Exception:
+            raise InvokeBadRequestError("ffmpeg is not installed")
diff --git a/api/fields/app_fields.py b/api/fields/app_fields.py
index 9030b2fe4d..63e8f5b16a 100644
--- a/api/fields/app_fields.py
+++ b/api/fields/app_fields.py
@@ -19,6 +19,7 @@ model_config_fields = {
     'suggested_questions': fields.Raw(attribute='suggested_questions_list'),
     'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'),
     'speech_to_text': fields.Raw(attribute='speech_to_text_dict'),
+    'text_to_speech': fields.Raw(attribute='text_to_speech_dict'),
     'retriever_resource': fields.Raw(attribute='retriever_resource_dict'),
     'annotation_reply': fields.Raw(attribute='annotation_reply_dict'),
     'more_like_this': fields.Raw(attribute='more_like_this_dict'),
diff --git a/api/migrations/versions/b24be59fbb04_.py b/api/migrations/versions/b24be59fbb04_.py
new file mode 100644
index 0000000000..e19ea09e86
--- /dev/null
+++ b/api/migrations/versions/b24be59fbb04_.py
@@ -0,0 +1,32 @@
+"""empty message
+
+Revision ID: b24be59fbb04
+Revises: 187385f442fc
+Create Date: 2024-01-17 01:31:12.670556
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'b24be59fbb04'
+down_revision = 'de95f5c77138'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('app_model_configs', schema=None) as batch_op:
+        batch_op.add_column(sa.Column('text_to_speech', sa.Text(), nullable=True))
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('app_model_configs', schema=None) as batch_op:
+        batch_op.drop_column('text_to_speech')
+
+    # ### end Alembic commands ###
diff --git a/api/models/model.py b/api/models/model.py
index f317113e8d..badaac9b57 100644
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -146,6 +146,7 @@ class AppModelConfig(db.Model):
     suggested_questions = db.Column(db.Text)
     suggested_questions_after_answer = db.Column(db.Text)
     speech_to_text = db.Column(db.Text)
+    text_to_speech = db.Column(db.Text)
     more_like_this = db.Column(db.Text)
     model = db.Column(db.Text)
     user_input_form = db.Column(db.Text)
@@ -184,6 +185,11 @@ class AppModelConfig(db.Model):
         return json.loads(self.speech_to_text) if self.speech_to_text \
             else {"enabled": False}
 
+    @property
+    def text_to_speech_dict(self) -> dict:
+        return json.loads(self.text_to_speech) if self.text_to_speech \
+            else {"enabled": False}
+
     @property
     def retriever_resource_dict(self) -> dict:
         return json.loads(self.retriever_resource) if self.retriever_resource \
@@ -263,6 +269,7 @@ class AppModelConfig(db.Model):
             "suggested_questions": self.suggested_questions_list,
             "suggested_questions_after_answer": self.suggested_questions_after_answer_dict,
             "speech_to_text": self.speech_to_text_dict,
+            "text_to_speech": self.text_to_speech_dict,
             "retriever_resource": self.retriever_resource_dict,
             "annotation_reply": self.annotation_reply_dict,
             "more_like_this": self.more_like_this_dict,
@@ -289,6 +296,8 @@ class AppModelConfig(db.Model):
         self.suggested_questions_after_answer = json.dumps(model_config['suggested_questions_after_answer'])
         self.speech_to_text = json.dumps(model_config['speech_to_text']) \
             if model_config.get('speech_to_text') else None
+        self.text_to_speech = json.dumps(model_config['text_to_speech']) \
+            if model_config.get('text_to_speech') else None
         self.more_like_this = json.dumps(model_config['more_like_this'])
         self.sensitive_word_avoidance = json.dumps(model_config['sensitive_word_avoidance']) \
             if model_config.get('sensitive_word_avoidance') else None
@@ -323,6 +332,7 @@ class AppModelConfig(db.Model):
             suggested_questions=self.suggested_questions,
             suggested_questions_after_answer=self.suggested_questions_after_answer,
             speech_to_text=self.speech_to_text,
+            text_to_speech=self.text_to_speech,
             more_like_this=self.more_like_this,
             sensitive_word_avoidance=self.sensitive_word_avoidance,
             external_data_tools=self.external_data_tools,
diff --git a/api/requirements.txt b/api/requirements.txt
index 639941e39b..08ceb1b830 100644
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -65,3 +65,4 @@ httpx[socks]~=0.24.1
 pydub~=0.25.1
 matplotlib~=3.8.2
 yfinance~=0.2.35
+pydub~=0.25.1
\ No newline at end of file
diff --git a/api/services/account_service.py b/api/services/account_service.py
index 076edf3786..e14f46dfee 100644
--- a/api/services/account_service.py
+++ b/api/services/account_service.py
@@ -86,13 +86,13 @@ class AccountService:
                 db.session.commit()
 
         return account
-    
+
     @staticmethod
     def get_account_jwt_token(account):
         payload = {
             "user_id": account.id,
             "exp": datetime.utcnow() + timedelta(days=30),
-            "iss":  current_app.config['EDITION'],
+            "iss": current_app.config['EDITION'],
             "sub": 'Console API Passport',
         }
 
@@ -345,7 +345,7 @@ class TenantService:
         }
         if action not in ['add', 'remove', 'update']:
             raise InvalidActionError("Invalid action.")
-        
+
         if member:
             if operator.id == member.id:
                 raise CannotOperateSelfError("Cannot operate self.")
@@ -546,10 +546,10 @@ class RegisterService:
             return None
 
         return {
-                'account': account,
-                'data': invitation_data,
-                'tenant': tenant,
-                }
+            'account': account,
+            'data': invitation_data,
+            'tenant': tenant,
+        }
 
     @classmethod
     def _get_invitation_by_token(cls, token: str, workspace_id: str, email: str) -> Optional[Dict[str, str]]:
diff --git a/api/services/app_model_config_service.py b/api/services/app_model_config_service.py
index f4e697f356..9c367a429e 100644
--- a/api/services/app_model_config_service.py
+++ b/api/services/app_model_config_service.py
@@ -95,6 +95,21 @@ class AppModelConfigService:
         if not isinstance(config["speech_to_text"]["enabled"], bool):
             raise ValueError("enabled in speech_to_text must be of boolean type")
 
+        # text_to_speech
+        if 'text_to_speech' not in config or not config["text_to_speech"]:
+            config["text_to_speech"] = {
+                "enabled": False
+            }
+
+        if not isinstance(config["text_to_speech"], dict):
+            raise ValueError("text_to_speech must be of dict type")
+
+        if "enabled" not in config["text_to_speech"] or not config["text_to_speech"]["enabled"]:
+            config["text_to_speech"]["enabled"] = False
+
+        if not isinstance(config["text_to_speech"]["enabled"], bool):
+            raise ValueError("enabled in text_to_speech must be of boolean type")
+
         # return retriever resource
         if 'retriever_resource' not in config or not config["retriever_resource"]:
             config["retriever_resource"] = {
@@ -317,6 +332,7 @@ class AppModelConfigService:
             "suggested_questions": config["suggested_questions"],
             "suggested_questions_after_answer": config["suggested_questions_after_answer"],
             "speech_to_text": config["speech_to_text"],
+            "text_to_speech": config["text_to_speech"],
             "retriever_resource": config["retriever_resource"],
             "more_like_this": config["more_like_this"],
             "sensitive_word_avoidance": config["sensitive_word_avoidance"],
diff --git a/api/services/audio_service.py b/api/services/audio_service.py
index 8d9a1e3b89..44aac41880 100644
--- a/api/services/audio_service.py
+++ b/api/services/audio_service.py
@@ -1,22 +1,26 @@
 import io
+from typing import Optional
 
 from core.model_manager import ModelManager
 from core.model_runtime.entities.model_entities import ModelType
-from services.errors.audio import (AudioTooLargeServiceError, NoAudioUploadedServiceError,
-                                   ProviderNotSupportSpeechToTextServiceError, UnsupportedAudioTypeServiceError)
+from services.errors.audio import (AudioTooLargeServiceError,
+                                   NoAudioUploadedServiceError,
+                                   ProviderNotSupportTextToSpeechServiceError,
+                                   ProviderNotSupportSpeechToTextServiceError,
+                                   UnsupportedAudioTypeServiceError)
 from werkzeug.datastructures import FileStorage
 
 FILE_SIZE = 15
 FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
-ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
+ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm', 'amr']
 
 
 class AudioService:
     @classmethod
-    def transcript(cls, tenant_id: str, file: FileStorage):
+    def transcript_asr(cls, tenant_id: str, file: FileStorage, end_user: Optional[str] = None):
         if file is None:
             raise NoAudioUploadedServiceError()
-        
+
         extension = file.mimetype
         if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
             raise UnsupportedAudioTypeServiceError()
@@ -33,8 +37,26 @@ class AudioService:
             tenant_id=tenant_id,
             model_type=ModelType.SPEECH2TEXT
         )
+        if model_instance is None:
+            raise ProviderNotSupportSpeechToTextServiceError()
 
         buffer = io.BytesIO(file_content)
         buffer.name = 'temp.mp3'
 
-        return {"text": model_instance.invoke_speech2text(buffer)}
+        return {"text": model_instance.invoke_speech2text(file=buffer, user=end_user)}
+
+    @classmethod
+    def transcript_tts(cls, tenant_id: str, text: str, streaming: bool, end_user: Optional[str] = None):
+        model_manager = ModelManager()
+        model_instance = model_manager.get_default_model_instance(
+            tenant_id=tenant_id,
+            model_type=ModelType.TTS
+        )
+        if model_instance is None:
+            raise ProviderNotSupportTextToSpeechServiceError()
+
+        try:
+            audio_response = model_instance.invoke_tts(content_text=text.strip(), user=end_user, streaming=streaming)
+            return audio_response
+        except Exception as e:
+            raise e
diff --git a/api/services/errors/audio.py b/api/services/errors/audio.py
index 8c6508936d..091ce36588 100644
--- a/api/services/errors/audio.py
+++ b/api/services/errors/audio.py
@@ -9,5 +9,10 @@ class AudioTooLargeServiceError(Exception):
 class UnsupportedAudioTypeServiceError(Exception):
     pass
 
+
 class ProviderNotSupportSpeechToTextServiceError(Exception):
-    pass
\ No newline at end of file
+    pass
+
+
+class ProviderNotSupportTextToSpeechServiceError(Exception):
+    pass
diff --git a/web/app/components/app/chat/answer/index.tsx b/web/app/components/app/chat/answer/index.tsx
index da2bf65e15..7c13a17f66 100644
--- a/web/app/components/app/chat/answer/index.tsx
+++ b/web/app/components/app/chat/answer/index.tsx
@@ -13,6 +13,7 @@ import MoreInfo from '../more-info'
 import CopyBtn from '../copy-btn'
 import Thought from '../thought'
 import Citation from '../citation'
+import AudioBtn from '@/app/components/base/audio-btn'
 import { randomString } from '@/utils'
 import type { MessageRating } from '@/models/log'
 import Tooltip from '@/app/components/base/tooltip'
@@ -53,6 +54,7 @@ export type IAnswerProps = {
   dataSets?: DataSet[]
   isShowCitation?: boolean
   isShowCitationHitInfo?: boolean
+  isShowTextToSpeech?: boolean
   // Annotation props
   supportAnnotation?: boolean
   appId?: string
@@ -75,6 +77,7 @@ const Answer: FC<IAnswerProps> = ({
   citation,
   isShowCitation,
   isShowCitationHitInfo = false,
+  isShowTextToSpeech,
   supportAnnotation,
   appId,
   question,
@@ -322,7 +325,13 @@ const Answer: FC<IAnswerProps> = ({
                     className={cn(s.copyBtn, 'mr-1')}
                   />
                 )}
-                {(supportAnnotation && !item.isOpeningStatement) && (
+                {!item.isOpeningStatement && isShowTextToSpeech && (
+                  <AudioBtn
+                    value={content}
+                    className={cn(s.playBtn, 'mr-1')}
+                  />
+                )}
+                {(!item.isOpeningStatement && supportAnnotation) && (
                   <AnnotationCtrlBtn
                     appId={appId!}
                     messageId={id}
diff --git a/web/app/components/app/chat/index.tsx b/web/app/components/app/chat/index.tsx
index bbeebf9c4e..95dbe3a72d 100644
--- a/web/app/components/app/chat/index.tsx
+++ b/web/app/components/app/chat/index.tsx
@@ -57,6 +57,7 @@ export type IChatProps = {
   isShowSuggestion?: boolean
   suggestionList?: string[]
   isShowSpeechToText?: boolean
+  isShowTextToSpeech?: boolean
   isShowCitation?: boolean
   answerIcon?: ReactNode
   isShowConfigElem?: boolean
@@ -89,6 +90,7 @@ const Chat: FC<IChatProps> = ({
   isShowSuggestion,
   suggestionList,
   isShowSpeechToText,
+  isShowTextToSpeech,
   isShowCitation,
   answerIcon,
   isShowConfigElem,
@@ -222,6 +224,7 @@ const Chat: FC<IChatProps> = ({
               dataSets={dataSets}
               isShowCitation={isShowCitation}
               isShowCitationHitInfo={isShowCitationHitInfo}
+              isShowTextToSpeech={isShowTextToSpeech}
               supportAnnotation={supportAnnotation}
               appId={appId}
               question={chatList[index - 1]?.content}
diff --git a/web/app/components/app/chat/style.module.css b/web/app/components/app/chat/style.module.css
index 46a4672625..0e0d67e35a 100644
--- a/web/app/components/app/chat/style.module.css
+++ b/web/app/components/app/chat/style.module.css
@@ -39,6 +39,7 @@
 }
 
 .copyBtn,
+.playBtn,
 .annotationBtn {
   display: none;
 }
@@ -65,6 +66,7 @@
 }
 
 .answerWrap:hover .copyBtn,
+.answerWrap:hover .playBtn,
 .answerWrap:hover .annotationBtn {
   display: block;
 }
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/citations-and-attributions-preview@2x.png b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/citations-and-attributions-preview@2x.png
new file mode 100644
index 0000000000..ef066204ca
Binary files /dev/null and b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/citations-and-attributions-preview@2x.png differ
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/conversation-opener-preview@2x.png b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/conversation-opener-preview@2x.png
new file mode 100644
index 0000000000..15639d500d
Binary files /dev/null and b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/conversation-opener-preview@2x.png differ
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/more-like-this-preview@2x.png b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/more-like-this-preview@2x.png
new file mode 100644
index 0000000000..62671c5889
Binary files /dev/null and b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/more-like-this-preview@2x.png differ
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/next-question-suggestion-preview@2x.png b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/next-question-suggestion-preview@2x.png
new file mode 100644
index 0000000000..758708ff15
Binary files /dev/null and b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/next-question-suggestion-preview@2x.png differ
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/opening-suggestion-preview@2x.png b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/opening-suggestion-preview@2x.png
new file mode 100644
index 0000000000..8bb4add322
Binary files /dev/null and b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/opening-suggestion-preview@2x.png differ
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/speech-to-text-preview@2x.png b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/speech-to-text-preview@2x.png
new file mode 100644
index 0000000000..68df3983dc
Binary files /dev/null and b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/speech-to-text-preview@2x.png differ
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-assistant@2x.png b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-assistant@2x.png
new file mode 100644
index 0000000000..91396e72c7
Binary files /dev/null and b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-assistant@2x.png differ
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-completion@2x.png b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-completion@2x.png
new file mode 100644
index 0000000000..7558e78bd9
Binary files /dev/null and b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-completion@2x.png differ
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/style.module.css b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/style.module.css
index 63139bda56..20f0534744 100644
--- a/web/app/components/app/configuration/config/feature/choose-feature/feature-item/style.module.css
+++ b/web/app/components/app/configuration/config/feature/choose-feature/feature-item/style.module.css
@@ -29,6 +29,11 @@
   background-image: url(./preview-imgs/speech-to-text.svg);
 }
 
+.textToSpeechPreview {
+  @apply shadow-lg rounded-lg;
+  background-image: url(./preview-imgs/text-to-audio-preview-assistant@2x.png);
+}
+
 .citationPreview {
   background-image: url(./preview-imgs/citation.svg);
-}
\ No newline at end of file
+}
diff --git a/web/app/components/app/configuration/config/feature/choose-feature/index.tsx b/web/app/components/app/configuration/config/feature/choose-feature/index.tsx
index 0a9814b73d..8364f9529d 100644
--- a/web/app/components/app/configuration/config/feature/choose-feature/index.tsx
+++ b/web/app/components/app/configuration/config/feature/choose-feature/index.tsx
@@ -7,7 +7,7 @@ import MoreLikeThisIcon from '../../../base/icons/more-like-this-icon'
 import FeatureItem from './feature-item'
 import Modal from '@/app/components/base/modal'
 import SuggestedQuestionsAfterAnswerIcon from '@/app/components/app/configuration/base/icons/suggested-questions-after-answer-icon'
-import { Microphone01 } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
+import { Microphone01, Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
 import { Citations } from '@/app/components/base/icons/src/vender/solid/editor'
 import { FileSearch02 } from '@/app/components/base/icons/src/vender/solid/files'
 import { MessageFast } from '@/app/components/base/icons/src/vender/solid/communication'
@@ -16,6 +16,7 @@ type IConfig = {
   moreLikeThis: boolean
   suggestedQuestionsAfterAnswer: boolean
   speechToText: boolean
+  textToSpeech: boolean
   citation: boolean
   moderation: boolean
   annotation: boolean
@@ -27,6 +28,7 @@ export type IChooseFeatureProps = {
   config: IConfig
   isChatApp: boolean
   onChange: (key: string, value: boolean) => void
+  showTextToSpeechItem?: boolean
   showSpeechToTextItem?: boolean
 }
 
@@ -42,6 +44,7 @@ const ChooseFeature: FC<IChooseFeatureProps> = ({
   isChatApp,
   config,
   onChange,
+  showTextToSpeechItem,
   showSpeechToTextItem,
 }) => {
   const { t } = useTranslation()
@@ -78,6 +81,18 @@ const ChooseFeature: FC<IChooseFeatureProps> = ({
                 value={config.suggestedQuestionsAfterAnswer}
                 onChange={value => onChange('suggestedQuestionsAfterAnswer', value)}
               />
+              {
+                showTextToSpeechItem && (
+                  <FeatureItem
+                    icon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
+                    previewImgClassName='textToSpeechPreview'
+                    title={t('appDebug.feature.textToSpeech.title')}
+                    description={t('appDebug.feature.textToSpeech.description')}
+                    value={config.textToSpeech}
+                    onChange={value => onChange('textToSpeech', value)}
+                  />
+                )
+              }
               {
                 showSpeechToTextItem && (
                   <FeatureItem
@@ -114,6 +129,18 @@ const ChooseFeature: FC<IChooseFeatureProps> = ({
                 value={config.moreLikeThis}
                 onChange={value => onChange('moreLikeThis', value)}
               />
+              {
+                showTextToSpeechItem && (
+                  <FeatureItem
+                    icon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
+                    previewImgClassName='textToSpeechPreview'
+                    title={t('appDebug.feature.textToSpeech.title')}
+                    description={t('appDebug.feature.textToSpeech.description')}
+                    value={config.textToSpeech}
+                    onChange={value => onChange('textToSpeech', value)}
+                  />
+                )
+              }
             </>
           </FeatureGroup>
         )}
diff --git a/web/app/components/app/configuration/config/feature/use-feature.tsx b/web/app/components/app/configuration/config/feature/use-feature.tsx
index 5ec0d8af02..190c50eab5 100644
--- a/web/app/components/app/configuration/config/feature/use-feature.tsx
+++ b/web/app/components/app/configuration/config/feature/use-feature.tsx
@@ -9,6 +9,8 @@ function useFeature({
   setSuggestedQuestionsAfterAnswer,
   speechToText,
   setSpeechToText,
+  textToSpeech,
+  setTextToSpeech,
   citation,
   setCitation,
   annotation,
@@ -24,6 +26,8 @@ function useFeature({
   setSuggestedQuestionsAfterAnswer: (suggestedQuestionsAfterAnswer: boolean) => void
   speechToText: boolean
   setSpeechToText: (speechToText: boolean) => void
+  textToSpeech: boolean
+  setTextToSpeech: (textToSpeech: boolean) => void
   citation: boolean
   setCitation: (citation: boolean) => void
   annotation: boolean
@@ -48,6 +52,7 @@ function useFeature({
     moreLikeThis,
     suggestedQuestionsAfterAnswer,
     speechToText,
+    textToSpeech,
     citation,
     annotation,
     moderation,
@@ -69,6 +74,9 @@ function useFeature({
       case 'speechToText':
         setSpeechToText(value)
         break
+      case 'textToSpeech':
+        setTextToSpeech(value)
+        break
       case 'citation':
         setCitation(value)
         break
diff --git a/web/app/components/app/configuration/config/index.tsx b/web/app/components/app/configuration/config/index.tsx
index f1eb9fd703..e9d7d3eef5 100644
--- a/web/app/components/app/configuration/config/index.tsx
+++ b/web/app/components/app/configuration/config/index.tsx
@@ -19,7 +19,7 @@ import AdvancedModeWaring from '@/app/components/app/configuration/prompt-mode/a
 import ConfigContext from '@/context/debug-configuration'
 import ConfigPrompt from '@/app/components/app/configuration/config-prompt'
 import ConfigVar from '@/app/components/app/configuration/config-var'
-import { type CitationConfig, type ModelConfig, type ModerationConfig, type MoreLikeThisConfig, PromptMode, type PromptVariable, type SpeechToTextConfig, type SuggestedQuestionsAfterAnswerConfig } from '@/models/debug'
+import { type CitationConfig, type ModelConfig, type ModerationConfig, type MoreLikeThisConfig, PromptMode, type PromptVariable, type SpeechToTextConfig, type SuggestedQuestionsAfterAnswerConfig, type TextToSpeechConfig } from '@/models/debug'
 import { AppType, ModelModeType } from '@/types/app'
 import { useModalContext } from '@/context/modal-context'
 import ConfigParamModal from '@/app/components/app/configuration/toolbox/annotation/config-param-modal'
@@ -51,6 +51,8 @@ const Config: FC = () => {
     setSuggestedQuestionsAfterAnswerConfig,
     speechToTextConfig,
     setSpeechToTextConfig,
+    textToSpeechConfig,
+    setTextToSpeechConfig,
     citationConfig,
     setCitationConfig,
     annotationConfig,
@@ -60,6 +62,7 @@ const Config: FC = () => {
   } = useContext(ConfigContext)
   const isChatApp = mode === AppType.chat
   const { data: speech2textDefaultModel } = useDefaultModel(4)
+  const { data: text2speechDefaultModel } = useDefaultModel(5)
   const { setShowModerationSettingModal } = useModalContext()
 
   const promptTemplate = modelConfig.configs.prompt_template
@@ -111,6 +114,12 @@ const Config: FC = () => {
         draft.enabled = value
       }))
     },
+    textToSpeech: textToSpeechConfig.enabled,
+    setTextToSpeech: (value) => {
+      setTextToSpeechConfig(produce(textToSpeechConfig, (draft: TextToSpeechConfig) => {
+        draft.enabled = value
+      }))
+    },
     citation: citationConfig.enabled,
     setCitation: (value) => {
       setCitationConfig(produce(citationConfig, (draft: CitationConfig) => {
@@ -173,7 +182,7 @@ const Config: FC = () => {
     setAnnotationConfig,
   })
 
-  const hasChatConfig = isChatApp && (featureConfig.openingStatement || featureConfig.suggestedQuestionsAfterAnswer || (featureConfig.speechToText && !!speech2textDefaultModel) || featureConfig.citation)
+  const hasChatConfig = isChatApp && (featureConfig.openingStatement || featureConfig.suggestedQuestionsAfterAnswer || (featureConfig.speechToText && !!speech2textDefaultModel) || (featureConfig.textToSpeech && !!text2speechDefaultModel) || featureConfig.citation)
   const hasToolbox = moderationConfig.enabled || featureConfig.annotation
 
   const wrapRef = useRef<HTMLDivElement>(null)
@@ -207,6 +216,7 @@ const Config: FC = () => {
             config={featureConfig}
             onChange={handleFeatureChange}
             showSpeechToTextItem={!!speech2textDefaultModel}
+            showTextToSpeechItem={!!text2speechDefaultModel}
           />
         )}
 
@@ -255,16 +265,21 @@ const Config: FC = () => {
                 }
               }
               isShowSuggestedQuestionsAfterAnswer={featureConfig.suggestedQuestionsAfterAnswer}
+              isShowTextToSpeech={featureConfig.textToSpeech && !!text2speechDefaultModel}
               isShowSpeechText={featureConfig.speechToText && !!speech2textDefaultModel}
               isShowCitation={featureConfig.citation}
             />
           )
         }
 
-        {/* TextnGeneration config */}
-        {moreLikeThisConfig.enabled && (
-          <ExperienceEnchanceGroup />
-        )}
+        {/* TextnGeneration config */}{
+          !hasChatConfig && (
+            <ExperienceEnchanceGroup
+              isShowMoreLike={moreLikeThisConfig.enabled}
+              isShowTextToSpeech={featureConfig.textToSpeech && !!text2speechDefaultModel}
+            />
+          )
+        }
 
         {/* Toolbox */}
         {
diff --git a/web/app/components/app/configuration/debug/index.tsx b/web/app/components/app/configuration/debug/index.tsx
index 733f596333..8a8d089f37 100644
--- a/web/app/components/app/configuration/debug/index.tsx
+++ b/web/app/components/app/configuration/debug/index.tsx
@@ -56,6 +56,7 @@ const Debug: FC<IDebug> = ({
     suggestedQuestions,
     suggestedQuestionsAfterAnswerConfig,
     speechToTextConfig,
+    textToSpeechConfig,
     citationConfig,
     moderationConfig,
     moreLikeThisConfig,
@@ -73,6 +74,7 @@ const Debug: FC<IDebug> = ({
     annotationConfig,
   } = useContext(ConfigContext)
   const { data: speech2textDefaultModel } = useDefaultModel(4)
+  const { data: text2speechDefaultModel } = useDefaultModel(5)
   const [chatList, setChatList, getChatList] = useGetState<IChatItem[]>([])
   const chatListDomRef = useRef<HTMLDivElement>(null)
   const { data: fileUploadConfigResponse } = useSWR({ url: '/files/upload' }, fetchFileUploadConfig)
@@ -233,6 +235,9 @@ const Debug: FC<IDebug> = ({
       setChatList(newListWithAnswer)
     }
     const postModelConfig: BackendModelConfig = {
+      text_to_speech: {
+        enabled: false,
+      },
       pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '',
       prompt_type: promptMode,
       chat_prompt_config: {},
@@ -514,6 +519,9 @@ const Debug: FC<IDebug> = ({
     const contextVar = modelConfig.configs.prompt_variables.find(item => item.is_context_var)?.key
 
     const postModelConfig: BackendModelConfig = {
+      text_to_speech: {
+        enabled: false,
+      },
       pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '',
       prompt_type: promptMode,
       chat_prompt_config: {},
@@ -657,6 +665,7 @@ const Debug: FC<IDebug> = ({
                   isShowSuggestion={doShowSuggestion}
                   suggestionList={suggestQuestions}
                   isShowSpeechToText={speechToTextConfig.enabled && !!speech2textDefaultModel}
+                  isShowTextToSpeech={textToSpeechConfig.enabled && !!text2speechDefaultModel}
                   isShowCitation={citationConfig.enabled}
                   isShowCitationHitInfo
                   isShowPromptLog
@@ -682,6 +691,7 @@ const Debug: FC<IDebug> = ({
                 className="mt-2"
                 content={completionRes}
                 isLoading={!completionRes && isResponsing}
+                isShowTextToSpeech={textToSpeechConfig.enabled && !!text2speechDefaultModel}
                 isResponsing={isResponsing}
                 isInstalledApp={false}
                 messageId={messageId}
diff --git a/web/app/components/app/configuration/features/chat-group/index.tsx b/web/app/components/app/configuration/features/chat-group/index.tsx
index 9b61f2f082..fd3cfa3a68 100644
--- a/web/app/components/app/configuration/features/chat-group/index.tsx
+++ b/web/app/components/app/configuration/features/chat-group/index.tsx
@@ -7,6 +7,7 @@ import type { IOpeningStatementProps } from './opening-statement'
 import OpeningStatement from './opening-statement'
 import SuggestedQuestionsAfterAnswer from './suggested-questions-after-answer'
 import SpeechToText from './speech-to-text'
+import TextToSpeech from './text-to-speech'
 import Citation from './citation'
 /*
 * Include
@@ -19,6 +20,7 @@ type ChatGroupProps = {
   openingStatementConfig: IOpeningStatementProps
   isShowSuggestedQuestionsAfterAnswer: boolean
   isShowSpeechText: boolean
+  isShowTextToSpeech: boolean
   isShowCitation: boolean
 }
 const ChatGroup: FC<ChatGroupProps> = ({
@@ -26,6 +28,7 @@ const ChatGroup: FC<ChatGroupProps> = ({
   openingStatementConfig,
   isShowSuggestedQuestionsAfterAnswer,
   isShowSpeechText,
+  isShowTextToSpeech,
   isShowCitation,
 }) => {
   const { t } = useTranslation()
@@ -40,6 +43,11 @@ const ChatGroup: FC<ChatGroupProps> = ({
         {isShowSuggestedQuestionsAfterAnswer && (
           <SuggestedQuestionsAfterAnswer />
         )}
+        {
+          isShowTextToSpeech && (
+            <TextToSpeech />
+          )
+        }
         {
           isShowSpeechText && (
             <SpeechToText />
diff --git a/web/app/components/app/configuration/features/chat-group/speech-to-text/index.tsx b/web/app/components/app/configuration/features/chat-group/speech-to-text/index.tsx
index 913765bce3..e452b38971 100644
--- a/web/app/components/app/configuration/features/chat-group/speech-to-text/index.tsx
+++ b/web/app/components/app/configuration/features/chat-group/speech-to-text/index.tsx
@@ -4,7 +4,7 @@ import { useTranslation } from 'react-i18next'
 import Panel from '@/app/components/app/configuration/base/feature-panel'
 import { Microphone01 } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
 
-const SuggestedQuestionsAfterAnswer: FC = () => {
+const SpeechToTextConfig: FC = () => {
   const { t } = useTranslation()
 
   return (
@@ -22,4 +22,4 @@ const SuggestedQuestionsAfterAnswer: FC = () => {
     />
   )
 }
-export default React.memo(SuggestedQuestionsAfterAnswer)
+export default React.memo(SpeechToTextConfig)
diff --git a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
new file mode 100644
index 0000000000..d3f5562df7
--- /dev/null
+++ b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
@@ -0,0 +1,25 @@
+'use client'
+import React, { type FC } from 'react'
+import { useTranslation } from 'react-i18next'
+import Panel from '@/app/components/app/configuration/base/feature-panel'
+import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
+
+const TextToSpeech: FC = () => {
+  const { t } = useTranslation()
+
+  return (
+    <Panel
+      title={
+        <div className='flex items-center gap-2'>
+          <div>{t('appDebug.feature.textToSpeech.title')}</div>
+        </div>
+      }
+      headerIcon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
+      headerRight={
+        <div className='text-xs text-gray-500'>{t('appDebug.feature.textToSpeech.resDes')}</div>
+      }
+      noBodySpacing
+    />
+  )
+}
+export default React.memo(TextToSpeech)
diff --git a/web/app/components/app/configuration/features/experience-enchance-group/index.tsx b/web/app/components/app/configuration/features/experience-enchance-group/index.tsx
index ee3f3b9ff6..6902a17468 100644
--- a/web/app/components/app/configuration/features/experience-enchance-group/index.tsx
+++ b/web/app/components/app/configuration/features/experience-enchance-group/index.tsx
@@ -3,19 +3,40 @@ import type { FC } from 'react'
 import React from 'react'
 import { useTranslation } from 'react-i18next'
 import GroupName from '../../base/group-name'
+import TextToSpeech from '../chat-group/text-to-speech'
 import MoreLikeThis from './more-like-this'
 
 /*
 * Include
 * 1. More like this
 */
-const ExperienceEnchanceGroup: FC = () => {
+
+type ExperienceGroupProps = {
+  isShowTextToSpeech: boolean
+  isShowMoreLike: boolean
+}
+
+const ExperienceEnchanceGroup: FC<ExperienceGroupProps> = ({
+  isShowTextToSpeech,
+  isShowMoreLike,
+}) => {
   const { t } = useTranslation()
 
   return (
     <div className='mt-7'>
-      <GroupName name={t('appDebug.feature.groupExperience.title')} />
-      <MoreLikeThis />
+      <GroupName name={t('appDebug.feature.groupExperience.title')}/>
+      <div className='space-y-3'>
+        {
+          isShowMoreLike && (
+            <MoreLikeThis/>
+          )
+        }
+        {
+          isShowTextToSpeech && (
+            <TextToSpeech/>
+          )
+        }
+      </div>
     </div>
   )
 }
diff --git a/web/app/components/app/configuration/index.tsx b/web/app/components/app/configuration/index.tsx
index fb1e31e083..8b8bdd7c3c 100644
--- a/web/app/components/app/configuration/index.tsx
+++ b/web/app/components/app/configuration/index.tsx
@@ -91,6 +91,9 @@ const Configuration: FC = () => {
   const [speechToTextConfig, setSpeechToTextConfig] = useState<MoreLikeThisConfig>({
     enabled: false,
   })
+  const [textToSpeechConfig, setTextToSpeechConfig] = useState<MoreLikeThisConfig>({
+    enabled: false,
+  })
   const [citationConfig, setCitationConfig] = useState<MoreLikeThisConfig>({
     enabled: false,
   })
@@ -140,6 +143,7 @@ const Configuration: FC = () => {
     more_like_this: null,
     suggested_questions_after_answer: null,
     speech_to_text: null,
+    text_to_speech: null,
     retriever_resource: null,
     sensitive_word_avoidance: null,
     dataSets: [],
@@ -232,6 +236,9 @@ const Configuration: FC = () => {
     setSpeechToTextConfig(modelConfig.speech_to_text || {
       enabled: false,
     })
+    setTextToSpeechConfig(modelConfig.text_to_speech || {
+      enabled: false,
+    })
     setCitationConfig(modelConfig.retriever_resource || {
       enabled: false,
     })
@@ -396,6 +403,9 @@ const Configuration: FC = () => {
         if (modelConfig.speech_to_text)
           setSpeechToTextConfig(modelConfig.speech_to_text)
 
+        if (modelConfig.text_to_speech)
+          setTextToSpeechConfig(modelConfig.text_to_speech)
+
         if (modelConfig.retriever_resource)
           setCitationConfig(modelConfig.retriever_resource)
 
@@ -444,6 +454,7 @@ const Configuration: FC = () => {
             more_like_this: modelConfig.more_like_this,
             suggested_questions_after_answer: modelConfig.suggested_questions_after_answer,
             speech_to_text: modelConfig.speech_to_text,
+            text_to_speech: modelConfig.text_to_speech,
             retriever_resource: modelConfig.retriever_resource,
             sensitive_word_avoidance: modelConfig.sensitive_word_avoidance,
             external_data_tools: modelConfig.external_data_tools,
@@ -559,6 +570,7 @@ const Configuration: FC = () => {
       more_like_this: moreLikeThisConfig,
       suggested_questions_after_answer: suggestedQuestionsAfterAnswerConfig,
       speech_to_text: speechToTextConfig,
+      text_to_speech: textToSpeechConfig,
       retriever_resource: citationConfig,
       sensitive_word_avoidance: moderationConfig,
       agent_mode: {
@@ -593,6 +605,7 @@ const Configuration: FC = () => {
       draft.more_like_this = moreLikeThisConfig
       draft.suggested_questions_after_answer = suggestedQuestionsAfterAnswerConfig
       draft.speech_to_text = speechToTextConfig
+      draft.text_to_speech = textToSpeechConfig
       draft.retriever_resource = citationConfig
       draft.dataSets = dataSets
     })
@@ -662,6 +675,8 @@ const Configuration: FC = () => {
       setSuggestedQuestionsAfterAnswerConfig,
       speechToTextConfig,
       setSpeechToTextConfig,
+      textToSpeechConfig,
+      setTextToSpeechConfig,
       citationConfig,
       setCitationConfig,
       annotationConfig,
diff --git a/web/app/components/app/log/list.tsx b/web/app/components/app/log/list.tsx
index 6e3ba82982..0346d4d0ce 100644
--- a/web/app/components/app/log/list.tsx
+++ b/web/app/components/app/log/list.tsx
@@ -297,6 +297,7 @@ function DetailPanel<T extends ChatConversationFullDetailResponse | CompletionCo
           feedback={detail.message.feedbacks.find((item: any) => item.from_source === 'admin')}
           onFeedback={feedback => onFeedback(detail.message.id, feedback)}
           supportAnnotation
+          isShowTextToSpeech
           appId={appDetail?.id}
           varList={varList}
         />
@@ -310,6 +311,7 @@ function DetailPanel<T extends ChatConversationFullDetailResponse | CompletionCo
             displayScene='console'
             isShowPromptLog
             supportAnnotation
+            isShowTextToSpeech
             appId={appDetail?.id}
             onChatListChange={setItems}
           />
diff --git a/web/app/components/app/text-generate/item/index.tsx b/web/app/components/app/text-generate/item/index.tsx
index d8967d2992..cd3e7cd331 100644
--- a/web/app/components/app/text-generate/item/index.tsx
+++ b/web/app/components/app/text-generate/item/index.tsx
@@ -12,6 +12,7 @@ import PromptLog from '@/app/components/app/chat/log'
 import { Markdown } from '@/app/components/base/markdown'
 import Loading from '@/app/components/base/loading'
 import Toast from '@/app/components/base/toast'
+import AudioBtn from '@/app/components/base/audio-btn'
 import type { Feedbacktype } from '@/app/components/app/chat/type'
 import { fetchMoreLikeThis, updateFeedback } from '@/service/share'
 import { Clipboard, File02 } from '@/app/components/base/icons/src/vender/line/files'
@@ -45,6 +46,7 @@ export type IGenerationItemProps = {
   controlClearMoreLikeThis?: number
   supportFeedback?: boolean
   supportAnnotation?: boolean
+  isShowTextToSpeech?: boolean
   appId?: string
   varList?: { label: string; value: string | number | object }[]
 }
@@ -90,6 +92,7 @@ const GenerationItem: FC<IGenerationItemProps> = ({
   controlClearMoreLikeThis,
   supportFeedback,
   supportAnnotation,
+  isShowTextToSpeech,
   appId,
   varList,
 }) => {
@@ -124,6 +127,7 @@ const GenerationItem: FC<IGenerationItemProps> = ({
     isLoading: isQuerying,
     feedback: childFeedback,
     onSave,
+    isShowTextToSpeech,
     isMobile,
     isInstalledApp,
     installedAppId,
@@ -366,8 +370,17 @@ const GenerationItem: FC<IGenerationItemProps> = ({
                   <div className='ml-1'>
                     {ratingContent}
                   </div>
-                )
-                }
+                )}
+
+                {isShowTextToSpeech && (
+                  <>
+                    <div className='ml-2 mr-2 h-[14px] w-[1px] bg-gray-200'></div>
+                    <AudioBtn
+                      value={content}
+                      className={'mr-1'}
+                    />
+                  </>
+                )}
               </div>
               <div className='text-xs text-gray-500'>{content?.length} {t('common.unit.char')}</div>
             </div>
diff --git a/web/app/components/app/text-generate/saved-items/index.tsx b/web/app/components/app/text-generate/saved-items/index.tsx
index e2cc0e8396..f63ebdfbc0 100644
--- a/web/app/components/app/text-generate/saved-items/index.tsx
+++ b/web/app/components/app/text-generate/saved-items/index.tsx
@@ -9,9 +9,11 @@ import type { SavedMessage } from '@/models/debug'
 import { Markdown } from '@/app/components/base/markdown'
 import { SimpleBtn, copyIcon } from '@/app/components/app/text-generate/item'
 import Toast from '@/app/components/base/toast'
+import AudioBtn from '@/app/components/base/audio-btn'
 
 export type ISavedItemsProps = {
   className?: string
+  isShowTextToSpeech?: boolean
   list: SavedMessage[]
   onRemove: (id: string) => void
   onStartCreateContent: () => void
@@ -25,6 +27,7 @@ const removeIcon = (
 
 const SavedItems: FC<ISavedItemsProps> = ({
   className,
+  isShowTextToSpeech,
   list,
   onRemove,
   onStartCreateContent,
@@ -69,6 +72,16 @@ const SavedItems: FC<ISavedItemsProps> = ({
                     {removeIcon}
                     <div>{t('common.operation.remove')}</div>
                   </SimpleBtn>
+
+                  {isShowTextToSpeech && (
+                    <>
+                      <div className='ml-2 mr-2 h-[14px] w-[1px] bg-gray-200'></div>
+                      <AudioBtn
+                        value={answer}
+                        className={'mr-1'}
+                      />
+                    </>
+                  )}
                 </div>
                 <div className='text-xs text-gray-500'>{answer?.length} {t('common.unit.char')}</div>
               </div>
diff --git a/web/app/components/base/audio-btn/index.tsx b/web/app/components/base/audio-btn/index.tsx
new file mode 100644
index 0000000000..f64993669a
--- /dev/null
+++ b/web/app/components/base/audio-btn/index.tsx
@@ -0,0 +1,110 @@
+'use client'
+import { useRef, useState } from 'react'
+import { t } from 'i18next'
+import { useParams, usePathname } from 'next/navigation'
+import s from './style.module.css'
+import Tooltip from '@/app/components/base/tooltip'
+import { randomString } from '@/utils'
+import { textToAudio } from '@/service/share'
+
+type AudioBtnProps = {
+  value: string
+  className?: string
+}
+
+const AudioBtn = ({
+  value,
+  className,
+}: AudioBtnProps) => {
+  const audioRef = useRef<HTMLAudioElement | null>(null)
+  const [isPlaying, setIsPlaying] = useState(false)
+  const [isPause, setPause] = useState(false)
+  const [hasEnded, setHasEnded] = useState(false)
+  const selector = useRef(`play-tooltip-${randomString(4)}`)
+  const params = useParams()
+  const pathname = usePathname()
+  const removeCodeBlocks = (inputText: any) => {
+    const codeBlockRegex = /```[\s\S]*?```/g
+    return inputText.replace(codeBlockRegex, '')
+  }
+
+  const playAudio = async () => {
+    const formData = new FormData()
+    if (value !== '') {
+      formData.append('text', removeCodeBlocks(value))
+
+      let url = '/universal-chat/text-to-audio'
+      let isPublic = false
+
+      if (params.token) {
+        url = '/text-to-audio'
+        isPublic = true
+      }
+      else if (params.appId) {
+        if (pathname.search('explore/installed') > -1)
+          url = `/installed-apps/${params.appId}/text-to-audio`
+        else
+          url = `/apps/${params.appId}/text-to-audio`
+      }
+
+      try {
+        const audioResponse = await textToAudio(url, isPublic, formData)
+        const blob_bytes = Buffer.from(audioResponse.data, 'latin1')
+        const blob = new Blob([blob_bytes], { type: 'audio/wav' })
+        const audioUrl = URL.createObjectURL(blob)
+        const audio = new Audio(audioUrl)
+        audioRef.current = audio
+        audio.play().then(() => {
+          setIsPlaying(true)
+        }).catch(() => {
+          setIsPlaying(false)
+          URL.revokeObjectURL(audioUrl)
+        })
+        audio.onended = () => setHasEnded(true)
+      }
+      catch (error) {
+        setIsPlaying(false)
+        console.error('Error playing audio:', error)
+      }
+    }
+  }
+
+  const togglePlayPause = () => {
+    if (audioRef.current) {
+      if (isPlaying) {
+        setPause(true)
+        audioRef.current.pause()
+      }
+      else if (!hasEnded) {
+        setPause(false)
+        audioRef.current.play()
+      }
+      else if (!isPlaying) {
+        playAudio().then()
+      }
+      setIsPlaying(prevIsPlaying => !prevIsPlaying)
+    }
+    else {
+      playAudio().then()
+    }
+  }
+
+  return (
+    <div className={`${(isPlaying && !hasEnded) ? 'mr-1' : className}`}>
+      <Tooltip
+        selector={selector.current}
+        content={(!isPause ? ((isPlaying && !hasEnded) ? t('appApi.playing') : t('appApi.play')) : t('appApi.pause')) as string}
+        className='z-10'
+      >
+        <div
+          className={'box-border p-0.5 flex items-center justify-center rounded-md bg-white cursor-pointer'}
+          style={{ boxShadow: '0px 4px 8px -2px rgba(16, 24, 40, 0.1), 0px 2px 4px -2px rgba(16, 24, 40, 0.06)' }}
+          onClick={togglePlayPause}>
+          <div className={`w-6 h-6 rounded-md hover:bg-gray-50 ${!isPause ? ((isPlaying && !hasEnded) ? s.playIcon : s.stopIcon) : s.pauseIcon}`}></div>
+        </div>
+      </Tooltip>
+    </div>
+  )
+}
+
+export default AudioBtn
diff --git a/web/app/components/base/audio-btn/style.module.css b/web/app/components/base/audio-btn/style.module.css
new file mode 100644
index 0000000000..7c05003b04
--- /dev/null
+++ b/web/app/components/base/audio-btn/style.module.css
@@ -0,0 +1,16 @@
+.playIcon {
+  background-image: url(~@/app/components/develop/secret-key/assets/play.svg);
+  background-position: center;
+  background-repeat: no-repeat;
+}
+.pauseIcon {
+  background-image: url(~@/app/components/develop/secret-key/assets/pause.svg);
+  background-position: center;
+  background-repeat: no-repeat;
+}
+
+.stopIcon {
+  background-position: center;
+  background-repeat: no-repeat;
+  background-image: url(~@/app/components/develop/secret-key/assets/stop.svg);
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/assets/vender/line/mediaAndDevices/speaker.svg b/web/app/components/base/icons/assets/vender/line/mediaAndDevices/speaker.svg
new file mode 100644
index 0000000000..f769c7e830
--- /dev/null
+++ b/web/app/components/base/icons/assets/vender/line/mediaAndDevices/speaker.svg
@@ -0,0 +1,15 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_109_6694)">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M0 2.86666C0 2.05664 0.656649 1.39999 1.46667 1.39999H5.86667C6.67668 1.39999 7.33333 2.05664 7.33333 2.86666C7.33333 3.27167 7.00501 3.59999 6.6 3.59999C6.19499 3.59999 5.86667 3.27167 5.86667 2.86666H4.4V7.99999C4.80501 7.99999 5.13333 8.32831 5.13333 8.73332C5.13333 9.13833 4.80501 9.46666 4.4 9.46666H2.93333C2.52832 9.46666 2.2 9.13833 2.2 8.73332C2.2 8.32831 2.52832 7.99999 2.93333 7.99999V2.86666H1.46667C1.46667 3.27167 1.13834 3.59999 0.733333 3.59999C0.328324 3.59999 0 3.27167 0 2.86666Z" fill="#444CE7"/>
+<path d="M13.8205 0.782296C13.7434 0.62811 13.5233 0.62811 13.4462 0.782296C12.9664 1.74206 12.8754 1.83302 11.9156 2.3129C11.7615 2.39 11.7615 2.61003 11.9156 2.68712C12.8754 3.167 12.9664 3.25797 13.4462 4.21773C13.5233 4.37191 13.7434 4.37191 13.8205 4.21773C14.3003 3.25797 14.3913 3.167 15.3511 2.68712C15.5053 2.61003 15.5053 2.39 15.3511 2.3129C14.3913 1.83302 14.3003 1.74206 13.8205 0.782296Z" fill="#444CE7"/>
+<path d="M9.79394 2.25319C9.71404 2.09337 9.48596 2.09337 9.40605 2.25319C9.04994 2.96543 8.96544 3.04993 8.2532 3.40605C8.09338 3.48595 8.09338 3.71402 8.2532 3.79393C8.96544 4.15005 9.04994 4.23455 9.40606 4.94679C9.48596 5.10661 9.71404 5.10661 9.79394 4.94679C10.1501 4.23455 10.2346 4.15005 10.9468 3.79393C11.1066 3.71402 11.1066 3.48595 10.9468 3.40605C10.2346 3.04993 10.1501 2.96543 9.79394 2.25319Z" fill="#444CE7"/>
+<path d="M2.75377 11.049C2.67668 10.8948 2.45665 10.8948 2.37956 11.049C1.89969 12.0087 1.80872 12.0997 0.848971 12.5796C0.694788 12.6566 0.694787 12.8767 0.848971 12.9538C1.80872 13.4336 1.89969 13.5246 2.37956 14.4844C2.45665 14.6385 2.67668 14.6385 2.75377 14.4844C3.23365 13.5246 3.32461 13.4336 4.28436 12.9538C4.43855 12.8767 4.43855 12.6566 4.28436 12.5796C3.32461 12.0997 3.23365 12.0087 2.75377 11.049Z" fill="#444CE7"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M14.6741 8.65106C14.8886 8.50146 15.1837 8.55405 15.3333 8.76853C15.7614 9.38226 16.0125 10.1292 16.0125 10.9333C16.0125 11.7375 15.7614 12.4844 15.3333 13.0981C15.1837 13.3126 14.8886 13.3652 14.6741 13.2156C14.4596 13.066 14.407 12.7708 14.5567 12.5564C14.8775 12.0964 15.0656 11.5375 15.0656 10.9333C15.0656 10.3291 14.8775 9.77025 14.5567 9.31028C14.407 9.09581 14.4596 8.80066 14.6741 8.65106Z" fill="#444CE7"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M12.5674 6.53771C12.794 6.51987 13.0155 6.61161 13.1632 6.78449C13.2954 6.93929 13.3164 7.12549 13.3244 7.21587C13.3334 7.31718 13.3334 7.44301 13.3333 7.57103C13.3333 7.57691 13.3333 7.58278 13.3333 7.58866L13.3333 14.3C13.3334 14.428 13.3334 14.5539 13.3244 14.6552C13.3164 14.7455 13.2954 14.9317 13.1632 15.0865C13.0155 15.2594 12.794 15.3512 12.5674 15.3333C12.3644 15.3173 12.2179 15.2005 12.1484 15.1423C12.0704 15.077 11.9814 14.988 11.8909 14.8975L10.3795 13.3861C10.3357 13.3423 10.3137 13.3205 10.2971 13.3053L10.2958 13.3041L10.2941 13.3041C10.2716 13.303 10.2407 13.3029 10.1787 13.3029L9.34101 13.3029C9.22151 13.3029 9.10513 13.3029 9.00657 13.2949C8.89833 13.286 8.77062 13.2652 8.6421 13.1997C8.46392 13.1089 8.31906 12.964 8.22827 12.7859C8.16279 12.6574 8.14192 12.5296 8.13308 12.4214C8.12503 12.3228 8.12504 12.2065 8.12505 12.087V9.79916C8.12505 9.79413 8.12505 9.78909 8.12505 9.78406C8.12504 9.66456 8.12503 9.54819 8.13308 9.44963C8.14192 9.34139 8.16279 9.21368 8.22827 9.08517C8.31906 8.90699 8.46392 8.76212 8.6421 8.67133C8.77062 8.60585 8.89833 8.58498 9.00657 8.57614C9.10512 8.56809 9.2215 8.5681 9.341 8.56812C9.34603 8.56812 9.35106 8.56812 9.3561 8.56812H10.1787C10.2407 8.56812 10.2716 8.56801 10.2941 8.56698L10.2958 8.5669L10.2971 8.56575C10.3137 8.55058 10.3357 8.52877 10.3795 8.48491L11.8784 6.98602C11.8826 6.98186 11.8867 6.97771 11.8909 6.97355C11.9814 6.88302 12.0704 6.79403 12.1484 6.72874C12.2179 6.67049 12.3644 6.55368 12.5674 6.53771Z" fill="#444CE7"/>
+</g>
+<defs>
+<clipPath id="clip0_109_6694">
+<rect width="16" height="16" fill="white"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/web/app/components/base/icons/assets/vender/solid/mediaAndDevices/speaker.svg b/web/app/components/base/icons/assets/vender/solid/mediaAndDevices/speaker.svg
new file mode 100644
index 0000000000..f769c7e830
--- /dev/null
+++ b/web/app/components/base/icons/assets/vender/solid/mediaAndDevices/speaker.svg
@@ -0,0 +1,15 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_109_6694)">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M0 2.86666C0 2.05664 0.656649 1.39999 1.46667 1.39999H5.86667C6.67668 1.39999 7.33333 2.05664 7.33333 2.86666C7.33333 3.27167 7.00501 3.59999 6.6 3.59999C6.19499 3.59999 5.86667 3.27167 5.86667 2.86666H4.4V7.99999C4.80501 7.99999 5.13333 8.32831 5.13333 8.73332C5.13333 9.13833 4.80501 9.46666 4.4 9.46666H2.93333C2.52832 9.46666 2.2 9.13833 2.2 8.73332C2.2 8.32831 2.52832 7.99999 2.93333 7.99999V2.86666H1.46667C1.46667 3.27167 1.13834 3.59999 0.733333 3.59999C0.328324 3.59999 0 3.27167 0 2.86666Z" fill="#444CE7"/>
+<path d="M13.8205 0.782296C13.7434 0.62811 13.5233 0.62811 13.4462 0.782296C12.9664 1.74206 12.8754 1.83302 11.9156 2.3129C11.7615 2.39 11.7615 2.61003 11.9156 2.68712C12.8754 3.167 12.9664 3.25797 13.4462 4.21773C13.5233 4.37191 13.7434 4.37191 13.8205 4.21773C14.3003 3.25797 14.3913 3.167 15.3511 2.68712C15.5053 2.61003 15.5053 2.39 15.3511 2.3129C14.3913 1.83302 14.3003 1.74206 13.8205 0.782296Z" fill="#444CE7"/>
+<path d="M9.79394 2.25319C9.71404 2.09337 9.48596 2.09337 9.40605 2.25319C9.04994 2.96543 8.96544 3.04993 8.2532 3.40605C8.09338 3.48595 8.09338 3.71402 8.2532 3.79393C8.96544 4.15005 9.04994 4.23455 9.40606 4.94679C9.48596 5.10661 9.71404 5.10661 9.79394 4.94679C10.1501 4.23455 10.2346 4.15005 10.9468 3.79393C11.1066 3.71402 11.1066 3.48595 10.9468 3.40605C10.2346 3.04993 10.1501 2.96543 9.79394 2.25319Z" fill="#444CE7"/>
+<path d="M2.75377 11.049C2.67668 10.8948 2.45665 10.8948 2.37956 11.049C1.89969 12.0087 1.80872 12.0997 0.848971 12.5796C0.694788 12.6566 0.694787 12.8767 0.848971 12.9538C1.80872 13.4336 1.89969 13.5246 2.37956 14.4844C2.45665 14.6385 2.67668 14.6385 2.75377 14.4844C3.23365 13.5246 3.32461 13.4336 4.28436 12.9538C4.43855 12.8767 4.43855 12.6566 4.28436 12.5796C3.32461 12.0997 3.23365 12.0087 2.75377 11.049Z" fill="#444CE7"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M14.6741 8.65106C14.8886 8.50146 15.1837 8.55405 15.3333 8.76853C15.7614 9.38226 16.0125 10.1292 16.0125 10.9333C16.0125 11.7375 15.7614 12.4844 15.3333 13.0981C15.1837 13.3126 14.8886 13.3652 14.6741 13.2156C14.4596 13.066 14.407 12.7708 14.5567 12.5564C14.8775 12.0964 15.0656 11.5375 15.0656 10.9333C15.0656 10.3291 14.8775 9.77025 14.5567 9.31028C14.407 9.09581 14.4596 8.80066 14.6741 8.65106Z" fill="#444CE7"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M12.5674 6.53771C12.794 6.51987 13.0155 6.61161 13.1632 6.78449C13.2954 6.93929 13.3164 7.12549 13.3244 7.21587C13.3334 7.31718 13.3334 7.44301 13.3333 7.57103C13.3333 7.57691 13.3333 7.58278 13.3333 7.58866L13.3333 14.3C13.3334 14.428 13.3334 14.5539 13.3244 14.6552C13.3164 14.7455 13.2954 14.9317 13.1632 15.0865C13.0155 15.2594 12.794 15.3512 12.5674 15.3333C12.3644 15.3173 12.2179 15.2005 12.1484 15.1423C12.0704 15.077 11.9814 14.988 11.8909 14.8975L10.3795 13.3861C10.3357 13.3423 10.3137 13.3205 10.2971 13.3053L10.2958 13.3041L10.2941 13.3041C10.2716 13.303 10.2407 13.3029 10.1787 13.3029L9.34101 13.3029C9.22151 13.3029 9.10513 13.3029 9.00657 13.2949C8.89833 13.286 8.77062 13.2652 8.6421 13.1997C8.46392 13.1089 8.31906 12.964 8.22827 12.7859C8.16279 12.6574 8.14192 12.5296 8.13308 12.4214C8.12503 12.3228 8.12504 12.2065 8.12505 12.087V9.79916C8.12505 9.79413 8.12505 9.78909 8.12505 9.78406C8.12504 9.66456 8.12503 9.54819 8.13308 9.44963C8.14192 9.34139 8.16279 9.21368 8.22827 9.08517C8.31906 8.90699 8.46392 8.76212 8.6421 8.67133C8.77062 8.60585 8.89833 8.58498 9.00657 8.57614C9.10512 8.56809 9.2215 8.5681 9.341 8.56812C9.34603 8.56812 9.35106 8.56812 9.3561 8.56812H10.1787C10.2407 8.56812 10.2716 8.56801 10.2941 8.56698L10.2958 8.5669L10.2971 8.56575C10.3137 8.55058 10.3357 8.52877 10.3795 8.48491L11.8784 6.98602C11.8826 6.98186 11.8867 6.97771 11.8909 6.97355C11.9814 6.88302 12.0704 6.79403 12.1484 6.72874C12.2179 6.67049 12.3644 6.55368 12.5674 6.53771Z" fill="#444CE7"/>
+</g>
+<defs>
+<clipPath id="clip0_109_6694">
+<rect width="16" height="16" fill="white"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.json b/web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.json
new file mode 100644
index 0000000000..3e5cbe171b
--- /dev/null
+++ b/web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.json
@@ -0,0 +1,112 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "16",
+			"height": "16",
+			"viewBox": "0 0 16 16",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"clip-path": "url(#clip0_109_6694)"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"fill-rule": "evenodd",
+							"clip-rule": "evenodd",
+							"d": "M0 2.86666C0 2.05664 0.656649 1.39999 1.46667 1.39999H5.86667C6.67668 1.39999 7.33333 2.05664 7.33333 2.86666C7.33333 3.27167 7.00501 3.59999 6.6 3.59999C6.19499 3.59999 5.86667 3.27167 5.86667 2.86666H4.4V7.99999C4.80501 7.99999 5.13333 8.32831 5.13333 8.73332C5.13333 9.13833 4.80501 9.46666 4.4 9.46666H2.93333C2.52832 9.46666 2.2 9.13833 2.2 8.73332C2.2 8.32831 2.52832 7.99999 2.93333 7.99999V2.86666H1.46667C1.46667 3.27167 1.13834 3.59999 0.733333 3.59999C0.328324 3.59999 0 3.27167 0 2.86666Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"d": "M13.8205 0.782296C13.7434 0.62811 13.5233 0.62811 13.4462 0.782296C12.9664 1.74206 12.8754 1.83302 11.9156 2.3129C11.7615 2.39 11.7615 2.61003 11.9156 2.68712C12.8754 3.167 12.9664 3.25797 13.4462 4.21773C13.5233 4.37191 13.7434 4.37191 13.8205 4.21773C14.3003 3.25797 14.3913 3.167 15.3511 2.68712C15.5053 2.61003 15.5053 2.39 15.3511 2.3129C14.3913 1.83302 14.3003 1.74206 13.8205 0.782296Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"d": "M9.79394 2.25319C9.71404 2.09337 9.48596 2.09337 9.40605 2.25319C9.04994 2.96543 8.96544 3.04993 8.2532 3.40605C8.09338 3.48595 8.09338 3.71402 8.2532 3.79393C8.96544 4.15005 9.04994 4.23455 9.40606 4.94679C9.48596 5.10661 9.71404 5.10661 9.79394 4.94679C10.1501 4.23455 10.2346 4.15005 10.9468 3.79393C11.1066 3.71402 11.1066 3.48595 10.9468 3.40605C10.2346 3.04993 10.1501 2.96543 9.79394 2.25319Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"d": "M2.75377 11.049C2.67668 10.8948 2.45665 10.8948 2.37956 11.049C1.89969 12.0087 1.80872 12.0997 0.848971 12.5796C0.694788 12.6566 0.694787 12.8767 0.848971 12.9538C1.80872 13.4336 1.89969 13.5246 2.37956 14.4844C2.45665 14.6385 2.67668 14.6385 2.75377 14.4844C3.23365 13.5246 3.32461 13.4336 4.28436 12.9538C4.43855 12.8767 4.43855 12.6566 4.28436 12.5796C3.32461 12.0997 3.23365 12.0087 2.75377 11.049Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"fill-rule": "evenodd",
+							"clip-rule": "evenodd",
+							"d": "M14.6741 8.65106C14.8886 8.50146 15.1837 8.55405 15.3333 8.76853C15.7614 9.38226 16.0125 10.1292 16.0125 10.9333C16.0125 11.7375 15.7614 12.4844 15.3333 13.0981C15.1837 13.3126 14.8886 13.3652 14.6741 13.2156C14.4596 13.066 14.407 12.7708 14.5567 12.5564C14.8775 12.0964 15.0656 11.5375 15.0656 10.9333C15.0656 10.3291 14.8775 9.77025 14.5567 9.31028C14.407 9.09581 14.4596 8.80066 14.6741 8.65106Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"fill-rule": "evenodd",
+							"clip-rule": "evenodd",
+							"d": "M12.5674 6.53771C12.794 6.51987 13.0155 6.61161 13.1632 6.78449C13.2954 6.93929 13.3164 7.12549 13.3244 7.21587C13.3334 7.31718 13.3334 7.44301 13.3333 7.57103C13.3333 7.57691 13.3333 7.58278 13.3333 7.58866L13.3333 14.3C13.3334 14.428 13.3334 14.5539 13.3244 14.6552C13.3164 14.7455 13.2954 14.9317 13.1632 15.0865C13.0155 15.2594 12.794 15.3512 12.5674 15.3333C12.3644 15.3173 12.2179 15.2005 12.1484 15.1423C12.0704 15.077 11.9814 14.988 11.8909 14.8975L10.3795 13.3861C10.3357 13.3423 10.3137 13.3205 10.2971 13.3053L10.2958 13.3041L10.2941 13.3041C10.2716 13.303 10.2407 13.3029 10.1787 13.3029L9.34101 13.3029C9.22151 13.3029 9.10513 13.3029 9.00657 13.2949C8.89833 13.286 8.77062 13.2652 8.6421 13.1997C8.46392 13.1089 8.31906 12.964 8.22827 12.7859C8.16279 12.6574 8.14192 12.5296 8.13308 12.4214C8.12503 12.3228 8.12504 12.2065 8.12505 12.087V9.79916C8.12505 9.79413 8.12505 9.78909 8.12505 9.78406C8.12504 9.66456 8.12503 9.54819 8.13308 9.44963C8.14192 9.34139 8.16279 9.21368 8.22827 9.08517C8.31906 8.90699 8.46392 8.76212 8.6421 8.67133C8.77062 8.60585 8.89833 8.58498 9.00657 8.57614C9.10512 8.56809 9.2215 8.5681 9.341 8.56812C9.34603 8.56812 9.35106 8.56812 9.3561 8.56812H10.1787C10.2407 8.56812 10.2716 8.56801 10.2941 8.56698L10.2958 8.5669L10.2971 8.56575C10.3137 8.55058 10.3357 8.52877 10.3795 8.48491L11.8784 6.98602C11.8826 6.98186 11.8867 6.97771 11.8909 6.97355C11.9814 6.88302 12.0704 6.79403 12.1484 6.72874C12.2179 6.67049 12.3644 6.55368 12.5674 6.53771Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					}
+				]
+			},
+			{
+				"type": "element",
+				"name": "defs",
+				"attributes": {},
+				"children": [
+					{
+						"type": "element",
+						"name": "clipPath",
+						"attributes": {
+							"id": "clip0_109_6694"
+						},
+						"children": [
+							{
+								"type": "element",
+								"name": "rect",
+								"attributes": {
+									"width": "16",
+									"height": "16",
+									"fill": "white"
+								},
+								"children": []
+							}
+						]
+					}
+				]
+			}
+		]
+	},
+	"name": "Speaker"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.tsx b/web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.tsx
new file mode 100644
index 0000000000..a33b9ebcfd
--- /dev/null
+++ b/web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './Speaker.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'Speaker'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/vender/line/mediaAndDevices/index.ts b/web/app/components/base/icons/src/vender/line/mediaAndDevices/index.ts
index 6e90c11609..ba693b054e 100644
--- a/web/app/components/base/icons/src/vender/line/mediaAndDevices/index.ts
+++ b/web/app/components/base/icons/src/vender/line/mediaAndDevices/index.ts
@@ -1,2 +1,3 @@
 export { default as Microphone01 } from './Microphone01'
 export { default as SlidersH } from './SlidersH'
+export { default as Speaker } from './Speaker'
diff --git a/web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.json b/web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.json
new file mode 100644
index 0000000000..3e5cbe171b
--- /dev/null
+++ b/web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.json
@@ -0,0 +1,112 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "16",
+			"height": "16",
+			"viewBox": "0 0 16 16",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"clip-path": "url(#clip0_109_6694)"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"fill-rule": "evenodd",
+							"clip-rule": "evenodd",
+							"d": "M0 2.86666C0 2.05664 0.656649 1.39999 1.46667 1.39999H5.86667C6.67668 1.39999 7.33333 2.05664 7.33333 2.86666C7.33333 3.27167 7.00501 3.59999 6.6 3.59999C6.19499 3.59999 5.86667 3.27167 5.86667 2.86666H4.4V7.99999C4.80501 7.99999 5.13333 8.32831 5.13333 8.73332C5.13333 9.13833 4.80501 9.46666 4.4 9.46666H2.93333C2.52832 9.46666 2.2 9.13833 2.2 8.73332C2.2 8.32831 2.52832 7.99999 2.93333 7.99999V2.86666H1.46667C1.46667 3.27167 1.13834 3.59999 0.733333 3.59999C0.328324 3.59999 0 3.27167 0 2.86666Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"d": "M13.8205 0.782296C13.7434 0.62811 13.5233 0.62811 13.4462 0.782296C12.9664 1.74206 12.8754 1.83302 11.9156 2.3129C11.7615 2.39 11.7615 2.61003 11.9156 2.68712C12.8754 3.167 12.9664 3.25797 13.4462 4.21773C13.5233 4.37191 13.7434 4.37191 13.8205 4.21773C14.3003 3.25797 14.3913 3.167 15.3511 2.68712C15.5053 2.61003 15.5053 2.39 15.3511 2.3129C14.3913 1.83302 14.3003 1.74206 13.8205 0.782296Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"d": "M9.79394 2.25319C9.71404 2.09337 9.48596 2.09337 9.40605 2.25319C9.04994 2.96543 8.96544 3.04993 8.2532 3.40605C8.09338 3.48595 8.09338 3.71402 8.2532 3.79393C8.96544 4.15005 9.04994 4.23455 9.40606 4.94679C9.48596 5.10661 9.71404 5.10661 9.79394 4.94679C10.1501 4.23455 10.2346 4.15005 10.9468 3.79393C11.1066 3.71402 11.1066 3.48595 10.9468 3.40605C10.2346 3.04993 10.1501 2.96543 9.79394 2.25319Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"d": "M2.75377 11.049C2.67668 10.8948 2.45665 10.8948 2.37956 11.049C1.89969 12.0087 1.80872 12.0997 0.848971 12.5796C0.694788 12.6566 0.694787 12.8767 0.848971 12.9538C1.80872 13.4336 1.89969 13.5246 2.37956 14.4844C2.45665 14.6385 2.67668 14.6385 2.75377 14.4844C3.23365 13.5246 3.32461 13.4336 4.28436 12.9538C4.43855 12.8767 4.43855 12.6566 4.28436 12.5796C3.32461 12.0997 3.23365 12.0087 2.75377 11.049Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"fill-rule": "evenodd",
+							"clip-rule": "evenodd",
+							"d": "M14.6741 8.65106C14.8886 8.50146 15.1837 8.55405 15.3333 8.76853C15.7614 9.38226 16.0125 10.1292 16.0125 10.9333C16.0125 11.7375 15.7614 12.4844 15.3333 13.0981C15.1837 13.3126 14.8886 13.3652 14.6741 13.2156C14.4596 13.066 14.407 12.7708 14.5567 12.5564C14.8775 12.0964 15.0656 11.5375 15.0656 10.9333C15.0656 10.3291 14.8775 9.77025 14.5567 9.31028C14.407 9.09581 14.4596 8.80066 14.6741 8.65106Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"fill-rule": "evenodd",
+							"clip-rule": "evenodd",
+							"d": "M12.5674 6.53771C12.794 6.51987 13.0155 6.61161 13.1632 6.78449C13.2954 6.93929 13.3164 7.12549 13.3244 7.21587C13.3334 7.31718 13.3334 7.44301 13.3333 7.57103C13.3333 7.57691 13.3333 7.58278 13.3333 7.58866L13.3333 14.3C13.3334 14.428 13.3334 14.5539 13.3244 14.6552C13.3164 14.7455 13.2954 14.9317 13.1632 15.0865C13.0155 15.2594 12.794 15.3512 12.5674 15.3333C12.3644 15.3173 12.2179 15.2005 12.1484 15.1423C12.0704 15.077 11.9814 14.988 11.8909 14.8975L10.3795 13.3861C10.3357 13.3423 10.3137 13.3205 10.2971 13.3053L10.2958 13.3041L10.2941 13.3041C10.2716 13.303 10.2407 13.3029 10.1787 13.3029L9.34101 13.3029C9.22151 13.3029 9.10513 13.3029 9.00657 13.2949C8.89833 13.286 8.77062 13.2652 8.6421 13.1997C8.46392 13.1089 8.31906 12.964 8.22827 12.7859C8.16279 12.6574 8.14192 12.5296 8.13308 12.4214C8.12503 12.3228 8.12504 12.2065 8.12505 12.087V9.79916C8.12505 9.79413 8.12505 9.78909 8.12505 9.78406C8.12504 9.66456 8.12503 9.54819 8.13308 9.44963C8.14192 9.34139 8.16279 9.21368 8.22827 9.08517C8.31906 8.90699 8.46392 8.76212 8.6421 8.67133C8.77062 8.60585 8.89833 8.58498 9.00657 8.57614C9.10512 8.56809 9.2215 8.5681 9.341 8.56812C9.34603 8.56812 9.35106 8.56812 9.3561 8.56812H10.1787C10.2407 8.56812 10.2716 8.56801 10.2941 8.56698L10.2958 8.5669L10.2971 8.56575C10.3137 8.55058 10.3357 8.52877 10.3795 8.48491L11.8784 6.98602C11.8826 6.98186 11.8867 6.97771 11.8909 6.97355C11.9814 6.88302 12.0704 6.79403 12.1484 6.72874C12.2179 6.67049 12.3644 6.55368 12.5674 6.53771Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					}
+				]
+			},
+			{
+				"type": "element",
+				"name": "defs",
+				"attributes": {},
+				"children": [
+					{
+						"type": "element",
+						"name": "clipPath",
+						"attributes": {
+							"id": "clip0_109_6694"
+						},
+						"children": [
+							{
+								"type": "element",
+								"name": "rect",
+								"attributes": {
+									"width": "16",
+									"height": "16",
+									"fill": "white"
+								},
+								"children": []
+							}
+						]
+					}
+				]
+			}
+		]
+	},
+	"name": "Speaker"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.tsx b/web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.tsx
new file mode 100644
index 0000000000..a33b9ebcfd
--- /dev/null
+++ b/web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './Speaker.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'Speaker'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/vender/solid/mediaAndDevices/index.ts b/web/app/components/base/icons/src/vender/solid/mediaAndDevices/index.ts
index 12538b9aaf..37d5b3a2d9 100644
--- a/web/app/components/base/icons/src/vender/solid/mediaAndDevices/index.ts
+++ b/web/app/components/base/icons/src/vender/solid/mediaAndDevices/index.ts
@@ -4,4 +4,5 @@ export { default as MagicWand } from './MagicWand'
 export { default as Microphone01 } from './Microphone01'
 export { default as Robot } from './Robot'
 export { default as Sliders02 } from './Sliders02'
+export { default as Speaker } from './Speaker'
 export { default as StopCircle } from './StopCircle'
diff --git a/web/app/components/develop/secret-key/assets/pause.svg b/web/app/components/develop/secret-key/assets/pause.svg
new file mode 100644
index 0000000000..a204b179d2
--- /dev/null
+++ b/web/app/components/develop/secret-key/assets/pause.svg
@@ -0,0 +1,10 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_129_2189)">
+<path d="M10.6666 14V10M13.3333 14V10M18.6666 12C18.6666 15.6819 15.6819 18.6667 12 18.6667C8.31808 18.6667 5.33331 15.6819 5.33331 12C5.33331 8.3181 8.31808 5.33333 12 5.33333C15.6819 5.33333 18.6666 8.3181 18.6666 12Z" stroke="#155EEF" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+</g>
+<defs>
+<clipPath id="clip0_129_2189">
+<rect width="16" height="16" fill="white" transform="translate(4 4)"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/web/app/components/develop/secret-key/assets/play.svg b/web/app/components/develop/secret-key/assets/play.svg
new file mode 100644
index 0000000000..0ab33af6c6
--- /dev/null
+++ b/web/app/components/develop/secret-key/assets/play.svg
@@ -0,0 +1,11 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_129_107)">
+<path d="M7.99991 14.6666C11.6819 14.6666 14.6666 11.6819 14.6666 7.99998C14.6666 4.31808 11.6819 1.33331 7.99998 1.33331C4.31808 1.33331 1.33331 4.31808 1.33331 7.99998C1.33331 11.6819 4.31808 14.6666 7.99998 14.6666Z" stroke="#155EEF" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+<path d="M6.66665 5.33331L10.6666 7.99998L6.66665 10.6666V5.33331Z" stroke="#155EEF" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+</g>
+<defs>
+<clipPath id="clip0_129_107">
+<rect width="16" height="16" fill="white"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/web/app/components/develop/secret-key/assets/stop.svg b/web/app/components/develop/secret-key/assets/stop.svg
new file mode 100644
index 0000000000..b423e98ce2
--- /dev/null
+++ b/web/app/components/develop/secret-key/assets/stop.svg
@@ -0,0 +1,11 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_129_107)">
+<path d="M7.99998 14.6666C11.6819 14.6666 14.6666 11.6819 14.6666 7.99998C14.6666 4.31808 11.6819 1.33331 7.99998 1.33331C4.31808 1.33331 1.33331 4.31808 1.33331 7.99998C1.33331 11.6819 4.31808 14.6666 7.99998 14.6666Z" stroke="#667085" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+<path d="M6.66665 5.33331L10.6666 7.99998L6.66665 10.6666V5.33331Z" stroke="#667085" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+</g>
+<defs>
+<clipPath id="clip0_129_107">
+<rect width="16" height="16" fill="white"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/web/app/components/develop/template/template.en.mdx b/web/app/components/develop/template/template.en.mdx
index 28d92e98f8..2ac9aecdea 100644
--- a/web/app/components/develop/template/template.en.mdx
+++ b/web/app/components/develop/template/template.en.mdx
@@ -6,7 +6,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 The text generation application offers non-session support and is ideal for translation, article writing, summarization AI, and more.
 
 <div>
-  ### Base URL 
+  ### Base URL
   <CodeGroup title="Code" targetCode={props.appDetail.api_base_url}>
     ```javascript
     ```
@@ -14,10 +14,10 @@ The text generation application offers non-session support and is ideal for tran
 
   ### Authentication
 
-  The Service API uses `API-Key` authentication.  
+  The Service API uses `API-Key` authentication.
   <i>**Strongly recommend storing your API Key on the server-side, not shared or stored on the client-side, to avoid possible API-Key leakage that can lead to serious consequences.**</i>
 
-  For all API requests, include your API Key in the `Authorization` HTTP Header, as shown below: 
+  For all API requests, include your API Key in the `Authorization` HTTP Header, as shown below:
 
   <CodeGroup title="Code">
     ```javascript
@@ -46,18 +46,18 @@ The text generation application offers non-session support and is ideal for tran
         User Input/Question content
       </Property>
       <Property name='inputs' type='object' key='inputs'>
-          Allows the entry of various variable values defined by the App.  
+          Allows the entry of various variable values defined by the App.
           The `inputs` parameter contains multiple key/value pairs, with each key corresponding to a specific variable and each value being the specific value for that variable.
           The text generation application requires at least one key/value pair to be inputted.
       </Property>
       <Property name='response_mode' type='string' key='response_mode'>
         The mode of response return, supporting:
         - `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)).
-        - `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)  
+        - `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
         <i>Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.</i>
       </Property>
       <Property name='user' type='string' key='user'>
-          User identifier, used to define the identity of the end-user for retrieval and statistics.  
+          User identifier, used to define the identity of the end-user for retrieval and statistics.
           Should be uniquely defined by the developer within the application.
       </Property>
       <Property name='conversation_id' type='string' key='conversation_id'>
@@ -71,9 +71,9 @@ The text generation application offers non-session support and is ideal for tran
           - `upload_file_id` (string) Uploaded file ID, which must be obtained by uploading through the File Upload API in advance (when the transfer method is `local_file`)
       </Property>
     </Properties>
-  
+
     ### Response
-    When `response_mode` is `blocking`, return a CompletionResponse object.    
+    When `response_mode` is `blocking`, return a CompletionResponse object.
     When `response_mode` is `streaming`, return a ChunkCompletionResponse stream.
 
     ### ChatCompletionResponse
@@ -205,7 +205,7 @@ The text generation application offers non-session support and is ideal for tran
 <Row>
   <Col>
   Upload a file (currently only images are supported) for use when sending messages, enabling multimodal understanding of images and text.
-  Supports png, jpg, jpeg, webp, gif formats.  
+  Supports png, jpg, jpeg, webp, gif formats.
   <i>Uploaded files are for use by the current end-user only.</i>
 
   ### Request Body
@@ -214,7 +214,7 @@ The text generation application offers non-session support and is ideal for tran
     The file to be uploaded.
   - `user` (string) Required
     User identifier, defined by the developer's rules, must be unique within the application.
-  
+
   ### Response
   After a successful upload, the server will return the file's ID and related information.
   - `id` (uuid) ID
@@ -236,7 +236,7 @@ The text generation application offers non-session support and is ideal for tran
   - 503, `s3_permission_denied`, no permission to upload files to S3
   - 503, `s3_file_too_large`, file exceeds S3 size limit
   - 500, internal server error
-  
+
 
   </Col>
   <Col sticky>
@@ -256,12 +256,12 @@ The text generation application offers non-session support and is ideal for tran
   <CodeGroup title="Response">
     ```json {{ title: 'Response' }}
     {
-      "id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",  
+      "id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
       "name": "example.png",
       "size": 1024,
       "extension": "png",
       "mime_type": "image/png",
-      "created_by": "6ad1ab0a-73ff-4ac1-b9e4-cdb312f71f13",  
+      "created_by": "6ad1ab0a-73ff-4ac1-b9e4-cdb312f71f13",
       "created_at": 1577836800,
     }
   ```
@@ -292,8 +292,8 @@ The text generation application offers non-session support and is ideal for tran
   <CodeGroup title="Request" tag="POST" label="/chat-messages/:task_id/stop" targetCode={`curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \\\n-H 'Authorization: Bearer {api_key}' \\\n-H 'Content-Type: application/json' \\\n--data-raw '{ "user": "abc-123"}`}>
     ```bash {{ title: 'cURL' }}
     curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \
-    -H 'Authorization: Bearer {api_key}' \  
-    -H 'Content-Type: application/json' \  
+    -H 'Authorization: Bearer {api_key}' \
+    -H 'Content-Type: application/json' \
     --data-raw '{
         "user": "abc-123"
     }'
@@ -484,3 +484,51 @@ The text generation application offers non-session support and is ideal for tran
     </CodeGroup>
   </Col>
 </Row>
+
+---
+
+<Heading
+  url='/text-to-audio'
+  method='POST'
+  title='text to audio'
+  name='#audio'
+/>
+<Row>
+  <Col>
+    Text to speech, only supports openai model.
+
+    ### Request Body
+
+    <Properties>
+      <Property name='text' type='str' key='text'>
+        Speech generated content。
+      </Property>
+      <Property name='user' type='string' key='user'>
+        The user identifier, defined by the developer, must ensure uniqueness within the app.
+      </Property>
+      <Property name='streaming' type='bool' key='streaming'>
+        Whether to enable streaming output, true、false。
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+
+    <CodeGroup title="Request" tag="POST" label="/text-to-audio" targetCode={`curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \\\n--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \\\n--form 'text=Hello Dify;user=abc-123;streaming=false`}>
+
+    ```bash {{ title: 'cURL' }}
+    curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \
+    --header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \
+    --form 'file=Hello Dify;user=abc-123;streaming=false'
+    ```
+
+    </CodeGroup>
+
+    <CodeGroup title="headers">
+    ```json {{ title: 'headers' }}
+    {
+      "Content-Type": "audio/wav"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
\ No newline at end of file
diff --git a/web/app/components/develop/template/template.zh.mdx b/web/app/components/develop/template/template.zh.mdx
index 4748ec41dd..2c5322c922 100644
--- a/web/app/components/develop/template/template.zh.mdx
+++ b/web/app/components/develop/template/template.zh.mdx
@@ -14,9 +14,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
 
   ### 鉴权
 
-  
-  Dify Service API 使用 `API-Key` 进行鉴权。  
-  <i>**强烈建议开发者把 `API-Key` 放在后端存储，而非分享或者放在客户端存储，以免 `API-Key` 泄露，导致财产损失。**</i>  
+
+  Dify Service API 使用 `API-Key` 进行鉴权。
+  <i>**强烈建议开发者把 `API-Key` 放在后端存储，而非分享或者放在客户端存储，以免 `API-Key` 泄露，导致财产损失。**</i>
   所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`，如下所示：
 
   <CodeGroup title="Code">
@@ -46,16 +46,16 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
       </Property>
       <Property name='inputs' type='object' key='inputs'>
         (选填)允许传入 App 定义的各变量值。
-  inputs 参数包含了多组键值对（Key/Value pairs），每组的键对应一个特定变量，每组的值则是该变量的具体值。  
+  inputs 参数包含了多组键值对（Key/Value pairs），每组的键对应一个特定变量，每组的值则是该变量的具体值。
   文本生成型应用要求至少传入一组键值对。
       </Property>
       <Property name='response_mode' type='string' key='response_mode'>
         - `streaming` 流式模式（推荐）。基于 SSE（**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**）实现类似打字机输出方式的流式返回。
-        - `blocking` 阻塞模式，等待执行完毕后返回结果。（请求若流程较长可能会被中断）。  
+        - `blocking` 阻塞模式，等待执行完毕后返回结果。（请求若流程较长可能会被中断）。
         <i>由于 Cloudflare 限制，请求会在 100 秒超时无返回后中断。</i>
       </Property>
       <Property name='user' type='string' key='user'>
-        用户标识，用于定义终端用户的身份，方便检索、统计。  
+        用户标识，用于定义终端用户的身份，方便检索、统计。
         由开发者定义规则，需保证用户标识在应用内唯一。
       </Property>
       <Property name='conversation_id' type='string' key='conversation_id'>
@@ -74,9 +74,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
 
     ### Response
     <Properties>
-    当 `response_mode` 为 `blocking` 时，返回 ChatCompletionResponse object。  
+    当 `response_mode` 为 `blocking` 时，返回 ChatCompletionResponse object。
     当 `response_mode` 为 `streaming`时，返回 ChunkChatCompletionResponse object 流式序列。
-    
+
     ### ChatCompletionResponse
     返回完整的 App 结果，`Content-Type` 为 `application/json`。
     - `message_id` (string) 消息唯一 ID
@@ -184,7 +184,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
 <Row>
   <Col>
     上传文件（目前仅支持图片）并在发送消息时使用，可实现图文多模态理解。
-    支持 png, jpg, jpeg, webp, gif 格式。  
+    支持 png, jpg, jpeg, webp, gif 格式。
     <i>上传的文件仅供当前终端用户使用。</i>
 
     ### Request Body
@@ -234,12 +234,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
     <CodeGroup title="Response">
     ```json {{ title: 'Response' }}
     {
-      "id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",  
+      "id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
       "name": "example.png",
       "size": 1024,
       "extension": "png",
       "mime_type": "image/png",
-      "created_by": 123,  
+      "created_by": 123,
       "created_at": 1577836800,
     }
     ```
@@ -258,7 +258,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
   仅支持流式模式。
   ### Path
   - `task_id` (string) 任务 ID，可在流式返回 Chunk 中获取
-  
+
   ### Request Body
   - `user` (string) Required
     用户标识，用于定义终端用户的身份，必须和发送消息接口传入 user 保持一致。
@@ -378,7 +378,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
     - `annotation_reply` (object) 标记回复
       - `enabled` (bool) 是否开启
     - `user_input_form` (array[object]) 用户输入表单配置
-      - `text-input` (object) 文本输入控件 
+      - `text-input` (object) 文本输入控件
         - `label` (string) 控件展示标签名
         - `variable` (string) 控件 ID
         - `required` (bool) 是否必填
@@ -388,7 +388,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
         - `variable` (string) 控件 ID
         - `required` (bool) 是否必填
         - `default` (string) 默认值
-      - `select` (object) 下拉控件 
+      - `select` (object) 下拉控件
         - `label` (string) 控件展示标签名
         - `variable` (string) 控件 ID
         - `required` (bool) 是否必填
@@ -447,3 +447,51 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
     </CodeGroup>
   </Col>
 </Row>
+
+---
+
+<Heading
+  url='/text-to-audio'
+  method='POST'
+  title='文字转语音'
+  name='#audio'
+/>
+<Row>
+  <Col>
+    文字转语音，仅支持 openai 模型。
+
+    ### Request Body
+
+    <Properties>
+      <Property name='text' type='str' key='text'>
+        语音生成内容。
+      </Property>
+      <Property name='user' type='string' key='user'>
+        用户标识，由开发者定义规则，需保证用户标识在应用内唯一。
+      </Property>
+      <Property name='streaming' type='bool' key='streaming'>
+        是否启用流式输出true、false。
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+
+    <CodeGroup title="Request" tag="POST" label="/text-to-audio" targetCode={`curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \\\n--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \\\n--form 'text=你好Dify;user=abc-123;streaming=false`}>
+
+    ```bash {{ title: 'cURL' }}
+    curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \
+    --header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \
+    --form 'file=你好Dify;user=abc-123;streaming=false'
+    ```
+
+    </CodeGroup>
+
+    <CodeGroup title="headers">
+    ```json {{ title: 'headers' }}
+    {
+      "Content-Type": "audio/wav"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
diff --git a/web/app/components/develop/template/template_chat.en.mdx b/web/app/components/develop/template/template_chat.en.mdx
index 3a0171fb51..f6532510c7 100644
--- a/web/app/components/develop/template/template_chat.en.mdx
+++ b/web/app/components/develop/template/template_chat.en.mdx
@@ -6,7 +6,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 Chat applications support session persistence, allowing previous chat history to be used as context for responses. This can be applicable for chatbots, customer service AI, etc.
 
 <div>
-  ### Base URL 
+  ### Base URL
   <CodeGroup title="Code" targetCode={props.appDetail.api_base_url}>
     ```javascript
     ```
@@ -14,10 +14,10 @@ Chat applications support session persistence, allowing previous chat history to
 
   ### Authentication
 
-  The Service API uses `API-Key` authentication.  
+  The Service API uses `API-Key` authentication.
   <i>**Strongly recommend storing your API Key on the server-side, not shared or stored on the client-side, to avoid possible API-Key leakage that can lead to serious consequences.**</i>
 
-  For all API requests, include your API Key in the `Authorization`HTTP Header, as shown below: 
+  For all API requests, include your API Key in the `Authorization`HTTP Header, as shown below:
 
   <CodeGroup title="Code">
     ```javascript
@@ -46,18 +46,18 @@ Chat applications support session persistence, allowing previous chat history to
         User Input/Question content
       </Property>
       <Property name='inputs' type='object' key='inputs'>
-          Allows the entry of various variable values defined by the App.  
+          Allows the entry of various variable values defined by the App.
           The `inputs` parameter contains multiple key/value pairs, with each key corresponding to a specific variable and each value being the specific value for that variable.
       </Property>
       <Property name='response_mode' type='string' key='response_mode'>
         The mode of response return, supporting:
         - `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)).
-        - `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)  
-        Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.  
+        - `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
+        Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.
         <i>Note: blocking mode is not supported in Agent Assistant mode</i>
       </Property>
       <Property name='user' type='string' key='user'>
-          User identifier, used to define the identity of the end-user for retrieval and statistics.  
+          User identifier, used to define the identity of the end-user for retrieval and statistics.
           Should be uniquely defined by the developer within the application.
       </Property>
       <Property name='conversation_id' type='string' key='conversation_id'>
@@ -75,9 +75,9 @@ Chat applications support session persistence, allowing previous chat history to
       Can achieve async title generation by calling the conversation rename API and setting `auto_generate` to true.
       </Property>
     </Properties>
-  
+
     ### Response
-    When response_mode is blocking, return a CompletionResponse object.  
+    When response_mode is blocking, return a CompletionResponse object.
     When response_mode is streaming, return a ChunkCompletionResponse stream.
 
     ### ChatCompletionResponse
@@ -122,7 +122,7 @@ Chat applications support session persistence, allowing previous chat history to
       - `tool` (string) A list of tools represents which tools are called，split by ;
       - `tool_input` (string) Input of tools in JSON format. Like: `{"dalle3": {"prompt": "a cute cat"}}`.
       - `created_at` (int) Creation timestamp, e.g., 1705395332
-      - `message_files` (array[string])  Refer to message_file event 
+      - `message_files` (array[string])  Refer to message_file event
         - `file_id` (string) File ID
       - `conversation_id` (string) Conversation ID
     - `event: message_file` Message file event, a new file has created by tool
@@ -260,7 +260,7 @@ Chat applications support session persistence, allowing previous chat history to
 <Row>
   <Col>
   Upload a file (currently only images are supported) for use when sending messages, enabling multimodal understanding of images and text.
-  Supports png, jpg, jpeg, webp, gif formats.  
+  Supports png, jpg, jpeg, webp, gif formats.
   Uploaded files are for use by the current end-user only.
 
   ### Request Body
@@ -269,7 +269,7 @@ Chat applications support session persistence, allowing previous chat history to
     The file to be uploaded.
   - `user` (string) Required
     User identifier, defined by the developer's rules, must be unique within the application.
-  
+
   ### Response
   After a successful upload, the server will return the file's ID and related information.
   - `id` (uuid) ID
@@ -291,7 +291,7 @@ Chat applications support session persistence, allowing previous chat history to
   - 503, `s3_permission_denied`, no permission to upload files to S3
   - 503, `s3_file_too_large`, file exceeds S3 size limit
   - 500, internal server error
-  
+
 
   </Col>
   <Col sticky>
@@ -311,12 +311,12 @@ Chat applications support session persistence, allowing previous chat history to
   <CodeGroup title="Response">
     ```json {{ title: 'Response' }}
     {
-      "id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",  
+      "id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
       "name": "example.png",
       "size": 1024,
       "extension": "png",
       "mime_type": "image/png",
-      "created_by": "6ad1ab0a-73ff-4ac1-b9e4-cdb312f71f13",  
+      "created_by": "6ad1ab0a-73ff-4ac1-b9e4-cdb312f71f13",
       "created_at": 1577836800,
     }
   ```
@@ -347,8 +347,8 @@ Chat applications support session persistence, allowing previous chat history to
   <CodeGroup title="Request" tag="POST" label="/chat-messages/:task_id/stop" targetCode={`curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \\\n-H 'Authorization: Bearer {api_key}' \\\n-H 'Content-Type: application/json' \\\n--data-raw '{ "user": "abc-123"}`}>
     ```bash {{ title: 'cURL' }}
     curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \
-    -H 'Authorization: Bearer {api_key}' \  
-    -H 'Content-Type: application/json' \  
+    -H 'Authorization: Bearer {api_key}' \
+    -H 'Content-Type: application/json' \
     --data-raw '{
         "user": "abc-123"
     }'
@@ -444,7 +444,7 @@ Chat applications support session persistence, allowing previous chat history to
         Conversation ID
       </Property>
       <Property name='user' type='string' key='user'>
-        User identifier, used to define the identity of the end-user for retrieval and statistics.  
+        User identifier, used to define the identity of the end-user for retrieval and statistics.
         Should be uniquely defined by the developer within the application.
       </Property>
       <Property name='first_id' type='string' key='first_id'>
@@ -475,7 +475,7 @@ Chat applications support session persistence, allowing previous chat history to
       - `tool` (string) A list of tools represents which tools are called，split by ;
       - `tool_input` (string) Input of tools in JSON format. Like: `{"dalle3": {"prompt": "a cute cat"}}`.
       - `created_at` (int) Creation timestamp, e.g., 1705395332
-      - `message_files` (array[string])  Refer to message_file event 
+      - `message_files` (array[string])  Refer to message_file event
         - `file_id` (string) File ID
     - `answer` (string) Response message content
     - `created_at` (timestamp) Creation timestamp, e.g., 1705395332
@@ -609,7 +609,7 @@ Chat applications support session persistence, allowing previous chat history to
 
     <Properties>
       <Property name='user' type='string' key='user'>
-          User identifier, used to define the identity of the end-user for retrieval and statistics.  
+          User identifier, used to define the identity of the end-user for retrieval and statistics.
           Should be uniquely defined by the developer within the application.
       </Property>
       <Property name='last_id' type='string' key='last_id'>
@@ -800,8 +800,8 @@ Chat applications support session persistence, allowing previous chat history to
 
     <Properties>
       <Property name='file' type='file' key='file'>
-        Audio file.  
-        Supported formats: `['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']`  
+        Audio file.
+        Supported formats: `['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']`
         File size limit: 15MB
       </Property>
       <Property name='user' type='string' key='user'>
@@ -837,6 +837,54 @@ Chat applications support session persistence, allowing previous chat history to
 
 ---
 
+<Heading
+  url='/text-to-audio'
+  method='POST'
+  title='text to audio'
+  name='#audio'
+/>
+<Row>
+  <Col>
+    Text to speech, only supports openai model.
+
+    ### Request Body
+
+    <Properties>
+      <Property name='text' type='str' key='text'>
+        Speech generated content。
+      </Property>
+      <Property name='user' type='string' key='user'>
+        The user identifier, defined by the developer, must ensure uniqueness within the app.
+      </Property>
+      <Property name='streaming' type='bool' key='streaming'>
+        Whether to enable streaming output, true、false。
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+
+    <CodeGroup title="Request" tag="POST" label="/text-to-audio" targetCode={`curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \\\n--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \\\n--form 'text=Hello Dify;user=abc-123;streaming=false`}>
+
+    ```bash {{ title: 'cURL' }}
+    curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \
+    --header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \
+    --form 'file=Hello Dify;user=abc-123;streaming=false'
+    ```
+
+    </CodeGroup>
+
+    <CodeGroup title="headers">
+    ```json {{ title: 'headers' }}
+    {
+      "Content-Type": "audio/wav"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+---
+
 <Heading
   url='/parameters'
   method='GET'
@@ -969,7 +1017,7 @@ Chat applications support session persistence, allowing previous chat history to
       </Properties>
   ### Response
   - `tool_icons`(object[string]) tool icons
-    - `tool_name` (string) 
+    - `tool_name` (string)
       - `icon` (object|string)
         - (object) icon object
           - `background` (string) background color in hex format
diff --git a/web/app/components/develop/template/template_chat.zh.mdx b/web/app/components/develop/template/template_chat.zh.mdx
index 9c4e3c7c3c..b87a89b825 100644
--- a/web/app/components/develop/template/template_chat.zh.mdx
+++ b/web/app/components/develop/template/template_chat.zh.mdx
@@ -14,8 +14,8 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
 
   ### 鉴权
 
-  Service API 使用 `API-Key` 进行鉴权。  
-  <i>**强烈建议开发者把 `API-Key` 放在后端存储，而非分享或者放在客户端存储，以免 `API-Key` 泄露，导致财产损失。**</i>    
+  Service API 使用 `API-Key` 进行鉴权。
+  <i>**强烈建议开发者把 `API-Key` 放在后端存储，而非分享或者放在客户端存储，以免 `API-Key` 泄露，导致财产损失。**</i>
   所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`，如下所示：
 
   <CodeGroup title="Code">
@@ -44,14 +44,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
         用户输入/提问内容。
       </Property>
       <Property name='inputs' type='object' key='inputs'>
-        (选填)允许传入 App 定义的各变量值。  
+        (选填)允许传入 App 定义的各变量值。
         inputs 参数包含了多组键值对（Key/Value pairs），每组的键对应一个特定变量，每组的值则是该变量的具体值。
 
       </Property>
       <Property name='response_mode' type='string' key='response_mode'>
         - `streaming` 流式模式（推荐）。基于 SSE（**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**）实现类似打字机输出方式的流式返回。
-        - `blocking` 阻塞模式，等待执行完毕后返回结果。（请求若流程较长可能会被中断）。  
-        <i>由于 Cloudflare 限制，请求会在 100 秒超时无返回后中断。</i>  
+        - `blocking` 阻塞模式，等待执行完毕后返回结果。（请求若流程较长可能会被中断）。
+        <i>由于 Cloudflare 限制，请求会在 100 秒超时无返回后中断。</i>
         注：Agent模式下不允许blocking。
       </Property>
       <Property name='user' type='string' key='user'>
@@ -77,9 +77,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
 
     ### Response
     <Properties>
-    当 `response_mode` 为 `blocking` 时，返回 ChatCompletionResponse object。  
+    当 `response_mode` 为 `blocking` 时，返回 ChatCompletionResponse object。
     当 `response_mode` 为 `streaming`时，返回 ChunkChatCompletionResponse object 流式序列。
-    
+
     ### ChatCompletionResponse
 
     返回完整的 App 结果，`Content-Type` 为 `application/json`。
@@ -147,7 +147,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
       - `conversation_id` (string) 会话 ID
       - `answer` (string) 替换内容（直接替换 LLM 所有回复文本）
       - `created_at` (int) 创建时间戳，如：1705395332
-    - `event: error` 
+    - `event: error`
       流式输出过程中出现的异常会以 stream event 形式输出，收到异常事件后即结束。
       - `task_id` (string) 任务 ID，用于请求跟踪和下方的停止响应接口
       - `message_id` (string) 消息唯一 ID
@@ -278,7 +278,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
 <Row>
   <Col>
     上传文件（目前仅支持图片）并在发送消息时使用，可实现图文多模态理解。
-    支持 png, jpg, jpeg, webp, gif 格式。  
+    支持 png, jpg, jpeg, webp, gif 格式。
     <i>上传的文件仅供当前终端用户使用。</i>
 
     ### Request Body
@@ -328,12 +328,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
     <CodeGroup title="Response">
     ```json {{ title: 'Response' }}
     {
-      "id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",  
+      "id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
       "name": "example.png",
       "size": 1024,
       "extension": "png",
       "mime_type": "image/png",
-      "created_by": 123,  
+      "created_by": 123,
       "created_at": 1577836800,
     }
     ```
@@ -352,7 +352,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
   仅支持流式模式。
   ### Path
   - `task_id` (string) 任务 ID，可在流式返回 Chunk 中获取
-  
+
   ### Request Body
   - `user` (string) Required
     用户标识，用于定义终端用户的身份，必须和发送消息接口传入 user 保持一致。
@@ -772,7 +772,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
     </Properties>
 
     ### Response
-    - `result` (string) 固定返回 success 
+    - `result` (string) 固定返回 success
   </Col>
   <Col sticky>
 
@@ -874,7 +874,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
     <Properties>
       <Property name='file' type='file' key='file'>
         语音文件。
-        支持格式：`['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']`  
+        支持格式：`['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']`
         文件大小限制：15MB
       </Property>
       <Property name='user' type='string' key='user'>
@@ -909,6 +909,54 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
 
 ---
 
+<Heading
+  url='/text-to-audio'
+  method='POST'
+  title='文字转语音'
+  name='#audio'
+/>
+<Row>
+  <Col>
+    文字转语音，仅支持 openai 模型。
+
+    ### Request Body
+
+    <Properties>
+      <Property name='text' type='str' key='text'>
+        语音生成内容。
+      </Property>
+      <Property name='user' type='string' key='user'>
+        用户标识，由开发者定义规则，需保证用户标识在应用内唯一。
+      </Property>
+      <Property name='streaming' type='bool' key='streaming'>
+        是否启用流式输出true、false。
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+
+    <CodeGroup title="Request" tag="POST" label="/text-to-audio" targetCode={`curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \\\n--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \\\n--form 'text=你好Dify;user=abc-123;streaming=false`}>
+
+    ```bash {{ title: 'cURL' }}
+    curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \
+    --header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \
+    --form 'file=你好Dify;user=abc-123;streaming=false'
+    ```
+
+    </CodeGroup>
+
+    <CodeGroup title="headers">
+    ```json {{ title: 'headers' }}
+    {
+      "Content-Type": "audio/wav"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+---
+
 <Heading
   url='/parameters'
   method='GET'
@@ -939,7 +987,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
     - `annotation_reply` (object) 标记回复
       - `enabled` (bool) 是否开启
     - `user_input_form` (array[object]) 用户输入表单配置
-      - `text-input` (object) 文本输入控件 
+      - `text-input` (object) 文本输入控件
         - `label` (string) 控件展示标签名
         - `variable` (string) 控件 ID
         - `required` (bool) 是否必填
@@ -949,7 +997,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
         - `variable` (string) 控件 ID
         - `required` (bool) 是否必填
         - `default` (string) 默认值
-      - `select` (object) 下拉控件 
+      - `select` (object) 下拉控件
         - `label` (string) 控件展示标签名
         - `variable` (string) 控件 ID
         - `required` (bool) 是否必填
@@ -1029,7 +1077,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
       </Properties>
   ### Response
   - `tool_icons`(object[string]) 工具图标
-    - `工具名称` (string) 
+    - `工具名称` (string)
       - `icon` (object|string)
         - (object) 图标
           - `background` (string) hex格式的背景色
diff --git a/web/app/components/header/account-setting/model-provider-page/declarations.ts b/web/app/components/header/account-setting/model-provider-page/declarations.ts
index e8cb079a14..d8eaf5f625 100644
--- a/web/app/components/header/account-setting/model-provider-page/declarations.ts
+++ b/web/app/components/header/account-setting/model-provider-page/declarations.ts
@@ -26,6 +26,7 @@ export enum ModelTypeEnum {
   rerank = 'rerank',
   speech2text = 'speech2text',
   moderation = 'moderation',
+  tts = 'tts',
 }
 
 export const MODEL_TYPE_TEXT = {
@@ -34,6 +35,7 @@ export const MODEL_TYPE_TEXT = {
   [ModelTypeEnum.rerank]: 'Rerank',
   [ModelTypeEnum.speech2text]: 'Speech2text',
   [ModelTypeEnum.moderation]: 'Moderation',
+  [ModelTypeEnum.tts]: 'TTS',
 }
 
 export enum ConfigurateMethodEnum {
diff --git a/web/app/components/header/account-setting/model-provider-page/hooks.ts b/web/app/components/header/account-setting/model-provider-page/hooks.ts
index 064076751d..1cc0e0ae13 100644
--- a/web/app/components/header/account-setting/model-provider-page/hooks.ts
+++ b/web/app/components/header/account-setting/model-provider-page/hooks.ts
@@ -100,12 +100,13 @@ export const useProviderCrenditialsFormSchemasValue = (
   return value
 }
 
-export type ModelTypeIndex = 1 | 2 | 3 | 4
+export type ModelTypeIndex = 1 | 2 | 3 | 4 | 5
 export const MODEL_TYPE_MAPS = {
   1: ModelTypeEnum.textGeneration,
   2: ModelTypeEnum.textEmbedding,
   3: ModelTypeEnum.rerank,
   4: ModelTypeEnum.speech2text,
+  5: ModelTypeEnum.tts,
 }
 
 export const useModelList = (type: ModelTypeIndex) => {
diff --git a/web/app/components/header/account-setting/model-provider-page/index.tsx b/web/app/components/header/account-setting/model-provider-page/index.tsx
index b5c7d59013..6cb672673b 100644
--- a/web/app/components/header/account-setting/model-provider-page/index.tsx
+++ b/web/app/components/header/account-setting/model-provider-page/index.tsx
@@ -30,9 +30,10 @@ const ModelProviderPage = () => {
   const { data: embeddingsDefaultModel } = useDefaultModel(2)
   const { data: rerankDefaultModel } = useDefaultModel(3)
   const { data: speech2textDefaultModel } = useDefaultModel(4)
+  const { data: ttsDefaultModel } = useDefaultModel(5)
   const { modelProviders: providers } = useProviderContext()
   const { setShowModelModal } = useModalContext()
-  const defaultModelNotConfigured = !textGenerationDefaultModel && !embeddingsDefaultModel && !speech2textDefaultModel && !rerankDefaultModel
+  const defaultModelNotConfigured = !textGenerationDefaultModel && !embeddingsDefaultModel && !speech2textDefaultModel && !rerankDefaultModel && !ttsDefaultModel
   const [configedProviders, notConfigedProviders] = useMemo(() => {
     const configedProviders: ModelProvider[] = []
     const notConfigedProviders: ModelProvider[] = []
@@ -104,6 +105,7 @@ const ModelProviderPage = () => {
           embeddingsDefaultModel={embeddingsDefaultModel}
           rerankDefaultModel={rerankDefaultModel}
           speech2textDefaultModel={speech2textDefaultModel}
+          ttsDefaultModel={ttsDefaultModel}
         />
       </div>
       {
diff --git a/web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx b/web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx
index d382562bd4..4215bbfdec 100644
--- a/web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx
+++ b/web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx
@@ -29,12 +29,14 @@ type SystemModelSelectorProps = {
   embeddingsDefaultModel: DefaultModelResponse | undefined
   rerankDefaultModel: DefaultModelResponse | undefined
   speech2textDefaultModel: DefaultModelResponse | undefined
+  ttsDefaultModel: DefaultModelResponse | undefined
 }
 const SystemModel: FC<SystemModelSelectorProps> = ({
   textGenerationDefaultModel,
   embeddingsDefaultModel,
   rerankDefaultModel,
   speech2textDefaultModel,
+  ttsDefaultModel,
 }) => {
   const { t } = useTranslation()
   const { notify } = useToastContext()
@@ -43,11 +45,13 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
   const { data: embeddingModelList } = useModelList(2)
   const { data: rerankModelList } = useModelList(3)
   const { data: speech2textModelList } = useModelList(4)
+  const { data: ttsModelList } = useModelList(5)
   const [changedModelTypes, setChangedModelTypes] = useState<ModelTypeEnum[]>([])
   const [currentTextGenerationDefaultModel, changeCurrentTextGenerationDefaultModel] = useSystemDefaultModelAndModelList(textGenerationDefaultModel, textGenerationModelList)
   const [currentEmbeddingsDefaultModel, changeCurrentEmbeddingsDefaultModel] = useSystemDefaultModelAndModelList(embeddingsDefaultModel, embeddingModelList)
   const [currentRerankDefaultModel, changeCurrentRerankDefaultModel] = useSystemDefaultModelAndModelList(rerankDefaultModel, rerankModelList)
   const [currentSpeech2textDefaultModel, changeCurrentSpeech2textDefaultModel] = useSystemDefaultModelAndModelList(speech2textDefaultModel, speech2textModelList)
+  const [currentTTSDefaultModel, changeCurrentTTSDefaultModel] = useSystemDefaultModelAndModelList(ttsDefaultModel, ttsModelList)
   const [open, setOpen] = useState(false)
 
   const getCurrentDefaultModelByModelType = (modelType: ModelTypeEnum) => {
@@ -59,6 +63,8 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
       return currentRerankDefaultModel
     else if (modelType === ModelTypeEnum.speech2text)
       return currentSpeech2textDefaultModel
+    else if (modelType === ModelTypeEnum.tts)
+      return currentTTSDefaultModel
 
     return undefined
   }
@@ -71,6 +77,8 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
       changeCurrentRerankDefaultModel(model)
     else if (modelType === ModelTypeEnum.speech2text)
       changeCurrentSpeech2textDefaultModel(model)
+    else if (modelType === ModelTypeEnum.tts)
+      changeCurrentTTSDefaultModel(model)
 
     if (!changedModelTypes.includes(modelType))
       setChangedModelTypes([...changedModelTypes, modelType])
@@ -79,7 +87,7 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
     const res = await updateDefaultModel({
       url: '/workspaces/current/default-model',
       body: {
-        model_settings: [ModelTypeEnum.textGeneration, ModelTypeEnum.textEmbedding, ModelTypeEnum.rerank, ModelTypeEnum.speech2text].map((modelType) => {
+        model_settings: [ModelTypeEnum.textGeneration, ModelTypeEnum.textEmbedding, ModelTypeEnum.rerank, ModelTypeEnum.speech2text, ModelTypeEnum.tts].map((modelType) => {
           return {
             model_type: modelType,
             provider: getCurrentDefaultModelByModelType(modelType)?.provider,
@@ -101,6 +109,8 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
           updateModelList(modelType)
         else if (modelType === ModelTypeEnum.speech2text)
           updateModelList(modelType)
+        else if (modelType === ModelTypeEnum.tts)
+          updateModelList(modelType)
       })
     }
   }
@@ -136,7 +146,7 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
                   <div className='w-[261px] text-gray-500'>{t('common.modelProvider.systemReasoningModel.tip')}</div>
                 }
               >
-                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400' />
+                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
               </Tooltip>
             </div>
             <div>
@@ -156,7 +166,7 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
                   <div className='w-[261px] text-gray-500'>{t('common.modelProvider.embeddingModel.tip')}</div>
                 }
               >
-                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400' />
+                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
               </Tooltip>
             </div>
             <div>
@@ -176,7 +186,7 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
                   <div className='w-[261px] text-gray-500'>{t('common.modelProvider.rerankModel.tip')}</div>
                 }
               >
-                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400' />
+                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
               </Tooltip>
             </div>
             <div>
@@ -196,7 +206,7 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
                   <div className='w-[261px] text-gray-500'>{t('common.modelProvider.speechToTextModel.tip')}</div>
                 }
               >
-                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400' />
+                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
               </Tooltip>
             </div>
             <div>
@@ -207,6 +217,26 @@ const SystemModel: FC<SystemModelSelectorProps> = ({
               />
             </div>
           </div>
+          <div className='px-6 py-1'>
+            <div className='flex items-center h-8 text-[13px] font-medium text-gray-900'>
+              {t('common.modelProvider.ttsModel.key')}
+              <Tooltip
+                selector='model-page-system-tts-model-tip'
+                htmlContent={
+                  <div className='w-[261px] text-gray-500'>{t('common.modelProvider.ttsModel.tip')}</div>
+                }
+              >
+                <HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
+              </Tooltip>
+            </div>
+            <div>
+              <ModelSelector
+                defaultModel={currentTTSDefaultModel}
+                modelList={ttsModelList}
+                onSelect={model => handleChangeDefaultModel(ModelTypeEnum.tts, model)}
+              />
+            </div>
+          </div>
           <div className='flex items-center justify-end px-6 py-4'>
             <Button
               className='mr-2 !h-8 !text-[13px]'
diff --git a/web/app/components/share/chat/index.tsx b/web/app/components/share/chat/index.tsx
index 65ee919000..5381ada3b3 100644
--- a/web/app/components/share/chat/index.tsx
+++ b/web/app/components/share/chat/index.tsx
@@ -32,7 +32,14 @@ import {
   updateFeedback,
 } from '@/service/share'
 import type { AppMeta, ConversationItem, SiteInfo } from '@/models/share'
-import type { PromptConfig, SuggestedQuestionsAfterAnswerConfig } from '@/models/debug'
+
+import type {
+  CitationConfig,
+  PromptConfig,
+  SpeechToTextConfig,
+  SuggestedQuestionsAfterAnswerConfig,
+  TextToSpeechConfig,
+} from '@/models/debug'
 import type { Feedbacktype, IChatItem } from '@/app/components/app/chat/type'
 import Chat from '@/app/components/app/chat'
 import { changeLanguage } from '@/i18n/i18next-config'
@@ -177,8 +184,9 @@ const Main: FC<IMainProps> = ({
   }
 
   const [suggestedQuestionsAfterAnswerConfig, setSuggestedQuestionsAfterAnswerConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
-  const [speechToTextConfig, setSpeechToTextConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
-  const [citationConfig, setCitationConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
+  const [speechToTextConfig, setSpeechToTextConfig] = useState<SpeechToTextConfig | null>(null)
+  const [textToSpeechConfig, setTextToSpeechConfig] = useState<TextToSpeechConfig | null>(null)
+  const [citationConfig, setCitationConfig] = useState<CitationConfig | null>(null)
 
   const [conversationIdChangeBecauseOfNew, setConversationIdChangeBecauseOfNew, getConversationIdChangeBecauseOfNew] = useGetState(false)
   const [isChatStarted, { setTrue: setChatStarted, setFalse: setChatNotStarted }] = useBoolean(false)
@@ -335,6 +343,7 @@ const Main: FC<IMainProps> = ({
     const caculatedPromptVariables = inputs || currInputs || null
     if (caculatedIntroduction && caculatedPromptVariables)
       caculatedIntroduction = replaceStringWithValues(caculatedIntroduction, promptConfig?.prompt_variables || [], caculatedPromptVariables)
+
     const openstatement = {
       id: `${Date.now()}`,
       content: caculatedIntroduction,
@@ -394,7 +403,7 @@ const Main: FC<IMainProps> = ({
         const isNotNewConversation = allConversations.some(item => item.id === _conversationId)
         setAllConversationList(allConversations)
         // fetch new conversation info
-        const { user_input_form, opening_statement: introduction, suggested_questions, suggested_questions_after_answer, speech_to_text, retriever_resource, file_upload, sensitive_word_avoidance }: any = appParams
+        const { user_input_form, opening_statement: introduction, suggested_questions, suggested_questions_after_answer, speech_to_text, text_to_speech, retriever_resource, file_upload, sensitive_word_avoidance }: any = appParams
         setVisionConfig({
           ...file_upload.image,
           image_file_size_limit: appParams?.system_parameters?.image_file_size_limit,
@@ -416,6 +425,7 @@ const Main: FC<IMainProps> = ({
         } as PromptConfig)
         setSuggestedQuestionsAfterAnswerConfig(suggested_questions_after_answer)
         setSpeechToTextConfig(speech_to_text)
+        setTextToSpeechConfig(text_to_speech)
         setCitationConfig(retriever_resource)
 
         // setConversationList(conversations as ConversationItem[])
@@ -887,6 +897,7 @@ const Main: FC<IMainProps> = ({
                     isShowSuggestion={doShowSuggestion}
                     suggestionList={suggestedQuestions}
                     isShowSpeechToText={speechToTextConfig?.enabled}
+                    isShowTextToSpeech={textToSpeechConfig?.enabled}
                     isShowCitation={citationConfig?.enabled && isInstalledApp}
                     visionConfig={{
                       ...visionConfig,
diff --git a/web/app/components/share/chatbot/index.tsx b/web/app/components/share/chatbot/index.tsx
index 3a93c2b5bf..2c074ba8d2 100644
--- a/web/app/components/share/chatbot/index.tsx
+++ b/web/app/components/share/chatbot/index.tsx
@@ -128,6 +128,8 @@ const Main: FC<IMainProps> = ({
   }
   const [suggestedQuestionsAfterAnswerConfig, setSuggestedQuestionsAfterAnswerConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
   const [speechToTextConfig, setSpeechToTextConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
+  const [textToSpeechConfig, setTextToSpeechConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
+  const [citationConfig, setCitationConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
 
   const [conversationIdChangeBecauseOfNew, setConversationIdChangeBecauseOfNew, getConversationIdChangeBecauseOfNew] = useGetState(false)
   const [isChatStarted, { setTrue: setChatStarted, setFalse: setChatNotStarted }] = useBoolean(false)
@@ -298,7 +300,7 @@ const Main: FC<IMainProps> = ({
         const isNotNewConversation = allConversations.some(item => item.id === _conversationId)
         setAllConversationList(allConversations)
         // fetch new conversation info
-        const { user_input_form, opening_statement: introduction, suggested_questions_after_answer, speech_to_text, file_upload, sensitive_word_avoidance }: any = appParams
+        const { user_input_form, opening_statement: introduction, suggested_questions_after_answer, speech_to_text, text_to_speech, retriever_resource, file_upload, sensitive_word_avoidance }: any = appParams
         setVisionConfig({
           ...file_upload.image,
           image_file_size_limit: appParams?.system_parameters?.image_file_size_limit,
@@ -318,6 +320,8 @@ const Main: FC<IMainProps> = ({
         } as PromptConfig)
         setSuggestedQuestionsAfterAnswerConfig(suggested_questions_after_answer)
         setSpeechToTextConfig(speech_to_text)
+        setTextToSpeechConfig(text_to_speech)
+        setCitationConfig(retriever_resource)
 
         // setConversationList(conversations as ConversationItem[])
 
@@ -634,6 +638,8 @@ const Main: FC<IMainProps> = ({
                     suggestionList={suggestQuestions}
                     displayScene='web'
                     isShowSpeechToText={speechToTextConfig?.enabled}
+                    isShowTextToSpeech={textToSpeechConfig?.enabled}
+                    isShowCitation={citationConfig?.enabled && isInstalledApp}
                     answerIcon={<LogoAvatar className='relative shrink-0' />}
                     visionConfig={visionConfig}
                   />
diff --git a/web/app/components/share/text-generation/index.tsx b/web/app/components/share/text-generation/index.tsx
index d51e861f57..36e772cd6f 100644
--- a/web/app/components/share/text-generation/index.tsx
+++ b/web/app/components/share/text-generation/index.tsx
@@ -16,7 +16,12 @@ import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
 import RunOnce from '@/app/components/share/text-generation/run-once'
 import { fetchSavedMessage as doFetchSavedMessage, fetchAppInfo, fetchAppParams, removeMessage, saveMessage } from '@/service/share'
 import type { SiteInfo } from '@/models/share'
-import type { MoreLikeThisConfig, PromptConfig, SavedMessage } from '@/models/debug'
+import type {
+  MoreLikeThisConfig,
+  PromptConfig,
+  SavedMessage,
+  TextToSpeechConfig,
+} from '@/models/debug'
 import AppIcon from '@/app/components/base/app-icon'
 import { changeLanguage } from '@/i18n/i18next-config'
 import Loading from '@/app/components/base/loading'
@@ -74,6 +79,7 @@ const TextGeneration: FC<IMainProps> = ({
   const [canReplaceLogo, setCanReplaceLogo] = useState<boolean>(false)
   const [promptConfig, setPromptConfig] = useState<PromptConfig | null>(null)
   const [moreLikeThisConfig, setMoreLikeThisConfig] = useState<MoreLikeThisConfig | null>(null)
+  const [textToSpeechConfig, setTextToSpeechConfig] = useState<TextToSpeechConfig | null>(null)
 
   // save message
   const [savedMessages, setSavedMessages] = useState<SavedMessage[]>([])
@@ -231,7 +237,7 @@ const TextGeneration: FC<IMainProps> = ({
             return
           }
         }
-        if (varItem.required === false)
+        if (!varItem.required)
           return
 
         if (item[varIndex].trim() === '') {
@@ -347,7 +353,7 @@ const TextGeneration: FC<IMainProps> = ({
       setCanReplaceLogo(can_replace_logo)
       changeLanguage(siteInfo.default_language)
 
-      const { user_input_form, more_like_this, file_upload, sensitive_word_avoidance }: any = appParams
+      const { user_input_form, more_like_this, file_upload, text_to_speech, sensitive_word_avoidance }: any = appParams
       setVisionConfig({
         ...file_upload.image,
         image_file_size_limit: appParams?.system_parameters?.image_file_size_limit,
@@ -358,6 +364,7 @@ const TextGeneration: FC<IMainProps> = ({
         prompt_variables,
       } as PromptConfig)
       setMoreLikeThisConfig(more_like_this)
+      setTextToSpeechConfig(text_to_speech)
     })()
   }, [])
 
@@ -388,7 +395,7 @@ const TextGeneration: FC<IMainProps> = ({
     isCallBatchAPI={isCallBatchAPI}
     isPC={isPC}
     isMobile={isMobile}
-    isInstalledApp={!!isInstalledApp}
+    isInstalledApp={isInstalledApp}
     installedAppInfo={installedAppInfo}
     isError={task?.status === TaskStatus.failed}
     promptConfig={promptConfig}
@@ -403,6 +410,7 @@ const TextGeneration: FC<IMainProps> = ({
     onCompleted={handleCompleted}
     visionConfig={visionConfig}
     completionFiles={completionFiles}
+    isShowTextToSpeech={!!textToSpeechConfig?.enabled}
   />)
 
   const renderBatchRes = () => {
@@ -562,6 +570,7 @@ const TextGeneration: FC<IMainProps> = ({
             {currTab === 'saved' && (
               <SavedItems
                 className='mt-4'
+                isShowTextToSpeech={textToSpeechConfig?.enabled}
                 list={savedMessages}
                 onRemove={handleRemoveSavedMessage}
                 onStartCreateContent={() => setCurrTab('create')}
diff --git a/web/app/components/share/text-generation/result/index.tsx b/web/app/components/share/text-generation/result/index.tsx
index b71bbec807..5616420584 100644
--- a/web/app/components/share/text-generation/result/index.tsx
+++ b/web/app/components/share/text-generation/result/index.tsx
@@ -22,6 +22,7 @@ export type IResultProps = {
   isInstalledApp: boolean
   installedAppInfo?: InstalledApp
   isError: boolean
+  isShowTextToSpeech: boolean
   promptConfig: PromptConfig | null
   moreLikeThisEnabled: boolean
   inputs: Record<string, any>
@@ -45,6 +46,7 @@ const Result: FC<IResultProps> = ({
   isInstalledApp,
   installedAppInfo,
   isError,
+  isShowTextToSpeech,
   promptConfig,
   moreLikeThisEnabled,
   inputs,
@@ -230,6 +232,7 @@ const Result: FC<IResultProps> = ({
       isLoading={isCallBatchAPI ? (!completionRes && isResponsing) : false}
       taskId={isCallBatchAPI ? ((taskId as number) < 10 ? `0${taskId}` : `${taskId}`) : undefined}
       controlClearMoreLikeThis={controlClearMoreLikeThis}
+      isShowTextToSpeech={isShowTextToSpeech}
     />
   )
 
diff --git a/web/context/debug-configuration.ts b/web/context/debug-configuration.ts
index 37a9ba472b..2987b0b1fa 100644
--- a/web/context/debug-configuration.ts
+++ b/web/context/debug-configuration.ts
@@ -16,6 +16,7 @@ import type {
   PromptItem,
   SpeechToTextConfig,
   SuggestedQuestionsAfterAnswerConfig,
+  TextToSpeechConfig,
 } from '@/models/debug'
 import type { ExternalDataTool } from '@/models/common'
 import type { DataSet } from '@/models/datasets'
@@ -64,6 +65,8 @@ type IDebugConfiguration = {
   setSuggestedQuestionsAfterAnswerConfig: (suggestedQuestionsAfterAnswerConfig: SuggestedQuestionsAfterAnswerConfig) => void
   speechToTextConfig: SpeechToTextConfig
   setSpeechToTextConfig: (speechToTextConfig: SpeechToTextConfig) => void
+  textToSpeechConfig: TextToSpeechConfig
+  setTextToSpeechConfig: (textToSpeechConfig: TextToSpeechConfig) => void
   citationConfig: CitationConfig
   setCitationConfig: (citationConfig: CitationConfig) => void
   annotationConfig: AnnotationReplyConfig
@@ -151,6 +154,10 @@ const DebugConfigurationContext = createContext<IDebugConfiguration>({
     enabled: false,
   },
   setSpeechToTextConfig: () => { },
+  textToSpeechConfig: {
+    enabled: false,
+  },
+  setTextToSpeechConfig: () => { },
   citationConfig: {
     enabled: false,
   },
@@ -197,6 +204,7 @@ const DebugConfigurationContext = createContext<IDebugConfiguration>({
     more_like_this: null,
     suggested_questions_after_answer: null,
     speech_to_text: null,
+    text_to_speech: null,
     retriever_resource: null,
     sensitive_word_avoidance: null,
     dataSets: [],
diff --git a/web/i18n/lang/app-api.en.ts b/web/i18n/lang/app-api.en.ts
index 2ea907b2b7..31d9a9f477 100644
--- a/web/i18n/lang/app-api.en.ts
+++ b/web/i18n/lang/app-api.en.ts
@@ -6,6 +6,9 @@ const translation = {
   ok: 'In Service',
   copy: 'Copy',
   copied: 'Copied',
+  play: 'Play',
+  pause: 'Pause',
+  playing: 'Playing',
   merMaind: {
     rerender: 'Redo Rerender',
   },
diff --git a/web/i18n/lang/app-api.zh.ts b/web/i18n/lang/app-api.zh.ts
index 5707364bc6..bc59186c1a 100644
--- a/web/i18n/lang/app-api.zh.ts
+++ b/web/i18n/lang/app-api.zh.ts
@@ -5,10 +5,13 @@ const translation = {
   disabled: '已停用',
   ok: '运行中',
   copy: '复制',
+  copied: '已复制',
+  play: '播放',
+  pause: '暂停',
+  playing: '播放中',
   merMaind: {
     rerender: '重新渲染',
   },
-  copied: '已复制',
   never: '从未',
   apiKeyModal: {
     apiSecretKey: 'API 密钥',
diff --git a/web/i18n/lang/app-debug.en.ts b/web/i18n/lang/app-debug.en.ts
index a862a7b19f..375f72b8cb 100644
--- a/web/i18n/lang/app-debug.en.ts
+++ b/web/i18n/lang/app-debug.en.ts
@@ -71,6 +71,11 @@ const translation = {
       description: 'Once enabled, you can use voice input.',
       resDes: 'Voice input is enabled',
     },
+    textToSpeech: {
+      title: 'Text to Speech',
+      description: 'Once enabled, text can be converted to speech.',
+      resDes: 'Text to Audio is enabled',
+    },
     citation: {
       title: 'Citations and Attributions',
       description: 'Once enabled, show source document and attributed section of the generated content.',
diff --git a/web/i18n/lang/app-debug.zh.ts b/web/i18n/lang/app-debug.zh.ts
index 85ec38173c..833259d0f0 100644
--- a/web/i18n/lang/app-debug.zh.ts
+++ b/web/i18n/lang/app-debug.zh.ts
@@ -71,6 +71,11 @@ const translation = {
       description: '启用后，您可以使用语音输入。',
       resDes: '语音输入已启用',
     },
+    textToSpeech: {
+      title: '文字转语音',
+      description: '启用后，文本可以转换成语音。',
+      resDes: '文本转音频已启用',
+    },
     citation: {
       title: '引用和归属',
       description: '启用后，显示源文档和生成内容的归属部分。',
diff --git a/web/i18n/lang/common.en.ts b/web/i18n/lang/common.en.ts
index a64c09f4e5..1745af0b40 100644
--- a/web/i18n/lang/common.en.ts
+++ b/web/i18n/lang/common.en.ts
@@ -248,6 +248,10 @@ const translation = {
       key: 'Speech-to-Text Model',
       tip: 'Set the default model for speech-to-text input in conversation.',
     },
+    ttsModel: {
+      key: 'Speech-to-Text Model',
+      tip: 'Set the default model for speech-to-text input in conversation.',
+    },
     rerankModel: {
       key: 'Rerank Model',
       tip: 'Rerank model will reorder the candidate document list based on the semantic match with  user query, improving the results of semantic ranking',
diff --git a/web/i18n/lang/common.zh.ts b/web/i18n/lang/common.zh.ts
index 631ce102c8..82a57e4278 100644
--- a/web/i18n/lang/common.zh.ts
+++ b/web/i18n/lang/common.zh.ts
@@ -248,6 +248,10 @@ const translation = {
       key: '语音转文本模型',
       tip: '设置对话中语音转文字输入的默认使用模型。',
     },
+    ttsModel: {
+      key: '文本转语音模型',
+      tip: '设置对话中文字转语音输出的默认使用模型。',
+    },
     rerankModel: {
       key: 'Rerank 模型',
       tip: '重排序模型将根据候选文档列表与用户问题语义匹配度进行重新排序，从而改进语义排序的结果',
diff --git a/web/models/debug.ts b/web/models/debug.ts
index ffe6dd4b9a..3cf34820e4 100644
--- a/web/models/debug.ts
+++ b/web/models/debug.ts
@@ -75,6 +75,8 @@ export type SuggestedQuestionsAfterAnswerConfig = MoreLikeThisConfig
 
 export type SpeechToTextConfig = MoreLikeThisConfig
 
+export type TextToSpeechConfig = MoreLikeThisConfig
+
 export type CitationConfig = MoreLikeThisConfig
 
 export type AnnotationReplyConfig = {
@@ -118,6 +120,7 @@ export type ModelConfig = {
   more_like_this: MoreLikeThisConfig | null
   suggested_questions_after_answer: SuggestedQuestionsAfterAnswerConfig | null
   speech_to_text: SpeechToTextConfig | null
+  text_to_speech: TextToSpeechConfig | null
   retriever_resource: RetrieverResourceConfig | null
   sensitive_word_avoidance: ModerationConfig | null
   dataSets: any[]
diff --git a/web/service/share.ts b/web/service/share.ts
index be52a5e7b8..95e2bd7e3c 100644
--- a/web/service/share.ts
+++ b/web/service/share.ts
@@ -136,6 +136,10 @@ export const audioToText = (url: string, isPublicAPI: boolean, body: FormData) =
   return (getAction('post', !isPublicAPI))(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ text: string }>
 }
 
+export const textToAudio = (url: string, isPublicAPI: boolean, body: FormData) => {
+  return (getAction('post', !isPublicAPI))(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ data: string }>
+}
+
 export const fetchAccessToken = async (appCode: string) => {
   const headers = new Headers()
   headers.append('X-App-Code', appCode)
diff --git a/web/types/app.ts b/web/types/app.ts
index 4c1e2eda86..ac4464c1cb 100644
--- a/web/types/app.ts
+++ b/web/types/app.ts
@@ -142,6 +142,9 @@ export type ModelConfig = {
   speech_to_text: {
     enabled: boolean
   }
+  text_to_speech: {
+    enabled: boolean
+  }
   retriever_resource: {
     enabled: boolean
   }