diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index cf4ac10828..c2fa0e5a6e 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -23,6 +23,7 @@
- tongyi
- wenxin
- moonshot
+- tencent
- jina
- chatglm
- yi
diff --git a/api/core/model_runtime/model_providers/tencent/__init__.py b/api/core/model_runtime/model_providers/tencent/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/tencent/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/tencent/_assets/icon_l_en.svg
new file mode 100644
index 0000000000..63c7c8f988
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tencent/_assets/icon_l_en.svg
@@ -0,0 +1,13 @@
+
\ No newline at end of file
diff --git a/api/core/model_runtime/model_providers/tencent/_assets/icon_l_zh.svg b/api/core/model_runtime/model_providers/tencent/_assets/icon_l_zh.svg
new file mode 100644
index 0000000000..63c7c8f988
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tencent/_assets/icon_l_zh.svg
@@ -0,0 +1,13 @@
+
\ No newline at end of file
diff --git a/api/core/model_runtime/model_providers/tencent/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/tencent/_assets/icon_s_en.svg
new file mode 100644
index 0000000000..a3299b9201
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tencent/_assets/icon_s_en.svg
@@ -0,0 +1,11 @@
+
\ No newline at end of file
diff --git a/api/core/model_runtime/model_providers/tencent/speech2text/__init__.py b/api/core/model_runtime/model_providers/tencent/speech2text/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/tencent/speech2text/flash_recognizer.py b/api/core/model_runtime/model_providers/tencent/speech2text/flash_recognizer.py
new file mode 100644
index 0000000000..c3e3b7c258
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tencent/speech2text/flash_recognizer.py
@@ -0,0 +1,156 @@
+import base64
+import hashlib
+import hmac
+import time
+
+import requests
+
+
+class Credential:
+ def __init__(self, secret_id, secret_key):
+ self.secret_id = secret_id
+ self.secret_key = secret_key
+
+
+class FlashRecognitionRequest:
+ def __init__(self, voice_format="mp3", engine_type="16k_zh"):
+ self.engine_type = engine_type
+ self.speaker_diarization = 0
+ self.hotword_id = ""
+ self.customization_id = ""
+ self.filter_dirty = 0
+ self.filter_modal = 0
+ self.filter_punc = 0
+ self.convert_num_mode = 1
+ self.word_info = 0
+ self.voice_format = voice_format
+ self.first_channel_only = 1
+ self.reinforce_hotword = 0
+ self.sentence_max_length = 0
+
+ def set_first_channel_only(self, first_channel_only):
+ self.first_channel_only = first_channel_only
+
+ def set_speaker_diarization(self, speaker_diarization):
+ self.speaker_diarization = speaker_diarization
+
+ def set_filter_dirty(self, filter_dirty):
+ self.filter_dirty = filter_dirty
+
+ def set_filter_modal(self, filter_modal):
+ self.filter_modal = filter_modal
+
+ def set_filter_punc(self, filter_punc):
+ self.filter_punc = filter_punc
+
+ def set_convert_num_mode(self, convert_num_mode):
+ self.convert_num_mode = convert_num_mode
+
+ def set_word_info(self, word_info):
+ self.word_info = word_info
+
+ def set_hotword_id(self, hotword_id):
+ self.hotword_id = hotword_id
+
+ def set_customization_id(self, customization_id):
+ self.customization_id = customization_id
+
+ def set_voice_format(self, voice_format):
+ self.voice_format = voice_format
+
+ def set_sentence_max_length(self, sentence_max_length):
+ self.sentence_max_length = sentence_max_length
+
+ def set_reinforce_hotword(self, reinforce_hotword):
+ self.reinforce_hotword = reinforce_hotword
+
+
+class FlashRecognizer:
+ """
+ reponse:
+ request_id string
+ status Integer
+ message String
+ audio_duration Integer
+ flash_result Result Array
+
+ Result:
+ text String
+ channel_id Integer
+ sentence_list Sentence Array
+
+ Sentence:
+ text String
+ start_time Integer
+ end_time Integer
+ speaker_id Integer
+ word_list Word Array
+
+ Word:
+ word String
+ start_time Integer
+ end_time Integer
+ stable_flag: Integer
+ """
+
+ def __init__(self, appid, credential):
+ self.credential = credential
+ self.appid = appid
+
+ def _format_sign_string(self, param):
+ signstr = "POSTasr.cloud.tencent.com/asr/flash/v1/"
+ for t in param:
+ if 'appid' in t:
+ signstr += str(t[1])
+ break
+ signstr += "?"
+ for x in param:
+ tmp = x
+ if 'appid' in x:
+ continue
+ for t in tmp:
+ signstr += str(t)
+ signstr += "="
+ signstr = signstr[:-1]
+ signstr += "&"
+ signstr = signstr[:-1]
+ return signstr
+
+ def _build_header(self):
+ header = {"Host": "asr.cloud.tencent.com"}
+ return header
+
+ def _sign(self, signstr, secret_key):
+ hmacstr = hmac.new(secret_key.encode('utf-8'),
+ signstr.encode('utf-8'), hashlib.sha1).digest()
+ s = base64.b64encode(hmacstr)
+ s = s.decode('utf-8')
+ return s
+
+ def _build_req_with_signature(self, secret_key, params, header):
+ query = sorted(params.items(), key=lambda d: d[0])
+ signstr = self._format_sign_string(query)
+ signature = self._sign(signstr, secret_key)
+ header["Authorization"] = signature
+ requrl = "https://"
+ requrl += signstr[4::]
+ return requrl
+
+ def _create_query_arr(self, req):
+ return {
+ 'appid': self.appid, 'secretid': self.credential.secret_id, 'timestamp': str(int(time.time())),
+ 'engine_type': req.engine_type, 'voice_format': req.voice_format,
+ 'speaker_diarization': req.speaker_diarization, 'hotword_id': req.hotword_id,
+ 'customization_id': req.customization_id, 'filter_dirty': req.filter_dirty,
+ 'filter_modal': req.filter_modal, 'filter_punc': req.filter_punc,
+ 'convert_num_mode': req.convert_num_mode, 'word_info': req.word_info,
+ 'first_channel_only': req.first_channel_only, 'reinforce_hotword': req.reinforce_hotword,
+ 'sentence_max_length': req.sentence_max_length
+ }
+
+ def recognize(self, req, data):
+ header = self._build_header()
+ query_arr = self._create_query_arr(req)
+ req_url = self._build_req_with_signature(self.credential.secret_key, query_arr, header)
+ r = requests.post(req_url, headers=header, data=data)
+ return r.text
diff --git a/api/core/model_runtime/model_providers/tencent/speech2text/speech2text.py b/api/core/model_runtime/model_providers/tencent/speech2text/speech2text.py
new file mode 100644
index 0000000000..00ec5aa9c8
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tencent/speech2text/speech2text.py
@@ -0,0 +1,92 @@
+import json
+from typing import IO, Optional
+
+import requests
+
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeConnectionError,
+ InvokeError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
+from core.model_runtime.model_providers.tencent.speech2text.flash_recognizer import (
+ Credential,
+ FlashRecognitionRequest,
+ FlashRecognizer,
+)
+
+
+class TencentSpeech2TextModel(Speech2TextModel):
+ def _invoke(self, model: str, credentials: dict,
+ file: IO[bytes], user: Optional[str] = None) \
+ -> str:
+ """
+ Invoke speech2text model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param file: audio file
+ :param user: unique user id
+ :return: text for given audio file
+ """
+ return self._speech2text_invoke(model, credentials, file)
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ audio_file_path = self._get_demo_file_path()
+
+ with open(audio_file_path, 'rb') as audio_file:
+ self._speech2text_invoke(model, credentials, audio_file)
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ def _speech2text_invoke(self, model: str, credentials: dict, file: IO[bytes]) -> str:
+ """
+ Invoke speech2text model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param file: audio file
+ :return: text for given audio file
+ """
+ app_id = credentials["app_id"]
+ secret_id = credentials["secret_id"]
+ secret_key = credentials["secret_key"]
+ voice_format = file.voice_format if hasattr(file, "voice_format") else "mp3"
+ tencent_voice_recognizer = FlashRecognizer(app_id, Credential(secret_id, secret_key))
+ resp = tencent_voice_recognizer.recognize(FlashRecognitionRequest(voice_format), file)
+ resp = json.loads(resp)
+ code = resp["code"]
+ message = resp["message"]
+ if code == 4002:
+ raise CredentialsValidateFailedError(str(message))
+ elif code != 0:
+ return f"Tencent ASR Recognition failed with code {code} and message {message}"
+ return "\n".join(item["text"] for item in resp["flash_result"])
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ The key is the error type thrown to the caller
+ The value is the error type thrown by the model,
+ which needs to be converted into a unified error type for the caller.
+
+ :return: Invoke error mapping
+ """
+ return {
+ InvokeConnectionError: [
+ requests.exceptions.ConnectionError
+ ],
+ InvokeAuthorizationError: [
+ CredentialsValidateFailedError
+ ]
+ }
diff --git a/api/core/model_runtime/model_providers/tencent/speech2text/tencent.yaml b/api/core/model_runtime/model_providers/tencent/speech2text/tencent.yaml
new file mode 100644
index 0000000000..618d19ac7c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tencent/speech2text/tencent.yaml
@@ -0,0 +1,5 @@
+model: tencent
+model_type: speech2text
+model_properties:
+ file_upload_limit: 25
+ supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm
diff --git a/api/core/model_runtime/model_providers/tencent/tencent.py b/api/core/model_runtime/model_providers/tencent/tencent.py
new file mode 100644
index 0000000000..dd9f90bb47
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tencent/tencent.py
@@ -0,0 +1,29 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class TencentProvider(ModelProvider):
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+ """
+ try:
+ model_instance = self.get_model_instance(ModelType.SPEECH2TEXT)
+ model_instance.validate_credentials(
+ model='tencent',
+ credentials=credentials
+ )
+ except CredentialsValidateFailedError as ex:
+ raise ex
+ except Exception as ex:
+ logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+ raise ex
diff --git a/api/core/model_runtime/model_providers/tencent/tencent.yaml b/api/core/model_runtime/model_providers/tencent/tencent.yaml
new file mode 100644
index 0000000000..7d8d5a1866
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tencent/tencent.yaml
@@ -0,0 +1,49 @@
+provider: tencent
+label:
+ zh_Hans: 腾讯云
+ en_US: Tencent
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ zh_Hans: icon_l_zh.svg
+ en_US: icon_l_en.svg
+background: "#E5E7EB"
+help:
+ title:
+ en_US: Get your API key from Tencent AI
+ zh_Hans: 从腾讯云获取 API Key
+ url:
+ en_US: https://cloud.tencent.com/product/asr
+supported_model_types:
+ - speech2text
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: app_id
+ label:
+ zh_Hans: APPID
+ en_US: APPID
+ type: text-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的腾讯语音识别服务的 APPID
+ en_US: Enter the APPID of your Tencent Cloud ASR service
+ - variable: secret_id
+ label:
+ zh_Hans: SecretId
+ en_US: SecretId
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的腾讯语音识别服务的 SecretId
+ en_US: Enter the SecretId of your Tencent Cloud ASR service
+ - variable: secret_key
+ label:
+ zh_Hans: SecretKey
+ en_US: SecretKey
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的腾讯语音识别服务的 SecretKey
+ en_US: Enter the SecretKey of your Tencent Cloud ASR service