add support for Tencent Cloud ASR (#2102)

### What problem does this PR solve?

add support for Tencent Cloud ASR

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
黄腾 2024-08-27 11:47:11 +08:00 committed by GitHub
parent cf038e099f
commit 2da4e7aa46
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 281 additions and 5 deletions

View File

@ -122,6 +122,10 @@ def add_llm():
f'"hunyuan_sk": "{req.get("hunyuan_sk", "")}"' + '}'
req["api_key"] = api_key
return set_api_key()
elif factory == "Tencent Cloud":
api_key = '{' + f'"tencent_cloud_sid": "{req.get("tencent_cloud_sid", "")}", ' \
f'"tencent_cloud_sk": "{req.get("tencent_cloud_sk", "")}"' + '}'
req["api_key"] = api_key
elif factory == "Bedrock":
# For Bedrock, due to its special authentication method
# Assemble bedrock_ak, bedrock_sk, bedrock_region

View File

@ -3233,6 +3233,13 @@
"tags": "TTS",
"status": "1",
"llm": []
},
{
"name": "Tencent Cloud",
"logo": "",
"tags": "SPEECH2TEXT",
"status": "1",
"llm": []
}
]
}

View File

@ -128,7 +128,8 @@ Seq2txtModel = {
"Tongyi-Qianwen": QWenSeq2txt,
"Ollama": OllamaSeq2txt,
"Azure-OpenAI": AzureSeq2txt,
"Xinference": XinferenceSeq2txt
"Xinference": XinferenceSeq2txt,
"Tencent Cloud": TencentCloudSeq2txt
}
TTSModel = {

View File

@ -22,7 +22,8 @@ from openai import OpenAI
import os
import json
from rag.utils import num_tokens_from_string
import base64
import re
class Base(ABC):
def __init__(self, key, model_name):
@ -35,6 +36,13 @@ class Base(ABC):
response_format="text"
)
return transcription.text.strip(), num_tokens_from_string(transcription.text.strip())
def audio2base64(self,audio):
if isinstance(audio, bytes):
return base64.b64encode(audio).decode("utf-8")
if isinstance(audio, io.BytesIO):
return base64.b64encode(audio.getvalue()).decode("utf-8")
raise TypeError("The input audio file should be in binary format.")
class GPTSeq2txt(Base):
@ -87,3 +95,66 @@ class XinferenceSeq2txt(Base):
def __init__(self, key, model_name="", base_url=""):
self.client = OpenAI(api_key="xxx", base_url=base_url)
self.model_name = model_name
class TencentCloudSeq2txt(Base):
def __init__(
self, key, model_name="16k_zh", base_url="https://asr.tencentcloudapi.com"
):
from tencentcloud.common import credential
from tencentcloud.asr.v20190614 import asr_client
key = json.loads(key)
sid = key.get("tencent_cloud_sid", "")
sk = key.get("tencent_cloud_sk", "")
cred = credential.Credential(sid, sk)
self.client = asr_client.AsrClient(cred, "")
self.model_name = model_name
def transcription(self, audio, max_retries=60, retry_interval=5):
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
TencentCloudSDKException,
)
from tencentcloud.asr.v20190614 import models
import time
b64 = self.audio2base64(audio)
try:
# dispatch disk
req = models.CreateRecTaskRequest()
params = {
"EngineModelType": self.model_name,
"ChannelNum": 1,
"ResTextFormat": 0,
"SourceType": 1,
"Data": b64,
}
req.from_json_string(json.dumps(params))
resp = self.client.CreateRecTask(req)
# loop query
req = models.DescribeTaskStatusRequest()
params = {"TaskId": resp.Data.TaskId}
req.from_json_string(json.dumps(params))
retries = 0
while retries < max_retries:
resp = self.client.DescribeTaskStatus(req)
if resp.Data.StatusStr == "success":
text = re.sub(
r"\[\d+:\d+\.\d+,\d+:\d+\.\d+\]\s*", "", resp.Data.Result
).strip()
return text, num_tokens_from_string(text)
elif resp.Data.StatusStr == "failed":
return (
"**ERROR**: Failed to retrieve speech recognition results.",
0,
)
else:
time.sleep(retry_interval)
retries += 1
return "**ERROR**: Max retries exceeded. Task may still be processing.", 0
except TencentCloudSDKException as e:
return "**ERROR**: " + str(e), 0
except Exception as e:
return "**ERROR**: " + str(e), 0

View File

@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1724663790857" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="4238" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M330.24 820.224l471.04-13.824c16.384-5.12 48.64-17.92 76.8-48.128 11.776-12.288 47.616-52.736 46.08-113.664-0.512-18.432-5.12-65.536-46.08-104.448-33.792-32.256-72.192-40.448-87.04-42.496-58.368-8.192-101.888 19.456-112.64 26.624l-199.68 291.84c-49.664 1.536-98.816 3.584-148.48 5.12l291.84-413.696c21.504-11.776 80.384-39.936 158.72-33.28 104.448 9.216 165.888 74.752 179.2 90.112 38.4 43.52 50.176 88.064 56.32 113.664 13.824 55.808 11.264 143.36-35.84 222.72-10.752 17.408-27.648 46.08-61.44 71.68-44.032 33.792-89.088 43.008-112.64 46.592l-552.96 4.608c25.6-34.304 51.2-69.12 76.8-103.424z" fill="#00A3FF" p-id="4239"></path><path d="M219.648 391.68c-45.056 13.824-90.112 27.136-135.168 40.96 14.848-207.36 180.736-367.104 372.736-373.76 165.376-5.632 322.56 103.936 379.904 271.36h-133.12c-42.496-102.4-144.896-166.4-252.416-158.72-116.736 8.704-214.528 100.352-231.936 220.16z" fill="#026FFF" p-id="4240"></path><path d="M519.68 458.24c-24.576-23.552-75.776-66.56-153.6-87.04-29.696-7.68-95.744-24.576-174.08 0-24.064 7.168-85.504 27.136-133.12 87.04C5.12 525.824 3.072 600.064 2.56 632.32c-0.512 28.672-1.536 129.536 76.8 209.92 71.168 72.704 160.768 80.896 184.32 81.92 24.064-35.84 47.616-71.68 71.68-107.52-17.92 4.096-52.736 8.704-93.696-2.048-17.92-5.12-55.296-15.36-85.504-49.152-39.936-44.032-40.448-96.768-40.96-117.76-0.512-19.456-1.024-72.192 35.84-117.76 47.616-58.88 121.344-60.928 138.24-61.44 96.256-3.072 159.232 65.536 168.96 76.8 20.48-29.184 40.96-57.856 61.44-87.04z" fill="#05C8DB" p-id="4241"></path></svg>

After

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -506,6 +506,7 @@ The above is the content you need to summarize.`,
vision: 'Does it support Vision?',
ollamaLink: 'How to integrate {{name}}',
FishAudioLink: 'How to use FishAudio',
TencentCloudLink: 'How to use TencentCloud ASR',
volcModelNameMessage: 'Please input your model name!',
addEndpointID: 'EndpointID of the model',
endpointIDMessage: 'Please input your EndpointID of the model',
@ -529,6 +530,10 @@ The above is the content you need to summarize.`,
HunyuanSIDMessage: 'Please input your Secret ID',
addHunyuanSK: 'Hunyuan Secret Key',
HunyuanSKMessage: 'Please input your Secret Key',
addTencentCloudSID: 'TencentCloud Secret ID',
TencentCloudSIDMessage: 'Please input your Secret ID',
addTencentCloudSK: 'TencentCloud Secret Key',
TencentCloudSKMessage: 'Please input your Secret Key',
SparkModelNameMessage: 'Please select Spark model',
addSparkAPIPassword: 'Spark APIPassword',
SparkAPIPasswordMessage: 'please input your APIPassword',

View File

@ -468,6 +468,7 @@ export default {
baseUrlNameMessage: '請輸入基礎 Url',
ollamaLink: '如何集成 {{name}}',
FishAudioLink: '如何使用Fish Audio',
TencentCloudLink: '如何使用騰訊雲語音識別',
volcModelNameMessage: '請輸入模型名稱!',
addEndpointID: '模型 EndpointID',
endpointIDMessage: '請輸入模型對應的EndpointID',
@ -491,6 +492,10 @@ export default {
HunyuanSIDMessage: '請輸入 Secret ID',
addHunyuanSK: '混元 Secret Key',
HunyuanSKMessage: '請輸入 Secret Key',
addTencentCloudSID: '騰訊雲 Secret ID',
TencentCloudSIDMessage: '請輸入 Secret ID',
addTencentCloudSK: '騰訊雲 Secret Key',
TencentCloudSKMessage: '請輸入 Secret Key',
SparkModelNameMessage: '請選擇星火模型!',
addSparkAPIPassword: '星火 APIPassword',
SparkAPIPasswordMessage: '請輸入 APIPassword',

View File

@ -485,6 +485,7 @@ export default {
baseUrlNameMessage: '请输入基础 Url',
ollamaLink: '如何集成 {{name}}',
FishAudioLink: '如何使用Fish Audio',
TencentCloudLink: '如何使用腾讯云语音识别',
volcModelNameMessage: '请输入模型名称!',
addEndpointID: '模型 EndpointID',
endpointIDMessage: '请输入模型对应的EndpointID',
@ -508,6 +509,10 @@ export default {
HunyuanSIDMessage: '请输入 Secret ID',
addHunyuanSK: '混元 Secret Key',
HunyuanSKMessage: '请输入 Secret Key',
addTencentCloudSID: '腾讯云 Secret ID',
TencentCloudSIDMessage: '请输入 Secret ID',
addTencentCloudSK: '腾讯云 Secret Key',
TencentCloudSKMessage: '请输入 Secret Key',
SparkModelNameMessage: '请选择星火模型!',
addSparkAPIPassword: '星火 APIPassword',
SparkAPIPasswordMessage: '请输入 APIPassword',

View File

@ -0,0 +1,129 @@
import { useTranslate } from '@/hooks/common-hooks';
import { IModalProps } from '@/interfaces/common';
import { IAddLlmRequestBody } from '@/interfaces/request/llm';
import { Flex, Form, Input, Modal, Select, Space } from 'antd';
import omit from 'lodash/omit';
type FieldType = IAddLlmRequestBody & {
TencentCloud_sid: string;
TencentCloud_sk: string;
};
const { Option } = Select;
const TencentCloudModal = ({
visible,
hideModal,
onOk,
loading,
llmFactory,
}: IModalProps<IAddLlmRequestBody> & { llmFactory: string }) => {
const [form] = Form.useForm<FieldType>();
const { t } = useTranslate('setting');
const handleOk = async () => {
const values = await form.validateFields();
const modelType = values.model_type;
const data = {
...omit(values),
model_type: modelType,
llm_factory: llmFactory,
};
console.info(data);
onOk?.(data);
};
return (
<Modal
title={t('addLlmTitle', { name: llmFactory })}
open={visible}
onOk={handleOk}
onCancel={hideModal}
okButtonProps={{ loading }}
footer={(originNode: React.ReactNode) => {
return (
<Flex justify={'space-between'}>
<a
href={`https://cloud.tencent.com/document/api/1093/37823`}
target="_blank"
rel="noreferrer"
>
{t('TencentCloudLink')}
</a>
<Space>{originNode}</Space>
</Flex>
);
}}
confirmLoading={loading}
>
<Form
name="basic"
style={{ maxWidth: 600 }}
autoComplete="off"
layout={'vertical'}
form={form}
>
<Form.Item<FieldType>
label={t('modelType')}
name="model_type"
initialValue={'speech2text'}
rules={[{ required: true, message: t('modelTypeMessage') }]}
>
<Select placeholder={t('modelTypeMessage')}>
<Option value="speech2text">speech2text</Option>
</Select>
</Form.Item>
<Form.Item<FieldType>
label={t('modelName')}
name="llm_name"
initialValue={'16k_zh'}
rules={[{ required: true, message: t('SparkModelNameMessage') }]}
>
<Select placeholder={t('modelTypeMessage')}>
<Option value="16k_zh">16k_zh</Option>
<Option value="16k_zh_large">16k_zh_large</Option>
<Option value="16k_multi_lang">16k_multi_lang</Option>
<Option value="16k_zh_dialect">16k_zh_dialect</Option>
<Option value="16k_en">16k_en</Option>
<Option value="16k_yue">16k_yue</Option>
<Option value="16k_zh-PY">16k_zh-PY</Option>
<Option value="16k_ja">16k_ja</Option>
<Option value="16k_ko">16k_ko</Option>
<Option value="16k_vi">16k_vi</Option>
<Option value="16k_ms">16k_ms</Option>
<Option value="16k_id">16k_id</Option>
<Option value="16k_fil">16k_fil</Option>
<Option value="16k_th">16k_th</Option>
<Option value="16k_pt">16k_pt</Option>
<Option value="16k_tr">16k_tr</Option>
<Option value="16k_ar">16k_ar</Option>
<Option value="16k_es">16k_es</Option>
<Option value="16k_hi">16k_hi</Option>
<Option value="16k_fr">16k_fr</Option>
<Option value="16k_zh_medical">16k_zh_medical</Option>
<Option value="16k_de">16k_de</Option>
</Select>
</Form.Item>
<Form.Item<FieldType>
label={t('addTencentCloudSID')}
name="TencentCloud_sid"
rules={[{ required: true, message: t('TencentCloudSIDMessage') }]}
>
<Input placeholder={t('TencentCloudSIDMessage')} />
</Form.Item>
<Form.Item<FieldType>
label={t('addTencentCloudSK')}
name="TencentCloud_sk"
rules={[{ required: true, message: t('TencentCloudSKMessage') }]}
>
<Input placeholder={t('TencentCloudSKMessage')} />
</Form.Item>
</Form>
</Modal>
);
};
export default TencentCloudModal;

View File

@ -36,6 +36,7 @@ export const IconMap = {
'XunFei Spark': 'spark',
BaiduYiyan: 'yiyan',
'Fish Audio': 'fish-audio',
'Tencent Cloud': 'tencent-cloud',
};
export const BedrockRegionList = [

View File

@ -81,14 +81,14 @@ const FishAudioModal = ({
</Form.Item>
<Form.Item<FieldType>
label={t('addFishAudioAK')}
name="FishAudio_ak"
name="fish_audio_ak"
rules={[{ required: true, message: t('FishAudioAKMessage') }]}
>
<Input placeholder={t('FishAudioAKMessage')} />
</Form.Item>
<Form.Item<FieldType>
label={t('addFishAudioRefID')}
name="FishAudio_refid"
name="fish_audio_refid"
rules={[{ required: false, message: t('FishAudioRefIDMessage') }]}
>
<Input placeholder={t('FishAudioRefIDMessage')} />

View File

@ -190,6 +190,33 @@ export const useSubmitHunyuan = () => {
};
};
export const useSubmitTencentCloud = () => {
const { addLlm, loading } = useAddLlm();
const {
visible: TencentCloudAddingVisible,
hideModal: hideTencentCloudAddingModal,
showModal: showTencentCloudAddingModal,
} = useSetModalState();
const onTencentCloudAddingOk = useCallback(
async (payload: IAddLlmRequestBody) => {
const ret = await addLlm(payload);
if (ret === 0) {
hideTencentCloudAddingModal();
}
},
[hideTencentCloudAddingModal, addLlm],
);
return {
TencentCloudAddingLoading: loading,
onTencentCloudAddingOk,
TencentCloudAddingVisible,
hideTencentCloudAddingModal,
showTencentCloudAddingModal,
};
};
export const useSubmitSpark = () => {
const { addLlm, loading } = useAddLlm();
const {

View File

@ -27,6 +27,7 @@ import {
import { useCallback, useMemo } from 'react';
import SettingTitle from '../components/setting-title';
import { isLocalLlmFactory } from '../utils';
import TencentCloudModal from './Tencent-modal';
import ApiKeyModal from './api-key-modal';
import BedrockModal from './bedrock-modal';
import { IconMap } from './constant';
@ -40,6 +41,7 @@ import {
useSubmitOllama,
useSubmitSpark,
useSubmitSystemModelSetting,
useSubmitTencentCloud,
useSubmitVolcEngine,
useSubmityiyan,
} from './hooks';
@ -101,7 +103,8 @@ const ModelCard = ({ item, clickApiKey }: IModelCardProps) => {
item.name === 'Tencent Hunyuan' ||
item.name === 'XunFei Spark' ||
item.name === 'BaiduYiyan' ||
item.name === 'Fish Audio'
item.name === 'Fish Audio' ||
item.name === 'Tencent Cloud'
? t('addTheModel')
: 'API-Key'}
<SettingOutlined />
@ -183,6 +186,14 @@ const UserSettingModel = () => {
HunyuanAddingLoading,
} = useSubmitHunyuan();
const {
TencentCloudAddingVisible,
hideTencentCloudAddingModal,
showTencentCloudAddingModal,
onTencentCloudAddingOk,
TencentCloudAddingLoading,
} = useSubmitTencentCloud();
const {
SparkAddingVisible,
hideSparkAddingModal,
@ -223,11 +234,13 @@ const UserSettingModel = () => {
'XunFei Spark': showSparkAddingModal,
BaiduYiyan: showyiyanAddingModal,
'Fish Audio': showFishAudioAddingModal,
'Tencent Cloud': showTencentCloudAddingModal,
}),
[
showBedrockAddingModal,
showVolcAddingModal,
showHunyuanAddingModal,
showTencentCloudAddingModal,
showSparkAddingModal,
showyiyanAddingModal,
showFishAudioAddingModal,
@ -349,6 +362,13 @@ const UserSettingModel = () => {
loading={HunyuanAddingLoading}
llmFactory={'Tencent Hunyuan'}
></HunyuanModal>
<TencentCloudModal
visible={TencentCloudAddingVisible}
hideModal={hideTencentCloudAddingModal}
onOk={onTencentCloudAddingOk}
loading={TencentCloudAddingLoading}
llmFactory={'Tencent TencentCloud'}
></TencentCloudModal>
<SparkModal
visible={SparkAddingVisible}
hideModal={hideSparkAddingModal}