mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-16 19:06:00 +08:00
add support for TongyiQwen tts (#2311)
### What problem does this PR solve? add support for TongyiQwen tts #1853 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
This commit is contained in:
parent
2ac72899ef
commit
cb69c742b0
@ -104,18 +104,24 @@
|
|||||||
"max_tokens": 2048,
|
"max_tokens": 2048,
|
||||||
"model_type": "embedding"
|
"model_type": "embedding"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "sambert-zhide-v1",
|
||||||
|
"tags": "TTS",
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"model_type": "tts"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "sambert-zhiru-v1",
|
||||||
|
"tags": "TTS",
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"model_type": "tts"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "text-embedding-v3",
|
"llm_name": "text-embedding-v3",
|
||||||
"tags": "TEXT EMBEDDING,8K",
|
"tags": "TEXT EMBEDDING,8K",
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"model_type": "embedding"
|
"model_type": "embedding"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"llm_name": "paraformer-realtime-8k-v1",
|
|
||||||
"tags": "SPEECH2TEXT",
|
|
||||||
"max_tokens": 26214400,
|
|
||||||
"model_type": "speech2text"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"llm_name": "qwen-vl-max",
|
"llm_name": "qwen-vl-max",
|
||||||
"tags": "LLM,CHAT,IMAGE2TEXT",
|
"tags": "LLM,CHAT,IMAGE2TEXT",
|
||||||
|
@ -137,5 +137,6 @@ Seq2txtModel = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TTSModel = {
|
TTSModel = {
|
||||||
"Fish Audio": FishAudioTTS
|
"Fish Audio": FishAudioTTS,
|
||||||
|
"Tongyi-Qianwen": QwenTTS
|
||||||
}
|
}
|
@ -22,7 +22,7 @@ from pydantic import BaseModel, conint
|
|||||||
from rag.utils import num_tokens_from_string
|
from rag.utils import num_tokens_from_string
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
class ServeReferenceAudio(BaseModel):
|
class ServeReferenceAudio(BaseModel):
|
||||||
audio: bytes
|
audio: bytes
|
||||||
text: str
|
text: str
|
||||||
@ -96,3 +96,61 @@ class FishAudioTTS(Base):
|
|||||||
|
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
raise RuntimeError(f"**ERROR**: {e}")
|
raise RuntimeError(f"**ERROR**: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
class QwenTTS(Base):
|
||||||
|
def __init__(self, key, model_name, base_url=""):
|
||||||
|
import dashscope
|
||||||
|
|
||||||
|
self.model_name = model_name
|
||||||
|
dashscope.api_key = key
|
||||||
|
|
||||||
|
def tts(self, text):
|
||||||
|
from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse
|
||||||
|
from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthesisResult
|
||||||
|
from collections import deque
|
||||||
|
|
||||||
|
class Callback(ResultCallback):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.dque = deque()
|
||||||
|
|
||||||
|
def _run(self):
|
||||||
|
while True:
|
||||||
|
if not self.dque:
|
||||||
|
time.sleep(0)
|
||||||
|
continue
|
||||||
|
val = self.dque.popleft()
|
||||||
|
if val:
|
||||||
|
yield val
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
def on_open(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_complete(self):
|
||||||
|
self.dque.append(None)
|
||||||
|
|
||||||
|
def on_error(self, response: SpeechSynthesisResponse):
|
||||||
|
raise RuntimeError(str(response))
|
||||||
|
|
||||||
|
def on_close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_event(self, result: SpeechSynthesisResult):
|
||||||
|
if result.get_audio_frame() is not None:
|
||||||
|
self.dque.append(result.get_audio_frame())
|
||||||
|
|
||||||
|
text = self.normalize_text(text)
|
||||||
|
callback = Callback()
|
||||||
|
SpeechSynthesizer.call(model=self.model_name,
|
||||||
|
text=text,
|
||||||
|
callback=callback,
|
||||||
|
format="mp3")
|
||||||
|
try:
|
||||||
|
for data in callback._run():
|
||||||
|
yield data
|
||||||
|
yield num_tokens_from_string(text)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"**ERROR**: {e}")
|
Loading…
x
Reference in New Issue
Block a user