From c6e1a2ca8a1a60c83d86bc4d5739f4a902f14b9e Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Wed, 19 Mar 2025 12:52:12 +0800 Subject: [PATCH] Feat: add TTS support for SILICONFLOW. (#6264) ### What problem does this PR solve? #6244 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- rag/llm/__init__.py | 2 ++ rag/llm/tts_model.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 5db8970b5..649599f10 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -143,6 +143,7 @@ from .tts_model import ( SparkTTS, XinferenceTTS, GPUStackTTS, + SILICONFLOWTTS ) EmbeddingModel = { @@ -278,4 +279,5 @@ TTSModel = { "XunFei Spark": SparkTTS, "Xinference": XinferenceTTS, "GPUStack": GPUStackTTS, + "SILICONFLOW": SILICONFLOWTTS, } diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py index 71fb93b74..ebcdca5e4 100644 --- a/rag/llm/tts_model.py +++ b/rag/llm/tts_model.py @@ -356,6 +356,7 @@ class OllamaTTS(Base): if chunk: yield chunk + class GPUStackTTS: def __init__(self, key, model_name, **kwargs): self.base_url = kwargs.get("base_url", None) @@ -386,4 +387,38 @@ class GPUStackTTS: for chunk in response.iter_content(chunk_size=1024): if chunk: - yield chunk \ No newline at end of file + yield chunk + + +class SILICONFLOWTTS(Base): + def __init__(self, key, model_name="FunAudioLLM/CosyVoice2-0.5B", base_url="https://api.siliconflow.cn/v1"): + if not base_url: + base_url = "https://api.siliconflow.cn/v1" + self.api_key = key + self.model_name = model_name + self.base_url = base_url + self.headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + def tts(self, text, voice="anna"): + text = self.normalize_text(text) + payload = { + "model": self.model_name, + "input": text, + "voice": f"{self.model_name}:{voice}", + "response_format": "mp3", + "sample_rate": 123, + "stream": True, + "speed": 1, + "gain": 0 + } + + response = requests.post(f"{self.base_url}/audio/speech", headers=self.headers, json=payload) + + if response.status_code != 200: + raise Exception(f"**Error**: {response.status_code}, {response.text}") + for chunk in response.iter_content(): + if chunk: + yield chunk