From c6e1a2ca8a1a60c83d86bc4d5739f4a902f14b9e Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Wed, 19 Mar 2025 12:52:12 +0800
Subject: [PATCH] Feat: add TTS support for SILICONFLOW. (#6264)

### What problem does this PR solve?

#6244

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 rag/llm/__init__.py  |  2 ++
 rag/llm/tts_model.py | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py
index 5db8970b5..649599f10 100644
--- a/rag/llm/__init__.py
+++ b/rag/llm/__init__.py
@@ -143,6 +143,7 @@ from .tts_model import (
     SparkTTS,
     XinferenceTTS,
     GPUStackTTS,
+    SILICONFLOWTTS
 )
 
 EmbeddingModel = {
@@ -278,4 +279,5 @@ TTSModel = {
     "XunFei Spark": SparkTTS,
     "Xinference": XinferenceTTS,
     "GPUStack": GPUStackTTS,
+    "SILICONFLOW": SILICONFLOWTTS,
 }
diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py
index 71fb93b74..ebcdca5e4 100644
--- a/rag/llm/tts_model.py
+++ b/rag/llm/tts_model.py
@@ -356,6 +356,7 @@ class OllamaTTS(Base):
             if chunk:
                 yield chunk
 
+
 class GPUStackTTS:
     def __init__(self, key, model_name, **kwargs):
         self.base_url = kwargs.get("base_url", None)
@@ -386,4 +387,38 @@ class GPUStackTTS:
 
         for chunk in response.iter_content(chunk_size=1024):
             if chunk:
-                yield chunk
\ No newline at end of file
+                yield chunk
+
+
+class SILICONFLOWTTS(Base):
+    def __init__(self, key, model_name="FunAudioLLM/CosyVoice2-0.5B", base_url="https://api.siliconflow.cn/v1"):
+        if not base_url:
+            base_url = "https://api.siliconflow.cn/v1"
+        self.api_key = key
+        self.model_name = model_name
+        self.base_url = base_url
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+
+    def tts(self, text, voice="anna"):
+        text = self.normalize_text(text)
+        payload = {
+            "model": self.model_name,
+            "input": text,
+            "voice": f"{self.model_name}:{voice}",
+            "response_format": "mp3",
+            "sample_rate": 123,
+            "stream": True,
+            "speed": 1,
+            "gain": 0
+        }
+
+        response = requests.post(f"{self.base_url}/audio/speech", headers=self.headers, json=payload)
+
+        if response.status_code != 200:
+            raise Exception(f"**Error**: {response.status_code}, {response.text}")
+        for chunk in response.iter_content():
+            if chunk:
+                yield chunk