From a7162240e6de0eb70a6914a3f9929b69057a4630 Mon Sep 17 00:00:00 2001
From: shAlfred <alfredcai@outlook.com>
Date: Thu, 8 Aug 2024 17:08:28 +0800
Subject: [PATCH] feat: add text-embedding functon and LLM models to
 Siliconflow (#7090)

---
 .../siliconflow/llm/_position.yaml            | 28 ++++++---
 .../siliconflow/llm/deepseek-v2-chat.yaml     |  2 +-
 .../siliconflow/llm/gemma-2-27b-it.yaml       | 30 +++++++++
 .../siliconflow/llm/gemma-2-9b-it.yaml        | 30 +++++++++
 .../siliconflow/llm/glm4-9b-chat.yaml         |  6 +-
 .../siliconflow/llm/internlm2_5-7b-chat.yaml  | 30 +++++++++
 .../llm/meta-mlama-3-70b-instruct.yaml        | 30 +++++++++
 .../llm/meta-mlama-3-8b-instruct.yaml         | 30 +++++++++
 .../llm/meta-mlama-3.1-405b-instruct.yaml     | 30 +++++++++
 .../llm/meta-mlama-3.1-70b-instruct.yaml      | 30 +++++++++
 .../llm/meta-mlama-3.1-8b-instruct.yaml       | 30 +++++++++
 .../llm/mistral-7b-instruct-v0.2.yaml         | 30 +++++++++
 .../llm/mistral-8x7b-instruct-v0.1.yaml       | 30 +++++++++
 .../siliconflow/llm/qwen2-1.5b-instruct.yaml  | 30 +++++++++
 .../llm/qwen2-57b-a14b-instruct.yaml          |  2 +-
 .../siliconflow/llm/qwen2-72b-instruct.yaml   |  2 +-
 .../siliconflow/llm/qwen2-7b-instruct.yaml    |  6 +-
 .../siliconflow/llm/yi-1.5-6b-chat.yaml       |  4 +-
 .../siliconflow/llm/yi-1.5-9b-chat.yaml       |  6 +-
 .../siliconflow/siliconflow.yaml              |  1 +
 .../text_embedding/bge-large-en-v1.5.yaml     |  5 ++
 .../text_embedding/bge-large-zh-v1.5.yaml     |  5 ++
 .../text_embedding/text_embedding.py          | 29 +++++++++
 .../siliconflow/test_text_embedding.py        | 62 +++++++++++++++++++
 24 files changed, 466 insertions(+), 22 deletions(-)
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-en-v1.5.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-zh-v1.5.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
 create mode 100644 api/tests/integration_tests/model_runtime/siliconflow/test_text_embedding.py

diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
index 20bb0790c2..c2f0eb0536 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -1,8 +1,20 @@
-- deepseek-v2-chat
-- qwen2-72b-instruct
-- qwen2-57b-a14b-instruct
-- qwen2-7b-instruct
-- yi-1.5-34b-chat
-- yi-1.5-9b-chat
-- yi-1.5-6b-chat
-- glm4-9B-chat
+- Qwen/Qwen2-72B-Instruct
+- Qwen/Qwen2-57B-A14B-Instruct
+- Qwen/Qwen2-7B-Instruct
+- Qwen/Qwen2-1.5B-Instruct
+- 01-ai/Yi-1.5-34B-Chat
+- 01-ai/Yi-1.5-9B-Chat-16K
+- 01-ai/Yi-1.5-6B-Chat
+- THUDM/glm-4-9b-chat
+- deepseek-ai/DeepSeek-V2-Chat
+- deepseek-ai/DeepSeek-Coder-V2-Instruct
+- internlm/internlm2_5-7b-chat
+- google/gemma-2-27b-it
+- google/gemma-2-9b-it
+- meta-llama/Meta-Llama-3-70B-Instruct
+- meta-llama/Meta-Llama-3-8B-Instruct
+- meta-llama/Meta-Llama-3.1-405B-Instruct
+- meta-llama/Meta-Llama-3.1-70B-Instruct
+- meta-llama/Meta-Llama-3.1-8B-Instruct
+- mistralai/Mixtral-8x7B-Instruct-v0.1
+- mistralai/Mistral-7B-Instruct-v0.2
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
index 3926568db6..caa6508b5e 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
@@ -1,4 +1,4 @@
-model: deepseek-ai/deepseek-v2-chat
+model: deepseek-ai/DeepSeek-V2-Chat
 label:
   en_US: deepseek-ai/DeepSeek-V2-Chat
 model_type: llm
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml
new file mode 100644
index 0000000000..2840e3dcf4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml
@@ -0,0 +1,30 @@
+model: google/gemma-2-27b-it
+label:
+  en_US: google/gemma-2-27b-it
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8196
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '1.26'
+  output: '1.26'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml
new file mode 100644
index 0000000000..d7e19b46f6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml
@@ -0,0 +1,30 @@
+model: google/gemma-2-9b-it
+label:
+  en_US: google/gemma-2-9b-it
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8196
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0'
+  output: '0'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml
index d6a4b21b66..9b32a02477 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml
@@ -1,4 +1,4 @@
-model: zhipuai/glm4-9B-chat
+model: THUDM/glm-4-9b-chat
 label:
   en_US: THUDM/glm-4-9b-chat
 model_type: llm
@@ -24,7 +24,7 @@ parameter_rules:
   - name: frequency_penalty
     use_template: frequency_penalty
 pricing:
-  input: '0.6'
-  output: '0.6'
+  input: '0'
+  output: '0'
   unit: '0.000001'
   currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml
new file mode 100644
index 0000000000..73ad4480aa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml
@@ -0,0 +1,30 @@
+model: internlm/internlm2_5-7b-chat
+label:
+  en_US: internlm/internlm2_5-7b-chat
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0'
+  output: '0'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
new file mode 100644
index 0000000000..9993d781ac
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3-70B-Instruct
+label:
+  en_US: meta-llama/Meta-Llama-3-70B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '4.13'
+  output: '4.13'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
new file mode 100644
index 0000000000..60e3764789
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3-8B-Instruct
+label:
+  en_US: meta-llama/Meta-Llama-3-8B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0'
+  output: '0'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml
new file mode 100644
index 0000000000..f992660aa2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3.1-405B-Instruct
+label:
+  en_US: meta-llama/Meta-Llama-3.1-405B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
new file mode 100644
index 0000000000..1c69d63a40
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3.1-70B-Instruct
+label:
+  en_US: meta-llama/Meta-Llama-3.1-70B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '4.13'
+  output: '4.13'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml
new file mode 100644
index 0000000000..a97002a5ca
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3.1-8B-Instruct
+label:
+  en_US: meta-llama/Meta-Llama-3.1-8B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0'
+  output: '0'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
new file mode 100644
index 0000000000..27664eab6c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@@ -0,0 +1,30 @@
+model: mistralai/Mistral-7B-Instruct-v0.2
+label:
+  en_US: mistralai/Mistral-7B-Instruct-v0.2
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0'
+  output: '0'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
new file mode 100644
index 0000000000..fd7aada428
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,30 @@
+model: mistralai/Mixtral-8x7B-Instruct-v0.1
+label:
+  en_US: mistralai/Mixtral-8x7B-Instruct-v0.1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '1.26'
+  output: '1.26'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml
new file mode 100644
index 0000000000..f6c976af8e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2-1.5B-Instruct
+label:
+  en_US: Qwen/Qwen2-1.5B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0'
+  output: '0'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
index 39624dc5b9..a996e919ea 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
@@ -1,4 +1,4 @@
-model: alibaba/Qwen2-57B-A14B-Instruct
+model: Qwen/Qwen2-57B-A14B-Instruct
 label:
   en_US: Qwen/Qwen2-57B-A14B-Instruct
 model_type: llm
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
index fb7ff6cb14..a6e2c22dac 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
@@ -1,4 +1,4 @@
-model: alibaba/Qwen2-72B-Instruct
+model: Qwen/Qwen2-72B-Instruct
 label:
   en_US: Qwen/Qwen2-72B-Instruct
 model_type: llm
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
index efda4abbd9..d8bea5e129 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
@@ -1,4 +1,4 @@
-model: alibaba/Qwen2-7B-Instruct
+model: Qwen/Qwen2-7B-Instruct
 label:
   en_US: Qwen/Qwen2-7B-Instruct
 model_type: llm
@@ -24,7 +24,7 @@ parameter_rules:
   - name: frequency_penalty
     use_template: frequency_penalty
 pricing:
-  input: '0.35'
-  output: '0.35'
+  input: '0'
+  output: '0'
   unit: '0.000001'
   currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml
index 38cd4197d4..fe4c8b4b3e 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml
@@ -24,7 +24,7 @@ parameter_rules:
   - name: frequency_penalty
     use_template: frequency_penalty
 pricing:
-  input: '0.35'
-  output: '0.35'
+  input: '0'
+  output: '0'
   unit: '0.000001'
   currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml
index 042eeea81a..c61f0dc53f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml
@@ -1,4 +1,4 @@
-model: 01-ai/Yi-1.5-9B-Chat
+model: 01-ai/Yi-1.5-9B-Chat-16K
 label:
   en_US: 01-ai/Yi-1.5-9B-Chat-16K
 model_type: llm
@@ -24,7 +24,7 @@ parameter_rules:
   - name: frequency_penalty
     use_template: frequency_penalty
 pricing:
-  input: '0.42'
-  output: '0.42'
+  input: '0'
+  output: '0'
   unit: '0.000001'
   currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml b/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml
index cf44c185d5..3084d3edcd 100644
--- a/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml
@@ -15,6 +15,7 @@ help:
     en_US: https://cloud.siliconflow.cn/keys
 supported_model_types:
   - llm
+  - text-embedding
 configurate_methods:
   - predefined-model
 provider_credential_schema:
diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-en-v1.5.yaml b/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-en-v1.5.yaml
new file mode 100644
index 0000000000..84f69b41a0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-en-v1.5.yaml
@@ -0,0 +1,5 @@
+model: BAAI/bge-large-en-v1.5
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-zh-v1.5.yaml b/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-zh-v1.5.yaml
new file mode 100644
index 0000000000..5248375d0b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-zh-v1.5.yaml
@@ -0,0 +1,5 @@
+model: BAAI/bge-large-zh-v1.5
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..c58765cecb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
@@ -0,0 +1,29 @@
+from typing import Optional
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
+    OAICompatEmbeddingModel,
+)
+
+
+class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
+    """
+    Model class for Siliconflow text embedding model.
+    """
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        self._add_custom_parameters(credentials)
+        super().validate_credentials(model, credentials)
+
+    def _invoke(self, model: str, credentials: dict,
+                texts: list[str], user: Optional[str] = None) \
+            -> TextEmbeddingResult:
+        self._add_custom_parameters(credentials)
+        return super()._invoke(model, credentials, texts, user)
+    
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        self._add_custom_parameters(credentials)
+        return super().get_num_tokens(model, credentials, texts)
+    
+    @classmethod
+    def _add_custom_parameters(cls, credentials: dict) -> None:
+        credentials['endpoint_url'] = 'https://api.siliconflow.cn/v1'
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/siliconflow/test_text_embedding.py b/api/tests/integration_tests/model_runtime/siliconflow/test_text_embedding.py
new file mode 100644
index 0000000000..18bd2e893a
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/siliconflow/test_text_embedding.py
@@ -0,0 +1,62 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.siliconflow.text_embedding.text_embedding import (
+    SiliconflowTextEmbeddingModel,
+)
+
+
+def test_validate_credentials():
+    model = SiliconflowTextEmbeddingModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="BAAI/bge-large-zh-v1.5",
+            credentials={
+                "api_key": "invalid_key"
+            },
+        )
+
+    model.validate_credentials(
+        model="BAAI/bge-large-zh-v1.5",
+        credentials={
+            "api_key": os.environ.get("API_KEY"),
+        },
+    )
+
+
+def test_invoke_model():
+    model = SiliconflowTextEmbeddingModel()
+
+    result = model.invoke(
+        model="BAAI/bge-large-zh-v1.5",
+        credentials={
+            "api_key": os.environ.get("API_KEY"),
+        },
+        texts=[
+            "hello",
+            "world",
+        ],
+        user="abc-123",
+    )
+
+    assert isinstance(result, TextEmbeddingResult)
+    assert len(result.embeddings) == 2
+    assert result.usage.total_tokens == 6
+
+
+def test_get_num_tokens():
+    model = SiliconflowTextEmbeddingModel()
+
+    num_tokens = model.get_num_tokens(
+        model="BAAI/bge-large-zh-v1.5",
+        credentials={
+            "api_key": os.environ.get("API_KEY"),
+        },
+        texts=["hello", "world"],
+    )
+
+    assert num_tokens == 2