From a7162240e6de0eb70a6914a3f9929b69057a4630 Mon Sep 17 00:00:00 2001 From: shAlfred Date: Thu, 8 Aug 2024 17:08:28 +0800 Subject: [PATCH] feat: add text-embedding functon and LLM models to Siliconflow (#7090) --- .../siliconflow/llm/_position.yaml | 28 ++++++--- .../siliconflow/llm/deepseek-v2-chat.yaml | 2 +- .../siliconflow/llm/gemma-2-27b-it.yaml | 30 +++++++++ .../siliconflow/llm/gemma-2-9b-it.yaml | 30 +++++++++ .../siliconflow/llm/glm4-9b-chat.yaml | 6 +- .../siliconflow/llm/internlm2_5-7b-chat.yaml | 30 +++++++++ .../llm/meta-mlama-3-70b-instruct.yaml | 30 +++++++++ .../llm/meta-mlama-3-8b-instruct.yaml | 30 +++++++++ .../llm/meta-mlama-3.1-405b-instruct.yaml | 30 +++++++++ .../llm/meta-mlama-3.1-70b-instruct.yaml | 30 +++++++++ .../llm/meta-mlama-3.1-8b-instruct.yaml | 30 +++++++++ .../llm/mistral-7b-instruct-v0.2.yaml | 30 +++++++++ .../llm/mistral-8x7b-instruct-v0.1.yaml | 30 +++++++++ .../siliconflow/llm/qwen2-1.5b-instruct.yaml | 30 +++++++++ .../llm/qwen2-57b-a14b-instruct.yaml | 2 +- .../siliconflow/llm/qwen2-72b-instruct.yaml | 2 +- .../siliconflow/llm/qwen2-7b-instruct.yaml | 6 +- .../siliconflow/llm/yi-1.5-6b-chat.yaml | 4 +- .../siliconflow/llm/yi-1.5-9b-chat.yaml | 6 +- .../siliconflow/siliconflow.yaml | 1 + .../text_embedding/bge-large-en-v1.5.yaml | 5 ++ .../text_embedding/bge-large-zh-v1.5.yaml | 5 ++ .../text_embedding/text_embedding.py | 29 +++++++++ .../siliconflow/test_text_embedding.py | 62 +++++++++++++++++++ 24 files changed, 466 insertions(+), 22 deletions(-) create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-en-v1.5.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-zh-v1.5.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py create mode 100644 api/tests/integration_tests/model_runtime/siliconflow/test_text_embedding.py diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml index 20bb0790c2..c2f0eb0536 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml @@ -1,8 +1,20 @@ -- deepseek-v2-chat -- qwen2-72b-instruct -- qwen2-57b-a14b-instruct -- qwen2-7b-instruct -- yi-1.5-34b-chat -- yi-1.5-9b-chat -- yi-1.5-6b-chat -- glm4-9B-chat +- Qwen/Qwen2-72B-Instruct +- Qwen/Qwen2-57B-A14B-Instruct +- Qwen/Qwen2-7B-Instruct +- Qwen/Qwen2-1.5B-Instruct +- 01-ai/Yi-1.5-34B-Chat +- 01-ai/Yi-1.5-9B-Chat-16K +- 01-ai/Yi-1.5-6B-Chat +- THUDM/glm-4-9b-chat +- deepseek-ai/DeepSeek-V2-Chat +- deepseek-ai/DeepSeek-Coder-V2-Instruct +- internlm/internlm2_5-7b-chat +- google/gemma-2-27b-it +- google/gemma-2-9b-it +- meta-llama/Meta-Llama-3-70B-Instruct +- meta-llama/Meta-Llama-3-8B-Instruct +- meta-llama/Meta-Llama-3.1-405B-Instruct +- meta-llama/Meta-Llama-3.1-70B-Instruct +- meta-llama/Meta-Llama-3.1-8B-Instruct +- mistralai/Mixtral-8x7B-Instruct-v0.1 +- mistralai/Mistral-7B-Instruct-v0.2 diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml index 3926568db6..caa6508b5e 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml @@ -1,4 +1,4 @@ -model: deepseek-ai/deepseek-v2-chat +model: deepseek-ai/DeepSeek-V2-Chat label: en_US: deepseek-ai/DeepSeek-V2-Chat model_type: llm diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml new file mode 100644 index 0000000000..2840e3dcf4 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml @@ -0,0 +1,30 @@ +model: google/gemma-2-27b-it +label: + en_US: google/gemma-2-27b-it +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8196 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml new file mode 100644 index 0000000000..d7e19b46f6 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml @@ -0,0 +1,30 @@ +model: google/gemma-2-9b-it +label: + en_US: google/gemma-2-9b-it +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8196 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml index d6a4b21b66..9b32a02477 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml @@ -1,4 +1,4 @@ -model: zhipuai/glm4-9B-chat +model: THUDM/glm-4-9b-chat label: en_US: THUDM/glm-4-9b-chat model_type: llm @@ -24,7 +24,7 @@ parameter_rules: - name: frequency_penalty use_template: frequency_penalty pricing: - input: '0.6' - output: '0.6' + input: '0' + output: '0' unit: '0.000001' currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml new file mode 100644 index 0000000000..73ad4480aa --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml @@ -0,0 +1,30 @@ +model: internlm/internlm2_5-7b-chat +label: + en_US: internlm/internlm2_5-7b-chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml new file mode 100644 index 0000000000..9993d781ac --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml @@ -0,0 +1,30 @@ +model: meta-llama/Meta-Llama-3-70B-Instruct +label: + en_US: meta-llama/Meta-Llama-3-70B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml new file mode 100644 index 0000000000..60e3764789 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml @@ -0,0 +1,30 @@ +model: meta-llama/Meta-Llama-3-8B-Instruct +label: + en_US: meta-llama/Meta-Llama-3-8B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml new file mode 100644 index 0000000000..f992660aa2 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml @@ -0,0 +1,30 @@ +model: meta-llama/Meta-Llama-3.1-405B-Instruct +label: + en_US: meta-llama/Meta-Llama-3.1-405B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '21' + output: '21' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml new file mode 100644 index 0000000000..1c69d63a40 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml @@ -0,0 +1,30 @@ +model: meta-llama/Meta-Llama-3.1-70B-Instruct +label: + en_US: meta-llama/Meta-Llama-3.1-70B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml new file mode 100644 index 0000000000..a97002a5ca --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml @@ -0,0 +1,30 @@ +model: meta-llama/Meta-Llama-3.1-8B-Instruct +label: + en_US: meta-llama/Meta-Llama-3.1-8B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml new file mode 100644 index 0000000000..27664eab6c --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml @@ -0,0 +1,30 @@ +model: mistralai/Mistral-7B-Instruct-v0.2 +label: + en_US: mistralai/Mistral-7B-Instruct-v0.2 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml new file mode 100644 index 0000000000..fd7aada428 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml @@ -0,0 +1,30 @@ +model: mistralai/Mixtral-8x7B-Instruct-v0.1 +label: + en_US: mistralai/Mixtral-8x7B-Instruct-v0.1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml new file mode 100644 index 0000000000..f6c976af8e --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml @@ -0,0 +1,30 @@ +model: Qwen/Qwen2-1.5B-Instruct +label: + en_US: Qwen/Qwen2-1.5B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml index 39624dc5b9..a996e919ea 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml @@ -1,4 +1,4 @@ -model: alibaba/Qwen2-57B-A14B-Instruct +model: Qwen/Qwen2-57B-A14B-Instruct label: en_US: Qwen/Qwen2-57B-A14B-Instruct model_type: llm diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml index fb7ff6cb14..a6e2c22dac 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml @@ -1,4 +1,4 @@ -model: alibaba/Qwen2-72B-Instruct +model: Qwen/Qwen2-72B-Instruct label: en_US: Qwen/Qwen2-72B-Instruct model_type: llm diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml index efda4abbd9..d8bea5e129 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml @@ -1,4 +1,4 @@ -model: alibaba/Qwen2-7B-Instruct +model: Qwen/Qwen2-7B-Instruct label: en_US: Qwen/Qwen2-7B-Instruct model_type: llm @@ -24,7 +24,7 @@ parameter_rules: - name: frequency_penalty use_template: frequency_penalty pricing: - input: '0.35' - output: '0.35' + input: '0' + output: '0' unit: '0.000001' currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml index 38cd4197d4..fe4c8b4b3e 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml @@ -24,7 +24,7 @@ parameter_rules: - name: frequency_penalty use_template: frequency_penalty pricing: - input: '0.35' - output: '0.35' + input: '0' + output: '0' unit: '0.000001' currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml index 042eeea81a..c61f0dc53f 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml @@ -1,4 +1,4 @@ -model: 01-ai/Yi-1.5-9B-Chat +model: 01-ai/Yi-1.5-9B-Chat-16K label: en_US: 01-ai/Yi-1.5-9B-Chat-16K model_type: llm @@ -24,7 +24,7 @@ parameter_rules: - name: frequency_penalty use_template: frequency_penalty pricing: - input: '0.42' - output: '0.42' + input: '0' + output: '0' unit: '0.000001' currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml b/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml index cf44c185d5..3084d3edcd 100644 --- a/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml @@ -15,6 +15,7 @@ help: en_US: https://cloud.siliconflow.cn/keys supported_model_types: - llm + - text-embedding configurate_methods: - predefined-model provider_credential_schema: diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-en-v1.5.yaml b/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-en-v1.5.yaml new file mode 100644 index 0000000000..84f69b41a0 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-en-v1.5.yaml @@ -0,0 +1,5 @@ +model: BAAI/bge-large-en-v1.5 +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-zh-v1.5.yaml b/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-zh-v1.5.yaml new file mode 100644 index 0000000000..5248375d0b --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/bge-large-zh-v1.5.yaml @@ -0,0 +1,5 @@ +model: BAAI/bge-large-zh-v1.5 +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py new file mode 100644 index 0000000000..c58765cecb --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py @@ -0,0 +1,29 @@ +from typing import Optional + +from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult +from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import ( + OAICompatEmbeddingModel, +) + + +class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel): + """ + Model class for Siliconflow text embedding model. + """ + def validate_credentials(self, model: str, credentials: dict) -> None: + self._add_custom_parameters(credentials) + super().validate_credentials(model, credentials) + + def _invoke(self, model: str, credentials: dict, + texts: list[str], user: Optional[str] = None) \ + -> TextEmbeddingResult: + self._add_custom_parameters(credentials) + return super()._invoke(model, credentials, texts, user) + + def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: + self._add_custom_parameters(credentials) + return super().get_num_tokens(model, credentials, texts) + + @classmethod + def _add_custom_parameters(cls, credentials: dict) -> None: + credentials['endpoint_url'] = 'https://api.siliconflow.cn/v1' \ No newline at end of file diff --git a/api/tests/integration_tests/model_runtime/siliconflow/test_text_embedding.py b/api/tests/integration_tests/model_runtime/siliconflow/test_text_embedding.py new file mode 100644 index 0000000000..18bd2e893a --- /dev/null +++ b/api/tests/integration_tests/model_runtime/siliconflow/test_text_embedding.py @@ -0,0 +1,62 @@ +import os + +import pytest + +from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.siliconflow.text_embedding.text_embedding import ( + SiliconflowTextEmbeddingModel, +) + + +def test_validate_credentials(): + model = SiliconflowTextEmbeddingModel() + + with pytest.raises(CredentialsValidateFailedError): + model.validate_credentials( + model="BAAI/bge-large-zh-v1.5", + credentials={ + "api_key": "invalid_key" + }, + ) + + model.validate_credentials( + model="BAAI/bge-large-zh-v1.5", + credentials={ + "api_key": os.environ.get("API_KEY"), + }, + ) + + +def test_invoke_model(): + model = SiliconflowTextEmbeddingModel() + + result = model.invoke( + model="BAAI/bge-large-zh-v1.5", + credentials={ + "api_key": os.environ.get("API_KEY"), + }, + texts=[ + "hello", + "world", + ], + user="abc-123", + ) + + assert isinstance(result, TextEmbeddingResult) + assert len(result.embeddings) == 2 + assert result.usage.total_tokens == 6 + + +def test_get_num_tokens(): + model = SiliconflowTextEmbeddingModel() + + num_tokens = model.get_num_tokens( + model="BAAI/bge-large-zh-v1.5", + credentials={ + "api_key": os.environ.get("API_KEY"), + }, + texts=["hello", "world"], + ) + + assert num_tokens == 2