diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml index 21eea7ef3b..da654d2174 100644 --- a/api/core/model_runtime/model_providers/_position.yaml +++ b/api/core/model_runtime/model_providers/_position.yaml @@ -32,3 +32,4 @@ - openai_api_compatible - deepseek - hunyuan +- siliconflow diff --git a/api/core/model_runtime/model_providers/siliconflow/_assets/siliconflow.svg b/api/core/model_runtime/model_providers/siliconflow/_assets/siliconflow.svg new file mode 100644 index 0000000000..16e406f030 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/_assets/siliconflow.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/siliconflow/_assets/siliconflow_square.svg b/api/core/model_runtime/model_providers/siliconflow/_assets/siliconflow_square.svg new file mode 100644 index 0000000000..ad6b384f7a --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/_assets/siliconflow_square.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml new file mode 100644 index 0000000000..20bb0790c2 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml @@ -0,0 +1,8 @@ +- deepseek-v2-chat +- qwen2-72b-instruct +- qwen2-57b-a14b-instruct +- qwen2-7b-instruct +- yi-1.5-34b-chat +- yi-1.5-9b-chat +- yi-1.5-6b-chat +- glm4-9B-chat diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml new file mode 100644 index 0000000000..da58e822f9 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml @@ -0,0 +1,32 @@ +model: deepseek-ai/deepseek-v2-chat +label: + en_US: deepseek-ai/deepseek-v2-chat +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.33' + output: '1.33' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml new file mode 100644 index 0000000000..115fc50b94 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml @@ -0,0 +1,32 @@ +model: zhipuai/glm4-9B-chat +label: + en_US: zhipuai/glm4-9B-chat +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0.6' + output: '0.6' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/llm.py b/api/core/model_runtime/model_providers/siliconflow/llm/llm.py new file mode 100644 index 0000000000..a9ce7b98c3 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/llm.py @@ -0,0 +1,25 @@ +from collections.abc import Generator +from typing import Optional, Union + +from core.model_runtime.entities.llm_entities import LLMResult +from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool +from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel + + +class SiliconflowLargeLanguageModel(OAIAPICompatLargeLanguageModel): + def _invoke(self, model: str, credentials: dict, + prompt_messages: list[PromptMessage], model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, + stream: bool = True, user: Optional[str] = None) \ + -> Union[LLMResult, Generator]: + self._add_custom_parameters(credentials) + return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream) + + def validate_credentials(self, model: str, credentials: dict) -> None: + self._add_custom_parameters(credentials) + super().validate_credentials(model, credentials) + + @classmethod + def _add_custom_parameters(cls, credentials: dict) -> None: + credentials['mode'] = 'chat' + credentials['endpoint_url'] = 'https://api.siliconflow.cn/v1' diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml new file mode 100644 index 0000000000..75eca7720c --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml @@ -0,0 +1,32 @@ +model: alibaba/Qwen2-57B-A14B-Instruct +label: + en_US: alibaba/Qwen2-57B-A14B-Instruct +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml new file mode 100644 index 0000000000..fcbc9e0b68 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml @@ -0,0 +1,32 @@ +model: alibaba/Qwen2-72B-Instruct +label: + en_US: alibaba/Qwen2-72B-Instruct +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml new file mode 100644 index 0000000000..eda1d40642 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml @@ -0,0 +1,32 @@ +model: alibaba/Qwen2-7B-Instruct +label: + en_US: alibaba/Qwen2-7B-Instruct +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0.35' + output: '0.35' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-34b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-34b-chat.yaml new file mode 100644 index 0000000000..6656e663e9 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-34b-chat.yaml @@ -0,0 +1,32 @@ +model: 01-ai/Yi-1.5-34B-Chat +label: + en_US: 01-ai/Yi-1.5-34B-Chat +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 16384 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml new file mode 100644 index 0000000000..ba6e0c5113 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml @@ -0,0 +1,32 @@ +model: 01-ai/Yi-1.5-6B-Chat +label: + en_US: 01-ai/Yi-1.5-6B-Chat +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0.35' + output: '0.35' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml new file mode 100644 index 0000000000..64be8998c5 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-9b-chat.yaml @@ -0,0 +1,32 @@ +model: 01-ai/Yi-1.5-9B-Chat +label: + en_US: 01-ai/Yi-1.5-9B-Chat +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 16384 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0.42' + output: '0.42' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/siliconflow.py b/api/core/model_runtime/model_providers/siliconflow/siliconflow.py new file mode 100644 index 0000000000..63f76fa8b5 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/siliconflow.py @@ -0,0 +1,29 @@ +import logging + +from core.model_runtime.entities.model_entities import ModelType +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.model_provider import ModelProvider + +logger = logging.getLogger(__name__) + +class SiliconflowProvider(ModelProvider): + + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + try: + model_instance = self.get_model_instance(ModelType.LLM) + + model_instance.validate_credentials( + model='deepseek-ai/deepseek-v2-chat', + credentials=credentials + ) + except CredentialsValidateFailedError as ex: + raise ex + except Exception as ex: + logger.exception(f'{self.get_provider_schema().provider} credentials validate failed') + raise ex diff --git a/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml b/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml new file mode 100644 index 0000000000..cf44c185d5 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml @@ -0,0 +1,29 @@ +provider: siliconflow +label: + zh_Hans: 硅基流动 + en_US: SiliconFlow +icon_small: + en_US: siliconflow_square.svg +icon_large: + en_US: siliconflow.svg +background: "#ffecff" +help: + title: + en_US: Get your API Key from SiliconFlow + zh_Hans: 从 SiliconFlow 获取 API Key + url: + en_US: https://cloud.siliconflow.cn/keys +supported_model_types: + - llm +configurate_methods: + - predefined-model +provider_credential_schema: + credential_form_schemas: + - variable: api_key + label: + en_US: API Key + type: secret-input + required: true + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key