mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 20:59:00 +08:00
feat: add text-embedding functon and LLM models to Siliconflow (#7090)
This commit is contained in:
parent
12095f8cd6
commit
a7162240e6
@ -1,8 +1,20 @@
|
||||
- deepseek-v2-chat
|
||||
- qwen2-72b-instruct
|
||||
- qwen2-57b-a14b-instruct
|
||||
- qwen2-7b-instruct
|
||||
- yi-1.5-34b-chat
|
||||
- yi-1.5-9b-chat
|
||||
- yi-1.5-6b-chat
|
||||
- glm4-9B-chat
|
||||
- Qwen/Qwen2-72B-Instruct
|
||||
- Qwen/Qwen2-57B-A14B-Instruct
|
||||
- Qwen/Qwen2-7B-Instruct
|
||||
- Qwen/Qwen2-1.5B-Instruct
|
||||
- 01-ai/Yi-1.5-34B-Chat
|
||||
- 01-ai/Yi-1.5-9B-Chat-16K
|
||||
- 01-ai/Yi-1.5-6B-Chat
|
||||
- THUDM/glm-4-9b-chat
|
||||
- deepseek-ai/DeepSeek-V2-Chat
|
||||
- deepseek-ai/DeepSeek-Coder-V2-Instruct
|
||||
- internlm/internlm2_5-7b-chat
|
||||
- google/gemma-2-27b-it
|
||||
- google/gemma-2-9b-it
|
||||
- meta-llama/Meta-Llama-3-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3-8B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-405B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
- mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
- mistralai/Mistral-7B-Instruct-v0.2
|
||||
|
@ -1,4 +1,4 @@
|
||||
model: deepseek-ai/deepseek-v2-chat
|
||||
model: deepseek-ai/DeepSeek-V2-Chat
|
||||
label:
|
||||
en_US: deepseek-ai/DeepSeek-V2-Chat
|
||||
model_type: llm
|
||||
|
@ -0,0 +1,30 @@
|
||||
model: google/gemma-2-27b-it
|
||||
label:
|
||||
en_US: google/gemma-2-27b-it
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8196
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '1.26'
|
||||
output: '1.26'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: google/gemma-2-9b-it
|
||||
label:
|
||||
en_US: google/gemma-2-9b-it
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8196
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -1,4 +1,4 @@
|
||||
model: zhipuai/glm4-9B-chat
|
||||
model: THUDM/glm-4-9b-chat
|
||||
label:
|
||||
en_US: THUDM/glm-4-9b-chat
|
||||
model_type: llm
|
||||
@ -24,7 +24,7 @@ parameter_rules:
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.6'
|
||||
output: '0.6'
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
|
@ -0,0 +1,30 @@
|
||||
model: internlm/internlm2_5-7b-chat
|
||||
label:
|
||||
en_US: internlm/internlm2_5-7b-chat
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3-70B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3-70B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '4.13'
|
||||
output: '4.13'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3.1-405B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '21'
|
||||
output: '21'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '4.13'
|
||||
output: '4.13'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: mistralai/Mistral-7B-Instruct-v0.2
|
||||
label:
|
||||
en_US: mistralai/Mistral-7B-Instruct-v0.2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
label:
|
||||
en_US: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '1.26'
|
||||
output: '1.26'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -0,0 +1,30 @@
|
||||
model: Qwen/Qwen2-1.5B-Instruct
|
||||
label:
|
||||
en_US: Qwen/Qwen2-1.5B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
@ -1,4 +1,4 @@
|
||||
model: alibaba/Qwen2-57B-A14B-Instruct
|
||||
model: Qwen/Qwen2-57B-A14B-Instruct
|
||||
label:
|
||||
en_US: Qwen/Qwen2-57B-A14B-Instruct
|
||||
model_type: llm
|
||||
|
@ -1,4 +1,4 @@
|
||||
model: alibaba/Qwen2-72B-Instruct
|
||||
model: Qwen/Qwen2-72B-Instruct
|
||||
label:
|
||||
en_US: Qwen/Qwen2-72B-Instruct
|
||||
model_type: llm
|
||||
|
@ -1,4 +1,4 @@
|
||||
model: alibaba/Qwen2-7B-Instruct
|
||||
model: Qwen/Qwen2-7B-Instruct
|
||||
label:
|
||||
en_US: Qwen/Qwen2-7B-Instruct
|
||||
model_type: llm
|
||||
@ -24,7 +24,7 @@ parameter_rules:
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.35'
|
||||
output: '0.35'
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
|
@ -24,7 +24,7 @@ parameter_rules:
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.35'
|
||||
output: '0.35'
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
|
@ -1,4 +1,4 @@
|
||||
model: 01-ai/Yi-1.5-9B-Chat
|
||||
model: 01-ai/Yi-1.5-9B-Chat-16K
|
||||
label:
|
||||
en_US: 01-ai/Yi-1.5-9B-Chat-16K
|
||||
model_type: llm
|
||||
@ -24,7 +24,7 @@ parameter_rules:
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.42'
|
||||
output: '0.42'
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
|
@ -15,6 +15,7 @@ help:
|
||||
en_US: https://cloud.siliconflow.cn/keys
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
|
@ -0,0 +1,5 @@
|
||||
model: BAAI/bge-large-en-v1.5
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
@ -0,0 +1,5 @@
|
||||
model: BAAI/bge-large-zh-v1.5
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
@ -0,0 +1,29 @@
|
||||
from typing import Optional
|
||||
|
||||
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
|
||||
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
|
||||
OAICompatEmbeddingModel,
|
||||
)
|
||||
|
||||
|
||||
class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
|
||||
"""
|
||||
Model class for Siliconflow text embedding model.
|
||||
"""
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
self._add_custom_parameters(credentials)
|
||||
super().validate_credentials(model, credentials)
|
||||
|
||||
def _invoke(self, model: str, credentials: dict,
|
||||
texts: list[str], user: Optional[str] = None) \
|
||||
-> TextEmbeddingResult:
|
||||
self._add_custom_parameters(credentials)
|
||||
return super()._invoke(model, credentials, texts, user)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
self._add_custom_parameters(credentials)
|
||||
return super().get_num_tokens(model, credentials, texts)
|
||||
|
||||
@classmethod
|
||||
def _add_custom_parameters(cls, credentials: dict) -> None:
|
||||
credentials['endpoint_url'] = 'https://api.siliconflow.cn/v1'
|
@ -0,0 +1,62 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.siliconflow.text_embedding.text_embedding import (
|
||||
SiliconflowTextEmbeddingModel,
|
||||
)
|
||||
|
||||
|
||||
def test_validate_credentials():
|
||||
model = SiliconflowTextEmbeddingModel()
|
||||
|
||||
with pytest.raises(CredentialsValidateFailedError):
|
||||
model.validate_credentials(
|
||||
model="BAAI/bge-large-zh-v1.5",
|
||||
credentials={
|
||||
"api_key": "invalid_key"
|
||||
},
|
||||
)
|
||||
|
||||
model.validate_credentials(
|
||||
model="BAAI/bge-large-zh-v1.5",
|
||||
credentials={
|
||||
"api_key": os.environ.get("API_KEY"),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_invoke_model():
|
||||
model = SiliconflowTextEmbeddingModel()
|
||||
|
||||
result = model.invoke(
|
||||
model="BAAI/bge-large-zh-v1.5",
|
||||
credentials={
|
||||
"api_key": os.environ.get("API_KEY"),
|
||||
},
|
||||
texts=[
|
||||
"hello",
|
||||
"world",
|
||||
],
|
||||
user="abc-123",
|
||||
)
|
||||
|
||||
assert isinstance(result, TextEmbeddingResult)
|
||||
assert len(result.embeddings) == 2
|
||||
assert result.usage.total_tokens == 6
|
||||
|
||||
|
||||
def test_get_num_tokens():
|
||||
model = SiliconflowTextEmbeddingModel()
|
||||
|
||||
num_tokens = model.get_num_tokens(
|
||||
model="BAAI/bge-large-zh-v1.5",
|
||||
credentials={
|
||||
"api_key": os.environ.get("API_KEY"),
|
||||
},
|
||||
texts=["hello", "world"],
|
||||
)
|
||||
|
||||
assert num_tokens == 2
|
Loading…
x
Reference in New Issue
Block a user