From 2a909e634b72880dc7b57ca5886b02e8ccf5bcde Mon Sep 17 00:00:00 2001 From: Kepler Date: Fri, 27 Dec 2024 20:23:46 +0800 Subject: [PATCH] feat: support Ernie-lite-pro-128k (#12161) Co-authored-by: bigfish49 --- .../model_providers/wenxin/_common.py | 1 + .../wenxin/llm/ernie-lite-pro-128k.yaml | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml diff --git a/api/core/model_runtime/model_providers/wenxin/_common.py b/api/core/model_runtime/model_providers/wenxin/_common.py index c77a499982..1247a11fe8 100644 --- a/api/core/model_runtime/model_providers/wenxin/_common.py +++ b/api/core/model_runtime/model_providers/wenxin/_common.py @@ -122,6 +122,7 @@ class _CommonWenxin: "bge-large-zh": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_zh", "tao-8k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/tao_8k", "bce-reranker-base_v1": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/reranker/bce_reranker_base", + "ernie-lite-pro-128k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-lite-pro-128k", } function_calling_supports = [ diff --git a/api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml b/api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml new file mode 100644 index 0000000000..4f5832c859 --- /dev/null +++ b/api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml @@ -0,0 +1,42 @@ +model: ernie-lite-pro-128k +label: + en_US: Ernie-Lite-Pro-128K +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + min: 0.1 + max: 1.0 + default: 0.8 + - name: top_p + use_template: top_p + - name: min_output_tokens + label: + en_US: "Min Output Tokens" + zh_Hans: "最小输出Token数" + use_template: max_tokens + min: 2 + max: 2048 + help: + zh_Hans: 指定模型最小输出token数 + en_US: Specifies the lower limit on the length of generated results. + - name: max_output_tokens + label: + en_US: "Max Output Tokens" + zh_Hans: "最大输出Token数" + use_template: max_tokens + min: 2 + max: 2048 + default: 2048 + help: + zh_Hans: 指定模型最大输出token数 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty