From a80fe20456c14c741b381a0d95f4341d636d0ee8 Mon Sep 17 00:00:00 2001 From: Joshua <138381132+joshua20231026@users.noreply.github.com> Date: Sat, 11 May 2024 21:05:31 +0800 Subject: [PATCH] add-some-new-models-hosted-on-nvidia (#4303) --- .../model_providers/nvidia/llm/_position.yaml | 4 ++ .../model_providers/nvidia/llm/arctic.yaml | 36 ++++++++++++++++++ .../model_providers/nvidia/llm/llm.py | 6 ++- .../nvidia/llm/mistral-large.yaml | 36 ++++++++++++++++++ .../llm/mixtral-8x22b-instruct-v0.1.yaml | 36 ++++++++++++++++++ .../nvidia/llm/recurrentgemma-2b.yaml | 37 +++++++++++++++++++ 6 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml index fc69862722..2401f2a890 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml @@ -1,7 +1,11 @@ - google/gemma-7b - google/codegemma-7b +- google/recurrentgemma-2b - meta/llama2-70b - meta/llama3-8b-instruct - meta/llama3-70b-instruct +- mistralai/mistral-large - mistralai/mixtral-8x7b-instruct-v0.1 +- mistralai/mixtral-8x22b-instruct-v0.1 - fuyu-8b +- snowflake/arctic diff --git a/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml b/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml new file mode 100644 index 0000000000..7f53ae58e6 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml @@ -0,0 +1,36 @@ +model: snowflake/arctic +label: + zh_Hans: snowflake/arctic + en_US: snowflake/arctic +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4000 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 1024 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py index 402ffb2cf2..047bbeda63 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py +++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py @@ -22,12 +22,16 @@ from core.model_runtime.utils import helper class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel): MODEL_SUFFIX_MAP = { 'fuyu-8b': 'vlm/adept/fuyu-8b', + 'mistralai/mistral-large': '', 'mistralai/mixtral-8x7b-instruct-v0.1': '', + 'mistralai/mixtral-8x22b-instruct-v0.1': '', 'google/gemma-7b': '', 'google/codegemma-7b': '', + 'snowflake/arctic':'', 'meta/llama2-70b': '', 'meta/llama3-8b-instruct': '', - 'meta/llama3-70b-instruct': '' + 'meta/llama3-70b-instruct': '', + 'google/recurrentgemma-2b': '' } diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml new file mode 100644 index 0000000000..3e14d22141 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml @@ -0,0 +1,36 @@ +model: mistralai/mistral-large +label: + zh_Hans: mistralai/mistral-large + en_US: mistralai/mistral-large +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 1024 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml new file mode 100644 index 0000000000..05500c0336 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml @@ -0,0 +1,36 @@ +model: mistralai/mixtral-8x22b-instruct-v0.1 +label: + zh_Hans: mistralai/mixtral-8x22b-instruct-v0.1 + en_US: mistralai/mixtral-8x22b-instruct-v0.1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 1024 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml new file mode 100644 index 0000000000..73fcce3930 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml @@ -0,0 +1,37 @@ +model: google/recurrentgemma-2b +label: + zh_Hans: google/recurrentgemma-2b + en_US: google/recurrentgemma-2b +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 2048 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.2 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 0.7 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 1024 + default: 1024 + - name: random_seed + type: int + help: + en_US: The seed to use for random sampling. If set, different calls will generate deterministic results. + zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定 + label: + en_US: Seed + zh_Hans: 种子 + default: 0 + min: 0 + max: 2147483647