add-some-new-models-hosted-on-nvidia (#4303)

2025-08-13 06:39:08 +08:00 · 2024-05-11 21:05:31 +08:00 · 2024-05-11 21:05:31 +08:00 · a80fe20456
commit a80fe20456
parent f7986805c6
6 changed files with 154 additions and 1 deletions
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@ -1,7 +1,11 @@
 - google/gemma-7b
 - google/codegemma-7b
+- google/recurrentgemma-2b
 - meta/llama2-70b
 - meta/llama3-8b-instruct
 - meta/llama3-70b-instruct
+- mistralai/mistral-large
 - mistralai/mixtral-8x7b-instruct-v0.1
+- mistralai/mixtral-8x22b-instruct-v0.1
 - fuyu-8b
+- snowflake/arctic
--- a/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml
@ -0,0 +1,36 @@
+model: snowflake/arctic
+label:
+  zh_Hans: snowflake/arctic
+  en_US: snowflake/arctic
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@ -22,12 +22,16 @@ from core.model_runtime.utils import helper
 class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
    MODEL_SUFFIX_MAP = {
        'fuyu-8b': 'vlm/adept/fuyu-8b',
+        'mistralai/mistral-large': '',
        'mistralai/mixtral-8x7b-instruct-v0.1': '',
+        'mistralai/mixtral-8x22b-instruct-v0.1': '',
        'google/gemma-7b': '',
        'google/codegemma-7b': '',
+        'snowflake/arctic':'',
        'meta/llama2-70b': '',
        'meta/llama3-8b-instruct': '',
-        'meta/llama3-70b-instruct': ''
+        'meta/llama3-70b-instruct': '',
+        'google/recurrentgemma-2b': ''
        
    }

--- a/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml
@ -0,0 +1,36 @@
+model: mistralai/mistral-large
+label:
+  zh_Hans: mistralai/mistral-large
+  en_US: mistralai/mistral-large
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
@ -0,0 +1,36 @@
+model: mistralai/mixtral-8x22b-instruct-v0.1
+label:
+  zh_Hans: mistralai/mixtral-8x22b-instruct-v0.1
+  en_US: mistralai/mixtral-8x22b-instruct-v0.1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml
@ -0,0 +1,37 @@
+model: google/recurrentgemma-2b
+label:
+  zh_Hans: google/recurrentgemma-2b
+  en_US: google/recurrentgemma-2b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 2048
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.2
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 0.7
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: Seed
+      zh_Hans: 种子
+    default: 0
+    min: 0
+    max: 2147483647