From a80fe20456c14c741b381a0d95f4341d636d0ee8 Mon Sep 17 00:00:00 2001
From: Joshua <138381132+joshua20231026@users.noreply.github.com>
Date: Sat, 11 May 2024 21:05:31 +0800
Subject: [PATCH] add-some-new-models-hosted-on-nvidia (#4303)

---
 .../model_providers/nvidia/llm/_position.yaml |  4 ++
 .../model_providers/nvidia/llm/arctic.yaml    | 36 ++++++++++++++++++
 .../model_providers/nvidia/llm/llm.py         |  6 ++-
 .../nvidia/llm/mistral-large.yaml             | 36 ++++++++++++++++++
 .../llm/mixtral-8x22b-instruct-v0.1.yaml      | 36 ++++++++++++++++++
 .../nvidia/llm/recurrentgemma-2b.yaml         | 37 +++++++++++++++++++
 6 files changed, 154 insertions(+), 1 deletion(-)
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml

diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
index fc69862722..2401f2a890 100644
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -1,7 +1,11 @@
 - google/gemma-7b
 - google/codegemma-7b
+- google/recurrentgemma-2b
 - meta/llama2-70b
 - meta/llama3-8b-instruct
 - meta/llama3-70b-instruct
+- mistralai/mistral-large
 - mistralai/mixtral-8x7b-instruct-v0.1
+- mistralai/mixtral-8x22b-instruct-v0.1
 - fuyu-8b
+- snowflake/arctic
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml b/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml
new file mode 100644
index 0000000000..7f53ae58e6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml
@@ -0,0 +1,36 @@
+model: snowflake/arctic
+label:
+  zh_Hans: snowflake/arctic
+  en_US: snowflake/arctic
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
index 402ffb2cf2..047bbeda63 100644
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -22,12 +22,16 @@ from core.model_runtime.utils import helper
 class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
     MODEL_SUFFIX_MAP = {
         'fuyu-8b': 'vlm/adept/fuyu-8b',
+        'mistralai/mistral-large': '',
         'mistralai/mixtral-8x7b-instruct-v0.1': '',
+        'mistralai/mixtral-8x22b-instruct-v0.1': '',
         'google/gemma-7b': '',
         'google/codegemma-7b': '',
+        'snowflake/arctic':'',
         'meta/llama2-70b': '',
         'meta/llama3-8b-instruct': '',
-        'meta/llama3-70b-instruct': ''
+        'meta/llama3-70b-instruct': '',
+        'google/recurrentgemma-2b': ''
         
     }
 
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml
new file mode 100644
index 0000000000..3e14d22141
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml
@@ -0,0 +1,36 @@
+model: mistralai/mistral-large
+label:
+  zh_Hans: mistralai/mistral-large
+  en_US: mistralai/mistral-large
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
new file mode 100644
index 0000000000..05500c0336
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
@@ -0,0 +1,36 @@
+model: mistralai/mixtral-8x22b-instruct-v0.1
+label:
+  zh_Hans: mistralai/mixtral-8x22b-instruct-v0.1
+  en_US: mistralai/mixtral-8x22b-instruct-v0.1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml
new file mode 100644
index 0000000000..73fcce3930
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml
@@ -0,0 +1,37 @@
+model: google/recurrentgemma-2b
+label:
+  zh_Hans: google/recurrentgemma-2b
+  en_US: google/recurrentgemma-2b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 2048
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.2
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 0.7
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: Seed
+      zh_Hans: 种子
+    default: 0
+    min: 0
+    max: 2147483647