diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml index 51e71920e8..0b622b0600 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml @@ -1,5 +1,7 @@ - google/gemma-7b - google/codegemma-7b - meta/llama2-70b +- meta/llama3-8b +- meta/llama3-70b - mistralai/mixtral-8x7b-instruct-v0.1 - fuyu-8b diff --git a/api/core/model_runtime/model_providers/nvidia/llm/codegemma-7b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/codegemma-7b.yaml index ae94b14220..57446224a8 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/codegemma-7b.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/codegemma-7b.yaml @@ -11,13 +11,19 @@ model_properties: parameter_rules: - name: temperature use_template: temperature + min: 0 + max: 1 + default: 0.5 - name: top_p use_template: top_p + min: 0 + max: 1 + default: 1 - name: max_tokens use_template: max_tokens - default: 1024 min: 1 max: 1024 + default: 1024 - name: frequency_penalty use_template: frequency_penalty min: -2 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml index 49749bba90..6ae524c6d8 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml @@ -22,6 +22,6 @@ parameter_rules: max: 1 - name: max_tokens use_template: max_tokens - default: 512 + default: 1024 min: 1 max: 1024 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml index c50dad4f14..794b820bf4 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml @@ -11,13 +11,19 @@ model_properties: parameter_rules: - name: temperature use_template: temperature + min: 0 + max: 1 + default: 0.5 - name: top_p use_template: top_p + min: 0 + max: 1 + default: 1 - name: max_tokens use_template: max_tokens - default: 512 min: 1 max: 1024 + default: 1024 - name: frequency_penalty use_template: frequency_penalty min: -2 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml index 46422cbdb6..9fba816b7f 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml @@ -7,17 +7,23 @@ features: - agent-thought model_properties: mode: chat - context_size: 32768 + context_size: 4096 parameter_rules: - name: temperature use_template: temperature + min: 0 + max: 1 + default: 0.5 - name: top_p use_template: top_p + min: 0 + max: 1 + default: 1 - name: max_tokens use_template: max_tokens - default: 512 min: 1 max: 1024 + default: 1024 - name: frequency_penalty use_template: frequency_penalty min: -2 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama3-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama3-70b.yaml new file mode 100644 index 0000000000..9999ef5a83 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama3-70b.yaml @@ -0,0 +1,36 @@ +model: meta/llama3-70b +label: + zh_Hans: meta/llama3-70b + en_US: meta/llama3-70b +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 1024 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama3-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama3-8b.yaml new file mode 100644 index 0000000000..4dd3215d74 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama3-8b.yaml @@ -0,0 +1,36 @@ +model: meta/llama3-8b +label: + zh_Hans: meta/llama3-8b + en_US: meta/llama3-8b +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 1024 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py index b1c2b77358..84f5fc5e1c 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py +++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py @@ -25,7 +25,10 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel): 'mistralai/mixtral-8x7b-instruct-v0.1': '', 'google/gemma-7b': '', 'google/codegemma-7b': '', - 'meta/llama2-70b': '' + 'meta/llama2-70b': '', + 'meta/llama3-8b': '', + 'meta/llama3-70b': '' + } def _invoke(self, model: str, credentials: dict, diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml index fbd8cc268e..d2c4dc5d93 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml @@ -11,13 +11,19 @@ model_properties: parameter_rules: - name: temperature use_template: temperature + min: 0 + max: 1 + default: 0.5 - name: top_p use_template: top_p + min: 0 + max: 1 + default: 1 - name: max_tokens use_template: max_tokens - default: 512 min: 1 max: 1024 + default: 1024 - name: frequency_penalty use_template: frequency_penalty min: -2 diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.yaml b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml index 4d6da913c1..ce894a3372 100644 --- a/api/core/model_runtime/model_providers/nvidia/nvidia.yaml +++ b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml @@ -1,6 +1,9 @@ provider: nvidia label: en_US: API Catalog +description: + en_US: API Catalog + zh_Hans: API Catalog icon_small: en_US: icon_s_en.svg icon_large: