diff --git a/api/core/model_runtime/model_providers/gitee_ai/llm/InternVL2-8B.yaml b/api/core/model_runtime/model_providers/gitee_ai/llm/InternVL2-8B.yaml
new file mode 100644
index 0000000000..d288c3dd39
--- /dev/null
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/InternVL2-8B.yaml
@@ -0,0 +1,93 @@
+model: InternVL2-8B
+label:
+  en_US: InternVL2-8B
+model_type: llm
+features:
+  - vision
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    label:
+      en_US: "Max Tokens"
+      zh_Hans: "最大Token数"
+    type: int
+    default: 512
+    min: 1
+    required: true
+    help:
+      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
+      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
+
+  - name: temperature
+    use_template: temperature
+    label:
+      en_US: "Temperature"
+      zh_Hans: "采样温度"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 1.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: top_p
+    use_template: top_p
+    label:
+      en_US: "Top P"
+      zh_Hans: "Top P"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 1.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: top_k
+    use_template: top_k
+    label:
+      en_US: "Top K"
+      zh_Hans: "Top K"
+    type: int
+    default: 50
+    min: 0
+    max: 100
+    required: true
+    help:
+      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
+      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
+
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    label:
+      en_US: "Frequency Penalty"
+      zh_Hans: "频率惩罚"
+    type: float
+    default: 0
+    min: -1.0
+    max: 1.0
+    precision: 1
+    required: false
+    help:
+      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
+      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
+
+  - name: user
+    use_template: text
+    label:
+      en_US: "User"
+      zh_Hans: "用户"
+    type: string
+    required: false
+    help:
+      en_US: "Used to track and differentiate conversation requests from different users."
+      zh_Hans: "用于追踪和区分不同用户的对话请求。"
diff --git a/api/core/model_runtime/model_providers/gitee_ai/llm/InternVL2.5-26B.yaml b/api/core/model_runtime/model_providers/gitee_ai/llm/InternVL2.5-26B.yaml
new file mode 100644
index 0000000000..b2dee88c02
--- /dev/null
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/InternVL2.5-26B.yaml
@@ -0,0 +1,93 @@
+model: InternVL2.5-26B
+label:
+  en_US: InternVL2.5-26B
+model_type: llm
+features:
+  - vision
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    label:
+      en_US: "Max Tokens"
+      zh_Hans: "最大Token数"
+    type: int
+    default: 512
+    min: 1
+    required: true
+    help:
+      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
+      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
+
+  - name: temperature
+    use_template: temperature
+    label:
+      en_US: "Temperature"
+      zh_Hans: "采样温度"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 1.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: top_p
+    use_template: top_p
+    label:
+      en_US: "Top P"
+      zh_Hans: "Top P"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 1.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: top_k
+    use_template: top_k
+    label:
+      en_US: "Top K"
+      zh_Hans: "Top K"
+    type: int
+    default: 50
+    min: 0
+    max: 100
+    required: true
+    help:
+      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
+      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
+
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    label:
+      en_US: "Frequency Penalty"
+      zh_Hans: "频率惩罚"
+    type: float
+    default: 0
+    min: -1.0
+    max: 1.0
+    precision: 1
+    required: false
+    help:
+      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
+      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
+
+  - name: user
+    use_template: text
+    label:
+      en_US: "User"
+      zh_Hans: "用户"
+    type: string
+    required: false
+    help:
+      en_US: "Used to track and differentiate conversation requests from different users."
+      zh_Hans: "用于追踪和区分不同用户的对话请求。"
diff --git a/api/core/model_runtime/model_providers/gitee_ai/llm/_position.yaml b/api/core/model_runtime/model_providers/gitee_ai/llm/_position.yaml
index 13c31ad02b..c942cda3b2 100644
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/_position.yaml
@@ -6,3 +6,5 @@
 - deepseek-coder-33B-instruct-chat
 - deepseek-coder-33B-instruct-completions
 - codegeex4-all-9b
+- InternVL2.5-26B
+- InternVL2-8B
diff --git a/api/core/model_runtime/model_providers/gitee_ai/llm/llm.py b/api/core/model_runtime/model_providers/gitee_ai/llm/llm.py
index 0c253a4a0a..68aaad2e3f 100644
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/llm.py
@@ -29,18 +29,26 @@ class GiteeAILargeLanguageModel(OAIAPICompatLargeLanguageModel):
         user: Optional[str] = None,
     ) -> Union[LLMResult, Generator]:
         self._add_custom_parameters(credentials, model, model_parameters)
-        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+        return super()._invoke(
+            GiteeAILargeLanguageModel.MODEL_TO_IDENTITY.get(model, model),
+            credentials,
+            prompt_messages,
+            model_parameters,
+            tools,
+            stop,
+            stream,
+            user,
+        )
 
     def validate_credentials(self, model: str, credentials: dict) -> None:
-        self._add_custom_parameters(credentials, None)
-        super().validate_credentials(model, credentials)
+        self._add_custom_parameters(credentials, model, None)
+        super().validate_credentials(GiteeAILargeLanguageModel.MODEL_TO_IDENTITY.get(model, model), credentials)
 
-    def _add_custom_parameters(self, credentials: dict, model: Optional[str]) -> None:
+    def _add_custom_parameters(self, credentials: dict, model: Optional[str], model_parameters: dict) -> None:
         if model is None:
             model = "Qwen2-72B-Instruct"
 
-        model_identity = GiteeAILargeLanguageModel.MODEL_TO_IDENTITY.get(model, model)
-        credentials["endpoint_url"] = f"https://ai.gitee.com/api/serverless/{model_identity}/"
+        credentials["endpoint_url"] = "https://ai.gitee.com/v1"
         if model.endswith("completions"):
             credentials["mode"] = LLMMode.COMPLETION.value
         else: