merge main

2025-08-18 03:26:00 +08:00 · 2024-09-27 16:27:00 +08:00 · 2024-09-27 16:27:00 +08:00 · 8fd04e5313
commit 8fd04e5313
parent 3904782647 0603359e2d
171 changed files with 1964 additions and 489 deletions
--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@ -0,0 +1,46 @@
 name: Web Tests
 on:
  pull_request:
    branches:
      - main
    paths:
      - web/**
 concurrency:
  group: web-tests-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 jobs:
  test:
    name: Web Tests
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: ./web
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Check changed files
        id: changed-files
        uses: tj-actions/changed-files@v45
        with:
          files: web/**
      - name: Setup Node.js
        uses: actions/setup-node@v4
        if: steps.changed-files.outputs.any_changed == 'true'
        with:
          node-version: 20
          cache: yarn
          cache-dependency-path: ./web/package.json
      - name: Install dependencies
        if: steps.changed-files.outputs.any_changed == 'true'
        run: yarn install --frozen-lockfile
      - name: Run tests
        if: steps.changed-files.outputs.any_changed == 'true'
        run: yarn test
--- a/api/app.py
+++ b/api/app.py
@ -53,11 +53,9 @@ from services.account_service import AccountService
 warnings.simplefilter("ignore", ResourceWarning)
-# fix windows platform
+os.environ["TZ"] = "UTC"
-if os.name == "nt":
+# windows platform not support tzset
-    os.system('tzutil /s "UTC"')
+if hasattr(time, "tzset"):
 else:
    os.environ["TZ"] = "UTC"
    time.tzset()
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@ -309,7 +309,7 @@ class AppRunner:
            if not prompt_messages:
                prompt_messages = result.prompt_messages
-            if not usage and result.delta.usage:
+            if result.delta.usage:
                usage = result.delta.usage
        if not usage:
--- a/api/core/embedding/cached_embedding.py
+++ b/api/core/embedding/cached_embedding.py
@ -5,6 +5,7 @@ from typing import Optional, cast
 import numpy as np
 from sqlalchemy.exc import IntegrityError
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.model_entities import ModelPropertyKey
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
                for i in range(0, len(embedding_queue_texts), max_chunks):
                    batch_texts = embedding_queue_texts[i : i + max_chunks]
-                    embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
+                    embedding_result = self._model_instance.invoke_text_embedding(
                        texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
                    )
                    for vector in embedding_result.embeddings:
                        try:
@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
            redis_client.expire(embedding_cache_key, 600)
            return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
        try:
-            embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
+            embedding_result = self._model_instance.invoke_text_embedding(
                texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
            )
            embedding_results = embedding_result.embeddings[0]
            embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
--- a/api/core/embedding/embedding_constant.py
+++ b/api/core/embedding/embedding_constant.py
@ -0,0 +1,10 @@
 from enum import Enum
 class EmbeddingInputType(Enum):
    """
    Enum for embedding input type.
    """
    DOCUMENT = "document"
    QUERY = "query"
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@ -3,6 +3,7 @@ import os
 from collections.abc import Callable, Generator, Sequence
 from typing import IO, Optional, Union, cast
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
 from core.entities.provider_entities import ModelLoadBalancingConfiguration
 from core.errors.error import ProviderTokenNotInitError
@ -158,12 +159,15 @@ class ModelInstance:
            tools=tools,
        )
-    def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
+    def invoke_text_embedding(
        self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
    ) -> TextEmbeddingResult:
        """
        Invoke large language model
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        if not isinstance(self.model_type_instance, TextEmbeddingModel):
@ -176,6 +180,7 @@ class ModelInstance:
            credentials=self.credentials,
            texts=texts,
            user=user,
            input_type=input_type,
        )
    def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
--- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py
+++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
@ -4,6 +4,7 @@ from typing import Optional
 from pydantic import ConfigDict
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.__base.ai_model import AIModel
@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
    model_config = ConfigDict(protected_namespaces=())
    def invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
-        Invoke large language model
+        Invoke text embedding model
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        self.started_at = time.perf_counter()
        try:
-            return self._invoke(model, credentials, texts, user)
+            return self._invoke(model, credentials, texts, user, input_type)
        except Exception as e:
            raise self._transform_invoke_error(e)
    @abstractmethod
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
-        Invoke large language model
+        Invoke text embedding model
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        raise NotImplementedError
--- a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
@ -7,6 +7,7 @@ import numpy as np
 import tiktoken
 from openai import AzureOpenAI
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_
 class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        base_model_name = credentials["base_model_name"]
        credentials_kwargs = self._to_credential_kwargs(credentials)
        client = AzureOpenAI(**credentials_kwargs)
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional
 from requests import post
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
--- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
@ -13,6 +13,7 @@ from botocore.exceptions import (
    UnknownServiceError,
 )
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)
 class BedrockTextEmbeddingModel(TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        client_config = Config(region_name=credentials["aws_region"])
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ import cohere
 import numpy as np
 from cohere.core import RequestOptions
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        # get model properties
--- a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
@ -15,6 +15,7 @@ help:
    en_US: https://fireworks.ai/account/api-keys
 supported_model_types:
  - llm
  - text-embedding
 configurate_methods:
  - predefined-model
 provider_credential_schema:
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
@ -0,0 +1,46 @@
 model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
 label:
  zh_Hans: Llama 3.2 11B Vision Instruct
  en_US: Llama 3.2 11B Vision Instruct
 model_type: llm
 features:
  - agent-thought
  - tool-call
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
  - name: max_tokens
    use_template: max_tokens
  - name: context_length_exceeded_behavior
    default: None
    label:
      zh_Hans: 上下文长度超出行为
      en_US: Context Length Exceeded Behavior
    help:
      zh_Hans: 上下文长度超出行为
      en_US: Context Length Exceeded Behavior
    type: string
    options:
      - None
      - truncate
      - error
  - name: response_format
    use_template: response_format
 pricing:
  input: '0.2'
  output: '0.2'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
@ -0,0 +1,46 @@
 model: accounts/fireworks/models/llama-v3p2-1b-instruct
 label:
  zh_Hans: Llama 3.2 1B Instruct
  en_US: Llama 3.2 1B Instruct
 model_type: llm
 features:
  - agent-thought
  - tool-call
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
  - name: max_tokens
    use_template: max_tokens
  - name: context_length_exceeded_behavior
    default: None
    label:
      zh_Hans: 上下文长度超出行为
      en_US: Context Length Exceeded Behavior
    help:
      zh_Hans: 上下文长度超出行为
      en_US: Context Length Exceeded Behavior
    type: string
    options:
      - None
      - truncate
      - error
  - name: response_format
    use_template: response_format
 pricing:
  input: '0.1'
  output: '0.1'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
@ -0,0 +1,46 @@
 model: accounts/fireworks/models/llama-v3p2-3b-instruct
 label:
  zh_Hans: Llama 3.2 3B Instruct
  en_US: Llama 3.2 3B Instruct
 model_type: llm
 features:
  - agent-thought
  - tool-call
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
  - name: max_tokens
    use_template: max_tokens
  - name: context_length_exceeded_behavior
    default: None
    label:
      zh_Hans: 上下文长度超出行为
      en_US: Context Length Exceeded Behavior
    help:
      zh_Hans: 上下文长度超出行为
      en_US: Context Length Exceeded Behavior
    type: string
    options:
      - None
      - truncate
      - error
  - name: response_format
    use_template: response_format
 pricing:
  input: '0.1'
  output: '0.1'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
@ -0,0 +1,46 @@
 model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
 label:
  zh_Hans: Llama 3.2 90B Vision Instruct
  en_US: Llama 3.2 90B Vision Instruct
 model_type: llm
 features:
  - agent-thought
  - tool-call
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
  - name: max_tokens
    use_template: max_tokens
  - name: context_length_exceeded_behavior
    default: None
    label:
      zh_Hans: 上下文长度超出行为
      en_US: Context Length Exceeded Behavior
    help:
      zh_Hans: 上下文长度超出行为
      en_US: Context Length Exceeded Behavior
    type: string
    options:
      - None
      - truncate
      - error
  - name: response_format
    use_template: response_format
 pricing:
  input: '0.9'
  output: '0.9'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
@ -0,0 +1,12 @@
 model: WhereIsAI/UAE-Large-V1
 label:
  zh_Hans: UAE-Large-V1
  en_US: UAE-Large-V1
 model_type: text-embedding
 model_properties:
  context_size: 512
  max_chunks: 1
 pricing:
  input: '0.008'
  unit: '0.000001'
  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/init.py
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/init.py
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
@ -0,0 +1,12 @@
 model: thenlper/gte-base
 label:
  zh_Hans: GTE-base
  en_US: GTE-base
 model_type: text-embedding
 model_properties:
  context_size: 512
  max_chunks: 1
 pricing:
  input: '0.008'
  unit: '0.000001'
  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
@ -0,0 +1,12 @@
 model: thenlper/gte-large
 label:
  zh_Hans: GTE-large
  en_US: GTE-large
 model_type: text-embedding
 model_properties:
  context_size: 512
  max_chunks: 1
 pricing:
  input: '0.008'
  unit: '0.000001'
  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
@ -0,0 +1,12 @@
 model: nomic-ai/nomic-embed-text-v1.5
 label:
  zh_Hans: nomic-embed-text-v1.5
  en_US: nomic-embed-text-v1.5
 model_type: text-embedding
 model_properties:
  context_size: 8192
  max_chunks: 16
 pricing:
  input: '0.008'
  unit: '0.000001'
  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
@ -0,0 +1,12 @@
 model: nomic-ai/nomic-embed-text-v1
 label:
  zh_Hans: nomic-embed-text-v1
  en_US: nomic-embed-text-v1
 model_type: text-embedding
 model_properties:
  context_size: 8192
  max_chunks: 16
 pricing:
  input: '0.008'
  unit: '0.000001'
  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
@ -0,0 +1,151 @@
 import time
 from collections.abc import Mapping
 from typing import Optional, Union
 import numpy as np
 from openai import OpenAI
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
 from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
 class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
    """
    Model class for Fireworks text embedding model.
    """
    def _invoke(
        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        credentials_kwargs = self._to_credential_kwargs(credentials)
        client = OpenAI(**credentials_kwargs)
        extra_model_kwargs = {}
        if user:
            extra_model_kwargs["user"] = user
        extra_model_kwargs["encoding_format"] = "float"
        context_size = self._get_context_size(model, credentials)
        max_chunks = self._get_max_chunks(model, credentials)
        inputs = []
        indices = []
        used_tokens = 0
        for i, text in enumerate(texts):
            # Here token count is only an approximation based on the GPT2 tokenizer
            # TODO: Optimize for better token estimation and chunking
            num_tokens = self._get_num_tokens_by_gpt2(text)
            if num_tokens >= context_size:
                cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
                # if num tokens is larger than context length, only use the start
                inputs.append(text[0:cutoff])
            else:
                inputs.append(text)
            indices += [i]
        batched_embeddings = []
        _iter = range(0, len(inputs), max_chunks)
        for i in _iter:
            embeddings_batch, embedding_used_tokens = self._embedding_invoke(
                model=model,
                client=client,
                texts=inputs[i : i + max_chunks],
                extra_model_kwargs=extra_model_kwargs,
            )
            used_tokens += embedding_used_tokens
            batched_embeddings += embeddings_batch
        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
        return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
        """
        Get number of tokens for given prompt messages
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :return:
        """
        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
    def validate_credentials(self, model: str, credentials: Mapping) -> None:
        """
        Validate model credentials
        :param model: model name
        :param credentials: model credentials
        :return:
        """
        try:
            # transform credentials to kwargs for model instance
            credentials_kwargs = self._to_credential_kwargs(credentials)
            client = OpenAI(**credentials_kwargs)
            # call embedding model
            self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
        except Exception as ex:
            raise CredentialsValidateFailedError(str(ex))
    def _embedding_invoke(
        self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
    ) -> tuple[list[list[float]], int]:
        """
        Invoke embedding model
        :param model: model name
        :param client: model client
        :param texts: texts to embed
        :param extra_model_kwargs: extra model kwargs
        :return: embeddings and used tokens
        """
        response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
        return [data.embedding for data in response.data], response.usage.total_tokens
    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
        """
        Calculate response usage
        :param model: model name
        :param credentials: model credentials
        :param tokens: input tokens
        :return: usage
        """
        input_price_info = self.get_price(
            model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
        )
        usage = EmbeddingUsage(
            tokens=tokens,
            total_tokens=tokens,
            unit_price=input_price_info.unit_price,
            price_unit=input_price_info.unit,
            total_price=input_price_info.total_amount,
            currency=input_price_info.currency,
            latency=time.perf_counter() - self.started_at,
        )
        return usage
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
@ -0,0 +1,48 @@
 model: gemini-1.5-flash-001
 label:
  en_US: Gemini 1.5 Flash 001
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 1048576
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens_to_sample
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
@ -0,0 +1,48 @@
 model: gemini-1.5-flash-002
 label:
  en_US: Gemini 1.5 Flash 002
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 1048576
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens_to_sample
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@ -0,0 +1,48 @@
 model: gemini-1.5-flash-8b-exp-0924
 label:
  en_US: Gemini 1.5 Flash 8B 0924
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 1048576
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens_to_sample
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
@ -1,6 +1,6 @@
 model: gemini-1.5-flash-latest
 label:
-  en_US: Gemini 1.5 Flash
+  en_US: Gemini 1.5 Flash Latest
 model_type: llm
 features:
  - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
@ -0,0 +1,48 @@
 model: gemini-1.5-flash
 label:
  en_US: Gemini 1.5 Flash
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 1048576
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens_to_sample
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
@ -0,0 +1,48 @@
 model: gemini-1.5-pro-001
 label:
  en_US: Gemini 1.5 Pro 001
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 2097152
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens_to_sample
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
@ -0,0 +1,48 @@
 model: gemini-1.5-pro-002
 label:
  en_US: Gemini 1.5 Pro 002
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 2097152
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens_to_sample
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@ -1,6 +1,6 @@
 model: gemini-1.5-pro-latest
 label:
-  en_US: Gemini 1.5 Pro
+  en_US: Gemini 1.5 Pro Latest
 model_type: llm
 features:
  - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
@ -0,0 +1,48 @@
 model: gemini-1.5-pro
 label:
  en_US: Gemini 1.5 Pro
 model_type: llm
 features:
  - agent-thought
  - vision
  - tool-call
  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 2097152
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: max_tokens_to_sample
    use_template: max_tokens
    required: true
    default: 8192
    min: 1
    max: 8192
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@ -27,6 +27,15 @@ parameter_rules:
    default: 4096
    min: 1
    max: 4096
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@ -31,6 +31,15 @@ parameter_rules:
    max: 2048
  - name: response_format
    use_template: response_format
  - name: stream
    label:
      zh_Hans: 流式输出
      en_US: Stream
    type: boolean
    help:
      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/llm.py
+++ b/api/core/model_runtime/model_providers/google/llm/llm.py
@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
 import google.generativeai as genai
 import requests
 from google.api_core import exceptions
-from google.generativeai import client
+from google.generativeai.client import _ClientManager
-from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
+from google.generativeai.types import ContentType, GenerateContentResponse
 from google.generativeai.types.content_types import to_part
 from PIL import Image
@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
                    history.append(content)
        # Create a new ClientManager with tenant's API key
-        new_client_manager = client._ClientManager()
+        new_client_manager = _ClientManager()
        new_client_manager.configure(api_key=credentials["google_api_key"])
        new_custom_client = new_client_manager.make_client("generative")
        google_model._client = new_custom_client
        safety_settings = {
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        }
        response = google_model.generate_content(
            contents=history,
            generation_config=genai.types.GenerationConfig(**config_kwargs),
            stream=stream,
            safety_settings=safety_settings,
            tools=self._convert_tools_to_glm_tool(tools) if tools else None,
            request_options={"timeout": 600},
        )
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@ -0,0 +1,25 @@
 model: llama-3.2-11b-text-preview
 label:
  zh_Hans: Llama 3.2 11B Text (Preview)
  en_US: Llama 3.2 11B Text (Preview)
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: max_tokens
    use_template: max_tokens
    default: 512
    min: 1
    max: 8192
 pricing:
  input: '0.05'
  output: '0.1'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
@ -0,0 +1,25 @@
 model: llama-3.2-1b-preview
 label:
  zh_Hans: Llama 3.2 1B Text (Preview)
  en_US: Llama 3.2 1B Text (Preview)
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: max_tokens
    use_template: max_tokens
    default: 512
    min: 1
    max: 8192
 pricing:
  input: '0.05'
  output: '0.1'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
@ -0,0 +1,25 @@
 model: llama-3.2-3b-preview
 label:
  zh_Hans: Llama 3.2 3B Text (Preview)
  en_US: Llama 3.2 3B Text (Preview)
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: max_tokens
    use_template: max_tokens
    default: 512
    min: 1
    max: 8192
 pricing:
  input: '0.05'
  output: '0.1'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@ -0,0 +1,25 @@
 model: llama-3.2-90b-text-preview
 label:
  zh_Hans: Llama 3.2 90B Text (Preview)
  en_US: Llama 3.2 90B Text (Preview)
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p
  - name: max_tokens
    use_template: max_tokens
    default: 512
    min: 1
    max: 8192
 pricing:
  input: '0.05'
  output: '0.1'
  unit: '0.000001'
  currency: USD
--- a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ import numpy as np
 import requests
 from huggingface_hub import HfApi, InferenceClient
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/
 class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        client = InferenceClient(token=credentials["huggingfacehub_api_token"])
        execute_model = model
--- a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
@ -1,6 +1,7 @@
 import time
 from typing import Optional
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        server_url = credentials["server_url"]
--- a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
 from tencentcloud.common.profile.http_profile import HttpProfile
 from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
--- a/api/core/model_runtime/model_providers/jina/jina.yaml
+++ b/api/core/model_runtime/model_providers/jina/jina.yaml
@ -67,46 +67,3 @@ model_credential_schema:
      required: false
      type: text-input
      default: '8192'
    - variable: task
      label:
        zh_Hans: 下游任务
        en_US: Downstream task
      placeholder:
        zh_Hans: 选择将使用向量模型的下游任务。模型将返回针对该任务优化的向量。
        en_US: Select the downstream task for which the embeddings will be used. The model will return the optimized embeddings for that task.
      required: false
      type: select
      options:
        - value: retrieval.query
          label:
            en_US: retrieval.query
        - value: retrieval.passage
          label:
            en_US: retrieval.passage
        - value: separation
          label:
            en_US: separation
        - value: classification
          label:
            en_US: classification
        - value: text-matching
          label:
            en_US: text-matching
    - variable: dimensions
      label:
        zh_Hans: 输出维度
        en_US: Output dimensions
      placeholder:
        zh_Hans: 输入您的输出维度
        en_US: Enter output dimensions
      required: false
      type: text-input
    - variable: late_chunking
      label:
        zh_Hans: 后期分块
        en_US: Late chunking
      placeholder:
        zh_Hans: 应用后期分块技术来利用模型的长上下文功能来生成上下文块向量化。
        en_US: Apply the late chunking technique to leverage the model's long-context capabilities for generating contextual chunk embeddings.
      required: false
      type: switch
--- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional
 from requests import post
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,7 +28,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "https://api.jina.ai/v1"
-    def _to_payload(self, model: str, texts: list[str], credentials: dict) -> dict:
+    def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
        """
        Parse model credentials
@ -44,23 +45,20 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
        data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
-        task = credentials.get("task")
+        # model specific parameters
-        dimensions = credentials.get("dimensions")
+        if model == "jina-embeddings-v3":
-        late_chunking = credentials.get("late_chunking")
+            # set `task` type according to input type for the best performance
-
+            data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
        if task is not None:
            data["task"] = task
        if dimensions is not None:
            data["dimensions"] = int(dimensions)
        if late_chunking is not None:
            data["late_chunking"] = late_chunking
        return data
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -69,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
@ -81,7 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
        url = base_url + "/embeddings"
        headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
-        data = self._to_payload(model=model, texts=texts, credentials=credentials)
+        data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
        try:
            response = post(url, headers=headers, data=dumps(data))
--- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ from typing import Optional
 from requests import post
 from yarl import URL
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
 class LocalAITextEmbeddingModel(TextEmbeddingModel):
    """
-    Model class for Jina text embedding model.
+    Model class for LocalAI text embedding model.
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        if len(texts) != 1:
--- a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional
 from requests import post
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "https://api.minimax.chat/v1/embeddings"
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["minimax_api_key"]
--- a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional
 import requests
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,7 +28,12 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "https://api.mixedbread.ai/v1"
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -36,6 +42,7 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
--- a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ from typing import Optional
 from nomic import embed
 from nomic import login as nomic_login
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import (
    EmbeddingUsage,
@ -46,6 +47,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -54,6 +56,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        embeddings, prompt_tokens, total_tokens = self.embed_text(
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional
 from requests import post
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
    models: list[str] = ["NV-Embed-QA"]
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
--- a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ from typing import Optional
 import numpy as np
 import oci
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        # get model properties
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
            if chunk_json["done"]:
                # calculate num tokens
-                if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
+                if "prompt_eval_count" in chunk_json:
                    # transform usage
                    prompt_tokens = chunk_json["prompt_eval_count"]
                    completion_tokens = chunk_json["eval_count"]
                else:
-                    # calculate num tokens
+                    prompt_message_content = prompt_messages[0].content
-                    prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
+                    if isinstance(prompt_message_content, str):
-                    completion_tokens = self._get_num_tokens_by_gpt2(full_text)
+                        prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
                    else:
                        content_text = ""
                        for message_content in prompt_message_content:
                            if message_content.type == PromptMessageContentType.TEXT:
                                message_content = cast(TextPromptMessageContent, message_content)
                                content_text += message_content.data
                        prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
                completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
                # transform usage
                usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
--- a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
@ -8,6 +8,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
--- a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ import numpy as np
 import tiktoken
 from openai import OpenAI
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        # transform credentials to kwargs for model instance
--- a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
@ -7,6 +7,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
--- a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ from typing import Optional
 from requests import post
 from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        server_url = credentials["server_url"]
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@ -7,6 +7,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
    """
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
--- a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional
 from replicate import Client as ReplicateClient
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat
 class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
        if "model_version" in credentials:
--- a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
+++ b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
@ -84,8 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
    Model class for Cohere large language model.
    """
-    sagemaker_client: Any = None
+    sagemaker_session: Any = None
    predictor: Any = None
    sagemaker_endpoint: str = None
    def _handle_chat_generate_response(
        self,
@ -211,7 +212,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
        :param user: unique user id
        :return: full response or stream response chunk generator result
        """
-        if not self.sagemaker_client:
+        if not self.sagemaker_session:
            access_key = credentials.get("aws_access_key_id")
            secret_key = credentials.get("aws_secret_access_key")
            aws_region = credentials.get("aws_region")
@ -226,11 +227,14 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
            else:
                boto_session = boto3.Session()
-            self.sagemaker_client = boto_session.client("sagemaker")
+            sagemaker_client = boto_session.client("sagemaker")
-            sagemaker_session = Session(boto_session=boto_session, sagemaker_client=self.sagemaker_client)
+            self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
        if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
            self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
            self.predictor = Predictor(
-                endpoint_name=credentials.get("sagemaker_endpoint"),
+                endpoint_name=self.sagemaker_endpoint,
-                sagemaker_session=sagemaker_session,
+                sagemaker_session=self.sagemaker_session,
                serializer=serializers.JSONSerializer(),
            )
--- a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ from typing import Any, Optional
 import boto3
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
        return embeddings
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        # get model properties
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@ -1,25 +1,38 @@
 - Qwen/Qwen2.5-7B-Instruct
 - Qwen/Qwen2.5-14B-Instruct
 - Qwen/Qwen2.5-32B-Instruct
 - Qwen/Qwen2.5-72B-Instruct
 - Qwen/Qwen2.5-Math-72B-Instruct
 - Qwen/Qwen2.5-32B-Instruct
 - Qwen/Qwen2.5-14B-Instruct
 - Qwen/Qwen2.5-7B-Instruct
 - Qwen/Qwen2.5-Coder-7B-Instruct
 - deepseek-ai/DeepSeek-V2.5
 - Qwen/Qwen2-72B-Instruct
 - Qwen/Qwen2-57B-A14B-Instruct
 - Qwen/Qwen2-7B-Instruct
 - Qwen/Qwen2-1.5B-Instruct
 - 01-ai/Yi-1.5-34B-Chat
 - 01-ai/Yi-1.5-9B-Chat-16K
 - 01-ai/Yi-1.5-6B-Chat
 - THUDM/glm-4-9b-chat
 - deepseek-ai/DeepSeek-V2.5
 - deepseek-ai/DeepSeek-V2-Chat
 - deepseek-ai/DeepSeek-Coder-V2-Instruct
 - THUDM/glm-4-9b-chat
 - THUDM/chatglm3-6b
 - 01-ai/Yi-1.5-34B-Chat-16K
 - 01-ai/Yi-1.5-9B-Chat-16K
 - 01-ai/Yi-1.5-6B-Chat
 - internlm/internlm2_5-20b-chat
 - internlm/internlm2_5-7b-chat
 - google/gemma-2-27b-it
 - google/gemma-2-9b-it
 - meta-llama/Meta-Llama-3-70B-Instruct
 - meta-llama/Meta-Llama-3-8B-Instruct
 - meta-llama/Meta-Llama-3.1-405B-Instruct
 - meta-llama/Meta-Llama-3.1-70B-Instruct
 - meta-llama/Meta-Llama-3.1-8B-Instruct
- mistralai/Mixtral-8x7B-Instruct-v0.1
+- meta-llama/Meta-Llama-3-70B-Instruct
 - meta-llama/Meta-Llama-3-8B-Instruct
 - google/gemma-2-27b-it
 - google/gemma-2-9b-it
 - mistralai/Mistral-7B-Instruct-v0.2
 - Pro/Qwen/Qwen2-7B-Instruct
 - Pro/Qwen/Qwen2-1.5B-Instruct
 - Pro/THUDM/glm-4-9b-chat
 - Pro/THUDM/chatglm3-6b
 - Pro/01-ai/Yi-1.5-9B-Chat-16K
 - Pro/01-ai/Yi-1.5-6B-Chat
 - Pro/internlm/internlm2_5-7b-chat
 - Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
 - Pro/meta-llama/Meta-Llama-3-8B-Instruct
 - Pro/google/gemma-2-9b-it
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@ -28,3 +28,4 @@ pricing:
  output: '0'
  unit: '0.000001'
  currency: RMB
 deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@ -28,3 +28,4 @@ pricing:
  output: '1.26'
  unit: '0.000001'
  currency: RMB
 deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
@ -1,5 +1,6 @@
 from typing import Optional
 from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
    OAICompatEmbeddingModel,
@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
        super().validate_credentials(model, credentials)
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :param input_type: input type
        :return: embeddings result
        """
        self._add_custom_parameters(credentials)
        return super()._invoke(model, credentials, texts, user)
--- a/api/core/model_runtime/model_providers/spark/llm/llm.py
+++ b/api/core/model_runtime/model_providers/spark/llm/llm.py
@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel):
        :param prompt_messages: prompt messages
        :return: llm response chunk generator result
        """
        completion = ""
        for index, content in enumerate(client.subscribe()):
            if isinstance(content, dict):
                delta = content["data"]
            else:
                delta = content
-
+            completion += delta
            assistant_prompt_message = AssistantPromptMessage(
                content=delta or "",
            )
-
+            temp_assistant_prompt_message = AssistantPromptMessage(
                content=completion,
            )
            prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
-            completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
+            completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])
            # transform usage
            usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
--- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: farui-plus
 label:
  en_US: farui-plus
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@ -18,7 +18,7 @@ from dashscope.common.error import (
    UnsupportedModel,
 )
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import (
    AssistantPromptMessage,
    ImagePromptMessageContent,
@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
    FetchFrom,
    I18nObject,
    ModelFeature,
    ModelPropertyKey,
    ModelType,
    ParameterRule,
    ParameterType,
@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        :param tools: tools for tool calling
        :return:
        """
        # Check if the model was added via get_customizable_model_schema
        if self.get_customizable_model_schema(model, credentials) is not None:
            # For custom models, tokens are not calculated.
            return 0
        if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
            model = model.replace("-chat", "")
        if model == "farui-plus":
@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        :param credentials: model credentials
        :return: AIModelEntity or None
        """
-        rules = [
+        return AIModelEntity(
            model=model,
            label=I18nObject(en_US=model, zh_Hans=model),
            model_type=ModelType.LLM,
            features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
            if credentials.get("function_calling_type") == "tool_call"
            else [],
            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
            model_properties={
                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
                ModelPropertyKey.MODE: LLMMode.CHAT.value,
            },
            parameter_rules=[
                ParameterRule(
                    name="temperature",
                type=ParameterType.FLOAT,
                    use_template="temperature",
-                label=I18nObject(zh_Hans="温度", en_US="Temperature"),
+                    label=I18nObject(en_US="Temperature", zh_Hans="温度"),
            ),
            ParameterRule(
                name="top_p",
                    type=ParameterType.FLOAT,
                use_template="top_p",
                label=I18nObject(zh_Hans="Top P", en_US="Top P"),
            ),
            ParameterRule(
                name="top_k",
                type=ParameterType.INT,
                min=0,
                max=99,
                label=I18nObject(zh_Hans="top_k", en_US="top_k"),
                ),
                ParameterRule(
                    name="max_tokens",
-                type=ParameterType.INT,
+                    use_template="max_tokens",
                    default=512,
                    min=1,
-                max=128000,
+                    max=int(credentials.get("max_tokens", 1024)),
-                default=1024,
+                    label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
                label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
            ),
            ParameterRule(
                name="seed",
                    type=ParameterType.INT,
                default=1234,
                label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
                ),
                ParameterRule(
-                name="repetition_penalty",
+                    name="top_p",
                    use_template="top_p",
                    label=I18nObject(en_US="Top P", zh_Hans="Top P"),
                    type=ParameterType.FLOAT,
                default=1.1,
                label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
                ),
-        ]
+                ParameterRule(
-
+                    name="top_k",
-        entity = AIModelEntity(
+                    use_template="top_k",
-            model=model,
+                    label=I18nObject(en_US="Top K", zh_Hans="Top K"),
-            label=I18nObject(en_US=model),
+                    type=ParameterType.FLOAT,
-            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+                ),
-            model_type=ModelType.LLM,
+                ParameterRule(
-            model_properties={},
+                    name="frequency_penalty",
-            parameter_rules=rules,
+                    use_template="frequency_penalty",
                    label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
                    type=ParameterType.FLOAT,
                ),
            ],
        )
        return entity
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo-0919
 label:
  en_US: qwen-coder-turbo-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo-latest
 label:
  en_US: qwen-coder-turbo-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo
 label:
  en_US: qwen-coder-turbo
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
@ -1,4 +1,4 @@
-# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-long
 label:
  en_US: qwen-long
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-0816
 label:
  en_US: qwen-math-plus-0816
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-0919
 label:
  en_US: qwen-math-plus-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-latest
 label:
  en_US: qwen-math-plus-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus
 label:
  en_US: qwen-math-plus
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo-0919
 label:
  en_US: qwen-math-turbo-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo-latest
 label:
  en_US: qwen-math-turbo-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
@ -1,3 +1,4 @@
 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo
 label:
  en_US: qwen-math-turbo
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-max, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0107
 label:
  en_US: qwen-max-0107
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-max-0403, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0403
 label:
  en_US: qwen-max-0403
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-max-0428, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0428
 label:
  en_US: qwen-max-0428
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-max-0919, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0919
 label:
  en_US: qwen-max-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-max, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-1201
 label:
  en_US: qwen-max-1201
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-max, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-latest
 label:
  en_US: qwen-max-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-max, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-longcontext
 label:
  en_US: qwen-max-longcontext
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-max, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max
 label:
  en_US: qwen-max
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-plus-0206, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0206
 label:
  en_US: qwen-plus-0206
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-plus-0624, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0624
 label:
  en_US: qwen-plus-0624
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-plus-0723, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0723
 label:
  en_US: qwen-plus-0723
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-plus-0806, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0806
 label:
  en_US: qwen-plus-0806
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-plus-0919, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0919
 label:
  en_US: qwen-plus-0919
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-plus, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-chat
 label:
  en_US: qwen-plus-chat
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-plus-latest, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-latest
 label:
  en_US: qwen-plus-latest
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-plus, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus
 label:
  en_US: qwen-plus
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@ -1,3 +1,6 @@
 # this model corresponds to qwen-turbo-0206, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-0206
 label:
  en_US: qwen-turbo-0206
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-turbo-0624, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-0624
 label:
  en_US: qwen-turbo-0624
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@ -1,3 +1,5 @@
 # this model corresponds to qwen-turbo-0919, for more details
 # please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-0919
 label:
  en_US: qwen-turbo-0919
--- a/Show More
+++ b/Show More