diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml
new file mode 100644
index 0000000000..5aee64b8e6
--- /dev/null
+++ b/.github/workflows/web-tests.yml
@@ -0,0 +1,46 @@
+name: Web Tests
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - web/**
+
+concurrency:
+  group: web-tests-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Web Tests
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./web
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: web/**
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        if: steps.changed-files.outputs.any_changed == 'true'
+        with:
+          node-version: 20
+          cache: yarn
+          cache-dependency-path: ./web/package.json
+
+      - name: Install dependencies
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: yarn install --frozen-lockfile
+
+      - name: Run tests
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: yarn test
diff --git a/api/app.py b/api/app.py
index 91a49337fc..1b58beee15 100644
--- a/api/app.py
+++ b/api/app.py
@@ -53,11 +53,9 @@ from services.account_service import AccountService
 
 warnings.simplefilter("ignore", ResourceWarning)
 
-# fix windows platform
-if os.name == "nt":
-    os.system('tzutil /s "UTC"')
-else:
-    os.environ["TZ"] = "UTC"
+os.environ["TZ"] = "UTC"
+# windows platform not support tzset
+if hasattr(time, "tzset"):
     time.tzset()
 
 
diff --git a/api/commands.py b/api/commands.py
index b8fc81af67..7ef4aed7f7 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -652,7 +652,7 @@ where sites.id is null limit 1000"""
                         app_was_created.send(app, account=account)
                 except Exception as e:
                     failed_app_ids.append(app_id)
-                    click.echo(click.style("FFailed to fix missing site for app {}".format(app_id), fg="red"))
+                    click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
                     logging.exception(f"Fix app related site missing issue failed, error: {e}")
                     continue
 
diff --git a/api/core/app/apps/base_app_generate_response_converter.py b/api/core/app/apps/base_app_generate_response_converter.py
index c6855ac854..62e79ec444 100644
--- a/api/core/app/apps/base_app_generate_response_converter.py
+++ b/api/core/app/apps/base_app_generate_response_converter.py
@@ -75,10 +75,10 @@ class AppGenerateResponseConverter(ABC):
         :return:
         """
         # show_retrieve_source
+        updated_resources = []
         if "retriever_resources" in metadata:
-            metadata["retriever_resources"] = []
             for resource in metadata["retriever_resources"]:
-                metadata["retriever_resources"].append(
+                updated_resources.append(
                     {
                         "segment_id": resource["segment_id"],
                         "position": resource["position"],
@@ -87,6 +87,7 @@ class AppGenerateResponseConverter(ABC):
                         "content": resource["content"],
                     }
                 )
+            metadata["retriever_resources"] = updated_resources
 
         # show annotation reply
         if "annotation_reply" in metadata:
diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py
index 1b412b8639..203aca3384 100644
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@@ -309,7 +309,7 @@ class AppRunner:
             if not prompt_messages:
                 prompt_messages = result.prompt_messages
 
-            if not usage and result.delta.usage:
+            if result.delta.usage:
                 usage = result.delta.usage
 
         if not usage:
diff --git a/api/core/embedding/cached_embedding.py b/api/core/embedding/cached_embedding.py
index 8ce12fd59f..75219051cd 100644
--- a/api/core/embedding/cached_embedding.py
+++ b/api/core/embedding/cached_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional, cast
 import numpy as np
 from sqlalchemy.exc import IntegrityError
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.model_entities import ModelPropertyKey
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
                 for i in range(0, len(embedding_queue_texts), max_chunks):
                     batch_texts = embedding_queue_texts[i : i + max_chunks]
 
-                    embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
+                    embedding_result = self._model_instance.invoke_text_embedding(
+                        texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
+                    )
 
                     for vector in embedding_result.embeddings:
                         try:
@@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
             redis_client.expire(embedding_cache_key, 600)
             return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
         try:
-            embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
+            embedding_result = self._model_instance.invoke_text_embedding(
+                texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
+            )
 
             embedding_results = embedding_result.embeddings[0]
             embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
diff --git a/api/core/embedding/embedding_constant.py b/api/core/embedding/embedding_constant.py
new file mode 100644
index 0000000000..9b4934646b
--- /dev/null
+++ b/api/core/embedding/embedding_constant.py
@@ -0,0 +1,10 @@
+from enum import Enum
+
+
+class EmbeddingInputType(Enum):
+    """
+    Enum for embedding input type.
+    """
+
+    DOCUMENT = "document"
+    QUERY = "query"
diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py
index c40b6d1808..e5b6784516 100644
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -65,7 +65,6 @@ SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
     "Please help me predict the three most likely questions that human would ask, "
     "and keeping each question under 20 characters.\n"
     "MAKE SURE your output is the SAME language as the Assistant's latest response"
-    "(if the main response is written in Chinese, then the language of your output must be using Chinese.)!\n"
     "The output must be an array in JSON format following the specified schema:\n"
     '["question1","question2","question3"]\n'
 )
diff --git a/api/core/model_manager.py b/api/core/model_manager.py
index 990efd36c6..74b4452362 100644
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@@ -3,6 +3,7 @@ import os
 from collections.abc import Callable, Generator, Sequence
 from typing import IO, Optional, Union, cast
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
 from core.entities.provider_entities import ModelLoadBalancingConfiguration
 from core.errors.error import ProviderTokenNotInitError
@@ -158,12 +159,15 @@ class ModelInstance:
             tools=tools,
         )
 
-    def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
+    def invoke_text_embedding(
+        self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
+    ) -> TextEmbeddingResult:
         """
         Invoke large language model
 
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         if not isinstance(self.model_type_instance, TextEmbeddingModel):
@@ -176,6 +180,7 @@ class ModelInstance:
             credentials=self.credentials,
             texts=texts,
             user=user,
+            input_type=input_type,
         )
 
     def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
diff --git a/api/core/model_runtime/model_providers/__base/text_embedding_model.py b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
index 54a4486023..a948dca20d 100644
--- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py
+++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
@@ -4,6 +4,7 @@ from typing import Optional
 
 from pydantic import ConfigDict
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.__base.ai_model import AIModel
@@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
     model_config = ConfigDict(protected_namespaces=())
 
     def invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
-        Invoke large language model
+        Invoke text embedding model
 
         :param model: model name
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         self.started_at = time.perf_counter()
 
         try:
-            return self._invoke(model, credentials, texts, user)
+            return self._invoke(model, credentials, texts, user, input_type)
         except Exception as e:
             raise self._transform_invoke_error(e)
 
     @abstractmethod
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
-        Invoke large language model
+        Invoke text embedding model
 
         :param model: model name
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         raise NotImplementedError
diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index 1f5f64019a..80db22ea84 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -38,3 +38,5 @@
 - perfxcloud
 - zhinao
 - fireworks
+- mixedbread
+- nomic
diff --git a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
index d9cff8ecbb..8701a38050 100644
--- a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ import numpy as np
 import tiktoken
 from openai import AzureOpenAI
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_
 
 class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
         base_model_name = credentials["base_model_name"]
         credentials_kwargs = self._to_credential_kwargs(credentials)
         client = AzureOpenAI(**credentials_kwargs)
diff --git a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
index 779dfbb608..56b9be1c36 100644
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
 
 from requests import post
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
     api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         api_key = credentials["api_key"]
diff --git a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
index 251170d1ae..d9c5726592 100644
--- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
@@ -13,6 +13,7 @@ from botocore.exceptions import (
     UnknownServiceError,
 )
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)
 
 class BedrockTextEmbeddingModel(TextEmbeddingModel):
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         client_config = Config(region_name=credentials["aws_region"])
diff --git a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
index a1c5e98118..4da2080690 100644
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ import cohere
 import numpy as np
 from cohere.core import RequestOptions
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         # get model properties
diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
index f886fa23b5..cdb87a55e9 100644
--- a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
@@ -15,6 +15,7 @@ help:
     en_US: https://fireworks.ai/account/api-keys
 supported_model_types:
   - llm
+  - text-embedding
 configurate_methods:
   - predefined-model
 provider_credential_schema:
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
new file mode 100644
index 0000000000..31415a24fa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+label:
+  zh_Hans: Llama 3.2 11B Vision Instruct
+  en_US: Llama 3.2 11B Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
new file mode 100644
index 0000000000..c2fd77d256
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-1b-instruct
+label:
+  zh_Hans: Llama 3.2 1B Instruct
+  en_US: Llama 3.2 1B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
new file mode 100644
index 0000000000..4b3c459c7b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-3b-instruct
+label:
+  zh_Hans: Llama 3.2 3B Instruct
+  en_US: Llama 3.2 3B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
new file mode 100644
index 0000000000..0aece7455d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+label:
+  zh_Hans: Llama 3.2 90B Vision Instruct
+  en_US: Llama 3.2 90B Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.9'
+  output: '0.9'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
new file mode 100644
index 0000000000..d7c11691cf
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
@@ -0,0 +1,12 @@
+model: WhereIsAI/UAE-Large-V1
+label:
+  zh_Hans: UAE-Large-V1
+  en_US: UAE-Large-V1
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/__init__.py b/api/core/model_runtime/model_providers/fireworks/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
new file mode 100644
index 0000000000..d09bafb4d3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
@@ -0,0 +1,12 @@
+model: thenlper/gte-base
+label:
+  zh_Hans: GTE-base
+  en_US: GTE-base
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
new file mode 100644
index 0000000000..c41fa2f9d3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
@@ -0,0 +1,12 @@
+model: thenlper/gte-large
+label:
+  zh_Hans: GTE-large
+  en_US: GTE-large
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
new file mode 100644
index 0000000000..c9098503d9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
@@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1.5
+label:
+  zh_Hans: nomic-embed-text-v1.5
+  en_US: nomic-embed-text-v1.5
+model_type: text-embedding
+model_properties:
+  context_size: 8192
+  max_chunks: 16
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
new file mode 100644
index 0000000000..89078d3ff6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
@@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1
+label:
+  zh_Hans: nomic-embed-text-v1
+  en_US: nomic-embed-text-v1
+model_type: text-embedding
+model_properties:
+  context_size: 8192
+  max_chunks: 16
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..cdce69ff38
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
@@ -0,0 +1,151 @@
+import time
+from collections.abc import Mapping
+from typing import Optional, Union
+
+import numpy as np
+from openai import OpenAI
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
+
+
+class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
+    """
+    Model class for Fireworks text embedding model.
+    """
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
+
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        client = OpenAI(**credentials_kwargs)
+
+        extra_model_kwargs = {}
+        if user:
+            extra_model_kwargs["user"] = user
+
+        extra_model_kwargs["encoding_format"] = "float"
+
+        context_size = self._get_context_size(model, credentials)
+        max_chunks = self._get_max_chunks(model, credentials)
+
+        inputs = []
+        indices = []
+        used_tokens = 0
+
+        for i, text in enumerate(texts):
+            # Here token count is only an approximation based on the GPT2 tokenizer
+            # TODO: Optimize for better token estimation and chunking
+            num_tokens = self._get_num_tokens_by_gpt2(text)
+
+            if num_tokens >= context_size:
+                cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
+                # if num tokens is larger than context length, only use the start
+                inputs.append(text[0:cutoff])
+            else:
+                inputs.append(text)
+            indices += [i]
+
+        batched_embeddings = []
+        _iter = range(0, len(inputs), max_chunks)
+
+        for i in _iter:
+            embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+                model=model,
+                client=client,
+                texts=inputs[i : i + max_chunks],
+                extra_model_kwargs=extra_model_kwargs,
+            )
+            used_tokens += embedding_used_tokens
+            batched_embeddings += embeddings_batch
+
+        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
+        return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
+
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+    def validate_credentials(self, model: str, credentials: Mapping) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            # transform credentials to kwargs for model instance
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            client = OpenAI(**credentials_kwargs)
+
+            # call embedding model
+            self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    def _embedding_invoke(
+        self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
+    ) -> tuple[list[list[float]], int]:
+        """
+        Invoke embedding model
+        :param model: model name
+        :param client: model client
+        :param texts: texts to embed
+        :param extra_model_kwargs: extra model kwargs
+        :return: embeddings and used tokens
+        """
+        response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
+        return [data.embedding for data in response.data], response.usage.total_tokens
+
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: input tokens
+        :return: usage
+        """
+        input_price_info = self.get_price(
+            model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
+        )
+
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at,
+        )
+
+        return usage
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
new file mode 100644
index 0000000000..d84e9937e0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-001
+label:
+  en_US: Gemini 1.5 Flash 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
new file mode 100644
index 0000000000..2ff70564b2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-002
+label:
+  en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
index bbc697e934..4e0209890a 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
new file mode 100644
index 0000000000..2aea8149f4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-8b-exp-0924
+label:
+  en_US: Gemini 1.5 Flash 8B 0924
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
index c5695e5dda..faabc5e4d1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
index 24b1c5af8a..a22fcca941 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
@@ -1,6 +1,6 @@
 model: gemini-1.5-flash-latest
 label:
-  en_US: Gemini 1.5 Flash
+  en_US: Gemini 1.5 Flash Latest
 model_type: llm
 features:
   - agent-thought
@@ -32,6 +32,15 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
new file mode 100644
index 0000000000..dfd55c3a94
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash
+label:
+  en_US: Gemini 1.5 Flash
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
new file mode 100644
index 0000000000..a1feff171d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro-001
+label:
+  en_US: Gemini 1.5 Pro 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
new file mode 100644
index 0000000000..9ae07a06c5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro-002
+label:
+  en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
index 0a918e0d7b..97c68f7a18 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@@ -32,6 +32,15 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
index 7452ce46e7..860e4816a1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
index b3e1ecf3af..d1bf7d269d 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@@ -1,6 +1,6 @@
 model: gemini-1.5-pro-latest
 label:
-  en_US: Gemini 1.5 Pro
+  en_US: Gemini 1.5 Pro Latest
 model_type: llm
 features:
   - agent-thought
@@ -32,6 +32,15 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
new file mode 100644
index 0000000000..bdd70b34a2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro
+label:
+  en_US: Gemini 1.5 Pro
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
index 075e484e46..2d213d56ad 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@@ -27,6 +27,15 @@ parameter_rules:
     default: 4096
     min: 1
     max: 4096
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
index 4e9f59e7da..e2f487c1ee 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@@ -31,6 +31,15 @@ parameter_rules:
     max: 2048
   - name: response_format
     use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/llm.py b/api/core/model_runtime/model_providers/google/llm/llm.py
index 3fc6787a44..e686ad08d9 100644
--- a/api/core/model_runtime/model_providers/google/llm/llm.py
+++ b/api/core/model_runtime/model_providers/google/llm/llm.py
@@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
 import google.generativeai as genai
 import requests
 from google.api_core import exceptions
-from google.generativeai import client
-from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
+from google.generativeai.client import _ClientManager
+from google.generativeai.types import ContentType, GenerateContentResponse
 from google.generativeai.types.content_types import to_part
 from PIL import Image
 
@@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
                     history.append(content)
 
         # Create a new ClientManager with tenant's API key
-        new_client_manager = client._ClientManager()
+        new_client_manager = _ClientManager()
         new_client_manager.configure(api_key=credentials["google_api_key"])
         new_custom_client = new_client_manager.make_client("generative")
 
         google_model._client = new_custom_client
 
-        safety_settings = {
-            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
-        }
-
         response = google_model.generate_content(
             contents=history,
             generation_config=genai.types.GenerationConfig(**config_kwargs),
             stream=stream,
-            safety_settings=safety_settings,
             tools=self._convert_tools_to_glm_tool(tools) if tools else None,
             request_options={"timeout": 600},
         )
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
new file mode 100644
index 0000000000..019d453723
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-11b-text-preview
+label:
+  zh_Hans: Llama 3.2 11B Text (Preview)
+  en_US: Llama 3.2 11B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
new file mode 100644
index 0000000000..a44e4ff508
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-1b-preview
+label:
+  zh_Hans: Llama 3.2 1B Text (Preview)
+  en_US: Llama 3.2 1B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
new file mode 100644
index 0000000000..f2fdd0a05e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-3b-preview
+label:
+  zh_Hans: Llama 3.2 3B Text (Preview)
+  en_US: Llama 3.2 3B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
new file mode 100644
index 0000000000..3b34e7c079
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-90b-text-preview
+label:
+  zh_Hans: Llama 3.2 90B Text (Preview)
+  en_US: Llama 3.2 90B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
index 4ad96c4233..b2e6d1b652 100644
--- a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ import numpy as np
 import requests
 from huggingface_hub import HfApi, InferenceClient
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/
 
 class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
         client = InferenceClient(token=credentials["huggingfacehub_api_token"])
 
         execute_model = model
diff --git a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
index 55f3c25804..b8ff3ca549 100644
--- a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
@@ -1,6 +1,7 @@
 import time
 from typing import Optional
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
index 1396e59e18..75701ebc54 100644
--- a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
@@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
 from tencentcloud.common.profile.http_profile import HttpProfile
 from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
 
diff --git a/api/core/model_runtime/model_providers/jina/jina.yaml b/api/core/model_runtime/model_providers/jina/jina.yaml
index 23e18ad75f..970b22965b 100644
--- a/api/core/model_runtime/model_providers/jina/jina.yaml
+++ b/api/core/model_runtime/model_providers/jina/jina.yaml
@@ -1,6 +1,6 @@
 provider: jina
 label:
-  en_US: Jina
+  en_US: Jina AI
 description:
   en_US: Embedding and Rerank Model Supported
 icon_small:
@@ -11,7 +11,7 @@ background: "#EFFDFD"
 help:
   title:
     en_US: Get your API key from Jina AI
-    zh_Hans: 从 Jina 获取 API Key
+    zh_Hans: 从 Jina AI 获取 API Key
   url:
     en_US: https://jina.ai/
 supported_model_types:
diff --git a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
index ceb79567d5..b397129512 100644
--- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
 
 from requests import post
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -27,8 +28,37 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
 
     api_base: str = "https://api.jina.ai/v1"
 
+    def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
+        """
+        Parse model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return: parsed credentials
+        """
+
+        def transform_jina_input_text(model, text):
+            if model == "jina-clip-v1":
+                return {"text": text}
+            return text
+
+        data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
+
+        # model specific parameters
+        if model == "jina-embeddings-v3":
+            # set `task` type according to input type for the best performance
+            data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
+
+        return data
+
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -37,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         api_key = credentials["api_key"]
@@ -49,15 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
         url = base_url + "/embeddings"
         headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
 
-        def transform_jina_input_text(model, text):
-            if model == "jina-clip-v1":
-                return {"text": text}
-            return text
-
-        data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
-
-        if model == "jina-embeddings-v3":
-            data["task"] = "text-matching"
+        data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
 
         try:
             response = post(url, headers=headers, data=dumps(data))
diff --git a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
index 7d258be81e..ab8ca76c2f 100644
--- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional
 from requests import post
 from yarl import URL
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
 
 class LocalAITextEmbeddingModel(TextEmbeddingModel):
     """
-    Model class for Jina text embedding model.
+    Model class for LocalAI text embedding model.
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         if len(texts) != 1:
diff --git a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
index 76fd1342bd..74d2a221d1 100644
--- a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
 
 from requests import post
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
     api_base: str = "https://api.minimax.chat/v1/embeddings"
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         api_key = credentials["minimax_api_key"]
diff --git a/api/core/model_runtime/model_providers/mixedbread/__init__.py b/api/core/model_runtime/model_providers/mixedbread/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png
new file mode 100644
index 0000000000..2027611bd5
Binary files /dev/null and b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png differ
diff --git a/api/core/model_runtime/model_providers/mixedbread/_assets/icon_s_en.png b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_s_en.png
new file mode 100644
index 0000000000..5c357bddbd
Binary files /dev/null and b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_s_en.png differ
diff --git a/api/core/model_runtime/model_providers/mixedbread/mixedbread.py b/api/core/model_runtime/model_providers/mixedbread/mixedbread.py
new file mode 100644
index 0000000000..3c78150e6f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/mixedbread.py
@@ -0,0 +1,27 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class MixedBreadProvider(ModelProvider):
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
+
+            # Use `mxbai-embed-large-v1` model for validate,
+            model_instance.validate_credentials(model="mxbai-embed-large-v1", credentials=credentials)
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+            raise ex
diff --git a/api/core/model_runtime/model_providers/mixedbread/mixedbread.yaml b/api/core/model_runtime/model_providers/mixedbread/mixedbread.yaml
new file mode 100644
index 0000000000..2f43aea6ad
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/mixedbread.yaml
@@ -0,0 +1,31 @@
+provider: mixedbread
+label:
+  en_US: MixedBread
+description:
+  en_US: Embedding and Rerank Model Supported
+icon_small:
+  en_US: icon_s_en.png
+icon_large:
+  en_US: icon_l_en.png
+background: "#EFFDFD"
+help:
+  title:
+    en_US: Get your API key from MixedBread AI
+    zh_Hans: 从 MixedBread 获取 API Key
+  url:
+    en_US: https://www.mixedbread.ai/
+supported_model_types:
+  - text-embedding
+  - rerank
+configurate_methods:
+  - predefined-model
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/mixedbread/rerank/__init__.py b/api/core/model_runtime/model_providers/mixedbread/rerank/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/mixedbread/rerank/mxbai-rerank-large-v1-en.yaml b/api/core/model_runtime/model_providers/mixedbread/rerank/mxbai-rerank-large-v1-en.yaml
new file mode 100644
index 0000000000..beda219953
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/rerank/mxbai-rerank-large-v1-en.yaml
@@ -0,0 +1,4 @@
+model: mxbai-rerank-large-v1
+model_type: rerank
+model_properties:
+  context_size: 512
diff --git a/api/core/model_runtime/model_providers/mixedbread/rerank/rerank.py b/api/core/model_runtime/model_providers/mixedbread/rerank/rerank.py
new file mode 100644
index 0000000000..bf3c12fd86
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/rerank/rerank.py
@@ -0,0 +1,125 @@
+from typing import Optional
+
+import httpx
+
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+
+
+class MixedBreadRerankModel(RerankModel):
+    """
+    Model class for MixedBread rerank model.
+    """
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        query: str,
+        docs: list[str],
+        score_threshold: Optional[float] = None,
+        top_n: Optional[int] = None,
+        user: Optional[str] = None,
+    ) -> RerankResult:
+        """
+        Invoke rerank model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param query: search query
+        :param docs: docs for reranking
+        :param score_threshold: score threshold
+        :param top_n: top n documents to return
+        :param user: unique user id
+        :return: rerank result
+        """
+        if len(docs) == 0:
+            return RerankResult(model=model, docs=[])
+
+        base_url = credentials.get("base_url", "https://api.mixedbread.ai/v1")
+        base_url = base_url.removesuffix("/")
+
+        try:
+            response = httpx.post(
+                base_url + "/reranking",
+                json={"model": model, "query": query, "input": docs, "top_k": top_n, "return_input": True},
+                headers={"Authorization": f"Bearer {credentials.get('api_key')}", "Content-Type": "application/json"},
+            )
+            response.raise_for_status()
+            results = response.json()
+
+            rerank_documents = []
+            for result in results["data"]:
+                rerank_document = RerankDocument(
+                    index=result["index"],
+                    text=result["input"],
+                    score=result["score"],
+                )
+                if score_threshold is None or result["score"] >= score_threshold:
+                    rerank_documents.append(rerank_document)
+
+            return RerankResult(model=model, docs=rerank_documents)
+        except httpx.HTTPStatusError as e:
+            raise InvokeServerUnavailableError(str(e))
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._invoke(
+                model=model,
+                credentials=credentials,
+                query="What is the capital of the United States?",
+                docs=[
+                    "Carson City is the capital city of the American state of Nevada. At the 2010 United States "
+                    "Census, Carson City had a population of 55,274.",
+                    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
+                    "are a political division controlled by the United States. Its capital is Saipan.",
+                ],
+                score_threshold=0.8,
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        """
+        return {
+            InvokeConnectionError: [httpx.ConnectError],
+            InvokeServerUnavailableError: [httpx.RemoteProtocolError],
+            InvokeRateLimitError: [],
+            InvokeAuthorizationError: [httpx.HTTPStatusError],
+            InvokeBadRequestError: [httpx.RequestError],
+        }
+
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
+        """
+        generate custom model entities from credentials
+        """
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(en_US=model),
+            model_type=ModelType.RERANK,
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
+        )
+
+        return entity
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/__init__.py b/api/core/model_runtime/model_providers/mixedbread/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-2d-large-v1-en.yaml b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-2d-large-v1-en.yaml
new file mode 100644
index 0000000000..0c3c863d06
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-2d-large-v1-en.yaml
@@ -0,0 +1,8 @@
+model: mxbai-embed-2d-large-v1
+model_type: text-embedding
+model_properties:
+  context_size: 512
+pricing:
+  input: '0.0001'
+  unit: '0.001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-large-v1-en.yaml b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-large-v1-en.yaml
new file mode 100644
index 0000000000..0c5cda2a72
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-large-v1-en.yaml
@@ -0,0 +1,8 @@
+model: mxbai-embed-large-v1
+model_type: text-embedding
+model_properties:
+  context_size: 512
+pricing:
+  input: '0.0001'
+  unit: '0.001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..68b7b448bf
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
@@ -0,0 +1,170 @@
+import time
+from json import JSONDecodeError, dumps
+from typing import Optional
+
+import requests
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+
+
+class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
+    """
+    Model class for MixedBread text embedding model.
+    """
+
+    api_base: str = "https://api.mixedbread.ai/v1"
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
+        api_key = credentials["api_key"]
+        if not api_key:
+            raise CredentialsValidateFailedError("api_key is required")
+
+        base_url = credentials.get("base_url", self.api_base)
+        base_url = base_url.removesuffix("/")
+
+        url = base_url + "/embeddings"
+        headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
+
+        data = {"model": model, "input": texts}
+
+        try:
+            response = requests.post(url, headers=headers, data=dumps(data))
+        except Exception as e:
+            raise InvokeConnectionError(str(e))
+
+        if response.status_code != 200:
+            try:
+                resp = response.json()
+                msg = resp["detail"]
+                if response.status_code == 401:
+                    raise InvokeAuthorizationError(msg)
+                elif response.status_code == 429:
+                    raise InvokeRateLimitError(msg)
+                elif response.status_code == 500:
+                    raise InvokeServerUnavailableError(msg)
+                else:
+                    raise InvokeBadRequestError(msg)
+            except JSONDecodeError as e:
+                raise InvokeServerUnavailableError(
+                    f"Failed to convert response to json: {e} with text: {response.text}"
+                )
+
+        try:
+            resp = response.json()
+            embeddings = resp["data"]
+            usage = resp["usage"]
+        except Exception as e:
+            raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
+
+        result = TextEmbeddingResult(
+            model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
+        )
+
+        return result
+
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._invoke(model=model, credentials=credentials, texts=["ping"])
+        except Exception as e:
+            raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        return {
+            InvokeConnectionError: [InvokeConnectionError],
+            InvokeServerUnavailableError: [InvokeServerUnavailableError],
+            InvokeRateLimitError: [InvokeRateLimitError],
+            InvokeAuthorizationError: [InvokeAuthorizationError],
+            InvokeBadRequestError: [KeyError, InvokeBadRequestError],
+        }
+
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: input tokens
+        :return: usage
+        """
+        # get input price info
+        input_price_info = self.get_price(
+            model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
+        )
+
+        # transform usage
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at,
+        )
+
+        return usage
+
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
+        """
+        generate custom model entities from credentials
+        """
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(en_US=model),
+            model_type=ModelType.TEXT_EMBEDDING,
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
+        )
+
+        return entity
diff --git a/api/core/model_runtime/model_providers/nomic/__init__.py b/api/core/model_runtime/model_providers/nomic/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg
new file mode 100644
index 0000000000..6c4a1058ab
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg
@@ -0,0 +1,13 @@
+<svg width="93" height="31" viewBox="0 0 93 31" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M89.6037 29.888C88.9464 29.888 88.3667 29.7302 87.8647 29.4145C87.3626 29.0936 86.9719 28.6407 86.6924 28.0559L87.2979 27.4037C87.5464 27.9109 87.8698 28.3069 88.2684 28.5915C88.6669 28.871 89.1094 29.0108 89.5959 29.0108C89.922 29.0108 90.2196 28.9435 90.4887 28.8089C90.763 28.6744 90.9804 28.4829 91.1408 28.2344C91.3064 27.9808 91.3892 27.6806 91.3892 27.3339C91.3892 27.0182 91.3116 26.7697 91.1563 26.5886C91.0062 26.4074 90.7837 26.2522 90.4887 26.1228C90.1988 25.9882 89.8366 25.8381 89.4018 25.6725C89.0654 25.5379 88.7393 25.3853 88.4236 25.2145C88.1079 25.0437 87.8465 24.8289 87.6395 24.5701C87.4377 24.3061 87.3367 23.9723 87.3367 23.5686C87.3367 23.1598 87.4454 22.7975 87.6628 22.4817C87.8802 22.1609 88.1804 21.9098 88.5634 21.7287C88.9464 21.5424 89.3811 21.4492 89.8676 21.4492C90.3127 21.4492 90.7293 21.545 91.1175 21.7365C91.5109 21.928 91.8628 22.1997 92.1733 22.5516L91.6532 23.2115C91.177 22.5853 90.5844 22.2721 89.8754 22.2721C89.4406 22.2721 89.0861 22.386 88.8118 22.6137C88.5427 22.8415 88.4081 23.1391 88.4081 23.5065C88.4081 23.7705 88.4935 23.9904 88.6643 24.1664C88.8351 24.3424 89.0576 24.4925 89.3319 24.6167C89.6114 24.7409 89.9116 24.8651 90.2325 24.9893C90.6983 25.1653 91.102 25.3413 91.4436 25.5172C91.7903 25.6932 92.0595 25.9183 92.251 26.1927C92.4425 26.4618 92.5382 26.8293 92.5382 27.2951C92.5382 27.8281 92.414 28.2888 92.1656 28.6769C91.9171 29.0651 91.5704 29.3653 91.1253 29.5775C90.6854 29.7845 90.1781 29.888 89.6037 29.888Z" fill="#3C593D"/>
+<path d="M79.8324 29.8841C79.0871 29.8841 78.4143 29.7029 77.8139 29.3406C77.2187 28.9732 76.7451 28.4711 76.3932 27.8345C76.0464 27.1979 75.873 26.4708 75.873 25.653C75.873 24.8456 76.0438 24.1262 76.3854 23.4948C76.7322 22.8582 77.2032 22.3562 77.7984 21.9887C78.3987 21.6212 79.0767 21.4375 79.8324 21.4375C80.5518 21.4375 81.2039 21.6057 81.7888 21.9421C82.3736 22.2785 82.8187 22.7443 83.1241 23.3395V21.6859H84.2575V29.6356H83.1241V27.9587C82.7825 28.5591 82.3244 29.0301 81.7499 29.3717C81.1754 29.7133 80.5363 29.8841 79.8324 29.8841ZM80.1119 28.8981C80.7071 28.8981 81.2324 28.761 81.6878 28.4867C82.1485 28.2072 82.5107 27.8242 82.7747 27.3377C83.0387 26.846 83.1706 26.287 83.1706 25.6608C83.1706 25.0294 83.0387 24.4704 82.7747 23.9839C82.5159 23.4974 82.1562 23.117 81.6956 22.8427C81.235 22.5632 80.7071 22.4235 80.1119 22.4235C79.5167 22.4235 78.9888 22.5632 78.5281 22.8427C78.0675 23.117 77.7052 23.4974 77.4413 23.9839C77.1773 24.4704 77.0453 25.0294 77.0453 25.6608C77.0453 26.287 77.1773 26.846 77.4413 27.3377C77.7052 27.8242 78.0675 28.2072 78.5281 28.4867C78.9888 28.761 79.5167 28.8981 80.1119 28.8981Z" fill="#3C593D"/>
+<path d="M71.9658 29.6382V16.2852H73.0993V29.6382H71.9658Z" fill="#3C593D"/>
+<path d="M68.1539 29.8864C67.5587 29.8864 67.0955 29.6871 66.7643 29.2886C66.4382 28.8849 66.2752 28.3182 66.2752 27.5884V22.5422H65.4678V21.6882H66.2752V18.7148H67.4086V21.6882H69.3883V22.5422H67.4086V27.5263C67.4086 27.9662 67.494 28.3026 67.6648 28.5355C67.8356 28.7684 68.0789 28.8849 68.3946 28.8849C68.6999 28.8849 68.9691 28.7995 69.202 28.6287L69.4892 29.5292C69.3132 29.6379 69.1062 29.7233 68.8681 29.7854C68.6301 29.8527 68.392 29.8864 68.1539 29.8864Z" fill="#3C593D"/>
+<path d="M58.513 29.8841C57.7678 29.8841 57.0949 29.7029 56.4946 29.3406C55.8994 28.9732 55.4258 28.4711 55.0739 27.8345C54.7271 27.1979 54.5537 26.4708 54.5537 25.653C54.5537 24.8456 54.7245 24.1262 55.0661 23.4948C55.4129 22.8582 55.8838 22.3562 56.479 21.9887C57.0794 21.6212 57.7574 21.4375 58.513 21.4375C59.2324 21.4375 59.8846 21.6057 60.4694 21.9421C61.0543 22.2785 61.4994 22.7443 61.8047 23.3395V21.6859H62.9382V29.6356H61.8047V27.9587C61.4631 28.5591 61.0051 29.0301 60.4306 29.3717C59.8561 29.7133 59.2169 29.8841 58.513 29.8841ZM58.7925 28.8981C59.3877 28.8981 59.913 28.761 60.3685 28.4867C60.8291 28.2072 61.1914 27.8242 61.4554 27.3377C61.7193 26.846 61.8513 26.287 61.8513 25.6608C61.8513 25.0294 61.7193 24.4704 61.4554 23.9839C61.1966 23.4974 60.8369 23.117 60.3763 22.8427C59.9156 22.5632 59.3877 22.4235 58.7925 22.4235C58.1973 22.4235 57.6694 22.5632 57.2088 22.8427C56.7482 23.117 56.3859 23.4974 56.1219 23.9839C55.858 24.4704 55.726 25.0294 55.726 25.6608C55.726 26.287 55.858 26.846 56.1219 27.3377C56.3859 27.8242 56.7482 28.2072 57.2088 28.4867C57.6694 28.761 58.1973 28.8981 58.7925 28.8981Z" fill="#3C593D"/>
+<path d="M5.41228 22.6607V0H6.76535V30.2143H5.41228L1.35307 7.55357V30.2143H0V0H1.35307L5.41228 22.6607Z" fill="#3C593D"/>
+<path d="M13.6575 28.9006C14.024 28.9006 14.3341 28.7775 14.5878 28.5312C14.8697 28.2848 15.0106 27.9701 15.0106 27.587V2.62733C15.0106 2.27154 14.8697 1.9705 14.5878 1.72418C14.3341 1.4505 14.024 1.31366 13.6575 1.31366C13.2629 1.31366 12.9387 1.4505 12.685 1.72418C12.4313 1.9705 12.3045 2.27154 12.3045 2.62733V27.587C12.3045 27.9701 12.4313 28.2848 12.685 28.5312C12.9387 28.7775 13.2629 28.9006 13.6575 28.9006ZM13.6575 30.2143C12.8964 30.2143 12.2481 29.968 11.7125 29.4753C11.2051 28.9554 10.9514 28.3259 10.9514 27.587V2.62733C10.9514 1.91576 11.2051 1.29998 11.7125 0.779988C12.2481 0.259996 12.8964 0 13.6575 0C14.3905 0 15.0247 0.259996 15.5603 0.779988C16.0959 1.29998 16.3637 1.91576 16.3637 2.62733V27.587C16.3637 28.3259 16.0959 28.9554 15.5603 29.4753C15.0247 29.968 14.3905 30.2143 13.6575 30.2143Z" fill="#3C593D"/>
+<path d="M28.3299 0H29.683V30.2143H28.3299V5.25466L24.9472 18.3913L21.5645 5.25466V30.2143H20.2115V0H21.5645L24.9472 13.1366L28.3299 0Z" fill="#3C593D"/>
+<path d="M33.6999 30.2143V0H35.0529V30.2143H33.6999Z" fill="#3C593D"/>
+<path d="M41.776 30.2143C41.0149 30.2143 40.3666 29.968 39.831 29.4753C39.3236 28.9554 39.0699 28.3259 39.0699 27.587V2.62733C39.0699 1.91576 39.3236 1.29998 39.831 0.779988C40.3666 0.259996 41.0149 0 41.776 0C42.5089 0 43.1432 0.259996 43.6788 0.779988C44.2143 1.29998 44.4821 1.91576 44.4821 2.62733V5.25466H43.1291V2.62733C43.1291 2.27154 42.9881 1.9705 42.7062 1.72418C42.4525 1.4505 42.1425 1.31366 41.776 1.31366C41.3814 1.31366 41.0572 1.4505 40.8035 1.72418C40.5498 1.9705 40.4229 2.27154 40.4229 2.62733V27.587C40.4229 27.9701 40.5498 28.2848 40.8035 28.5312C41.0572 28.7775 41.3814 28.9006 41.776 28.9006C42.1425 28.9006 42.4525 28.7775 42.7062 28.5312C42.9881 28.2848 43.1291 27.9701 43.1291 27.587V24.9596H44.4821V27.587C44.4821 28.3259 44.2143 28.9554 43.6788 29.4753C43.1432 29.968 42.5089 30.2143 41.776 30.2143Z" fill="#3C593D"/>
+<path d="M56 1H91" stroke="#3C593D" stroke-linecap="round" stroke-dasharray="0.1 2"/>
+</svg>
diff --git a/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png b/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png
new file mode 100644
index 0000000000..3eba3b82bc
Binary files /dev/null and b/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png differ
diff --git a/api/core/model_runtime/model_providers/nomic/_common.py b/api/core/model_runtime/model_providers/nomic/_common.py
new file mode 100644
index 0000000000..406577dcd7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/_common.py
@@ -0,0 +1,28 @@
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+
+
+class _CommonNomic:
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [InvokeConnectionError],
+            InvokeServerUnavailableError: [InvokeServerUnavailableError],
+            InvokeRateLimitError: [InvokeRateLimitError],
+            InvokeAuthorizationError: [InvokeAuthorizationError],
+            InvokeBadRequestError: [KeyError, InvokeBadRequestError],
+        }
diff --git a/api/core/model_runtime/model_providers/nomic/nomic.py b/api/core/model_runtime/model_providers/nomic/nomic.py
new file mode 100644
index 0000000000..d4e5da2e98
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/nomic.py
@@ -0,0 +1,26 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class NomicAtlasProvider(ModelProvider):
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
+            model_instance.validate_credentials(model="nomic-embed-text-v1.5", credentials=credentials)
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+            raise ex
diff --git a/api/core/model_runtime/model_providers/nomic/nomic.yaml b/api/core/model_runtime/model_providers/nomic/nomic.yaml
new file mode 100644
index 0000000000..60dcf1facb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/nomic.yaml
@@ -0,0 +1,29 @@
+provider: nomic
+label:
+  zh_Hans: Nomic Atlas
+  en_US: Nomic Atlas
+icon_small:
+  en_US: icon_s_en.png
+icon_large:
+  en_US: icon_l_en.svg
+background: "#EFF1FE"
+help:
+  title:
+    en_US: Get your API key from Nomic Atlas
+    zh_Hans: 从Nomic Atlas获取 API Key
+  url:
+    en_US: https://atlas.nomic.ai/data
+supported_model_types:
+  - text-embedding
+configurate_methods:
+  - predefined-model
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: nomic_api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py b/api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml
new file mode 100644
index 0000000000..111452df57
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml
@@ -0,0 +1,8 @@
+model: nomic-embed-text-v1.5
+model_type: text-embedding
+model_properties:
+  context_size: 8192
+pricing:
+  input: "0.1"
+  unit: "0.000001"
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml
new file mode 100644
index 0000000000..ac59f106ed
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml
@@ -0,0 +1,8 @@
+model: nomic-embed-text-v1
+model_type: text-embedding
+model_properties:
+  context_size: 8192
+pricing:
+  input: "0.1"
+  unit: "0.000001"
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..857dfb5f41
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
@@ -0,0 +1,165 @@
+import time
+from functools import wraps
+from typing import Optional
+
+from nomic import embed
+from nomic import login as nomic_login
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import (
+    EmbeddingUsage,
+    TextEmbeddingResult,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import (
+    TextEmbeddingModel,
+)
+from core.model_runtime.model_providers.nomic._common import _CommonNomic
+
+
+def nomic_login_required(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            if not kwargs.get("credentials"):
+                raise ValueError("missing credentials parameters")
+            credentials = kwargs.get("credentials")
+            if "nomic_api_key" not in credentials:
+                raise ValueError("missing nomic_api_key in credentials parameters")
+            # nomic login
+            nomic_login(credentials["nomic_api_key"])
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
+    """
+    Model class for nomic text embedding model.
+    """
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
+        embeddings, prompt_tokens, total_tokens = self.embed_text(
+            model=model,
+            credentials=credentials,
+            texts=texts,
+        )
+
+        # calc usage
+        usage = self._calc_response_usage(
+            model=model, credentials=credentials, tokens=prompt_tokens, total_tokens=total_tokens
+        )
+        return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
+
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            # call embedding model
+            self.embed_text(model=model, credentials=credentials, texts=["ping"])
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    @nomic_login_required
+    def embed_text(self, model: str, credentials: dict, texts: list[str]) -> tuple[list[list[float]], int, int]:
+        """Call out to Nomic's embedding endpoint.
+
+        Args:
+            model: The model to use for embedding.
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text, and tokens usage.
+        """
+        embeddings: list[list[float]] = []
+        prompt_tokens = 0
+        total_tokens = 0
+
+        response = embed.text(
+            model=model,
+            texts=texts,
+        )
+
+        if not (response and "embeddings" in response):
+            raise ValueError("Embedding data is missing in the response.")
+
+        if not (response and "usage" in response):
+            raise ValueError("Response usage is missing.")
+
+        if "prompt_tokens" not in response["usage"]:
+            raise ValueError("Response usage does not contain prompt tokens.")
+
+        if "total_tokens" not in response["usage"]:
+            raise ValueError("Response usage does not contain total tokens.")
+
+        embeddings = [list(map(float, e)) for e in response["embeddings"]]
+        total_tokens = response["usage"]["total_tokens"]
+        prompt_tokens = response["usage"]["prompt_tokens"]
+        return embeddings, prompt_tokens, total_tokens
+
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int, total_tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: prompt tokens
+        :param total_tokens: total tokens
+        :return: usage
+        """
+        # get input price info
+        input_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            price_type=PriceType.INPUT,
+            tokens=tokens,
+        )
+
+        # transform usage
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=total_tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at,
+        )
+
+        return usage
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
index 00cec265d5..936ceb8dd2 100644
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
 
 from requests import post
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
     models: list[str] = ["NV-Embed-QA"]
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         api_key = credentials["api_key"]
diff --git a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
index 80ad2be9f5..4de9296cca 100644
--- a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ from typing import Optional
 import numpy as np
 import oci
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         # get model properties
diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py
index ff732e6925..a7ea53e0e9 100644
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
 
             if chunk_json["done"]:
                 # calculate num tokens
-                if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
-                    # transform usage
+                if "prompt_eval_count" in chunk_json:
                     prompt_tokens = chunk_json["prompt_eval_count"]
-                    completion_tokens = chunk_json["eval_count"]
                 else:
-                    # calculate num tokens
-                    prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
-                    completion_tokens = self._get_num_tokens_by_gpt2(full_text)
+                    prompt_message_content = prompt_messages[0].content
+                    if isinstance(prompt_message_content, str):
+                        prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
+                    else:
+                        content_text = ""
+                        for message_content in prompt_message_content:
+                            if message_content.type == PromptMessageContentType.TEXT:
+                                message_content = cast(TextPromptMessageContent, message_content)
+                                content_text += message_content.data
+                        prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
+
+                completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
 
                 # transform usage
                 usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
diff --git a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
index b4c61d8a6d..5cf3f1c6fa 100644
--- a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
@@ -8,6 +8,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
     AIModelEntity,
@@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
 
diff --git a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
index 535d8388bc..16f1a0cfa1 100644
--- a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ import numpy as np
 import tiktoken
 from openai import OpenAI
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         # transform credentials to kwargs for model instance
diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
index e83cfdf873..64fa6aaa3c 100644
--- a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
     AIModelEntity,
@@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
 
diff --git a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
index 00e583cc79..c5d4330912 100644
--- a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional
 from requests import post
 from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
index b62a2d2aaf..1e86f351c8 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
     AIModelEntity,
@@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
 
diff --git a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
index 71b6fb99c4..9f724a77ac 100644
--- a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
 
 from replicate import Client as ReplicateClient
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat
 
 class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
         client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
 
         if "model_version" in credentials:
diff --git a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
index 2edd13d56d..97b7692044 100644
--- a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
+++ b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
@@ -84,9 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
     Model class for Cohere large language model.
     """
 
-    sagemaker_client: Any = None
-    sagemaker_sess: Any = None
+    sagemaker_session: Any = None
     predictor: Any = None
+    sagemaker_endpoint: str = None
 
     def _handle_chat_generate_response(
         self,
@@ -212,27 +212,29 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
         :param user: unique user id
         :return: full response or stream response chunk generator result
         """
-        if not self.sagemaker_client:
-            access_key = credentials.get("access_key")
-            secret_key = credentials.get("secret_key")
+        if not self.sagemaker_session:
+            access_key = credentials.get("aws_access_key_id")
+            secret_key = credentials.get("aws_secret_access_key")
             aws_region = credentials.get("aws_region")
+            boto_session = None
             if aws_region:
                 if access_key and secret_key:
-                    self.sagemaker_client = boto3.client(
-                        "sagemaker-runtime",
-                        aws_access_key_id=access_key,
-                        aws_secret_access_key=secret_key,
-                        region_name=aws_region,
+                    boto_session = boto3.Session(
+                        aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=aws_region
                     )
                 else:
-                    self.sagemaker_client = boto3.client("sagemaker-runtime", region_name=aws_region)
+                    boto_session = boto3.Session(region_name=aws_region)
             else:
-                self.sagemaker_client = boto3.client("sagemaker-runtime")
+                boto_session = boto3.Session()
 
-            sagemaker_session = Session(sagemaker_runtime_client=self.sagemaker_client)
+            sagemaker_client = boto_session.client("sagemaker")
+            self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
+
+        if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
+            self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
             self.predictor = Predictor(
-                endpoint_name=credentials.get("sagemaker_endpoint"),
-                sagemaker_session=sagemaker_session,
+                endpoint_name=self.sagemaker_endpoint,
+                sagemaker_session=self.sagemaker_session,
                 serializer=serializers.JSONSerializer(),
             )
 
diff --git a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
index d55144f8a7..8f993ce672 100644
--- a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ from typing import Any, Optional
 
 import boto3
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
         return embeddings
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         # get model properties
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
index 43db4aed11..a3e5d0981f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -1,25 +1,38 @@
-- Qwen/Qwen2.5-7B-Instruct
-- Qwen/Qwen2.5-14B-Instruct
-- Qwen/Qwen2.5-32B-Instruct
 - Qwen/Qwen2.5-72B-Instruct
+- Qwen/Qwen2.5-Math-72B-Instruct
+- Qwen/Qwen2.5-32B-Instruct
+- Qwen/Qwen2.5-14B-Instruct
+- Qwen/Qwen2.5-7B-Instruct
+- Qwen/Qwen2.5-Coder-7B-Instruct
+- deepseek-ai/DeepSeek-V2.5
 - Qwen/Qwen2-72B-Instruct
 - Qwen/Qwen2-57B-A14B-Instruct
 - Qwen/Qwen2-7B-Instruct
 - Qwen/Qwen2-1.5B-Instruct
-- 01-ai/Yi-1.5-34B-Chat
-- 01-ai/Yi-1.5-9B-Chat-16K
-- 01-ai/Yi-1.5-6B-Chat
-- THUDM/glm-4-9b-chat
-- deepseek-ai/DeepSeek-V2.5
 - deepseek-ai/DeepSeek-V2-Chat
 - deepseek-ai/DeepSeek-Coder-V2-Instruct
+- THUDM/glm-4-9b-chat
+- THUDM/chatglm3-6b
+- 01-ai/Yi-1.5-34B-Chat-16K
+- 01-ai/Yi-1.5-9B-Chat-16K
+- 01-ai/Yi-1.5-6B-Chat
+- internlm/internlm2_5-20b-chat
 - internlm/internlm2_5-7b-chat
-- google/gemma-2-27b-it
-- google/gemma-2-9b-it
-- meta-llama/Meta-Llama-3-70B-Instruct
-- meta-llama/Meta-Llama-3-8B-Instruct
 - meta-llama/Meta-Llama-3.1-405B-Instruct
 - meta-llama/Meta-Llama-3.1-70B-Instruct
 - meta-llama/Meta-Llama-3.1-8B-Instruct
-- mistralai/Mixtral-8x7B-Instruct-v0.1
+- meta-llama/Meta-Llama-3-70B-Instruct
+- meta-llama/Meta-Llama-3-8B-Instruct
+- google/gemma-2-27b-it
+- google/gemma-2-9b-it
 - mistralai/Mistral-7B-Instruct-v0.2
+- Pro/Qwen/Qwen2-7B-Instruct
+- Pro/Qwen/Qwen2-1.5B-Instruct
+- Pro/THUDM/glm-4-9b-chat
+- Pro/THUDM/chatglm3-6b
+- Pro/01-ai/Yi-1.5-9B-Chat-16K
+- Pro/01-ai/Yi-1.5-6B-Chat
+- Pro/internlm/internlm2_5-7b-chat
+- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
+- Pro/meta-llama/Meta-Llama-3-8B-Instruct
+- Pro/google/gemma-2-9b-it
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
index 27664eab6c..89fb153ba0 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@@ -28,3 +28,4 @@ pricing:
   output: '0'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
index fd7aada428..2785e7496f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@@ -28,3 +28,4 @@ pricing:
   output: '1.26'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
index 6cdf4933b4..c5dcc12610 100644
--- a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
@@ -1,5 +1,6 @@
 from typing import Optional
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
     OAICompatEmbeddingModel,
@@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
         super().validate_credentials(model, credentials)
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
         self._add_custom_parameters(credentials)
         return super()._invoke(model, credentials, texts, user)
 
diff --git a/api/core/model_runtime/model_providers/spark/llm/_client.py b/api/core/model_runtime/model_providers/spark/llm/_client.py
index b99a657e71..48911f657a 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_client.py
+++ b/api/core/model_runtime/model_providers/spark/llm/_client.py
@@ -25,6 +25,7 @@ class SparkLLMClient:
             "spark-pro": {"version": "v3.1", "chat_domain": "generalv3"},
             "spark-pro-128k": {"version": "pro-128k", "chat_domain": "pro-128k"},
             "spark-max": {"version": "v3.5", "chat_domain": "generalv3.5"},
+            "spark-max-32k": {"version": "max-32k", "chat_domain": "max-32k"},
             "spark-4.0-ultra": {"version": "v4.0", "chat_domain": "4.0Ultra"},
         }
 
@@ -32,7 +33,7 @@ class SparkLLMClient:
 
         self.chat_domain = model_api_configs[model]["chat_domain"]
 
-        if model == "spark-pro-128k":
+        if model in ["spark-pro-128k", "spark-max-32k"]:
             self.api_base = f"wss://{domain}/{endpoint}/{api_version}"
         else:
             self.api_base = f"wss://{domain}/{api_version}/{endpoint}"
diff --git a/api/core/model_runtime/model_providers/spark/llm/_position.yaml b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
index 458397f2aa..73f39cb119 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
@@ -1,3 +1,4 @@
+- spark-max-32k
 - spark-4.0-ultra
 - spark-max
 - spark-pro-128k
diff --git a/api/core/model_runtime/model_providers/spark/llm/llm.py b/api/core/model_runtime/model_providers/spark/llm/llm.py
index 57193dc031..1181ba699a 100644
--- a/api/core/model_runtime/model_providers/spark/llm/llm.py
+++ b/api/core/model_runtime/model_providers/spark/llm/llm.py
@@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel):
         :param prompt_messages: prompt messages
         :return: llm response chunk generator result
         """
+        completion = ""
         for index, content in enumerate(client.subscribe()):
             if isinstance(content, dict):
                 delta = content["data"]
             else:
                 delta = content
-
+            completion += delta
             assistant_prompt_message = AssistantPromptMessage(
                 content=delta or "",
             )
-
+            temp_assistant_prompt_message = AssistantPromptMessage(
+                content=completion,
+            )
             prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
-            completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
+            completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])
 
             # transform usage
             usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml b/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml
new file mode 100644
index 0000000000..1a1ab6844c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml
@@ -0,0 +1,33 @@
+model: spark-max-32k
+label:
+  en_US: Spark Max-32K
+model_type: llm
+model_properties:
+  mode: chat
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    help:
+      zh_Hans: 核采样阈值。用于决定结果随机性，取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+      en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+  - name: max_tokens
+    use_template: max_tokens
+    default: 4096
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 模型回答的tokens的最大长度。
+      en_US: Maximum length of tokens for the model response.
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    default: 4
+    min: 1
+    max: 6
+    help:
+      zh_Hans: 从 k 个候选中随机选择一个（非等概率）。
+      en_US: Randomly select one from k candidates (non-equal probability).
+    required: false
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
index aad07f5673..34a57d1fc0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: farui-plus
 label:
   en_US: farui-plus
@@ -62,16 +63,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
index f90c7f075f..3e3585b30a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -18,7 +18,7 @@ from dashscope.common.error import (
     UnsupportedModel,
 )
 
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import (
     AssistantPromptMessage,
     ImagePromptMessageContent,
@@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
     FetchFrom,
     I18nObject,
     ModelFeature,
+    ModelPropertyKey,
     ModelType,
     ParameterRule,
     ParameterType,
@@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
         :param tools: tools for tool calling
         :return:
         """
+        # Check if the model was added via get_customizable_model_schema
+        if self.get_customizable_model_schema(model, credentials) is not None:
+            # For custom models, tokens are not calculated.
+            return 0
+
         if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
             model = model.replace("-chat", "")
         if model == "farui-plus":
@@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
         :param credentials: model credentials
         :return: AIModelEntity or None
         """
-        rules = [
-            ParameterRule(
-                name="temperature",
-                type=ParameterType.FLOAT,
-                use_template="temperature",
-                label=I18nObject(zh_Hans="温度", en_US="Temperature"),
-            ),
-            ParameterRule(
-                name="top_p",
-                type=ParameterType.FLOAT,
-                use_template="top_p",
-                label=I18nObject(zh_Hans="Top P", en_US="Top P"),
-            ),
-            ParameterRule(
-                name="top_k",
-                type=ParameterType.INT,
-                min=0,
-                max=99,
-                label=I18nObject(zh_Hans="top_k", en_US="top_k"),
-            ),
-            ParameterRule(
-                name="max_tokens",
-                type=ParameterType.INT,
-                min=1,
-                max=128000,
-                default=1024,
-                label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
-            ),
-            ParameterRule(
-                name="seed",
-                type=ParameterType.INT,
-                default=1234,
-                label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
-            ),
-            ParameterRule(
-                name="repetition_penalty",
-                type=ParameterType.FLOAT,
-                default=1.1,
-                label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
-            ),
-        ]
-
-        entity = AIModelEntity(
+        return AIModelEntity(
             model=model,
-            label=I18nObject(en_US=model),
-            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            label=I18nObject(en_US=model, zh_Hans=model),
             model_type=ModelType.LLM,
-            model_properties={},
-            parameter_rules=rules,
+            features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
+            if credentials.get("function_calling_type") == "tool_call"
+            else [],
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_properties={
+                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
+                ModelPropertyKey.MODE: LLMMode.CHAT.value,
+            },
+            parameter_rules=[
+                ParameterRule(
+                    name="temperature",
+                    use_template="temperature",
+                    label=I18nObject(en_US="Temperature", zh_Hans="温度"),
+                    type=ParameterType.FLOAT,
+                ),
+                ParameterRule(
+                    name="max_tokens",
+                    use_template="max_tokens",
+                    default=512,
+                    min=1,
+                    max=int(credentials.get("max_tokens", 1024)),
+                    label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
+                    type=ParameterType.INT,
+                ),
+                ParameterRule(
+                    name="top_p",
+                    use_template="top_p",
+                    label=I18nObject(en_US="Top P", zh_Hans="Top P"),
+                    type=ParameterType.FLOAT,
+                ),
+                ParameterRule(
+                    name="top_k",
+                    use_template="top_k",
+                    label=I18nObject(en_US="Top K", zh_Hans="Top K"),
+                    type=ParameterType.FLOAT,
+                ),
+                ParameterRule(
+                    name="frequency_penalty",
+                    use_template="frequency_penalty",
+                    label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
+                    type=ParameterType.FLOAT,
+                ),
+            ],
         )
-
-        return entity
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
index ebba565d57..64a3f33133 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo-0919
 label:
   en_US: qwen-coder-turbo-0919
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
index 361e2c2373..a4c93f7047 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo-latest
 label:
   en_US: qwen-coder-turbo-latest
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
index f4032a4dd3..ff68faed80 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-coder-turbo
 label:
   en_US: qwen-coder-turbo
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
index dbe7d024a5..c3dbb3616f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
@@ -1,4 +1,4 @@
-# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-long
 label:
   en_US: qwen-long
@@ -63,16 +63,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
index 89d1302abe..42fe1f6862 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-0816
 label:
   en_US: qwen-math-plus-0816
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
index 032b3c970d..9b6567b8cd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-0919
 label:
   en_US: qwen-math-plus-0919
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
index 31dd9f6972..b2a2393b36 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus-latest
 label:
   en_US: qwen-math-plus-latest
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
index 1a51d57f78..63f4b7ff0a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-plus
 label:
   en_US: qwen-math-plus
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
index 1894eea417..4da90eec3e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo-0919
 label:
   en_US: qwen-math-turbo-0919
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
index b8365618b0..d29f8851dd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo-latest
 label:
   en_US: qwen-math-turbo-latest
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
index 8d346d691e..2a8f7f725e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-math-turbo
 label:
   en_US: qwen-math-turbo
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
index c0ad12b85e..ef1841b517 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0107
 label:
   en_US: qwen-max-0107
@@ -62,16 +64,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
index b00fb44d29..a2ea5df130 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0403, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0403
 label:
   en_US: qwen-max-0403
@@ -62,16 +64,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
index 1848dcc07d..a467665f11 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0428, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0428
 label:
   en_US: qwen-max-0428
@@ -62,16 +64,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
index 238882bb12..78661eaea0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-0919
 label:
   en_US: qwen-max-0919
@@ -62,16 +64,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
index dc234783cd..6f4674576b 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-1201
 label:
   en_US: qwen-max-1201
@@ -66,12 +68,6 @@ parameter_rules:
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
index 9d7d3c2fcb..8b5f005473 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-latest
 label:
   en_US: qwen-max-latest
@@ -62,16 +64,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
index a7bdc42f73..098494ff95 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max-longcontext
 label:
   en_US: qwen-max-longcontext
@@ -62,16 +64,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
index 57888406af..9d0d3f8db3 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
 model: qwen-max
 label:
   en_US: qwen-max
@@ -62,6 +64,7 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +72,9 @@ parameter_rules:
   - name: enable_search
     type: boolean
     default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
     help:
       zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
       en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
index 1e0b816617..0b1a6f81df 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0206
 label:
   en_US: qwen-plus-0206
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
index f70c373922..7706005bb5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0624
 label:
   en_US: qwen-plus-0624
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
index c6007e9164..348276fc08 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0723, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0723
 label:
   en_US: qwen-plus-0723
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
index 2f53c43336..29f125135e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0806, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0806
 label:
   en_US: qwen-plus-0806
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
index 90b54ca52e..905fa1e102 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-0919
 label:
   en_US: qwen-plus-0919
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
index 59e8851240..c7a3549727 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-chat
 label:
   en_US: qwen-plus-chat
@@ -62,16 +64,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
index 2a821dbcfe..608f52c296 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus-latest
 label:
   en_US: qwen-plus-latest
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
index 626884f4b2..9089e57255 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
 model: qwen-plus
 label:
   en_US: qwen-plus
@@ -62,6 +64,7 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +72,9 @@ parameter_rules:
   - name: enable_search
     type: boolean
     default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
     help:
       zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
       en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
index 844fced77a..7ee0d44f2f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@@ -1,3 +1,6 @@
+# this model corresponds to qwen-turbo-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
+
 model: qwen-turbo-0206
 label:
   en_US: qwen-turbo-0206
@@ -60,16 +63,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
index 0152f75579..20a3f7eb64 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-0624
 label:
   en_US: qwen-turbo-0624
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
index 19c6c8d293..ba73dec363 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-0919
 label:
   en_US: qwen-turbo-0919
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
index f557f311ef..d785b7fe85 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-chat
 label:
   en_US: qwen-turbo-chat
@@ -62,16 +64,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
index be2475847e..fe38a4283c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo-latest
 label:
   en_US: qwen-turbo-latest
@@ -60,16 +62,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
index 90f13dc19f..215c9ec5fc 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
 model: qwen-turbo
 label:
   en_US: qwen-turbo
@@ -62,6 +64,7 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +72,9 @@ parameter_rules:
   - name: enable_search
     type: boolean
     default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
     help:
       zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
       en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
index 63b6074d0d..d80168ffc3 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-vl-max-0201
 label:
   en_US: qwen-vl-max-0201
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
index 41d45966e9..50e10226a5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-vl-max-0809
 label:
   en_US: qwen-vl-max-0809
@@ -9,6 +10,15 @@ model_properties:
   mode: chat
   context_size: 32000
 parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
   - name: top_p
     use_template: top_p
     type: float
@@ -50,6 +60,18 @@ parameter_rules:
       en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
   - name: response_format
     use_template: response_format
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    use_template: response_format
 pricing:
   input: '0.02'
   output: '0.02'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
index 78d0509374..21b127f56c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-vl-max
 label:
   en_US: qwen-vl-max
@@ -9,6 +10,15 @@ model_properties:
   mode: chat
   context_size: 32000
 parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
   - name: top_p
     use_template: top_p
     type: float
@@ -50,6 +60,18 @@ parameter_rules:
       en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
   - name: response_format
     use_template: response_format
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    use_template: response_format
 pricing:
   input: '0.02'
   output: '0.02'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
index 8944388b1e..03cb039d15 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-vl-plus-0201
 label:
   en_US: qwen-vl-plus-0201
@@ -9,6 +10,15 @@ model_properties:
   mode: chat
   context_size: 8000
 parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
   - name: top_p
     use_template: top_p
     type: float
@@ -50,6 +60,18 @@ parameter_rules:
       en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
   - name: response_format
     use_template: response_format
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    use_template: response_format
 pricing:
   input: '0.02'
   output: '0.02'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
index 869e0ea71c..67b2b2ebdd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-vl-plus-0809
 label:
   en_US: qwen-vl-plus-0809
@@ -9,6 +10,15 @@ model_properties:
   mode: chat
   context_size: 32768
 parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
   - name: top_p
     use_template: top_p
     type: float
@@ -50,6 +60,18 @@ parameter_rules:
       en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
   - name: response_format
     use_template: response_format
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    use_template: response_format
 pricing:
   input: '0.008'
   output: '0.008'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
index da11bacc64..f55764c6c0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen-vl-plus
 label:
   en_US: qwen-vl-plus
@@ -9,6 +10,15 @@ model_properties:
   mode: chat
   context_size: 8000
 parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
   - name: top_p
     use_template: top_p
     type: float
@@ -50,6 +60,18 @@ parameter_rules:
       en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
   - name: response_format
     use_template: response_format
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    use_template: response_format
 pricing:
   input: '0.008'
   output: '0.008'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
index cfe4b5a666..ea157f42de 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2-math-1.5b-instruct
 label:
   en_US: qwen2-math-1.5b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
index e541c197b0..37052a9233 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2-math-72b-instruct
 label:
   en_US: qwen2-math-72b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
index ba4514e3d6..e182f1c27f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2-math-7b-instruct
 label:
   en_US: qwen2-math-7b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
index e5596041af..9e75ccc1f2 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-0.5b-instruct
 label:
   en_US: qwen2.5-0.5b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
index 4004c59417..67c9d31243 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-1.5b-instruct
 label:
   en_US: qwen2.5-1.5b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
index d8f53666ce..2a38be921c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-14b-instruct
 label:
   en_US: qwen2.5-14b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
index 890f7e6e4e..e6e4fbf978 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-32b-instruct
 label:
   en_US: qwen2.5-32b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
index 6d3d2dd5bb..8f250379a7 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-3b-instruct
 label:
   en_US: qwen2.5-3b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
index 17d0eb5b35..bb3cdd6141 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-72b-instruct
 label:
   en_US: qwen2.5-72b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
index 435b3f90a2..fdcd3d4275 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-7b-instruct
 label:
   en_US: qwen2.5-7b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
index 435b3f90a2..fdcd3d4275 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
 model: qwen2.5-7b-instruct
 label:
   en_US: qwen2.5-7b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
     type: float
     default: 1.1
     label:
+      zh_Hans: 重复惩罚
       en_US: Repetition penalty
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
-  - name: enable_search
-    type: boolean
-    default: false
-    help:
-      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
-      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
   - name: response_format
     use_template: response_format
 pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
index f4303c53d3..52e35d8b50 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
 model: text-embedding-v1
 model_type: text-embedding
 model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
index f6be3544ed..5bb6a8f424 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
 model: text-embedding-v2
 model_type: text-embedding
 model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
index 171a379ee2..d8af0e2b63 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
 model: text-embedding-v3
 model_type: text-embedding
 model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
index 5783d2e383..736cd44df8 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
 import dashscope
 import numpy as np
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import (
     EmbeddingUsage,
@@ -27,6 +28,7 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
         credentials: dict,
         texts: list[str],
         user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -35,6 +37,7 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         credentials_kwargs = self._to_credential_kwargs(credentials)
diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
index fabe6d90e6..1a09c20fd9 100644
--- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
@@ -37,14 +37,51 @@ model_credential_schema:
       en_US: Model Name
       zh_Hans: 模型名称
     placeholder:
-      en_US: Enter full model name
-      zh_Hans: 输入模型全称
+      en_US: Enter your model name
+      zh_Hans: 输入模型名称
   credential_form_schemas:
     - variable: dashscope_api_key
-      required: true
       label:
         en_US: API Key
       type: secret-input
+      required: true
       placeholder:
         zh_Hans: 在此输入您的 API Key
         en_US: Enter your API Key
+    - variable: context_size
+      label:
+        zh_Hans: 模型上下文长度
+        en_US: Model context size
+      required: true
+      type: text-input
+      default: '4096'
+      placeholder:
+        zh_Hans: 在此输入您的模型上下文长度
+        en_US: Enter your Model context size
+    - variable: max_tokens
+      label:
+        zh_Hans: 最大 token 上限
+        en_US: Upper bound for max tokens
+      default: '4096'
+      type: text-input
+      show_on:
+        - variable: __model_type
+          value: llm
+    - variable: function_calling_type
+      label:
+        en_US: Function calling
+      type: select
+      required: false
+      default: no_call
+      options:
+        - value: no_call
+          label:
+            en_US: Not Support
+            zh_Hans: 不支持
+        - value: function_call
+          label:
+            en_US: Support
+            zh_Hans: 支持
+      show_on:
+        - variable: __model_type
+          value: llm
diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
index edd4a36d98..b6509cd26c 100644
--- a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ import numpy as np
 from openai import OpenAI
 from tokenizers import Tokenizer
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -22,7 +23,14 @@ class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
     def _get_tokenizer(self) -> Tokenizer:
         return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
 
-    def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult:
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: str | None = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+    ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
 
@@ -30,6 +38,7 @@ class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
 
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
similarity index 96%
rename from api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml
rename to api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
index c308f0a322..f5386be06d 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
@@ -1,6 +1,6 @@
 model: gemini-1.5-flash-001
 label:
-  en_US: Gemini 1.5 Flash
+  en_US: Gemini 1.5 Flash 001
 model_type: llm
 features:
   - agent-thought
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml
new file mode 100644
index 0000000000..97bd44f06b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-flash-002
+label:
+  en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      en_US: Top k
+    type: int
+    help:
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_output_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
similarity index 96%
rename from api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml
rename to api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
index 744863e773..5e08f2294e 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
@@ -1,6 +1,6 @@
 model: gemini-1.5-pro-001
 label:
-  en_US: Gemini 1.5 Pro
+  en_US: Gemini 1.5 Pro 001
 model_type: llm
 features:
   - agent-thought
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml
new file mode 100644
index 0000000000..8f327ea2f3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-pro-002
+label:
+  en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      en_US: Top k
+    type: int
+    help:
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_output_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml
new file mode 100644
index 0000000000..0f5eb34c0c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml
@@ -0,0 +1,37 @@
+model: gemini-flash-experimental
+label:
+  en_US: Gemini Flash Experimental
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      en_US: Top k
+    type: int
+    help:
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_output_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml
new file mode 100644
index 0000000000..fa31cabb85
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml
@@ -0,0 +1,37 @@
+model: gemini-pro-experimental
+label:
+  en_US: Gemini Pro Experimental
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      en_US: Top k
+    type: int
+    help:
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_output_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
index da69b7cdf3..1dd785d545 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
@@ -2,6 +2,7 @@ import base64
 import io
 import json
 import logging
+import time
 from collections.abc import Generator
 from typing import Optional, Union, cast
 
@@ -20,7 +21,6 @@ from google.api_core import exceptions
 from google.cloud import aiplatform
 from google.oauth2 import service_account
 from PIL import Image
-from vertexai.generative_models import HarmBlockThreshold, HarmCategory
 
 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
 from core.model_runtime.entities.message_entities import (
@@ -34,6 +34,7 @@ from core.model_runtime.entities.message_entities import (
     ToolPromptMessage,
     UserPromptMessage,
 )
+from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.errors.invoke import (
     InvokeAuthorizationError,
     InvokeBadRequestError,
@@ -503,20 +504,12 @@ class VertexAiLargeLanguageModel(LargeLanguageModel):
                     else:
                         history.append(content)
 
-        safety_settings = {
-            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
-        }
-
         google_model = glm.GenerativeModel(model_name=model, system_instruction=system_instruction)
 
         response = google_model.generate_content(
             contents=history,
             generation_config=glm.GenerationConfig(**config_kwargs),
             stream=stream,
-            safety_settings=safety_settings,
             tools=self._convert_tools_to_glm_tool(tools) if tools else None,
         )
 
diff --git a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
index 519373a7f3..fce9544df0 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
@@ -9,6 +9,7 @@ from google.cloud import aiplatform
 from google.oauth2 import service_account
 from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
     AIModelEntity,
@@ -30,7 +31,12 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -38,6 +44,8 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
         :param model: model name
         :param credentials: model credentials
         :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
diff --git a/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
index 9cba2cb879..0dd4037c95 100644
--- a/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
@@ -2,6 +2,7 @@ import time
 from decimal import Decimal
 from typing import Optional
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
     AIModelEntity,
@@ -41,7 +42,12 @@ class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -50,6 +56,7 @@ class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         if ArkClientV3.is_legacy(credentials):
diff --git a/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
index 4d6f6dccd0..c21d0c0552 100644
--- a/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from typing import Any, Optional
 import numpy as np
 from requests import Response, post
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import InvokeError
@@ -70,7 +71,12 @@ class WenxinTextEmbeddingModel(TextEmbeddingModel):
         return WenxinTextEmbedding(api_key, secret_key)
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -79,6 +85,7 @@ class WenxinTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
 
diff --git a/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
index 8043af1d6c..1627239132 100644
--- a/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
@@ -3,6 +3,7 @@ from typing import Optional
 
 from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -25,7 +26,12 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -40,6 +46,7 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
index b1f9b7485c..7fcf692202 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
@@ -46,6 +46,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 4095
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.1'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
index 4e7d5fd3cc..fcd7c7768c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
@@ -46,6 +46,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 4095
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.001'
   output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
index 14f17db5d6..c9ae5abf19 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
@@ -46,6 +46,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 4095
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.01'
   output: '0.01'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
index 3361474d73..98c4f72c72 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
@@ -46,6 +46,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 4095
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0'
   output: '0'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
index bf0135d198..0b5391ce2f 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
@@ -46,6 +46,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 8192
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.001'
   output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
index ab4b32dd82..62f453fb77 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
@@ -46,6 +46,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 4095
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.1'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
index d1b01731f5..350b080c3f 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
@@ -49,6 +49,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 4095
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.001'
   output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
index 9ede308f18..2d7ebd71cf 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
@@ -46,6 +46,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 4095
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.05'
   output: '0.05'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
index 28286580a7..3a1120ff37 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
@@ -44,6 +44,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 1024
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.05'
   output: '0.05'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
index 4c5fa24034..14b9623e5a 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
@@ -44,6 +44,15 @@ parameter_rules:
     default: 1024
     min: 1
     max: 1024
+  - name: web_search
+    type: boolean
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
 pricing:
   input: '0.01'
   output: '0.01'
diff --git a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
index ee20954381..14a529dddf 100644
--- a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
@@ -1,6 +1,7 @@
 import time
 from typing import Optional
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -15,7 +16,12 @@ class ZhipuAITextEmbeddingModel(_CommonZhipuaiAI, TextEmbeddingModel):
     """
 
     def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
     ) -> TextEmbeddingResult:
         """
         Invoke text embedding model
@@ -24,6 +30,7 @@ class ZhipuAITextEmbeddingModel(_CommonZhipuaiAI, TextEmbeddingModel):
         :param credentials: model credentials
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         credentials_kwargs = self._to_credential_kwargs(credentials)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
index d8ecc31064..05510a3ec4 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
@@ -57,7 +57,7 @@ class AsyncCompletions(BaseAPI):
             if temperature <= 0:
                 do_sample = False
                 temperature = 0.01
-                # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间，do_sample重写为:false（参数top_p temperture不生效）")  # noqa: E501
+                # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间，do_sample重写为:false（参数top_p temperature不生效）")  # noqa: E501
             if temperature >= 1:
                 temperature = 0.99
                 # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间")
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
index 1c23473a03..8e5bb454e6 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
@@ -60,7 +60,7 @@ class Completions(BaseAPI):
             if temperature <= 0:
                 do_sample = False
                 temperature = 0.01
-                # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间，do_sample重写为:false（参数top_p temperture不生效）")  # noqa: E501
+                # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间，do_sample重写为:false（参数top_p temperature不生效）")  # noqa: E501
             if temperature >= 1:
                 temperature = 0.99
                 # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间")
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
index 5e9a7e0a98..6d8ba700b7 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
@@ -630,8 +630,7 @@ def validate_type(*, type_: type[_T], value: object) -> _T:
     return cast(_T, _validate_non_model_type(type_=type_, value=value))
 
 
-# our use of subclasssing here causes weirdness for type checkers,
-# so we just pretend that we don't subclass
+# Subclassing here confuses type checkers, so we treat this class as non-inheriting.
 if TYPE_CHECKING:
     GenericModel = BaseModel
 else:
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
index d0f933d814..ffdafb85d5 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
@@ -169,7 +169,7 @@ class BaseSyncPage(BasePage[_T], Generic[_T]):
     # Pydantic uses a custom `__iter__` method to support casting BaseModels
     # to dictionaries. e.g. dict(model).
     # As we want to support `for item in page`, this is inherently incompatible
-    # with the default pydantic behaviour. It is not possible to support both
+    # with the default pydantic behavior. It is not possible to support both
     # use cases at once. Fortunately, this is not a big deal as all other pydantic
     # methods should continue to work as expected as there is an alternative method
     # to cast a model to a dictionary, model.dict(), which is used internally
@@ -356,16 +356,16 @@ class HttpClient:
             **kwargs,
         )
 
-    def _object_to_formfata(self, key: str, value: Data | Mapping[object, object]) -> list[tuple[str, str]]:
+    def _object_to_formdata(self, key: str, value: Data | Mapping[object, object]) -> list[tuple[str, str]]:
         items = []
 
         if isinstance(value, Mapping):
             for k, v in value.items():
-                items.extend(self._object_to_formfata(f"{key}[{k}]", v))
+                items.extend(self._object_to_formdata(f"{key}[{k}]", v))
             return items
         if isinstance(value, list | tuple):
             for v in value:
-                items.extend(self._object_to_formfata(key + "[]", v))
+                items.extend(self._object_to_formdata(key + "[]", v))
             return items
 
         def _primitive_value_to_str(val) -> str:
@@ -385,7 +385,7 @@ class HttpClient:
         return [(key, str_data)]
 
     def _make_multipartform(self, data: Mapping[object, object]) -> dict[str, object]:
-        items = flatten(list(starmap(self._object_to_formfata, data.items())))
+        items = flatten(list(starmap(self._object_to_formdata, data.items())))
 
         serialized: dict[str, object] = {}
         for key, value in items:
@@ -620,7 +620,7 @@ class HttpClient:
         stream: bool,
         stream_cls: type[StreamResponse] | None,
     ) -> ResponseT:
-        # _legacy_response with raw_response_header to paser method
+        # _legacy_response with raw_response_header to parser method
         if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
             return cast(
                 ResponseT,
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
index 47183b9eee..51bf21bcdc 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
@@ -87,7 +87,7 @@ class LegacyAPIResponse(Generic[R]):
 
         For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
 
-        You can customise the type that the response is parsed into through
+        You can customize the type that the response is parsed into through
         the `to` argument, e.g.
 
         ```py
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
index 45443da662..92e6018055 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
@@ -252,7 +252,7 @@ class APIResponse(BaseAPIResponse[R]):
 
         For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
 
-        You can customise the type that the response is parsed into through
+        You can customize the type that the response is parsed into through
         the `to` argument, e.g.
 
         ```py
@@ -363,7 +363,7 @@ class StreamAlreadyConsumed(ZhipuAIError):  # noqa: N818
     # ^ error
     ```
 
-    If you want this behaviour you'll need to either manually accumulate the response
+    If you want this behavior you'll need to either manually accumulate the response
     content or call `await response.read()` before iterating over the stream.
     """
 
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
index 32e23e6dab..59cb41d712 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
@@ -1,8 +1,8 @@
-from .document import DocumentData, DocumentFailedInfo, DocumentObject, DocumentSuccessinfo
+from .document import DocumentData, DocumentFailedInfo, DocumentObject, DocumentSuccessInfo
 
 __all__ = [
     "DocumentData",
     "DocumentObject",
-    "DocumentSuccessinfo",
+    "DocumentSuccessInfo",
     "DocumentFailedInfo",
 ]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
index b9a1646391..980bc6f4a7 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
@@ -2,10 +2,10 @@ from typing import Optional
 
 from ....core import BaseModel
 
-__all__ = ["DocumentData", "DocumentObject", "DocumentSuccessinfo", "DocumentFailedInfo"]
+__all__ = ["DocumentData", "DocumentObject", "DocumentSuccessInfo", "DocumentFailedInfo"]
 
 
-class DocumentSuccessinfo(BaseModel):
+class DocumentSuccessInfo(BaseModel):
     documentId: Optional[str] = None
     """文件id"""
     filename: Optional[str] = None
@@ -24,7 +24,7 @@ class DocumentFailedInfo(BaseModel):
 class DocumentObject(BaseModel):
     """文档信息"""
 
-    successInfos: Optional[list[DocumentSuccessinfo]] = None
+    successInfos: Optional[list[DocumentSuccessInfo]] = None
     """上传成功的文件信息"""
     failedInfos: Optional[list[DocumentFailedInfo]] = None
     """上传失败的文件信息"""
diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
index 612542dab1..6dcd98dcfd 100644
--- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
+++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
@@ -40,19 +40,8 @@ class AnalyticdbConfig(BaseModel):
 
 
 class AnalyticdbVector(BaseVector):
-    _instance = None
-    _init = False
-
-    def __new__(cls, *args, **kwargs):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-
     def __init__(self, collection_name: str, config: AnalyticdbConfig):
-        # collection_name must be updated every time
         self._collection_name = collection_name.lower()
-        if AnalyticdbVector._init:
-            return
         try:
             from alibabacloud_gpdb20160503.client import Client
             from alibabacloud_tea_openapi import models as open_api_models
@@ -62,7 +51,6 @@ class AnalyticdbVector(BaseVector):
         self._client_config = open_api_models.Config(user_agent="dify", **config.to_analyticdb_client_params())
         self._client = Client(self._client_config)
         self._initialize()
-        AnalyticdbVector._init = True
 
     def _initialize(self) -> None:
         cache_key = f"vector_indexing_{self.config.instance_id}"
@@ -257,11 +245,14 @@ class AnalyticdbVector(BaseVector):
         documents = []
         for match in response.body.matches.match:
             if match.score > score_threshold:
+                metadata = json.loads(match.metadata.get("metadata_"))
+                metadata["score"] = match.score
                 doc = Document(
                     page_content=match.metadata.get("page_content"),
-                    metadata=json.loads(match.metadata.get("metadata_")),
+                    metadata=metadata,
                 )
                 documents.append(doc)
+        documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
         return documents
 
     def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
@@ -286,12 +277,14 @@ class AnalyticdbVector(BaseVector):
         for match in response.body.matches.match:
             if match.score > score_threshold:
                 metadata = json.loads(match.metadata.get("metadata_"))
+                metadata["score"] = match.score
                 doc = Document(
                     page_content=match.metadata.get("page_content"),
                     vector=match.metadata.get("vector"),
                     metadata=metadata,
                 )
                 documents.append(doc)
+        documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
         return documents
 
     def delete(self) -> None:
diff --git a/api/core/rag/datasource/vdb/vector_base.py b/api/core/rag/datasource/vdb/vector_base.py
index 1a0dc7f48b..22e191340d 100644
--- a/api/core/rag/datasource/vdb/vector_base.py
+++ b/api/core/rag/datasource/vdb/vector_base.py
@@ -45,6 +45,7 @@ class BaseVector(ABC):
     def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
         raise NotImplementedError
 
+    @abstractmethod
     def delete(self) -> None:
         raise NotImplementedError
 
diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py
index fe7eaa32e6..0ffc89b214 100644
--- a/api/core/rag/extractor/extract_processor.py
+++ b/api/core/rag/extractor/extract_processor.py
@@ -124,7 +124,7 @@ class ExtractProcessor:
                         extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url)
                     elif file_extension == ".xml":
                         extractor = UnstructuredXmlExtractor(file_path, unstructured_api_url)
-                    elif file_extension == "epub":
+                    elif file_extension == ".epub":
                         extractor = UnstructuredEpubExtractor(file_path, unstructured_api_url)
                     else:
                         # txt
@@ -146,7 +146,7 @@ class ExtractProcessor:
                         extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
                     elif file_extension == ".csv":
                         extractor = CSVExtractor(file_path, autodetect_encoding=True)
-                    elif file_extension == "epub":
+                    elif file_extension == ".epub":
                         extractor = UnstructuredEpubExtractor(file_path)
                     else:
                         # txt
diff --git a/api/core/tools/entities/common_entities.py b/api/core/tools/entities/common_entities.py
index 37a926697b..924e6fc0cf 100644
--- a/api/core/tools/entities/common_entities.py
+++ b/api/core/tools/entities/common_entities.py
@@ -1,6 +1,6 @@
 from typing import Optional
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 
 class I18nObject(BaseModel):
@@ -8,16 +8,16 @@ class I18nObject(BaseModel):
     Model class for i18n object.
     """
 
-    zh_Hans: Optional[str] = None
-    pt_BR: Optional[str] = None
     en_US: str
+    zh_Hans: Optional[str] = Field(default=None)
+    pt_BR: Optional[str] = Field(default=None)
+    ja_JP: Optional[str] = Field(default=None)
 
     def __init__(self, **data):
         super().__init__(**data)
-        if not self.zh_Hans:
-            self.zh_Hans = self.en_US
-        if not self.pt_BR:
-            self.pt_BR = self.en_US
+        self.zh_Hans = self.zh_Hans or self.en_US
+        self.pt_BR = self.pt_BR or self.en_US
+        self.ja_JP = self.ja_JP or self.en_US
 
     def to_dict(self) -> dict:
-        return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR}
+        return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR, "ja_JP": self.ja_JP}
diff --git a/api/core/tools/provider/builtin/arxiv/arxiv.yaml b/api/core/tools/provider/builtin/arxiv/arxiv.yaml
index d26993b336..25aec97bb7 100644
--- a/api/core/tools/provider/builtin/arxiv/arxiv.yaml
+++ b/api/core/tools/provider/builtin/arxiv/arxiv.yaml
@@ -4,9 +4,11 @@ identity:
   label:
     en_US: ArXiv
     zh_Hans: ArXiv
+    ja_JP: ArXiv
   description:
     en_US: Access to a vast repository of scientific papers and articles in various fields of research.
     zh_Hans: 访问各个研究领域大量科学论文和文章的存储库。
+    ja_JP: 多様な研究分野の科学論文や記事の膨大なリポジトリへのアクセス。
   icon: icon.svg
   tags:
     - search
diff --git a/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml b/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
index 7439a48658..afc1925df3 100644
--- a/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
+++ b/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
@@ -4,10 +4,12 @@ identity:
   label:
     en_US: Arxiv Search
     zh_Hans: Arxiv 搜索
+    ja_JP: Arxiv 検索
 description:
   human:
     en_US: A tool for searching scientific papers and articles from the Arxiv repository. Input can be an Arxiv ID or an author's name.
     zh_Hans: 一个用于从Arxiv存储库搜索科学论文和文章的工具。 输入可以是Arxiv ID或作者姓名。
+    ja_JP: Arxivリポジトリから科学論文や記事を検索するためのツールです。入力はArxiv IDまたは著者名にすることができます。
   llm: A tool for searching scientific papers and articles from the Arxiv repository. Input can be an Arxiv ID or an author's name.
 parameters:
   - name: query
@@ -16,8 +18,10 @@ parameters:
     label:
       en_US: Query string
       zh_Hans: 查询字符串
+      ja_JP: クエリ文字列
     human_description:
       en_US: The Arxiv ID or author's name used for searching.
       zh_Hans: 用于搜索的Arxiv ID或作者姓名。
+      ja_JP: 検索に使用されるArxiv IDまたは著者名。
     llm_description: The Arxiv ID or author's name used for searching.
     form: llm
diff --git a/api/core/tools/provider/builtin/comfyui/comfyui.yaml b/api/core/tools/provider/builtin/comfyui/comfyui.yaml
index 066fd85308..3891eebf3a 100644
--- a/api/core/tools/provider/builtin/comfyui/comfyui.yaml
+++ b/api/core/tools/provider/builtin/comfyui/comfyui.yaml
@@ -39,4 +39,4 @@ credentials_for_provider:
       en_US: The checkpoint name of the ComfyUI server, e.g. xxx.safetensors
       zh_Hans: ComfyUI服务器的模型名称, 比如 xxx.safetensors
       pt_BR: The checkpoint name of the ComfyUI server, e.g. xxx.safetensors
-    url: https://docs.dify.ai/tutorials/tool-configuration/comfyui
+    url: https://github.com/comfyanonymous/ComfyUI#installing
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/crawl.py b/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
index 9675b8eb91..15ab510c6c 100644
--- a/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
+++ b/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
@@ -35,10 +35,10 @@ class CrawlTool(BuiltinTool):
         scrapeOptions["excludeTags"] = get_array_params(tool_parameters, "excludeTags")
         scrapeOptions["onlyMainContent"] = tool_parameters.get("onlyMainContent", False)
         scrapeOptions["waitFor"] = tool_parameters.get("waitFor", 0)
-        scrapeOptions = {k: v for k, v in scrapeOptions.items() if v not in {None, ""}}
+        scrapeOptions = {k: v for k, v in scrapeOptions.items() if v not in (None, "")}
         payload["scrapeOptions"] = scrapeOptions or None
 
-        payload = {k: v for k, v in payload.items() if v not in {None, ""}}
+        payload = {k: v for k, v in payload.items() if v not in (None, "")}
 
         crawl_result = app.crawl_url(url=tool_parameters["url"], wait=wait_for_results, **payload)
 
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/scrape.py b/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
index 538b4a1fcb..f00a9b31ce 100644
--- a/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
+++ b/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
@@ -29,10 +29,10 @@ class ScrapeTool(BuiltinTool):
         extract["schema"] = get_json_params(tool_parameters, "schema")
         extract["systemPrompt"] = tool_parameters.get("systemPrompt")
         extract["prompt"] = tool_parameters.get("prompt")
-        extract = {k: v for k, v in extract.items() if v not in {None, ""}}
+        extract = {k: v for k, v in extract.items() if v not in (None, "")}
         payload["extract"] = extract or None
 
-        payload = {k: v for k, v in payload.items() if v not in {None, ""}}
+        payload = {k: v for k, v in payload.items() if v not in (None, "")}
 
         crawl_result = app.scrape_url(url=tool_parameters["url"], **payload)
         markdown_result = crawl_result.get("data", {}).get("markdown", "")
diff --git a/api/core/tools/provider/builtin/jina/jina.yaml b/api/core/tools/provider/builtin/jina/jina.yaml
index 06f23382d9..346175c41f 100644
--- a/api/core/tools/provider/builtin/jina/jina.yaml
+++ b/api/core/tools/provider/builtin/jina/jina.yaml
@@ -2,9 +2,9 @@ identity:
   author: Dify
   name: jina
   label:
-    en_US: Jina
-    zh_Hans: Jina
-    pt_BR: Jina
+    en_US: Jina AI
+    zh_Hans: Jina AI
+    pt_BR: Jina AI
   description:
     en_US: Convert any URL to an LLM-friendly input or perform searches on the web for grounding information. Experience improved output for your agent and RAG systems at no cost.
     zh_Hans: 将任何URL转换为LLM易读的输入或在网页上搜索引擎上搜索引擎。
@@ -22,11 +22,11 @@ credentials_for_provider:
       zh_Hans: API 密钥（可留空）
       pt_BR: Chave API (deixe vazio se você não tiver uma)
     placeholder:
-      en_US: Please enter your Jina API key
-      zh_Hans: 请输入你的 Jina API 密钥
-      pt_BR: Por favor, insira sua chave de API do Jina
+      en_US: Please enter your Jina AI API key
+      zh_Hans: 请输入你的 Jina AI API 密钥
+      pt_BR: Por favor, insira sua chave de API do Jina AI
     help:
-      en_US: Get your Jina API key from Jina (optional, but you can get a higher rate)
-      zh_Hans: 从 Jina 获取您的 Jina API 密钥（非必须，能得到更高的速率）
-      pt_BR: Obtenha sua chave de API do Jina na Jina (opcional, mas você pode obter uma taxa mais alta)
+      en_US: Get your Jina AI API key from Jina AI (optional, but you can get a higher rate)
+      zh_Hans: 从 Jina AI 获取您的 Jina AI API 密钥（非必须，能得到更高的速率）
+      pt_BR: Obtenha sua chave de API do Jina AI na Jina AI (opcional, mas você pode obter uma taxa mais alta)
     url: https://jina.ai
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
index 58ad6d8694..589bc3433d 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
@@ -2,14 +2,14 @@ identity:
   name: jina_reader
   author: Dify
   label:
-    en_US: JinaReader
-    zh_Hans: JinaReader
-    pt_BR: JinaReader
+    en_US: Fetch Single Page
+    zh_Hans: 获取单页面
+    pt_BR: Fetch Single Page
 description:
   human:
-    en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost.
-    zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。
-    pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo.
+    en_US: Fetch the target URL (can be a PDF) and convert it into a LLM-friendly markdown.
+    zh_Hans: 获取目标网址（可以是 PDF），并将其转换为适合大模型处理的 Markdown 格式。
+    pt_BR: Busque a URL de destino (que pode ser um PDF) e converta em um Markdown LLM-friendly.
   llm: A tool for scraping webpages. Input should be a URL.
 parameters:
   - name: url
@@ -17,13 +17,13 @@ parameters:
     required: true
     label:
       en_US: URL
-      zh_Hans: 网页链接
+      zh_Hans: 网址
       pt_BR: URL
     human_description:
-      en_US: used for linking to webpages
-      zh_Hans: 用于链接到网页
-      pt_BR: used for linking to webpages
-    llm_description: url for scraping
+      en_US: Web link
+      zh_Hans: 网页链接
+      pt_BR: URL da web
+    llm_description: url para scraping
     form: llm
   - name: request_params
     type: string
@@ -31,14 +31,14 @@ parameters:
     label:
       en_US: Request params
       zh_Hans: 请求参数
-      pt_BR: Request params
+      pt_BR: Parâmetros de solicitação
     human_description:
       en_US: |
         request parameters, format: {"key1": "value1", "key2": "value2"}
       zh_Hans: |
         请求参数，格式：{"key1": "value1", "key2": "value2"}
       pt_BR: |
-        request parameters, format: {"key1": "value1", "key2": "value2"}
+        parâmetros de solicitação, formato: {"key1": "value1", "key2": "value2"}
     llm_description: request parameters
     form: llm
   - name: target_selector
@@ -51,7 +51,7 @@ parameters:
     human_description:
       en_US: css selector for scraping specific elements
       zh_Hans: css 选择器用于抓取特定元素
-      pt_BR: css selector for scraping specific elements
+      pt_BR: css selector para scraping de elementos específicos
     llm_description: css selector of the target element to scrape
     form: form
   - name: wait_for_selector
@@ -64,7 +64,7 @@ parameters:
     human_description:
       en_US: css selector for waiting for specific elements
       zh_Hans: css 选择器用于等待特定元素
-      pt_BR: css selector for waiting for specific elements
+      pt_BR: css selector para aguardar elementos específicos
     llm_description: css selector of the target element to wait for
     form: form
   - name: image_caption
@@ -77,8 +77,8 @@ parameters:
       pt_BR: Legenda da imagem
     human_description:
       en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
-      zh_Hans: "为指定 URL 上的所有图像添加标题，为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
-      pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
+      zh_Hans: "为指定 URL 上的所有图像添加标题，为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签，以支持下游模型的图像交互。"
+      pt_BR: "Adiciona legendas a todas as imagens na URL especificada, adicionando 'Imagem [idx]: [legenda]' como uma tag alt para aquelas que não têm uma. Isso permite que os modelos LLM inferiores interajam com as imagens em atividades como raciocínio e resumo."
     llm_description: Captions all images at the specified URL
     form: form
   - name: gather_all_links_at_the_end
@@ -91,8 +91,8 @@ parameters:
       pt_BR: Coletar todos os links ao final
     human_description:
       en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
-      zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
-      pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+      zh_Hans: 末尾将添加“按钮和链接”部分，方便下游模型或网络代理做页面导航或执行进一步操作。
+      pt_BR: Um "Botões & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
     llm_description: Gather all links at the end
     form: form
   - name: gather_all_images_at_the_end
@@ -105,8 +105,8 @@ parameters:
       pt_BR: Coletar todas as imagens ao final
     human_description:
       en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
-      zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果，从而提高推理能力。
-      pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+      zh_Hans: 末尾会新增“图片”部分，方便下游模型全面了解页面的视觉内容，提升推理效果。
+      pt_BR: Um "Imagens" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
     llm_description: Gather all images at the end
     form: form
   - name: proxy_server
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
index 2bc70e1be1..e58c639e56 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
@@ -2,13 +2,14 @@ identity:
   name: jina_search
   author: Dify
   label:
-    en_US: JinaSearch
-    zh_Hans: JinaSearch
-    pt_BR: JinaSearch
+    en_US: Search the web
+    zh_Hans: 联网搜索
+    pt_BR: Search the web
 description:
   human:
-    en_US: Search on the web and get the top 5 results. Useful for grounding using information from the web.
-    zh_Hans: 在网络上搜索返回前 5 个结果。
+    en_US: Search on the public web of a given query and return the top results as LLM-friendly markdown.
+    zh_Hans: 针对给定的查询在互联网上进行搜索，并以适合大模型处理的 Markdown 格式返回最相关的结果。
+    pt_BR: Procurar na web pública de uma consulta fornecida e retornar os melhores resultados como markdown para LLMs.
   llm: A tool for searching results on the web for grounding. Input should be a simple question.
 parameters:
   - name: query
@@ -16,11 +17,13 @@ parameters:
     required: true
     label:
       en_US: Question (Query)
-      zh_Hans: 信息查询
+      zh_Hans: 查询
+      pt_BR: Pergunta (Consulta)
     human_description:
       en_US: used to find information on the web
       zh_Hans: 在网络上搜索信息
-    llm_description: simple question to ask on the web
+      pt_BR: Usado para encontrar informações na web
+    llm_description: Pergunta simples para fazer na web
     form: llm
   - name: image_caption
     type: boolean
@@ -32,7 +35,7 @@ parameters:
       pt_BR: Legenda da imagem
     human_description:
       en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
-      zh_Hans: "为指定 URL 上的所有图像添加标题，为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
+      zh_Hans: "为指定 URL 上的所有图像添加标题，为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签，以支持下游模型的图像交互。"
       pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
     llm_description: Captions all images at the specified URL
     form: form
@@ -46,8 +49,8 @@ parameters:
       pt_BR: Coletar todos os links ao final
     human_description:
       en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
-      zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
-      pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+      zh_Hans: 末尾将添加“按钮和链接”部分，汇总页面上的所有链接。方便下游模型或网络代理做页面导航或执行进一步操作。
+      pt_BR: Um "Botão & Links" seção será criada no final. Isso ajuda os LLMs ou agentes da web navegando pela página ou executar ações adicionais.
     llm_description: Gather all links at the end
     form: form
   - name: gather_all_images_at_the_end
@@ -60,8 +63,8 @@ parameters:
       pt_BR: Coletar todas as imagens ao final
     human_description:
       en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
-      zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果，从而提高推理能力。
-      pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+      zh_Hans: 末尾会新增“图片”部分，汇总页面上的所有图片。方便下游模型概览页面的视觉内容，提升推理效果。
+      pt_BR: Um "Imagens" seção será criada no final. Isso fornece uma visão geral de todas as imagens na página para os LLMs, que pode melhorar a razão.
     llm_description: Gather all images at the end
     form: form
   - name: proxy_server
@@ -74,7 +77,7 @@ parameters:
     human_description:
       en_US: Use proxy to access URLs
       zh_Hans: 利用代理访问 URL
-      pt_BR: Use proxy to access URLs
+      pt_BR: Usar proxy para acessar URLs
     llm_description: Use proxy to access URLs
     form: form
   - name: no_cache
@@ -83,7 +86,7 @@ parameters:
     default: false
     label:
       en_US: Bypass the Cache
-      zh_Hans: 绕过缓存
+      zh_Hans: 是否绕过缓存
       pt_BR: Ignorar o cache
     human_description:
       en_US: Bypass the Cache
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml b/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
index 62a5c7e7ba..74885cdf9a 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
@@ -2,11 +2,14 @@ identity:
   name: jina_tokenizer
   author: hjlarry
   label:
-    en_US: JinaTokenizer
+    en_US: Segment
+    zh_Hans: 切分器
+    pt_BR: Segment
 description:
   human:
-    en_US: Free API to tokenize text and segment long text into chunks.
-    zh_Hans: 免费的API可以将文本tokenize，也可以将长文本分割成多个部分。
+    en_US: Split long text into chunks and do tokenization.
+    zh_Hans: 将长文本拆分成小段落，并做分词处理。
+    pt_BR: Dividir o texto longo em pedaços e fazer tokenização.
   llm: Free API to tokenize text and segment long text into chunks.
 parameters:
   - name: content
@@ -15,6 +18,7 @@ parameters:
     label:
       en_US: Content
       zh_Hans: 内容
+      pt_BR: Conteúdo
     llm_description: the content which need to tokenize or segment
     form: llm
   - name: return_tokens
@@ -23,18 +27,22 @@ parameters:
     label:
       en_US: Return the tokens
       zh_Hans: 是否返回tokens
+      pt_BR: Retornar os tokens
     human_description:
       en_US: Return the tokens and their corresponding ids in the response.
       zh_Hans: 返回tokens及其对应的ids。
+      pt_BR: Retornar os tokens e seus respectivos ids na resposta.
     form: form
   - name: return_chunks
     type: boolean
     label:
       en_US: Return the chunks
       zh_Hans: 是否分块
+      pt_BR: Retornar os chunks
     human_description:
       en_US: Chunking the input into semantically meaningful segments while handling a wide variety of text types and edge cases based on common structural cues.
-      zh_Hans: 将输入分块为具有语义意义的片段，同时根据常见的结构线索处理各种文本类型和边缘情况。
+      zh_Hans: 将输入文本分块为语义有意义的片段，同时基于常见的结构线索处理各种文本类型和特殊情况。
+      pt_BR: Dividir o texto de entrada em segmentos semanticamente significativos, enquanto lida com uma ampla variedade de tipos de texto e casos de borda com base em pistas estruturais comuns.
     form: form
   - name: tokenizer
     type: select
diff --git a/api/core/tools/provider/builtin/stepfun/stepfun.py b/api/core/tools/provider/builtin/stepfun/stepfun.py
index b24f730c95..239db85b11 100644
--- a/api/core/tools/provider/builtin/stepfun/stepfun.py
+++ b/api/core/tools/provider/builtin/stepfun/stepfun.py
@@ -16,7 +16,7 @@ class StepfunProvider(BuiltinToolProviderController):
                 user_id="",
                 tool_parameters={
                     "prompt": "cute girl, blue eyes, white hair, anime style",
-                    "size": "1024x1024",
+                    "size": "256x256",
                     "n": 1,
                 },
             )
diff --git a/api/core/tools/provider/builtin/stepfun/stepfun.yaml b/api/core/tools/provider/builtin/stepfun/stepfun.yaml
index 1f841ec369..e8139a4d7d 100644
--- a/api/core/tools/provider/builtin/stepfun/stepfun.yaml
+++ b/api/core/tools/provider/builtin/stepfun/stepfun.yaml
@@ -4,11 +4,9 @@ identity:
   label:
     en_US: Image-1X
     zh_Hans: 阶跃星辰绘画
-    pt_BR: Image-1X
   description:
     en_US: Image-1X
     zh_Hans: 阶跃星辰绘画
-    pt_BR: Image-1X
   icon: icon.png
   tags:
     - image
@@ -20,27 +18,16 @@ credentials_for_provider:
     label:
       en_US: Stepfun API key
       zh_Hans: 阶跃星辰API key
-      pt_BR: Stepfun API key
-    help:
-      en_US: Please input your stepfun API key
-      zh_Hans: 请输入你的阶跃星辰 API key
-      pt_BR: Please input your stepfun API key
     placeholder:
-      en_US: Please input your stepfun API key
+      en_US: Please input your Stepfun API key
       zh_Hans: 请输入你的阶跃星辰 API key
-      pt_BR: Please input your stepfun API key
+    url: https://platform.stepfun.com/interface-key
   stepfun_base_url:
     type: text-input
     required: false
     label:
       en_US: Stepfun base URL
       zh_Hans: 阶跃星辰 base URL
-      pt_BR: Stepfun base URL
-    help:
-      en_US: Please input your Stepfun base URL
-      zh_Hans: 请输入你的阶跃星辰 base URL
-      pt_BR: Please input your Stepfun base URL
     placeholder:
       en_US: Please input your Stepfun base URL
       zh_Hans: 请输入你的阶跃星辰 base URL
-      pt_BR: Please input your Stepfun base URL
diff --git a/api/core/tools/provider/builtin/stepfun/tools/image.py b/api/core/tools/provider/builtin/stepfun/tools/image.py
index 0b92b122bf..eb55dae518 100644
--- a/api/core/tools/provider/builtin/stepfun/tools/image.py
+++ b/api/core/tools/provider/builtin/stepfun/tools/image.py
@@ -1,4 +1,3 @@
-import random
 from typing import Any, Union
 
 from openai import OpenAI
@@ -19,7 +18,7 @@ class StepfunTool(BuiltinTool):
         """
         invoke tools
         """
-        base_url = self.runtime.credentials.get("stepfun_base_url", "https://api.stepfun.com")
+        base_url = self.runtime.credentials.get("stepfun_base_url") or "https://api.stepfun.com"
         base_url = str(URL(base_url) / "v1")
 
         client = OpenAI(
@@ -28,9 +27,7 @@ class StepfunTool(BuiltinTool):
         )
 
         extra_body = {}
-        model = tool_parameters.get("model", "step-1x-medium")
-        if not model:
-            return self.create_text_message("Please input model name")
+        model = "step-1x-medium"
         # prompt
         prompt = tool_parameters.get("prompt", "")
         if not prompt:
@@ -67,9 +64,3 @@ class StepfunTool(BuiltinTool):
                 )
             )
         return result
-
-    @staticmethod
-    def _generate_random_id(length=8):
-        characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-        random_id = "".join(random.choices(characters, k=length))
-        return random_id
diff --git a/api/core/tools/provider/builtin/stepfun/tools/image.yaml b/api/core/tools/provider/builtin/stepfun/tools/image.yaml
index dcc5bd2db2..8d7c9b6586 100644
--- a/api/core/tools/provider/builtin/stepfun/tools/image.yaml
+++ b/api/core/tools/provider/builtin/stepfun/tools/image.yaml
@@ -29,35 +29,6 @@ parameters:
       pt_BR: Image prompt, you can check the official documentation of step-1x
     llm_description: Image prompt of step-1x you should describe the image you want to generate as a list of words as possible as detailed
     form: llm
-  - name: model
-    type: select
-    required: false
-    human_description:
-      en_US: used for selecting the model name
-      zh_Hans: 用于选择模型的名字
-      pt_BR: used for selecting the model name
-    label:
-      en_US: Model Name
-      zh_Hans: 模型名字
-      pt_BR: Model Name
-    form: form
-    options:
-      - value: step-1x-turbo
-        label:
-          en_US: turbo
-          zh_Hans: turbo
-          pt_BR: turbo
-      - value: step-1x-medium
-        label:
-          en_US: medium
-          zh_Hans: medium
-          pt_BR: medium
-      - value: step-1x-large
-        label:
-          en_US: large
-          zh_Hans: large
-          pt_BR: large
-    default: step-1x-medium
   - name: size
     type: select
     required: false
diff --git a/api/core/tools/provider/builtin/tavily/tavily.yaml b/api/core/tools/provider/builtin/tavily/tavily.yaml
index 7b25a81848..95820f4d18 100644
--- a/api/core/tools/provider/builtin/tavily/tavily.yaml
+++ b/api/core/tools/provider/builtin/tavily/tavily.yaml
@@ -28,4 +28,4 @@ credentials_for_provider:
       en_US: Get your Tavily API key from Tavily
       zh_Hans: 从 TavilyApi 获取您的 Tavily API key
       pt_BR: Get your Tavily API key from Tavily
-    url: https://docs.tavily.com/docs/tavily-api/introduction
+    url: https://docs.tavily.com/docs/welcome
diff --git a/api/core/tools/provider/builtin/xinference/_assets/icon.png b/api/core/tools/provider/builtin/xinference/_assets/icon.png
new file mode 100644
index 0000000000..e58cacbd12
Binary files /dev/null and b/api/core/tools/provider/builtin/xinference/_assets/icon.png differ
diff --git a/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py
new file mode 100644
index 0000000000..847f2730f2
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py
@@ -0,0 +1,412 @@
+import io
+import json
+from base64 import b64decode, b64encode
+from copy import deepcopy
+from typing import Any, Union
+
+from httpx import get, post
+from PIL import Image
+from yarl import URL
+
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_entities import (
+    ToolInvokeMessage,
+    ToolParameter,
+    ToolParameterOption,
+)
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.tool.builtin_tool import BuiltinTool
+
+# All commented out parameters default to null
+DRAW_TEXT_OPTIONS = {
+    # Prompts
+    "prompt": "",
+    "negative_prompt": "",
+    # "styles": [],
+    # Seeds
+    "seed": -1,
+    "subseed": -1,
+    "subseed_strength": 0,
+    "seed_resize_from_h": -1,
+    "seed_resize_from_w": -1,
+    # Samplers
+    "sampler_name": "DPM++ 2M",
+    # "scheduler": "",
+    # "sampler_index": "Automatic",
+    # Latent Space Options
+    "batch_size": 1,
+    "n_iter": 1,
+    "steps": 10,
+    "cfg_scale": 7,
+    "width": 512,
+    "height": 512,
+    # "restore_faces": True,
+    # "tiling": True,
+    "do_not_save_samples": False,
+    "do_not_save_grid": False,
+    # "eta": 0,
+    # "denoising_strength": 0.75,
+    # "s_min_uncond": 0,
+    # "s_churn": 0,
+    # "s_tmax": 0,
+    # "s_tmin": 0,
+    # "s_noise": 0,
+    "override_settings": {},
+    "override_settings_restore_afterwards": True,
+    # Refinement Options
+    "refiner_checkpoint": "",
+    "refiner_switch_at": 0,
+    "disable_extra_networks": False,
+    # "firstpass_image": "",
+    # "comments": "",
+    # High-Resolution Options
+    "enable_hr": False,
+    "firstphase_width": 0,
+    "firstphase_height": 0,
+    "hr_scale": 2,
+    # "hr_upscaler": "",
+    "hr_second_pass_steps": 0,
+    "hr_resize_x": 0,
+    "hr_resize_y": 0,
+    # "hr_checkpoint_name": "",
+    # "hr_sampler_name": "",
+    # "hr_scheduler": "",
+    "hr_prompt": "",
+    "hr_negative_prompt": "",
+    # Task Options
+    # "force_task_id": "",
+    # Script Options
+    # "script_name": "",
+    "script_args": [],
+    # Output Options
+    "send_images": True,
+    "save_images": False,
+    "alwayson_scripts": {},
+    # "infotext": "",
+}
+
+
+class StableDiffusionTool(BuiltinTool):
+    def _invoke(
+        self, user_id: str, tool_parameters: dict[str, Any]
+    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+        invoke tools
+        """
+        # base url
+        base_url = self.runtime.credentials.get("base_url", None)
+        if not base_url:
+            return self.create_text_message("Please input base_url")
+
+        if tool_parameters.get("model"):
+            self.runtime.credentials["model"] = tool_parameters["model"]
+
+        model = self.runtime.credentials.get("model", None)
+        if not model:
+            return self.create_text_message("Please input model")
+
+        # set model
+        try:
+            url = str(URL(base_url) / "sdapi" / "v1" / "options")
+            response = post(
+                url,
+                json={"sd_model_checkpoint": model},
+                headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+            )
+            if response.status_code != 200:
+                raise ToolProviderCredentialValidationError("Failed to set model, please tell user to set model")
+        except Exception as e:
+            raise ToolProviderCredentialValidationError("Failed to set model, please tell user to set model")
+
+        # get image id and image variable
+        image_id = tool_parameters.get("image_id", "")
+        image_variable = self.get_default_image_variable()
+        # Return text2img if there's no image ID or no image variable
+        if not image_id or not image_variable:
+            return self.text2img(base_url=base_url, tool_parameters=tool_parameters)
+
+        # Proceed with image-to-image generation
+        return self.img2img(base_url=base_url, tool_parameters=tool_parameters)
+
+    def validate_models(self):
+        """
+        validate models
+        """
+        try:
+            base_url = self.runtime.credentials.get("base_url", None)
+            if not base_url:
+                raise ToolProviderCredentialValidationError("Please input base_url")
+            model = self.runtime.credentials.get("model", None)
+            if not model:
+                raise ToolProviderCredentialValidationError("Please input model")
+
+            api_url = str(URL(base_url) / "sdapi" / "v1" / "sd-models")
+            response = get(url=api_url, timeout=10)
+            if response.status_code == 404:
+                # try draw a picture
+                self._invoke(
+                    user_id="test",
+                    tool_parameters={
+                        "prompt": "a cat",
+                        "width": 1024,
+                        "height": 1024,
+                        "steps": 1,
+                        "lora": "",
+                    },
+                )
+            elif response.status_code != 200:
+                raise ToolProviderCredentialValidationError("Failed to get models")
+            else:
+                models = [d["model_name"] for d in response.json()]
+                if len([d for d in models if d == model]) > 0:
+                    return self.create_text_message(json.dumps(models))
+                else:
+                    raise ToolProviderCredentialValidationError(f"model {model} does not exist")
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(f"Failed to get models, {e}")
+
+    def get_sd_models(self) -> list[str]:
+        """
+        get sd models
+        """
+        try:
+            base_url = self.runtime.credentials.get("base_url", None)
+            if not base_url:
+                return []
+            api_url = str(URL(base_url) / "sdapi" / "v1" / "sd-models")
+            response = get(url=api_url, timeout=120)
+            if response.status_code != 200:
+                return []
+            else:
+                return [d["model_name"] for d in response.json()]
+        except Exception as e:
+            return []
+
+    def get_sample_methods(self) -> list[str]:
+        """
+        get sample method
+        """
+        try:
+            base_url = self.runtime.credentials.get("base_url", None)
+            if not base_url:
+                return []
+            api_url = str(URL(base_url) / "sdapi" / "v1" / "samplers")
+            response = get(url=api_url, timeout=120)
+            if response.status_code != 200:
+                return []
+            else:
+                return [d["name"] for d in response.json()]
+        except Exception as e:
+            return []
+
+    def img2img(
+        self, base_url: str, tool_parameters: dict[str, Any]
+    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+        generate image
+        """
+
+        # Fetch the binary data of the image
+        image_variable = self.get_default_image_variable()
+        image_binary = self.get_variable_file(image_variable.name)
+        if not image_binary:
+            return self.create_text_message("Image not found, please request user to generate image firstly.")
+
+        # Convert image to RGB and save as PNG
+        try:
+            with Image.open(io.BytesIO(image_binary)) as image, io.BytesIO() as buffer:
+                image.convert("RGB").save(buffer, format="PNG")
+                image_binary = buffer.getvalue()
+        except Exception as e:
+            return self.create_text_message(f"Failed to process the image: {str(e)}")
+
+        # copy draw options
+        draw_options = deepcopy(DRAW_TEXT_OPTIONS)
+        # set image options
+        model = tool_parameters.get("model", "")
+        draw_options_image = {
+            "init_images": [b64encode(image_binary).decode("utf-8")],
+            "denoising_strength": 0.9,
+            "restore_faces": False,
+            "script_args": [],
+            "override_settings": {"sd_model_checkpoint": model},
+            "resize_mode": 0,
+            "image_cfg_scale": 0,
+            # "mask": None,
+            "mask_blur_x": 4,
+            "mask_blur_y": 4,
+            "mask_blur": 0,
+            "mask_round": True,
+            "inpainting_fill": 0,
+            "inpaint_full_res": True,
+            "inpaint_full_res_padding": 0,
+            "inpainting_mask_invert": 0,
+            "initial_noise_multiplier": 0,
+            # "latent_mask": None,
+            "include_init_images": True,
+        }
+        # update key and values
+        draw_options.update(draw_options_image)
+        draw_options.update(tool_parameters)
+
+        # get prompt lora model
+        prompt = tool_parameters.get("prompt", "")
+        lora = tool_parameters.get("lora", "")
+        model = tool_parameters.get("model", "")
+        if lora:
+            draw_options["prompt"] = f"{lora},{prompt}"
+        else:
+            draw_options["prompt"] = prompt
+
+        try:
+            url = str(URL(base_url) / "sdapi" / "v1" / "img2img")
+            response = post(
+                url,
+                json=draw_options,
+                timeout=120,
+                headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+            )
+            if response.status_code != 200:
+                return self.create_text_message("Failed to generate image")
+
+            image = response.json()["images"][0]
+
+            return self.create_blob_message(
+                blob=b64decode(image),
+                meta={"mime_type": "image/png"},
+                save_as=self.VariableKey.IMAGE.value,
+            )
+
+        except Exception as e:
+            return self.create_text_message("Failed to generate image")
+
+    def text2img(
+        self, base_url: str, tool_parameters: dict[str, Any]
+    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+        generate image
+        """
+        # copy draw options
+        draw_options = deepcopy(DRAW_TEXT_OPTIONS)
+        draw_options.update(tool_parameters)
+        # get prompt lora model
+        prompt = tool_parameters.get("prompt", "")
+        lora = tool_parameters.get("lora", "")
+        model = tool_parameters.get("model", "")
+        if lora:
+            draw_options["prompt"] = f"{lora},{prompt}"
+        else:
+            draw_options["prompt"] = prompt
+        draw_options["override_settings"]["sd_model_checkpoint"] = model
+
+        try:
+            url = str(URL(base_url) / "sdapi" / "v1" / "txt2img")
+            response = post(
+                url,
+                json=draw_options,
+                timeout=120,
+                headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+            )
+            if response.status_code != 200:
+                return self.create_text_message("Failed to generate image")
+
+            image = response.json()["images"][0]
+
+            return self.create_blob_message(
+                blob=b64decode(image),
+                meta={"mime_type": "image/png"},
+                save_as=self.VariableKey.IMAGE.value,
+            )
+
+        except Exception as e:
+            return self.create_text_message("Failed to generate image")
+
+    def get_runtime_parameters(self) -> list[ToolParameter]:
+        parameters = [
+            ToolParameter(
+                name="prompt",
+                label=I18nObject(en_US="Prompt", zh_Hans="Prompt"),
+                human_description=I18nObject(
+                    en_US="Image prompt, you can check the official documentation of Stable Diffusion",
+                    zh_Hans="图像提示词，您可以查看 Stable Diffusion 的官方文档",
+                ),
+                type=ToolParameter.ToolParameterType.STRING,
+                form=ToolParameter.ToolParameterForm.LLM,
+                llm_description="Image prompt of Stable Diffusion, you should describe the image you want to generate"
+                " as a list of words as possible as detailed, the prompt must be written in English.",
+                required=True,
+            ),
+        ]
+        if len(self.list_default_image_variables()) != 0:
+            parameters.append(
+                ToolParameter(
+                    name="image_id",
+                    label=I18nObject(en_US="image_id", zh_Hans="image_id"),
+                    human_description=I18nObject(
+                        en_US="Image id of the image you want to generate based on, if you want to generate image based"
+                        " on the default image, you can leave this field empty.",
+                        zh_Hans="您想要生成的图像的图像 ID，如果您想要基于默认图像生成图像，则可以将此字段留空。",
+                    ),
+                    type=ToolParameter.ToolParameterType.STRING,
+                    form=ToolParameter.ToolParameterForm.LLM,
+                    llm_description="Image id of the original image, you can leave this field empty if you want to"
+                    " generate a new image.",
+                    required=True,
+                    options=[
+                        ToolParameterOption(value=i.name, label=I18nObject(en_US=i.name, zh_Hans=i.name))
+                        for i in self.list_default_image_variables()
+                    ],
+                )
+            )
+
+        if self.runtime.credentials:
+            try:
+                models = self.get_sd_models()
+                if len(models) != 0:
+                    parameters.append(
+                        ToolParameter(
+                            name="model",
+                            label=I18nObject(en_US="Model", zh_Hans="Model"),
+                            human_description=I18nObject(
+                                en_US="Model of Stable Diffusion, you can check the official documentation"
+                                " of Stable Diffusion",
+                                zh_Hans="Stable Diffusion 的模型，您可以查看 Stable Diffusion 的官方文档",
+                            ),
+                            type=ToolParameter.ToolParameterType.SELECT,
+                            form=ToolParameter.ToolParameterForm.FORM,
+                            llm_description="Model of Stable Diffusion, you can check the official documentation"
+                            " of Stable Diffusion",
+                            required=True,
+                            default=models[0],
+                            options=[
+                                ToolParameterOption(value=i, label=I18nObject(en_US=i, zh_Hans=i)) for i in models
+                            ],
+                        )
+                    )
+
+            except:
+                pass
+
+            sample_methods = self.get_sample_methods()
+            if len(sample_methods) != 0:
+                parameters.append(
+                    ToolParameter(
+                        name="sampler_name",
+                        label=I18nObject(en_US="Sampling method", zh_Hans="Sampling method"),
+                        human_description=I18nObject(
+                            en_US="Sampling method of Stable Diffusion, you can check the official documentation"
+                            " of Stable Diffusion",
+                            zh_Hans="Stable Diffusion 的Sampling method，您可以查看 Stable Diffusion 的官方文档",
+                        ),
+                        type=ToolParameter.ToolParameterType.SELECT,
+                        form=ToolParameter.ToolParameterForm.FORM,
+                        llm_description="Sampling method of Stable Diffusion, you can check the official documentation"
+                        " of Stable Diffusion",
+                        required=True,
+                        default=sample_methods[0],
+                        options=[
+                            ToolParameterOption(value=i, label=I18nObject(en_US=i, zh_Hans=i)) for i in sample_methods
+                        ],
+                    )
+                )
+        return parameters
diff --git a/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml
new file mode 100644
index 0000000000..4f1d17f175
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml
@@ -0,0 +1,87 @@
+identity:
+  name: stable_diffusion
+  author: xinference
+  label:
+    en_US: Stable Diffusion
+    zh_Hans: Stable Diffusion
+description:
+  human:
+    en_US: Generate images using Stable Diffusion models.
+    zh_Hans: 使用 Stable Diffusion 模型生成图片。
+  llm: draw the image you want based on your prompt.
+parameters:
+  - name: prompt
+    type: string
+    required: true
+    label:
+      en_US: Prompt
+      zh_Hans: 提示词
+    human_description:
+      en_US: Image prompt
+      zh_Hans: 图像提示词
+    llm_description: Image prompt of Stable Diffusion, you should describe the image you want to generate as a list of words as possible as detailed, the prompt must be written in English.
+    form: llm
+  - name: model
+    type: string
+    required: false
+    label:
+      en_US: Model Name
+      zh_Hans: 模型名称
+    human_description:
+      en_US: Model Name
+      zh_Hans: 模型名称
+    form: form
+  - name: lora
+    type: string
+    required: false
+    label:
+      en_US: Lora
+      zh_Hans: Lora
+    human_description:
+      en_US: Lora
+      zh_Hans: Lora
+    form: form
+  - name: steps
+    type: number
+    required: false
+    label:
+      en_US: Steps
+      zh_Hans: Steps
+    human_description:
+      en_US: Steps
+      zh_Hans: Steps
+    form: form
+    default: 10
+  - name: width
+    type: number
+    required: false
+    label:
+      en_US: Width
+      zh_Hans: Width
+    human_description:
+      en_US: Width
+      zh_Hans: Width
+    form: form
+    default: 1024
+  - name: height
+    type: number
+    required: false
+    label:
+      en_US: Height
+      zh_Hans: Height
+    human_description:
+      en_US: Height
+      zh_Hans: Height
+    form: form
+    default: 1024
+  - name: negative_prompt
+    type: string
+    required: false
+    label:
+      en_US: Negative prompt
+      zh_Hans: Negative prompt
+    human_description:
+      en_US: Negative prompt
+      zh_Hans: Negative prompt
+    form: form
+    default: bad art, ugly, deformed, watermark, duplicated, discontinuous lines
diff --git a/api/core/tools/provider/builtin/xinference/xinference.py b/api/core/tools/provider/builtin/xinference/xinference.py
new file mode 100644
index 0000000000..7c2428cc00
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/xinference.py
@@ -0,0 +1,18 @@
+import requests
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class XinferenceProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict) -> None:
+        base_url = credentials.get("base_url")
+        api_key = credentials.get("api_key")
+        model = credentials.get("model")
+        res = requests.post(
+            f"{base_url}/sdapi/v1/options",
+            headers={"Authorization": f"Bearer {api_key}"},
+            json={"sd_model_checkpoint": model},
+        )
+        if res.status_code != 200:
+            raise ToolProviderCredentialValidationError("Xinference API key is invalid")
diff --git a/api/core/tools/provider/builtin/xinference/xinference.yaml b/api/core/tools/provider/builtin/xinference/xinference.yaml
new file mode 100644
index 0000000000..19aaf5cbd1
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/xinference.yaml
@@ -0,0 +1,40 @@
+identity:
+  author: xinference
+  name: xinference
+  label:
+    en_US: Xinference
+    zh_Hans: Xinference
+  description:
+    zh_Hans: Xinference 提供的兼容 Stable Diffusion web ui 的图片生成 API。
+    en_US: Stable Diffusion web ui compatible API provided by Xinference.
+  icon: icon.png
+  tags:
+    - image
+credentials_for_provider:
+  base_url:
+    type: secret-input
+    required: true
+    label:
+      en_US: Base URL
+      zh_Hans: Xinference 服务器的 Base URL
+    placeholder:
+      en_US: Please input Xinference server's Base URL
+      zh_Hans: 请输入 Xinference 服务器的 Base URL
+  model:
+    type: text-input
+    required: true
+    label:
+      en_US: Model
+      zh_Hans: 模型
+    placeholder:
+      en_US: Please input your model name
+      zh_Hans: 请输入你的模型名称
+  api_key:
+    type: secret-input
+    required: true
+    label:
+      en_US: API Key
+      zh_Hans: Xinference 服务器的 API Key
+    placeholder:
+      en_US: Please input Xinference server's API Key
+      zh_Hans: 请输入 Xinference 服务器的 API Key
diff --git a/api/core/tools/provider/builtin/youtube/youtube.py b/api/core/tools/provider/builtin/youtube/youtube.py
index aad876491c..07e430bcbf 100644
--- a/api/core/tools/provider/builtin/youtube/youtube.py
+++ b/api/core/tools/provider/builtin/youtube/youtube.py
@@ -13,7 +13,7 @@ class YahooFinanceProvider(BuiltinToolProviderController):
             ).invoke(
                 user_id="",
                 tool_parameters={
-                    "channel": "TOKYO GIRLS COLLECTION",
+                    "channel": "UC2JZCsZSOudXA08cMMRCL9g",
                     "start_date": "2020-01-01",
                     "end_date": "2024-12-31",
                 },
diff --git a/api/core/tools/provider/tool_provider.py b/api/core/tools/provider/tool_provider.py
index 05c88b904e..321b212014 100644
--- a/api/core/tools/provider/tool_provider.py
+++ b/api/core/tools/provider/tool_provider.py
@@ -153,6 +153,9 @@ class ToolProviderController(BaseModel, ABC):
 
             # check type
             credential_schema = credentials_need_to_validate[credential_name]
+            if not credential_schema.required and credentials[credential_name] is None:
+                continue
+
             if credential_schema.type in {
                 ToolProviderCredentials.CredentialsType.SECRET_INPUT,
                 ToolProviderCredentials.CredentialsType.TEXT_INPUT,
diff --git a/api/core/workflow/nodes/end/end_stream_processor.py b/api/core/workflow/nodes/end/end_stream_processor.py
index 0366d7965d..1aecf863ac 100644
--- a/api/core/workflow/nodes/end/end_stream_processor.py
+++ b/api/core/workflow/nodes/end/end_stream_processor.py
@@ -22,8 +22,8 @@ class EndStreamProcessor(StreamProcessor):
         for end_node_id, _ in self.end_stream_param.end_stream_variable_selector_mapping.items():
             self.route_position[end_node_id] = 0
         self.current_stream_chunk_generating_node_ids: dict[str, list[str]] = {}
-        self.has_outputed = False
-        self.outputed_node_ids = set()
+        self.has_output = False
+        self.output_node_ids = set()
 
     def process(self, generator: Generator[GraphEngineEvent, None, None]) -> Generator[GraphEngineEvent, None, None]:
         for event in generator:
@@ -34,11 +34,11 @@ class EndStreamProcessor(StreamProcessor):
                 yield event
             elif isinstance(event, NodeRunStreamChunkEvent):
                 if event.in_iteration_id:
-                    if self.has_outputed and event.node_id not in self.outputed_node_ids:
+                    if self.has_output and event.node_id not in self.output_node_ids:
                         event.chunk_content = "\n" + event.chunk_content
 
-                    self.outputed_node_ids.add(event.node_id)
-                    self.has_outputed = True
+                    self.output_node_ids.add(event.node_id)
+                    self.has_output = True
                     yield event
                     continue
 
@@ -53,11 +53,11 @@ class EndStreamProcessor(StreamProcessor):
                     )
 
                 if stream_out_end_node_ids:
-                    if self.has_outputed and event.node_id not in self.outputed_node_ids:
+                    if self.has_output and event.node_id not in self.output_node_ids:
                         event.chunk_content = "\n" + event.chunk_content
 
-                    self.outputed_node_ids.add(event.node_id)
-                    self.has_outputed = True
+                    self.output_node_ids.add(event.node_id)
+                    self.has_output = True
                     yield event
             elif isinstance(event, NodeRunSucceededEvent):
                 yield event
@@ -124,11 +124,11 @@ class EndStreamProcessor(StreamProcessor):
 
                 if text:
                     current_node_id = value_selector[0]
-                    if self.has_outputed and current_node_id not in self.outputed_node_ids:
+                    if self.has_output and current_node_id not in self.output_node_ids:
                         text = "\n" + text
 
-                    self.outputed_node_ids.add(current_node_id)
-                    self.has_outputed = True
+                    self.output_node_ids.add(current_node_id)
+                    self.has_output = True
                     yield NodeRunStreamChunkEvent(
                         id=event.id,
                         node_id=event.node_id,
diff --git a/api/poetry.lock b/api/poetry.lock
index 78816683d8..bce21fb547 100644
--- a/api/poetry.lock
+++ b/api/poetry.lock
@@ -2333,13 +2333,13 @@ develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk",
 
 [[package]]
 name = "elasticsearch"
-version = "8.14.0"
+version = "8.15.1"
 description = "Python client for Elasticsearch"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "elasticsearch-8.14.0-py3-none-any.whl", hash = "sha256:cef8ef70a81af027f3da74a4f7d9296b390c636903088439087b8262a468c130"},
-    {file = "elasticsearch-8.14.0.tar.gz", hash = "sha256:aa2490029dd96f4015b333c1827aa21fd6c0a4d223b00dfb0fe933b8d09a511b"},
+    {file = "elasticsearch-8.15.1-py3-none-any.whl", hash = "sha256:02a0476e98768a30d7926335fc0d305c04fdb928eea1354c6e6040d8c2814569"},
+    {file = "elasticsearch-8.15.1.tar.gz", hash = "sha256:40c0d312f8adf8bdc81795bc16a0b546ddf544cb1f90e829a244e4780c4dbfd8"},
 ]
 
 [package.dependencies]
@@ -2347,7 +2347,10 @@ elastic-transport = ">=8.13,<9"
 
 [package.extras]
 async = ["aiohttp (>=3,<4)"]
+dev = ["aiohttp", "black", "build", "coverage", "isort", "jinja2", "mapbox-vector-tile", "nox", "numpy", "orjson", "pandas", "pyarrow", "pytest", "pytest-asyncio", "pytest-cov", "python-dateutil", "pyyaml (>=5.4)", "requests (>=2,<3)", "simsimd", "twine", "unasync"]
+docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=2.0)"]
 orjson = ["orjson (>=3)"]
+pyarrow = ["pyarrow (>=1)"]
 requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"]
 vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"]
 
@@ -4135,6 +4138,20 @@ files = [
     {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
 ]
 
+[[package]]
+name = "jsonlines"
+version = "4.0.0"
+description = "Library with helpers for the jsonlines file format"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55"},
+    {file = "jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74"},
+]
+
+[package.dependencies]
+attrs = ">=19.2.0"
+
 [[package]]
 name = "jsonpath-ng"
 version = "1.6.1"
@@ -4469,6 +4486,24 @@ files = [
     {file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"},
 ]
 
+[[package]]
+name = "loguru"
+version = "0.7.2"
+description = "Python logging made (stupidly) simple"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"},
+    {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"},
+]
+
+[package.dependencies]
+colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""}
+win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
+
+[package.extras]
+dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"]
+
 [[package]]
 name = "lxml"
 version = "5.3.0"
@@ -5320,6 +5355,36 @@ plot = ["matplotlib"]
 tgrep = ["pyparsing"]
 twitter = ["twython"]
 
+[[package]]
+name = "nomic"
+version = "3.1.2"
+description = "The official Nomic python client."
+optional = false
+python-versions = "*"
+files = [
+    {file = "nomic-3.1.2.tar.gz", hash = "sha256:2de1ab1dcf2429011c92987bb2f1eafe1a3a4901c3185b18f994bf89616f606d"},
+]
+
+[package.dependencies]
+click = "*"
+jsonlines = "*"
+loguru = "*"
+numpy = "*"
+pandas = "*"
+pillow = "*"
+pyarrow = "*"
+pydantic = "*"
+pyjwt = "*"
+requests = "*"
+rich = "*"
+tqdm = "*"
+
+[package.extras]
+all = ["nomic[aws,local]"]
+aws = ["boto3", "sagemaker"]
+dev = ["black (==24.3.0)", "cairosvg", "coverage", "isort", "mkautodoc", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]", "myst-parser", "nomic[all]", "pandas", "pillow", "pylint", "pyright", "pytest", "pytorch-lightning", "twine"]
+local = ["gpt4all (>=2.5.0,<3)"]
+
 [[package]]
 name = "novita-client"
 version = "0.5.7"
@@ -9919,6 +9984,20 @@ files = [
 beautifulsoup4 = "*"
 requests = ">=2.0.0,<3.0.0"
 
+[[package]]
+name = "win32-setctime"
+version = "1.1.0"
+description = "A small Python utility to set file creation time on Windows"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
+    {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
+]
+
+[package.extras]
+dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
+
 [[package]]
 name = "wrapt"
 version = "1.16.0"
@@ -10422,4 +10501,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "eb7ef7be5c7790e214f37f17f92b69407ad557cb80055ef7e49e36eb51b3fca6"
+content-hash = "69b42bb1ff033f14e199fee8335356275099421d72bbd7037b7a991ea65cae08"
diff --git a/api/pyproject.toml b/api/pyproject.toml
index 506f379aaf..f004865d5f 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -28,7 +28,6 @@ select = [
     "PLR0402", # manual-from-import
     "PLR1711", # useless-return
     "PLR1714", # repeated-equality-comparison
-    "PLR6201", # literal-membership
     "RUF019", # unnecessary-key-check
     "RUF100", # unused-noqa
     "RUF101", # redirected-noqa
@@ -101,6 +100,7 @@ exclude = [
 OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii"
 UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa"
 FIREWORKS_API_KEY = "fw_aaaaaaaaaaaaaaaaaaaa"
+NOMIC_API_KEY = "nk-aaaaaaaaaaaaaaaaaaaa"
 AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com"
 AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94"
 ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz"
@@ -122,6 +122,7 @@ CODE_EXECUTION_API_KEY = "dify-sandbox"
 FIRECRAWL_API_KEY = "fc-"
 TEI_EMBEDDING_SERVER_URL = "http://a.abc.com:11451"
 TEI_RERANK_SERVER_URL = "http://a.abc.com:11451"
+MIXEDBREAD_API_KEY = "mk-aaaaaaaaaaaaaaaaaaaa"
 
 [tool.poetry]
 name = "dify-api"
@@ -218,6 +219,7 @@ azure-ai-inference = "^1.0.0b3"
 volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"}
 oci = "^2.133.0"
 tos = "^2.7.1"
+nomic = "^3.1.2"
 [tool.poetry.group.indriect.dependencies]
 kaleido = "0.2.1"
 rank-bm25 = "~0.2.2"
@@ -251,7 +253,7 @@ alibabacloud_gpdb20160503 = "~3.8.0"
 alibabacloud_tea_openapi = "~0.3.9"
 chromadb = "0.5.1"
 clickhouse-connect = "~0.7.16"
-elasticsearch = "8.14.0"
+elasticsearch = "~8.15.1"
 oracledb = "~2.2.1"
 pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] }
 pgvector = "0.2.5"
diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py
index 30c010ef29..e96f06ed40 100644
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -1100,8 +1100,8 @@ class DocumentService:
             DocumentService.data_source_args_validate(args)
             DocumentService.process_rule_args_validate(args)
         else:
-            if ("data_source" not in args and not args["data_source"]) and (
-                "process_rule" not in args and not args["process_rule"]
+            if ("data_source" not in args or not args["data_source"]) and (
+                "process_rule" not in args or not args["process_rule"]
             ):
                 raise ValueError("Data source or Process rule is required")
             else:
diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py
index 7ae1b9f231..2bc48c4185 100644
--- a/api/services/tools/tools_transform_service.py
+++ b/api/services/tools/tools_transform_service.py
@@ -74,12 +74,14 @@ class ToolTransformService:
                 en_US=provider_controller.identity.description.en_US,
                 zh_Hans=provider_controller.identity.description.zh_Hans,
                 pt_BR=provider_controller.identity.description.pt_BR,
+                ja_JP=provider_controller.identity.description.ja_JP,
             ),
             icon=provider_controller.identity.icon,
             label=I18nObject(
                 en_US=provider_controller.identity.label.en_US,
                 zh_Hans=provider_controller.identity.label.zh_Hans,
                 pt_BR=provider_controller.identity.label.pt_BR,
+                ja_JP=provider_controller.identity.label.ja_JP,
             ),
             type=ToolProviderType.BUILT_IN,
             masked_credentials={},
diff --git a/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py b/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py
new file mode 100644
index 0000000000..281e866e45
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py
@@ -0,0 +1,59 @@
+import os
+from collections.abc import Callable
+from typing import Any, Literal, Union
+
+import pytest
+
+# import monkeypatch
+from _pytest.monkeypatch import MonkeyPatch
+from nomic import embed
+
+
+def create_embedding(texts: list[str], model: str, **kwargs: Any) -> dict:
+    texts_len = len(texts)
+
+    foo_embedding_sample = 0.123456
+
+    combined = {
+        "embeddings": [[foo_embedding_sample for _ in range(768)] for _ in range(texts_len)],
+        "usage": {"prompt_tokens": texts_len, "total_tokens": texts_len},
+        "model": model,
+        "inference_mode": "remote",
+    }
+
+    return combined
+
+
+def mock_nomic(
+    monkeypatch: MonkeyPatch,
+    methods: list[Literal["text_embedding"]],
+) -> Callable[[], None]:
+    """
+    mock nomic module
+
+    :param monkeypatch: pytest monkeypatch fixture
+    :return: unpatch function
+    """
+
+    def unpatch() -> None:
+        monkeypatch.undo()
+
+    if "text_embedding" in methods:
+        monkeypatch.setattr(embed, "text", create_embedding)
+
+    return unpatch
+
+
+MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
+
+
+@pytest.fixture
+def setup_nomic_mock(request, monkeypatch):
+    methods = request.param if hasattr(request, "param") else []
+    if MOCK:
+        unpatch = mock_nomic(monkeypatch, methods=methods)
+
+    yield
+
+    if MOCK:
+        unpatch()
diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py b/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py
new file mode 100644
index 0000000000..7bf723b3a9
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py
@@ -0,0 +1,54 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.fireworks.text_embedding.text_embedding import FireworksTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["text_embedding"]], indirect=True)
+def test_validate_credentials(setup_openai_mock):
+    model = FireworksTextEmbeddingModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="nomic-ai/nomic-embed-text-v1.5", credentials={"fireworks_api_key": "invalid_key"}
+        )
+
+    model.validate_credentials(
+        model="nomic-ai/nomic-embed-text-v1.5", credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}
+    )
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["text_embedding"]], indirect=True)
+def test_invoke_model(setup_openai_mock):
+    model = FireworksTextEmbeddingModel()
+
+    result = model.invoke(
+        model="nomic-ai/nomic-embed-text-v1.5",
+        credentials={
+            "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY"),
+        },
+        texts=["hello", "world", " ".join(["long_text"] * 100), " ".join(["another_long_text"] * 100)],
+        user="foo",
+    )
+
+    assert isinstance(result, TextEmbeddingResult)
+    assert len(result.embeddings) == 4
+    assert result.usage.total_tokens == 2
+
+
+def test_get_num_tokens():
+    model = FireworksTextEmbeddingModel()
+
+    num_tokens = model.get_num_tokens(
+        model="nomic-ai/nomic-embed-text-v1.5",
+        credentials={
+            "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY"),
+        },
+        texts=["hello", "world"],
+    )
+
+    assert num_tokens == 2
diff --git a/api/tests/integration_tests/model_runtime/mixedbread/__init__.py b/api/tests/integration_tests/model_runtime/mixedbread/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/tests/integration_tests/model_runtime/mixedbread/test_provider.py b/api/tests/integration_tests/model_runtime/mixedbread/test_provider.py
new file mode 100644
index 0000000000..25c9f3ce8d
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/mixedbread/test_provider.py
@@ -0,0 +1,28 @@
+import os
+from unittest.mock import Mock, patch
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.mixedbread.mixedbread import MixedBreadProvider
+
+
+def test_validate_provider_credentials():
+    provider = MixedBreadProvider()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        provider.validate_provider_credentials(credentials={"api_key": "hahahaha"})
+    with patch("requests.post") as mock_post:
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            "usage": {"prompt_tokens": 3, "total_tokens": 3},
+            "model": "mixedbread-ai/mxbai-embed-large-v1",
+            "data": [{"embedding": [0.23333 for _ in range(1024)], "index": 0, "object": "embedding"}],
+            "object": "list",
+            "normalized": "true",
+            "encoding_format": "float",
+            "dimensions": 1024,
+        }
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+        provider.validate_provider_credentials(credentials={"api_key": os.environ.get("MIXEDBREAD_API_KEY")})
diff --git a/api/tests/integration_tests/model_runtime/mixedbread/test_rerank.py b/api/tests/integration_tests/model_runtime/mixedbread/test_rerank.py
new file mode 100644
index 0000000000..b65aab74aa
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/mixedbread/test_rerank.py
@@ -0,0 +1,100 @@
+import os
+from unittest.mock import Mock, patch
+
+import pytest
+
+from core.model_runtime.entities.rerank_entities import RerankResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.mixedbread.rerank.rerank import MixedBreadRerankModel
+
+
+def test_validate_credentials():
+    model = MixedBreadRerankModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="mxbai-rerank-large-v1",
+            credentials={"api_key": "invalid_key"},
+        )
+    with patch("httpx.post") as mock_post:
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            "usage": {"prompt_tokens": 86, "total_tokens": 86},
+            "model": "mixedbread-ai/mxbai-rerank-large-v1",
+            "data": [
+                {
+                    "index": 0,
+                    "score": 0.06762695,
+                    "input": "Carson City is the capital city of the American state of Nevada. At the 2010 United "
+                    "States Census, Carson City had a population of 55,274.",
+                    "object": "text_document",
+                },
+                {
+                    "index": 1,
+                    "score": 0.057403564,
+                    "input": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific "
+                    "Ocean that are a political division controlled by the United States. Its capital is "
+                    "Saipan.",
+                    "object": "text_document",
+                },
+            ],
+            "object": "list",
+            "top_k": 2,
+            "return_input": True,
+        }
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+        model.validate_credentials(
+            model="mxbai-rerank-large-v1",
+            credentials={
+                "api_key": os.environ.get("MIXEDBREAD_API_KEY"),
+            },
+        )
+
+
+def test_invoke_model():
+    model = MixedBreadRerankModel()
+    with patch("httpx.post") as mock_post:
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            "usage": {"prompt_tokens": 56, "total_tokens": 56},
+            "model": "mixedbread-ai/mxbai-rerank-large-v1",
+            "data": [
+                {
+                    "index": 0,
+                    "score": 0.6044922,
+                    "input": "Kasumi is a girl name of Japanese origin meaning mist.",
+                    "object": "text_document",
+                },
+                {
+                    "index": 1,
+                    "score": 0.0703125,
+                    "input": "Her music is a kawaii bass, a mix of future bass, pop, and kawaii music and she leads a "
+                    "team named PopiParty.",
+                    "object": "text_document",
+                },
+            ],
+            "object": "list",
+            "top_k": 2,
+            "return_input": "true",
+        }
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+        result = model.invoke(
+            model="mxbai-rerank-large-v1",
+            credentials={
+                "api_key": os.environ.get("MIXEDBREAD_API_KEY"),
+            },
+            query="Who is Kasumi?",
+            docs=[
+                "Kasumi is a girl name of Japanese origin meaning mist.",
+                "Her music is a kawaii bass, a mix of future bass, pop, and kawaii music and she leads a team named "
+                "PopiParty.",
+            ],
+            score_threshold=0.5,
+        )
+
+        assert isinstance(result, RerankResult)
+        assert len(result.docs) == 1
+        assert result.docs[0].index == 0
+        assert result.docs[0].score >= 0.5
diff --git a/api/tests/integration_tests/model_runtime/mixedbread/test_text_embedding.py b/api/tests/integration_tests/model_runtime/mixedbread/test_text_embedding.py
new file mode 100644
index 0000000000..ca97a18951
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/mixedbread/test_text_embedding.py
@@ -0,0 +1,78 @@
+import os
+from unittest.mock import Mock, patch
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.mixedbread.text_embedding.text_embedding import MixedBreadTextEmbeddingModel
+
+
+def test_validate_credentials():
+    model = MixedBreadTextEmbeddingModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(model="mxbai-embed-large-v1", credentials={"api_key": "invalid_key"})
+    with patch("requests.post") as mock_post:
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            "usage": {"prompt_tokens": 3, "total_tokens": 3},
+            "model": "mixedbread-ai/mxbai-embed-large-v1",
+            "data": [{"embedding": [0.23333 for _ in range(1024)], "index": 0, "object": "embedding"}],
+            "object": "list",
+            "normalized": "true",
+            "encoding_format": "float",
+            "dimensions": 1024,
+        }
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+        model.validate_credentials(
+            model="mxbai-embed-large-v1", credentials={"api_key": os.environ.get("MIXEDBREAD_API_KEY")}
+        )
+
+
+def test_invoke_model():
+    model = MixedBreadTextEmbeddingModel()
+
+    with patch("requests.post") as mock_post:
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            "usage": {"prompt_tokens": 6, "total_tokens": 6},
+            "model": "mixedbread-ai/mxbai-embed-large-v1",
+            "data": [
+                {"embedding": [0.23333 for _ in range(1024)], "index": 0, "object": "embedding"},
+                {"embedding": [0.23333 for _ in range(1024)], "index": 1, "object": "embedding"},
+            ],
+            "object": "list",
+            "normalized": "true",
+            "encoding_format": "float",
+            "dimensions": 1024,
+        }
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+        result = model.invoke(
+            model="mxbai-embed-large-v1",
+            credentials={
+                "api_key": os.environ.get("MIXEDBREAD_API_KEY"),
+            },
+            texts=["hello", "world"],
+            user="abc-123",
+        )
+
+        assert isinstance(result, TextEmbeddingResult)
+        assert len(result.embeddings) == 2
+        assert result.usage.total_tokens == 6
+
+
+def test_get_num_tokens():
+    model = MixedBreadTextEmbeddingModel()
+
+    num_tokens = model.get_num_tokens(
+        model="mxbai-embed-large-v1",
+        credentials={
+            "api_key": os.environ.get("MIXEDBREAD_API_KEY"),
+        },
+        texts=["ping"],
+    )
+
+    assert num_tokens == 1
diff --git a/api/tests/integration_tests/model_runtime/nomic/__init__.py b/api/tests/integration_tests/model_runtime/nomic/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py b/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py
new file mode 100644
index 0000000000..52dc96ee95
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py
@@ -0,0 +1,62 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_validate_credentials(setup_nomic_mock):
+    model = NomicTextEmbeddingModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="nomic-embed-text-v1.5",
+            credentials={
+                "nomic_api_key": "invalid_key",
+            },
+        )
+
+    model.validate_credentials(
+        model="nomic-embed-text-v1.5",
+        credentials={
+            "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+        },
+    )
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_invoke_model(setup_nomic_mock):
+    model = NomicTextEmbeddingModel()
+
+    result = model.invoke(
+        model="nomic-embed-text-v1.5",
+        credentials={
+            "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+        },
+        texts=["hello", "world"],
+        user="foo",
+    )
+
+    assert isinstance(result, TextEmbeddingResult)
+    assert result.model == "nomic-embed-text-v1.5"
+    assert len(result.embeddings) == 2
+    assert result.usage.total_tokens == 2
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_get_num_tokens(setup_nomic_mock):
+    model = NomicTextEmbeddingModel()
+
+    num_tokens = model.get_num_tokens(
+        model="nomic-embed-text-v1.5",
+        credentials={
+            "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+        },
+        texts=["hello", "world"],
+    )
+
+    assert num_tokens == 2
diff --git a/api/tests/integration_tests/model_runtime/nomic/test_provider.py b/api/tests/integration_tests/model_runtime/nomic/test_provider.py
new file mode 100644
index 0000000000..6cad400c06
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/nomic/test_provider.py
@@ -0,0 +1,22 @@
+import os
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.nomic.nomic import NomicAtlasProvider
+from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_validate_provider_credentials(setup_nomic_mock):
+    provider = NomicAtlasProvider()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        provider.validate_provider_credentials(credentials={})
+
+    provider.validate_provider_credentials(
+        credentials={
+            "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+        },
+    )
diff --git a/dev/pytest/pytest_model_runtime.sh b/dev/pytest/pytest_model_runtime.sh
index 4c1c6bf4f3..b60ff64fdc 100755
--- a/dev/pytest/pytest_model_runtime.sh
+++ b/dev/pytest/pytest_model_runtime.sh
@@ -7,4 +7,6 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
   api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
   api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
   api/tests/integration_tests/model_runtime/upstage \
-  api/tests/integration_tests/model_runtime/fireworks
+  api/tests/integration_tests/model_runtime/fireworks \
+  api/tests/integration_tests/model_runtime/nomic \
+  api/tests/integration_tests/model_runtime/mixedbread
diff --git a/docker/.env.example b/docker/.env.example
index c892c15636..d43c3edc7e 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -346,7 +346,7 @@ VOLCENGINE_TOS_REGION=your-region
 # ------------------------------
 
 # The type of vector store to use. 
-# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`.
+# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, ``chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`.
 VECTOR_STORE=weaviate
 
 # The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
@@ -385,13 +385,30 @@ MYSCALE_PASSWORD=
 MYSCALE_DATABASE=dify
 MYSCALE_FTS_PARAMS=
 
-# pgvector configurations, only available when VECTOR_STORE is `pgvecto-rs or pgvector`
+# pgvector configurations, only available when VECTOR_STORE is `pgvector`
 PGVECTOR_HOST=pgvector
 PGVECTOR_PORT=5432
 PGVECTOR_USER=postgres
 PGVECTOR_PASSWORD=difyai123456
 PGVECTOR_DATABASE=dify
 
+# pgvecto-rs configurations, only available when VECTOR_STORE is `pgvecto-rs`
+PGVECTO_RS_HOST=pgvecto-rs
+PGVECTO_RS_PORT=5432
+PGVECTO_RS_USER=postgres
+PGVECTO_RS_PASSWORD=difyai123456
+PGVECTO_RS_DATABASE=dify
+
+# analyticdb configurations, only available when VECTOR_STORE is `analyticdb`
+ANALYTICDB_KEY_ID=your-ak
+ANALYTICDB_KEY_SECRET=your-sk
+ANALYTICDB_REGION_ID=cn-hangzhou
+ANALYTICDB_INSTANCE_ID=gp-ab123456
+ANALYTICDB_ACCOUNT=testaccount
+ANALYTICDB_PASSWORD=testpassword
+ANALYTICDB_NAMESPACE=dify
+ANALYTICDB_NAMESPACE_PASSWORD=difypassword
+
 # TiDB vector configurations, only available when VECTOR_STORE is `tidb`
 TIDB_VECTOR_HOST=tidb
 TIDB_VECTOR_PORT=4000
@@ -563,6 +580,15 @@ CODE_MAX_STRING_ARRAY_LENGTH=30
 CODE_MAX_OBJECT_ARRAY_LENGTH=30
 CODE_MAX_NUMBER_ARRAY_LENGTH=1000
 
+# Workflow runtime configuration
+WORKFLOW_MAX_EXECUTION_STEPS=500
+WORKFLOW_MAX_EXECUTION_TIME=1200
+WORKFLOW_CALL_MAX_DEPTH=5
+
+# HTTP request node in workflow configuration
+HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760
+HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576
+
 # SSRF Proxy server HTTP URL
 SSRF_PROXY_HTTP_URL=http://ssrf_proxy:3128
 # SSRF Proxy server HTTPS URL
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index e72c3724f9..95e271a0e9 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -202,8 +202,13 @@ x-shared-env: &shared-api-worker-env
   CODE_MAX_STRING_ARRAY_LENGTH: ${CODE_MAX_STRING_ARRAY_LENGTH:-30}
   CODE_MAX_OBJECT_ARRAY_LENGTH: ${CODE_MAX_OBJECT_ARRAY_LENGTH:-30}
   CODE_MAX_NUMBER_ARRAY_LENGTH: ${CODE_MAX_NUMBER_ARRAY_LENGTH:-1000}
+  WORKFLOW_MAX_EXECUTION_STEPS: ${WORKFLOW_MAX_EXECUTION_STEPS:-500}
+  WORKFLOW_MAX_EXECUTION_TIME: ${WORKFLOW_MAX_EXECUTION_TIME:-1200}
+  WORKFLOW_CALL_MAX_DEPTH: ${WORKFLOW_MAX_EXECUTION_TIME:-5}
   SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128}
   SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128}
+  HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760}
+  HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576}
 
 services:
   # API service
@@ -625,7 +630,7 @@ services:
   # https://www.elastic.co/guide/en/elasticsearch/reference/current/settings.html
   # https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#docker-prod-prerequisites
   elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch:8.14.3
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.15.1
     container_name: elasticsearch
     profiles:
       - elasticsearch
@@ -652,7 +657,7 @@ services:
   # https://www.elastic.co/guide/en/kibana/current/docker.html
   # https://www.elastic.co/guide/en/kibana/current/settings.html
   kibana:
-    image: docker.elastic.co/kibana/kibana:8.14.3
+    image: docker.elastic.co/kibana/kibana:8.15.1
     container_name: kibana
     profiles:
       - elasticsearch
diff --git a/sdks/python-client/dify_client/client.py b/sdks/python-client/dify_client/client.py
index 2be079bdf3..5e42507a42 100644
--- a/sdks/python-client/dify_client/client.py
+++ b/sdks/python-client/dify_client/client.py
@@ -1,103 +1,80 @@
 import json
+
 import requests
 
 
 class DifyClient:
-    def __init__(self, api_key, base_url: str = 'https://api.dify.ai/v1'):
+    def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1"):
         self.api_key = api_key
         self.base_url = base_url
 
     def _send_request(self, method, endpoint, json=None, params=None, stream=False):
-        headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json"
-        }
+        headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
 
         url = f"{self.base_url}{endpoint}"
         response = requests.request(method, url, json=json, params=params, headers=headers, stream=stream)
 
         return response
-    
 
     def _send_request_with_files(self, method, endpoint, data, files):
-        headers = {
-            "Authorization": f"Bearer {self.api_key}"
-        }
+        headers = {"Authorization": f"Bearer {self.api_key}"}
 
         url = f"{self.base_url}{endpoint}"
         response = requests.request(method, url, data=data, headers=headers, files=files)
 
         return response
-    
+
     def message_feedback(self, message_id, rating, user):
-        data = {
-            "rating": rating,
-            "user": user
-        }
+        data = {"rating": rating, "user": user}
         return self._send_request("POST", f"/messages/{message_id}/feedbacks", data)
-    
+
     def get_application_parameters(self, user):
         params = {"user": user}
         return self._send_request("GET", "/parameters", params=params)
-    
+
     def file_upload(self, user, files):
-        data = {
-            "user": user
-        }
+        data = {"user": user}
         return self._send_request_with_files("POST", "/files/upload", data=data, files=files)
 
-    def text_to_audio(self, text:str, user:str, streaming:bool=False):
-        data = {
-            "text": text,
-            "user": user,
-            "streaming": streaming
-        }
+    def text_to_audio(self, text: str, user: str, streaming: bool = False):
+        data = {"text": text, "user": user, "streaming": streaming}
         return self._send_request("POST", "/text-to-audio", data=data)
-    
-    def get_meta(self,user):
-        params = { "user": user}
-        return self._send_request("GET", f"/meta", params=params)
+
+    def get_meta(self, user):
+        params = {"user": user}
+        return self._send_request("GET", "/meta", params=params)
 
 
 class CompletionClient(DifyClient):
     def create_completion_message(self, inputs, response_mode, user, files=None):
-        data = {
-            "inputs": inputs,
-            "response_mode": response_mode,
-            "user": user,
-            "files": files
-        }
-        return self._send_request("POST", "/completion-messages", data,
-                                  stream=True if response_mode == "streaming" else False)
+        data = {"inputs": inputs, "response_mode": response_mode, "user": user, "files": files}
+        return self._send_request(
+            "POST", "/completion-messages", data, stream=True if response_mode == "streaming" else False
+        )
 
 
 class ChatClient(DifyClient):
     def create_chat_message(self, inputs, query, user, response_mode="blocking", conversation_id=None, files=None):
-        data = {
-            "inputs": inputs,
-            "query": query,
-            "user": user,
-            "response_mode": response_mode,
-            "files": files
-        }
+        data = {"inputs": inputs, "query": query, "user": user, "response_mode": response_mode, "files": files}
         if conversation_id:
             data["conversation_id"] = conversation_id
 
-        return self._send_request("POST", "/chat-messages", data,
-                                  stream=True if response_mode == "streaming" else False)
-    
-    def get_suggested(self, message_id, user:str):
+        return self._send_request(
+            "POST", "/chat-messages", data, stream=True if response_mode == "streaming" else False
+        )
+
+    def get_suggested(self, message_id, user: str):
         params = {"user": user}
         return self._send_request("GET", f"/messages/{message_id}/suggested", params=params)
-    
+
     def stop_message(self, task_id, user):
         data = {"user": user}
-        return self._send_request("POST", f"/chat-messages/{task_id}/stop", data)   
+        return self._send_request("POST", f"/chat-messages/{task_id}/stop", data)
 
     def get_conversations(self, user, last_id=None, limit=None, pinned=None):
         params = {"user": user, "last_id": last_id, "limit": limit, "pinned": pinned}
         return self._send_request("GET", "/conversations", params=params)
-    
+
     def get_conversation_messages(self, user, conversation_id=None, first_id=None, limit=None):
         params = {"user": user}
 
@@ -109,15 +86,15 @@ class ChatClient(DifyClient):
             params["limit"] = limit
 
         return self._send_request("GET", "/messages", params=params)
-    
-    def rename_conversation(self, conversation_id, name,auto_generate:bool, user:str):
-        data = {"name": name, "auto_generate": auto_generate,"user": user}
+
+    def rename_conversation(self, conversation_id, name, auto_generate: bool, user: str):
+        data = {"name": name, "auto_generate": auto_generate, "user": user}
         return self._send_request("POST", f"/conversations/{conversation_id}/name", data)
 
     def delete_conversation(self, conversation_id, user):
         data = {"user": user}
         return self._send_request("DELETE", f"/conversations/{conversation_id}", data)
-    
+
     def audio_to_text(self, audio_file, user):
         data = {"user": user}
         files = {"audio_file": audio_file}
@@ -125,10 +102,10 @@ class ChatClient(DifyClient):
 
 
 class WorkflowClient(DifyClient):
-    def run(self, inputs:dict, response_mode:str="streaming", user:str="abc-123"):
+    def run(self, inputs: dict, response_mode: str = "streaming", user: str = "abc-123"):
         data = {"inputs": inputs, "response_mode": response_mode, "user": user}
         return self._send_request("POST", "/workflows/run", data)
-    
+
     def stop(self, task_id, user):
         data = {"user": user}
         return self._send_request("POST", f"/workflows/tasks/{task_id}/stop", data)
@@ -137,10 +114,8 @@ class WorkflowClient(DifyClient):
         return self._send_request("GET", f"/workflows/run/{workflow_run_id}")
 
 
-
 class KnowledgeBaseClient(DifyClient):
-
-    def __init__(self, api_key, base_url: str = 'https://api.dify.ai/v1', dataset_id: str = None):
+    def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1", dataset_id: str = None):
         """
         Construct a KnowledgeBaseClient object.
 
@@ -150,10 +125,7 @@ class KnowledgeBaseClient(DifyClient):
             dataset_id (str, optional): ID of the dataset. Defaults to None. You don't need this if you just want to
                 create a new dataset. or list datasets. otherwise you need to set this.
         """
-        super().__init__(
-            api_key=api_key,
-            base_url=base_url
-        )
+        super().__init__(api_key=api_key, base_url=base_url)
         self.dataset_id = dataset_id
 
     def _get_dataset_id(self):
@@ -162,10 +134,10 @@ class KnowledgeBaseClient(DifyClient):
         return self.dataset_id
 
     def create_dataset(self, name: str, **kwargs):
-        return self._send_request('POST', '/datasets', {'name': name}, **kwargs)
+        return self._send_request("POST", "/datasets", {"name": name}, **kwargs)
 
     def list_datasets(self, page: int = 1, page_size: int = 20, **kwargs):
-        return self._send_request('GET', f'/datasets?page={page}&limit={page_size}', **kwargs)
+        return self._send_request("GET", f"/datasets?page={page}&limit={page_size}", **kwargs)
 
     def create_document_by_text(self, name, text, extra_params: dict = None, **kwargs):
         """
@@ -193,14 +165,7 @@ class KnowledgeBaseClient(DifyClient):
         }
         :return: Response from the API
         """
-        data = {
-            'indexing_technique': 'high_quality',
-            'process_rule': {
-                'mode': 'automatic'
-            },
-            'name': name,
-            'text': text
-        }
+        data = {"indexing_technique": "high_quality", "process_rule": {"mode": "automatic"}, "name": name, "text": text}
         if extra_params is not None and isinstance(extra_params, dict):
             data.update(extra_params)
         url = f"/datasets/{self._get_dataset_id()}/document/create_by_text"
@@ -233,10 +198,7 @@ class KnowledgeBaseClient(DifyClient):
         }
         :return: Response from the API
         """
-        data = {
-            'name': name,
-            'text': text
-        }
+        data = {"name": name, "text": text}
         if extra_params is not None and isinstance(extra_params, dict):
             data.update(extra_params)
         url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/update_by_text"
@@ -269,16 +231,11 @@ class KnowledgeBaseClient(DifyClient):
         :return: Response from the API
         """
         files = {"file": open(file_path, "rb")}
-        data = {
-            'process_rule': {
-                'mode': 'automatic'
-            },
-            'indexing_technique': 'high_quality'
-        }
+        data = {"process_rule": {"mode": "automatic"}, "indexing_technique": "high_quality"}
         if extra_params is not None and isinstance(extra_params, dict):
             data.update(extra_params)
         if original_document_id is not None:
-            data['original_document_id'] = original_document_id
+            data["original_document_id"] = original_document_id
         url = f"/datasets/{self._get_dataset_id()}/document/create_by_file"
         return self._send_request_with_files("POST", url, {"data": json.dumps(data)}, files)
 
@@ -352,11 +309,11 @@ class KnowledgeBaseClient(DifyClient):
         """
         params = {}
         if page is not None:
-            params['page'] = page
+            params["page"] = page
         if page_size is not None:
-            params['limit'] = page_size
+            params["limit"] = page_size
         if keyword is not None:
-            params['keyword'] = keyword
+            params["keyword"] = keyword
         url = f"/datasets/{self._get_dataset_id()}/documents"
         return self._send_request("GET", url, params=params, **kwargs)
 
@@ -383,9 +340,9 @@ class KnowledgeBaseClient(DifyClient):
         url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/segments"
         params = {}
         if keyword is not None:
-            params['keyword'] = keyword
+            params["keyword"] = keyword
         if status is not None:
-            params['status'] = status
+            params["status"] = status
         if "params" in kwargs:
             params.update(kwargs["params"])
         return self._send_request("GET", url, params=params, **kwargs)
diff --git a/web/app/activate/page.tsx b/web/app/activate/page.tsx
index 90874f50ce..0f18544335 100644
--- a/web/app/activate/page.tsx
+++ b/web/app/activate/page.tsx
@@ -22,7 +22,7 @@ const Activate = () => {
         <Header />
         <ActivateForm />
         <div className='px-8 py-6 text-sm font-normal text-gray-500'>
-          © {new Date().getFullYear()} Dify, Inc. All rights reserved.
+          © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.
         </div>
       </div>
     </div>
diff --git a/web/app/components/app/configuration/config-var/config-modal/index.tsx b/web/app/components/app/configuration/config-var/config-modal/index.tsx
index 606280653e..ac1d86c1a2 100644
--- a/web/app/components/app/configuration/config-var/config-modal/index.tsx
+++ b/web/app/components/app/configuration/config-var/config-modal/index.tsx
@@ -1,6 +1,6 @@
 'use client'
 import type { FC } from 'react'
-import React, { useCallback, useState } from 'react'
+import React, { useCallback, useEffect, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
 import ModalFoot from '../modal-foot'
@@ -40,6 +40,12 @@ const ConfigModal: FC<IConfigModalProps> = ({
   const { t } = useTranslation()
   const [tempPayload, setTempPayload] = useState<InputVar>(payload || getNewVarInWorkflow('') as any)
   const { type, label, variable, options, max_length } = tempPayload
+  const modalRef = useRef<HTMLDivElement>(null)
+  useEffect(() => {
+    // To fix the first input element auto focus, then directly close modal will raise error
+    if (isShow)
+      modalRef.current?.focus()
+  }, [isShow])
 
   const isStringInput = type === InputVarType.textInput || type === InputVarType.paragraph
   const checkVariableName = useCallback((value: string) => {
@@ -135,7 +141,7 @@ const ConfigModal: FC<IConfigModalProps> = ({
       isShow={isShow}
       onClose={onClose}
     >
-      <div className='mb-8'>
+      <div className='mb-8' ref={modalRef} tabIndex={-1}>
         <div className='space-y-2'>
 
           <Field title={t('appDebug.variableConfig.fieldType')}>
diff --git a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx
index 5faef46d98..2de500a3a6 100644
--- a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx
+++ b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx
@@ -22,6 +22,7 @@ import {
 import Avatar from '@/app/components/base/avatar'
 import { useAppContext } from '@/context/app-context'
 import { ModelFeatureEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
+import { getLastAnswer } from '@/app/components/base/chat/utils'
 
 type DebugWithSingleModelProps = {
   checkCanSend?: () => boolean
@@ -83,17 +84,11 @@ const DebugWithSingleModel = forwardRef<DebugWithSingleModelRefType, DebugWithSi
       },
     }
 
-    const lastAnswer = chatListRef.current.at(-1)
-
     const data: any = {
       query: message,
       inputs,
       model_config: configData,
-      parent_message_id: last_answer?.id || (lastAnswer
-        ? lastAnswer.isOpeningStatement
-          ? null
-          : lastAnswer.id
-        : null),
+      parent_message_id: last_answer?.id || getLastAnswer(chatListRef.current)?.id || null,
     }
 
     if (visionConfig.enabled && files?.length && supportVision)
@@ -116,13 +111,13 @@ const DebugWithSingleModel = forwardRef<DebugWithSingleModelRefType, DebugWithSi
 
     const prevMessages = chatList.slice(0, index)
     const question = prevMessages.pop()
-    const lastAnswer = prevMessages.at(-1)
+    const lastAnswer = getLastAnswer(prevMessages)
 
     if (!question)
       return
 
     handleUpdateChatList(prevMessages)
-    doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer)
+    doSend(question.content, question.message_files, lastAnswer)
   }, [chatList, handleUpdateChatList, doSend])
 
   const allToolIcons = useMemo(() => {
diff --git a/web/app/components/app/log/filter.tsx b/web/app/components/app/log/filter.tsx
index 0552b44d16..b8d7ca5a36 100644
--- a/web/app/components/app/log/filter.tsx
+++ b/web/app/components/app/log/filter.tsx
@@ -55,6 +55,8 @@ const Filter: FC<IFilterProps> = ({ isChatMode, appId, queryParams, setQueryPara
           className='!w-[300px]'
           onSelect={
             (item) => {
+              if (!item.value)
+                return
               setQueryParams({ ...queryParams, annotation_status: item.value as string })
             }
           }
diff --git a/web/app/components/app/workflow-log/filter.tsx b/web/app/components/app/workflow-log/filter.tsx
index d239c39d2c..58b5252c07 100644
--- a/web/app/components/app/workflow-log/filter.tsx
+++ b/web/app/components/app/workflow-log/filter.tsx
@@ -23,6 +23,8 @@ const Filter: FC<IFilterProps> = ({ queryParams, setQueryParams }: IFilterProps)
           className='!min-w-[100px]'
           onSelect={
             (item) => {
+              if (!item.value)
+                return
               setQueryParams({ ...queryParams, status: item.value as string })
             }
           }
diff --git a/web/app/components/base/app-icon-picker/index.tsx b/web/app/components/base/app-icon-picker/index.tsx
index 8254815475..ba375abdd9 100644
--- a/web/app/components/base/app-icon-picker/index.tsx
+++ b/web/app/components/base/app-icon-picker/index.tsx
@@ -88,7 +88,7 @@ const AppIconPicker: FC<AppIconPickerProps> = ({
       if (!imageCropInfo)
         return
       setUploading(true)
-      const blob = await getCroppedImg(imageCropInfo.tempUrl, imageCropInfo.croppedAreaPixels)
+      const blob = await getCroppedImg(imageCropInfo.tempUrl, imageCropInfo.croppedAreaPixels, imageCropInfo.fileName)
       const file = new File([blob], imageCropInfo.fileName, { type: blob.type })
       handleLocalFileUpload(file)
     }
diff --git a/web/app/components/base/app-icon-picker/utils.ts b/web/app/components/base/app-icon-picker/utils.ts
index 0c90e96feb..14c9ae3f28 100644
--- a/web/app/components/base/app-icon-picker/utils.ts
+++ b/web/app/components/base/app-icon-picker/utils.ts
@@ -11,6 +11,23 @@ export function getRadianAngle(degreeValue: number) {
   return (degreeValue * Math.PI) / 180
 }
 
+export function getMimeType(fileName: string): string {
+  const extension = fileName.split('.').pop()?.toLowerCase()
+  switch (extension) {
+    case 'png':
+      return 'image/png'
+    case 'jpg':
+    case 'jpeg':
+      return 'image/jpeg'
+    case 'gif':
+      return 'image/gif'
+    case 'webp':
+      return 'image/webp'
+    default:
+      return 'image/jpeg'
+  }
+}
+
 /**
  * Returns the new bounding area of a rotated rectangle.
  */
@@ -31,12 +48,14 @@ export function rotateSize(width: number, height: number, rotation: number) {
 export default async function getCroppedImg(
   imageSrc: string,
   pixelCrop: { x: number; y: number; width: number; height: number },
+  fileName: string,
   rotation = 0,
   flip = { horizontal: false, vertical: false },
 ): Promise<Blob> {
   const image = await createImage(imageSrc)
   const canvas = document.createElement('canvas')
   const ctx = canvas.getContext('2d')
+  const mimeType = getMimeType(fileName)
 
   if (!ctx)
     throw new Error('Could not create a canvas context')
@@ -93,6 +112,6 @@ export default async function getCroppedImg(
         resolve(file)
       else
         reject(new Error('Could not create a blob'))
-    }, 'image/jpeg')
+    }, mimeType)
   })
 }
diff --git a/web/app/components/base/audio-btn/audio.ts b/web/app/components/base/audio-btn/audio.ts
index a61fd085d4..baf675d0be 100644
--- a/web/app/components/base/audio-btn/audio.ts
+++ b/web/app/components/base/audio-btn/audio.ts
@@ -12,7 +12,7 @@ export default class AudioPlayer {
   mediaSource: MediaSource | null
   audio: HTMLAudioElement
   audioContext: AudioContext
-  sourceBuffer?: SourceBuffer
+  sourceBuffer?: any
   cacheBuffers: ArrayBuffer[] = []
   pauseTimer: number | null = null
   msgId: string | undefined
@@ -33,7 +33,7 @@ export default class AudioPlayer {
     this.callback = callback
 
     // Compatible with iphone ios17 ManagedMediaSource
-    const MediaSource = window.MediaSource || window.ManagedMediaSource
+    const MediaSource = window.ManagedMediaSource || window.MediaSource
     if (!MediaSource) {
       Toast.notify({
         message: 'Your browser does not support audio streaming, if you are using an iPhone, please update to iOS 17.1 or later.',
@@ -43,6 +43,10 @@ export default class AudioPlayer {
     this.mediaSource = MediaSource ? new MediaSource() : null
     this.audio = new Audio()
     this.setCallback(callback)
+    if (!window.MediaSource) { // if use  ManagedMediaSource
+      this.audio.disableRemotePlayback = true
+      this.audio.controls = true
+    }
     this.audio.src = this.mediaSource ? URL.createObjectURL(this.mediaSource) : ''
     this.audio.autoplay = true
 
diff --git a/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx b/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx
index 225bbac714..9d7b360f38 100644
--- a/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx
+++ b/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx
@@ -6,6 +6,7 @@ import type {
   OnSend,
 } from '../types'
 import { useChat } from '../chat/hooks'
+import { getLastAnswer } from '../utils'
 import { useChatWithHistoryContext } from './context'
 import Header from './header'
 import ConfigPanel from './config-panel'
@@ -67,17 +68,11 @@ const ChatWrapper = () => {
   }, [])
 
   const doSend: OnSend = useCallback((message, files, last_answer) => {
-    const lastAnswer = chatListRef.current.at(-1)
-
     const data: any = {
       query: message,
       inputs: currentConversationId ? currentConversationItem?.inputs : newConversationInputs,
       conversation_id: currentConversationId,
-      parent_message_id: last_answer?.id || (lastAnswer
-        ? lastAnswer.isOpeningStatement
-          ? null
-          : lastAnswer.id
-        : null),
+      parent_message_id: last_answer?.id || getLastAnswer(chatListRef.current)?.id || null,
     }
 
     if (appConfig?.file_upload?.image.enabled && files?.length)
@@ -111,13 +106,13 @@ const ChatWrapper = () => {
 
     const prevMessages = chatList.slice(0, index)
     const question = prevMessages.pop()
-    const lastAnswer = prevMessages.at(-1)
+    const lastAnswer = getLastAnswer(prevMessages)
 
     if (!question)
       return
 
     handleUpdateChatList(prevMessages)
-    doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer)
+    doSend(question.content, question.message_files, lastAnswer)
   }, [chatList, handleUpdateChatList, doSend])
 
   const chatNode = useMemo(() => {
diff --git a/web/app/components/base/chat/chat/answer/index.tsx b/web/app/components/base/chat/chat/answer/index.tsx
index 705cd73ddf..8184967edc 100644
--- a/web/app/components/base/chat/chat/answer/index.tsx
+++ b/web/app/components/base/chat/chat/answer/index.tsx
@@ -85,6 +85,19 @@ const Answer: FC<AnswerProps> = ({
       getContentWidth()
   }, [responding])
 
+  // Recalculate contentWidth when content changes (e.g., SVG preview/source toggle)
+  useEffect(() => {
+    if (!containerRef.current)
+      return
+    const resizeObserver = new ResizeObserver(() => {
+      getContentWidth()
+    })
+    resizeObserver.observe(containerRef.current)
+    return () => {
+      resizeObserver.disconnect()
+    }
+  }, [])
+
   return (
     <div className='flex mb-2 last:mb-0'>
       <div className='shrink-0 relative w-10 h-10'>
diff --git a/web/app/components/base/chat/chat/hooks.ts b/web/app/components/base/chat/chat/hooks.ts
index dfb5a1b685..64c238f9d1 100644
--- a/web/app/components/base/chat/chat/hooks.ts
+++ b/web/app/components/base/chat/chat/hooks.ts
@@ -334,9 +334,9 @@ export const useChat = (
             const newChatList = produce(chatListRef.current, (draft) => {
               const index = draft.findIndex(item => item.id === responseItem.id)
               if (index !== -1) {
-                const requestion = draft[index - 1]
+                const question = draft[index - 1]
                 draft[index - 1] = {
-                  ...requestion,
+                  ...question,
                 }
                 draft[index] = {
                   ...draft[index],
diff --git a/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx b/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx
index ed2f24274d..b97c940eec 100644
--- a/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx
+++ b/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx
@@ -6,6 +6,7 @@ import type {
   OnSend,
 } from '../types'
 import { useChat } from '../chat/hooks'
+import { getLastAnswer } from '../utils'
 import { useEmbeddedChatbotContext } from './context'
 import ConfigPanel from './config-panel'
 import { isDify } from './utils'
@@ -69,17 +70,11 @@ const ChatWrapper = () => {
   }, [])
 
   const doSend: OnSend = useCallback((message, files, last_answer) => {
-    const lastAnswer = chatListRef.current.at(-1)
-
     const data: any = {
       query: message,
       inputs: currentConversationId ? currentConversationItem?.inputs : newConversationInputs,
       conversation_id: currentConversationId,
-      parent_message_id: last_answer?.id || (lastAnswer
-        ? lastAnswer.isOpeningStatement
-          ? null
-          : lastAnswer.id
-        : null),
+      parent_message_id: last_answer?.id || getLastAnswer(chatListRef.current)?.id || null,
     }
 
     if (appConfig?.file_upload?.image.enabled && files?.length)
@@ -113,13 +108,13 @@ const ChatWrapper = () => {
 
     const prevMessages = chatList.slice(0, index)
     const question = prevMessages.pop()
-    const lastAnswer = prevMessages.at(-1)
+    const lastAnswer = getLastAnswer(prevMessages)
 
     if (!question)
       return
 
     handleUpdateChatList(prevMessages)
-    doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer)
+    doSend(question.content, question.message_files, lastAnswer)
   }, [chatList, handleUpdateChatList, doSend])
 
   const chatNode = useMemo(() => {
diff --git a/web/app/components/base/chat/types.ts b/web/app/components/base/chat/types.ts
index 489dbb44cf..0bc50518eb 100644
--- a/web/app/components/base/chat/types.ts
+++ b/web/app/components/base/chat/types.ts
@@ -63,7 +63,7 @@ export type ChatItem = IChatItem & {
   conversationId?: string
 }
 
-export type OnSend = (message: string, files?: VisionFile[], last_answer?: ChatItem) => void
+export type OnSend = (message: string, files?: VisionFile[], last_answer?: ChatItem | null) => void
 
 export type OnRegenerate = (chatItem: ChatItem) => void
 
diff --git a/web/app/components/base/chat/utils.ts b/web/app/components/base/chat/utils.ts
index e851c4c463..305df5995d 100644
--- a/web/app/components/base/chat/utils.ts
+++ b/web/app/components/base/chat/utils.ts
@@ -19,6 +19,15 @@ function getProcessedInputsFromUrlParams(): Record<string, any> {
   return inputs
 }
 
+function getLastAnswer(chatList: ChatItem[]) {
+  for (let i = chatList.length - 1; i >= 0; i--) {
+    const item = chatList[i]
+    if (item.isAnswer && !item.isOpeningStatement)
+      return item
+  }
+  return null
+}
+
 function appendQAToChatList(chatList: ChatItem[], item: any) {
   // we append answer first and then question since will reverse the whole chatList later
   chatList.push({
@@ -71,5 +80,6 @@ function getPrevChatList(fetchedMessages: any[]) {
 
 export {
   getProcessedInputsFromUrlParams,
+  getLastAnswer,
   getPrevChatList,
 }
diff --git a/web/app/components/base/image-uploader/image-preview.tsx b/web/app/components/base/image-uploader/image-preview.tsx
index e5bd4c1bbc..096facabfd 100644
--- a/web/app/components/base/image-uploader/image-preview.tsx
+++ b/web/app/components/base/image-uploader/image-preview.tsx
@@ -88,7 +88,7 @@ const ImagePreview: FC<ImagePreviewProps> = ({
     })
   }
 
-  const imageTobase64ToBlob = (base64: string, type = 'image/png'): Blob => {
+  const imageBase64ToBlob = (base64: string, type = 'image/png'): Blob => {
     const byteCharacters = atob(base64)
     const byteArrays = []
 
@@ -109,7 +109,7 @@ const ImagePreview: FC<ImagePreviewProps> = ({
     const shareImage = async () => {
       try {
         const base64Data = url.split(',')[1]
-        const blob = imageTobase64ToBlob(base64Data, 'image/png')
+        const blob = imageBase64ToBlob(base64Data, 'image/png')
 
         await navigator.clipboard.write([
           new ClipboardItem({
diff --git a/web/app/components/base/markdown.tsx b/web/app/components/base/markdown.tsx
index 443ee3410c..39a399cc9f 100644
--- a/web/app/components/base/markdown.tsx
+++ b/web/app/components/base/markdown.tsx
@@ -116,59 +116,80 @@ const CodeBlock: CodeComponent = memo(({ inline, className, children, ...props }
   const match = /language-(\w+)/.exec(className || '')
   const language = match?.[1]
   const languageShowName = getCorrectCapitalizationLanguageName(language || '')
-  let chartData = JSON.parse(String('{"title":{"text":"ECharts error - Wrong JSON format."}}').replace(/\n$/, ''))
-  if (language === 'echarts') {
-    try {
-      chartData = JSON.parse(String(children).replace(/\n$/, ''))
+  const chartData = useMemo(() => {
+    if (language === 'echarts') {
+      try {
+        return JSON.parse(String(children).replace(/\n$/, ''))
+      }
+      catch (error) {}
     }
-    catch (error) {
-    }
-  }
+    return JSON.parse('{"title":{"text":"ECharts error - Wrong JSON format."}}')
+  }, [language, children])
 
-  // Use `useMemo` to ensure that `SyntaxHighlighter` only re-renders when necessary
-  return useMemo(() => {
-    return (!inline && match)
-      ? (
-        <div>
-          <div
-            className='flex justify-between h-8 items-center p-1 pl-3 border-b'
-            style={{
-              borderColor: 'rgba(0, 0, 0, 0.05)',
-            }}
-          >
-            <div className='text-[13px] text-gray-500 font-normal'>{languageShowName}</div>
-            <div style={{ display: 'flex' }}>
-              {language === 'mermaid' && <SVGBtn isSVG={isSVG} setIsSVG={setIsSVG}/>}
-              <CopyBtn
-                className='mr-1'
-                value={String(children).replace(/\n$/, '')}
-                isPlain
-              />
-            </div>
-          </div>
-          {(language === 'mermaid' && isSVG)
-            ? (<Flowchart PrimitiveCode={String(children).replace(/\n$/, '')} />)
-            : (language === 'echarts'
-              ? (<div style={{ minHeight: '350px', minWidth: '700px' }}><ErrorBoundary><ReactEcharts option={chartData} /></ErrorBoundary></div>)
-              : (language === 'svg'
-                ? (<ErrorBoundary><SVGRenderer content={String(children).replace(/\n$/, '')} /></ErrorBoundary>)
-                : (<SyntaxHighlighter
-                  {...props}
-                  style={atelierHeathLight}
-                  customStyle={{
-                    paddingLeft: 12,
-                    backgroundColor: '#fff',
-                  }}
-                  language={match[1]}
-                  showLineNumbers
-                  PreTag="div"
-                >
-                  {String(children).replace(/\n$/, '')}
-                </SyntaxHighlighter>)))}
+  const renderCodeContent = useMemo(() => {
+    const content = String(children).replace(/\n$/, '')
+    if (language === 'mermaid' && isSVG) {
+      return <Flowchart PrimitiveCode={content} />
+    }
+    else if (language === 'echarts') {
+      return (
+        <div style={{ minHeight: '350px', minWidth: '700px' }}>
+          <ErrorBoundary>
+            <ReactEcharts option={chartData} />
+          </ErrorBoundary>
         </div>
       )
-      : (<code {...props} className={className}>{children}</code>)
-  }, [chartData, children, className, inline, isSVG, language, languageShowName, match, props])
+    }
+    else if (language === 'svg' && isSVG) {
+      return (
+        <ErrorBoundary>
+          <SVGRenderer content={content} />
+        </ErrorBoundary>
+      )
+    }
+    else {
+      return (
+        <SyntaxHighlighter
+          {...props}
+          style={atelierHeathLight}
+          customStyle={{
+            paddingLeft: 12,
+            backgroundColor: '#fff',
+          }}
+          language={match?.[1]}
+          showLineNumbers
+          PreTag="div"
+        >
+          {content}
+        </SyntaxHighlighter>
+      )
+    }
+  }, [language, match, props, children, chartData, isSVG])
+
+  if (inline || !match)
+    return <code {...props} className={className}>{children}</code>
+
+  return (
+    <div>
+      <div
+        className='flex justify-between h-8 items-center p-1 pl-3 border-b'
+        style={{
+          borderColor: 'rgba(0, 0, 0, 0.05)',
+        }}
+      >
+        <div className='text-[13px] text-gray-500 font-normal'>{languageShowName}</div>
+        <div style={{ display: 'flex' }}>
+          {(['mermaid', 'svg']).includes(language!) && <SVGBtn isSVG={isSVG} setIsSVG={setIsSVG}/>}
+          <CopyBtn
+            className='mr-1'
+            value={String(children).replace(/\n$/, '')}
+            isPlain
+          />
+        </div>
+      </div>
+      {renderCodeContent}
+    </div>
+  )
 })
 CodeBlock.displayName = 'CodeBlock'
 
diff --git a/web/app/components/base/svg-gallery/index.tsx b/web/app/components/base/svg-gallery/index.tsx
index 81e8e87655..4368df00e9 100644
--- a/web/app/components/base/svg-gallery/index.tsx
+++ b/web/app/components/base/svg-gallery/index.tsx
@@ -29,7 +29,7 @@ export const SVGRenderer = ({ content }: { content: string }) => {
     if (svgRef.current) {
       try {
         svgRef.current.innerHTML = ''
-        const draw = SVG().addTo(svgRef.current).size('100%', '100%')
+        const draw = SVG().addTo(svgRef.current)
 
         const parser = new DOMParser()
         const svgDoc = parser.parseFromString(content, 'image/svg+xml')
@@ -40,13 +40,11 @@ export const SVGRenderer = ({ content }: { content: string }) => {
 
         const originalWidth = parseInt(svgElement.getAttribute('width') || '400', 10)
         const originalHeight = parseInt(svgElement.getAttribute('height') || '600', 10)
-        const scale = Math.min(windowSize.width / originalWidth, windowSize.height / originalHeight, 1)
-        const scaledWidth = originalWidth * scale
-        const scaledHeight = originalHeight * scale
-        draw.size(scaledWidth, scaledHeight)
+        draw.viewbox(0, 0, originalWidth, originalHeight)
+
+        svgRef.current.style.width = `${Math.min(originalWidth, 298)}px`
 
         const rootElement = draw.svg(content)
-        rootElement.scale(scale)
 
         rootElement.click(() => {
           setImagePreview(svgToDataURL(svgElement as Element))
@@ -54,7 +52,7 @@ export const SVGRenderer = ({ content }: { content: string }) => {
       }
       catch (error) {
         if (svgRef.current)
-          svgRef.current.innerHTML = 'Error rendering SVG. Wait for the image content to complete.'
+          svgRef.current.innerHTML = '<span style="padding: 1rem;">Error rendering SVG. Wait for the image content to complete.</span>'
       }
     }
   }, [content, windowSize])
@@ -62,14 +60,14 @@ export const SVGRenderer = ({ content }: { content: string }) => {
   return (
     <>
       <div ref={svgRef} style={{
-        width: '100%',
-        height: '100%',
-        minHeight: '300px',
         maxHeight: '80vh',
         display: 'flex',
         justifyContent: 'center',
         alignItems: 'center',
         cursor: 'pointer',
+        wordBreak: 'break-word',
+        whiteSpace: 'normal',
+        margin: '0 auto',
       }} />
       {imagePreview && (<ImagePreview url={imagePreview} title='Preview' onCancel={() => setImagePreview('')} />)}
     </>
diff --git a/web/app/components/develop/template/template_workflow.en.mdx b/web/app/components/develop/template/template_workflow.en.mdx
index 2bd0fe9daf..5c712c2c29 100644
--- a/web/app/components/develop/template/template_workflow.en.mdx
+++ b/web/app/components/develop/template/template_workflow.en.mdx
@@ -424,7 +424,7 @@ Workflow applications offers non-session support and is ideal for translation, a
 />
 <Row>
   <Col>
-    Returns worklfow logs, with the first page returning the latest `{limit}` messages, i.e., in reverse order.
+    Returns workflow logs, with the first page returning the latest `{limit}` messages, i.e., in reverse order.
 
     ### Query
 
diff --git a/web/app/components/tools/workflow-tool/configure-button.tsx b/web/app/components/tools/workflow-tool/configure-button.tsx
index d2c5142f53..6521410dae 100644
--- a/web/app/components/tools/workflow-tool/configure-button.tsx
+++ b/web/app/components/tools/workflow-tool/configure-button.tsx
@@ -65,7 +65,7 @@ const WorkflowToolConfigureButton = ({
         else {
           if (item.type === 'paragraph' && param.type !== 'string')
             return true
-          if (param.type !== item.type && !(param.type === 'string' && item.type === 'paragraph'))
+          if (item.type === 'text-input' && param.type !== 'string')
             return true
         }
       }
diff --git a/web/app/components/workflow/hooks/use-workflow-run.ts b/web/app/components/workflow/hooks/use-workflow-run.ts
index e1da503f38..68c3ff0a4b 100644
--- a/web/app/components/workflow/hooks/use-workflow-run.ts
+++ b/web/app/components/workflow/hooks/use-workflow-run.ts
@@ -185,7 +185,7 @@ export const useWorkflowRun = () => {
             draft.forEach((edge) => {
               edge.data = {
                 ...edge.data,
-                _runned: false,
+                _run: false,
               }
             })
           })
@@ -292,7 +292,7 @@ export const useWorkflowRun = () => {
             const newEdges = produce(edges, (draft) => {
               draft.forEach((edge) => {
                 if (edge.target === data.node_id && incomeNodesId.includes(edge.source))
-                  edge.data = { ...edge.data, _runned: true } as any
+                  edge.data = { ...edge.data, _run: true } as any
               })
             })
             setEdges(newEdges)
@@ -416,7 +416,7 @@ export const useWorkflowRun = () => {
             const edge = draft.find(edge => edge.target === data.node_id && edge.source === prevNodeId)
 
             if (edge)
-              edge.data = { ...edge.data, _runned: true } as any
+              edge.data = { ...edge.data, _run: true } as any
           })
           setEdges(newEdges)
 
diff --git a/web/app/components/workflow/nodes/http/node.tsx b/web/app/components/workflow/nodes/http/node.tsx
index 5bbb10fc3a..4b7dbea257 100644
--- a/web/app/components/workflow/nodes/http/node.tsx
+++ b/web/app/components/workflow/nodes/http/node.tsx
@@ -15,7 +15,7 @@ const Node: FC<NodeProps<HttpNodeType>> = ({
     <div className='mb-1 px-3 py-1'>
       <div className='flex items-start p-1 rounded-md bg-gray-100'>
         <div className='flex items-center h-4 shrink-0 px-1 rounded bg-gray-25 text-xs font-semibold text-gray-700 uppercase'>{method}</div>
-        <div className='pl-1'>
+        <div className='pl-1 pt-1'>
           <ReadonlyInputWithSelectVar
             value={url}
             nodeId={id}
diff --git a/web/app/components/workflow/panel/chat-record/index.tsx b/web/app/components/workflow/panel/chat-record/index.tsx
index 1bcfd6474d..16d2c304a7 100644
--- a/web/app/components/workflow/panel/chat-record/index.tsx
+++ b/web/app/components/workflow/panel/chat-record/index.tsx
@@ -90,7 +90,7 @@ const ChatRecord = () => {
   return (
     <div
       className={`
-        flex flex-col w-[400px] rounded-l-2xl h-full border border-black/2 shadow-xl
+        flex flex-col w-[420px] rounded-l-2xl h-full border border-black/2 shadow-xl
       `}
       style={{
         background: 'linear-gradient(156deg, rgba(242, 244, 247, 0.80) 0%, rgba(242, 244, 247, 0.00) 99.43%), var(--white, #FFF)',
@@ -121,7 +121,7 @@ const ChatRecord = () => {
                 supportCitationHitInfo: true,
               } as any}
               chatList={chatList}
-              chatContainerClassName='px-4'
+              chatContainerClassName='px-3'
               chatContainerInnerClassName='pt-6 w-full max-w-full mx-auto'
               chatFooterClassName='px-4 rounded-b-2xl'
               chatFooterInnerClassName='pb-4 w-full max-w-full mx-auto'
@@ -129,6 +129,8 @@ const ChatRecord = () => {
               noChatInput
               allToolIcons={{}}
               showPromptLog
+              noSpacing
+              chatAnswerContainerInner='!pr-2'
             />
           </div>
         </>
diff --git a/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx b/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx
index 86519af603..230b2d7fa0 100644
--- a/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx
+++ b/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx
@@ -25,6 +25,7 @@ import {
   stopChatMessageResponding,
 } from '@/service/debug'
 import { useStore as useAppStore } from '@/app/components/app/store'
+import { getLastAnswer } from '@/app/components/base/chat/utils'
 
 type ChatWrapperProps = {
   showConversationVariableModal: boolean
@@ -76,19 +77,13 @@ const ChatWrapper = forwardRef<ChatWrapperRefType, ChatWrapperProps>(({ showConv
   )
 
   const doSend = useCallback<OnSend>((query, files, last_answer) => {
-    const lastAnswer = chatListRef.current.at(-1)
-
     handleSend(
       {
         query,
         files,
         inputs: workflowStore.getState().inputs,
         conversation_id: conversationId,
-        parent_message_id: last_answer?.id || (lastAnswer
-          ? lastAnswer.isOpeningStatement
-            ? null
-            : lastAnswer.id
-          : null),
+        parent_message_id: last_answer?.id || getLastAnswer(chatListRef.current)?.id || null,
       },
       {
         onGetSuggestedQuestions: (messageId, getAbortController) => fetchSuggestedQuestions(appDetail!.id, messageId, getAbortController),
@@ -103,13 +98,13 @@ const ChatWrapper = forwardRef<ChatWrapperRefType, ChatWrapperProps>(({ showConv
 
     const prevMessages = chatList.slice(0, index)
     const question = prevMessages.pop()
-    const lastAnswer = prevMessages.at(-1)
+    const lastAnswer = getLastAnswer(prevMessages)
 
     if (!question)
       return
 
     handleUpdateChatList(prevMessages)
-    doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer)
+    doSend(question.content, question.message_files, lastAnswer)
   }, [chatList, handleUpdateChatList, doSend])
 
   useImperativeHandle(ref, () => {
diff --git a/web/app/forgot-password/page.tsx b/web/app/forgot-password/page.tsx
index fa44d1a20c..bb46011c06 100644
--- a/web/app/forgot-password/page.tsx
+++ b/web/app/forgot-password/page.tsx
@@ -28,7 +28,7 @@ const ForgotPassword = () => {
         <Header />
         {token ? <ChangePasswordForm /> : <ForgotPasswordForm />}
         <div className='px-8 py-6 text-sm font-normal text-gray-500'>
-          © {new Date().getFullYear()} Dify, Inc. All rights reserved.
+          © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.
         </div>
       </div>
     </div>
diff --git a/web/app/install/page.tsx b/web/app/install/page.tsx
index 9fa38dd15e..395fae34ec 100644
--- a/web/app/install/page.tsx
+++ b/web/app/install/page.tsx
@@ -22,7 +22,7 @@ const Install = () => {
         <Header />
         <InstallForm />
         <div className='px-8 py-6 text-sm font-normal text-gray-500'>
-          © {new Date().getFullYear()} Dify, Inc. All rights reserved.
+          © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.
         </div>
       </div>
     </div>
diff --git a/web/i18n/es-ES/common.ts b/web/i18n/es-ES/common.ts
index 2ba907361f..59a05f63d8 100644
--- a/web/i18n/es-ES/common.ts
+++ b/web/i18n/es-ES/common.ts
@@ -202,7 +202,7 @@ const translation = {
     invitationLink: 'Enlace de invitación',
     failedInvitationEmails: 'Los siguientes usuarios no fueron invitados exitosamente',
     ok: 'OK',
-    removeFromTeam: 'Eliminar del equipo',
+    removeFromTeam: 'Eliminar del espacio de trabajo',
     removeFromTeamTip: 'Se eliminará el acceso al equipo',
     setAdmin: 'Establecer como administrador',
     setMember: 'Establecer como miembro ordinario',
diff --git a/web/i18n/language.ts b/web/i18n/language.ts
index fde69328bd..c2e23cc19a 100644
--- a/web/i18n/language.ts
+++ b/web/i18n/language.ts
@@ -31,10 +31,8 @@ export const languages = data.languages
 export const LanguagesSupported = languages.filter(item => item.supported).map(item => item.value)
 
 export const getLanguage = (locale: string) => {
-  if (locale === 'zh-Hans')
-    return locale.replace('-', '_')
-
-  return LanguagesSupported[0].replace('-', '_')
+  const supportedLocale = LanguagesSupported.find(lang => lang.startsWith(locale.split('-')[0]))
+  return (supportedLocale || LanguagesSupported[0]).replace('-', '_')
 }
 
 export const NOTICE_I18N = {
diff --git a/web/i18n/zh-Hans/common.ts b/web/i18n/zh-Hans/common.ts
index 52ab7d6f02..7947d32f25 100644
--- a/web/i18n/zh-Hans/common.ts
+++ b/web/i18n/zh-Hans/common.ts
@@ -200,7 +200,7 @@ const translation = {
     invitationLink: '邀请链接',
     failedInvitationEmails: '邀请以下邮箱失败',
     ok: '好的',
-    removeFromTeam: '移除团队',
+    removeFromTeam: '移出团队',
     removeFromTeamTip: '将取消团队访问',
     setAdmin: '设为管理员',
     setMember: '设为普通成员',
diff --git a/web/i18n/zh-Hant/common.ts b/web/i18n/zh-Hant/common.ts
index c1f3ed2b2b..8cd51b1991 100644
--- a/web/i18n/zh-Hant/common.ts
+++ b/web/i18n/zh-Hant/common.ts
@@ -194,7 +194,7 @@ const translation = {
     invitationLink: '邀請連結',
     failedInvitationEmails: '邀請以下郵箱失敗',
     ok: '好的',
-    removeFromTeam: '移除團隊',
+    removeFromTeam: '移出團隊',
     removeFromTeamTip: '將取消團隊訪問',
     setAdmin: '設為管理員',
     setMember: '設為普通成員',
diff --git a/web/public/embed.js b/web/public/embed.js
index 8ed7a67dc8..3c2735b6fc 100644
--- a/web/public/embed.js
+++ b/web/public/embed.js
@@ -69,38 +69,47 @@
       iframe.id = iframeId;
       iframe.src = iframeUrl;
       iframe.style.cssText = `
-        border: none; position: fixed; flex-direction: column; justify-content: space-between;
+        border: none; position: absolute; flex-direction: column; justify-content: space-between;
         box-shadow: rgba(150, 150, 150, 0.2) 0px 10px 30px 0px, rgba(150, 150, 150, 0.2) 0px 0px 0px 1px;
-        bottom: 5rem; right: 1rem; width: 24rem; max-width: calc(100vw - 2rem); height: 40rem;
+        bottom: 55px; right: 0; width: 24rem; max-width: calc(100vw - 2rem); height: 40rem;
         max-height: calc(100vh - 6rem); border-radius: 0.75rem; display: flex; z-index: 2147483647;
         overflow: hidden; left: unset; background-color: #F3F4F6;user-select: none;
       `;
 
-      document.body.appendChild(iframe);
+      return iframe;
     }
 
     // Function to reset the iframe position
     function resetIframePosition() {
+      if (window.innerWidth <= 640)
+        return
+
       const targetIframe = document.getElementById(iframeId);
       const targetButton = document.getElementById(buttonId);
       if (targetIframe && targetButton) {
         const buttonRect = targetButton.getBoundingClientRect();
-        const buttonBottom = window.innerHeight - buttonRect.bottom;
-        const buttonRight = window.innerWidth - buttonRect.right;
-        const buttonLeft = buttonRect.left;
 
-        // Adjust iframe position to stay within viewport
-        targetIframe.style.bottom = `${
-          buttonBottom + buttonRect.height + 5 + targetIframe.clientHeight > window.innerHeight
-            ? buttonBottom - targetIframe.clientHeight - 5
-            : buttonBottom + buttonRect.height + 5
-        }px`;
+        const buttonInBottom = buttonRect.top - 5 > targetIframe.clientHeight
 
-        targetIframe.style.right = `${
-          buttonRight + targetIframe.clientWidth > window.innerWidth
-            ? window.innerWidth - buttonLeft - targetIframe.clientWidth
-            : buttonRight
-        }px`;
+        if (buttonInBottom) {
+          targetIframe.style.bottom = `${buttonRect.height + 5}px`;
+          targetIframe.style.top = 'unset';
+        }
+        else {
+          targetIframe.style.bottom = 'unset';
+          targetIframe.style.top = `${buttonRect.height + 5}px`;
+        }
+
+        const buttonInRight = buttonRect.right > targetIframe.clientWidth;
+
+        if (buttonInRight) {
+          targetIframe.style.right = '0';
+          targetIframe.style.left = 'unset';
+        }
+        else {
+          targetIframe.style.right = 'unset';
+          targetIframe.style.left = 0;
+        }
       }
     }
 
@@ -146,12 +155,6 @@
           box-shadow: var(--${containerDiv.id}-box-shadow, rgba(0, 0, 0, 0.2) 0px 4px 8px 0px);
           cursor: pointer;
           z-index: 2147483647;
-          transition: all 0.2s ease-in-out 0s;
-        }
-      `);
-      styleSheet.sheet.insertRule(`
-        #${containerDiv.id}:hover {
-          transform: var(--${containerDiv.id}-hover-transform, scale(1.1));
         }
       `);
 
@@ -167,7 +170,7 @@
       containerDiv.addEventListener("click", function () {
         const targetIframe = document.getElementById(iframeId);
         if (!targetIframe) {
-          createIframe();
+          containerDiv.appendChild(createIframe());
           resetIframePosition();
           this.title = "Exit (ESC)";
           displayDiv.innerHTML = svgIcons.close;
@@ -255,9 +258,6 @@
     if (!document.getElementById(buttonId)) {
       createButton();
     }
-
-    createIframe();
-    document.getElementById(iframeId).style.display = 'none';
   }
 
   // Add esc Exit keyboard event triggered
@@ -279,4 +279,4 @@
   } else {
     document.body.onload = embedChatbot;
   }
-})();
+})();
\ No newline at end of file
diff --git a/web/public/embed.min.js b/web/public/embed.min.js
index 0e023cb5d1..eb20858148 100644
--- a/web/public/embed.min.js
+++ b/web/public/embed.min.js
@@ -1,31 +1,26 @@
-(()=>{let t="difyChatbotConfig",a="dify-chatbot-bubble-button",c="dify-chatbot-bubble-window",h=window[t],p={open:`<svg id="openIcon" width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
-    <path fill-rule="evenodd" clip-rule="evenodd" d="M7.7586 2L16.2412 2C17.0462 1.99999 17.7105 1.99998 18.2517 2.04419C18.8138 2.09012 19.3305 2.18868 19.8159 2.43598C20.5685 2.81947 21.1804 3.43139 21.5639 4.18404C21.8112 4.66937 21.9098 5.18608 21.9557 5.74818C21.9999 6.28937 21.9999 6.95373 21.9999 7.7587L22 14.1376C22.0004 14.933 22.0007 15.5236 21.8636 16.0353C21.4937 17.4156 20.4155 18.4938 19.0352 18.8637C18.7277 18.9461 18.3917 18.9789 17.9999 18.9918L17.9999 20.371C18 20.6062 18 20.846 17.9822 21.0425C17.9651 21.2305 17.9199 21.5852 17.6722 21.8955C17.3872 22.2525 16.9551 22.4602 16.4983 22.4597C16.1013 22.4593 15.7961 22.273 15.6386 22.1689C15.474 22.06 15.2868 21.9102 15.1031 21.7632L12.69 19.8327C12.1714 19.4178 12.0174 19.3007 11.8575 19.219C11.697 19.137 11.5262 19.0771 11.3496 19.0408C11.1737 19.0047 10.9803 19 10.3162 19H7.75858C6.95362 19 6.28927 19 5.74808 18.9558C5.18598 18.9099 4.66928 18.8113 4.18394 18.564C3.43129 18.1805 2.81937 17.5686 2.43588 16.816C2.18859 16.3306 2.09002 15.8139 2.0441 15.2518C1.99988 14.7106 1.99989 14.0463 1.9999 13.2413V7.75868C1.99989 6.95372 1.99988 6.28936 2.0441 5.74818C2.09002 5.18608 2.18859 4.66937 2.43588 4.18404C2.81937 3.43139 3.43129 2.81947 4.18394 2.43598C4.66928 2.18868 5.18598 2.09012 5.74808 2.04419C6.28927 1.99998 6.95364 1.99999 7.7586 2ZM10.5073 7.5C10.5073 6.67157 9.83575 6 9.00732 6C8.1789 6 7.50732 6.67157 7.50732 7.5C7.50732 8.32843 8.1789 9 9.00732 9C9.83575 9 10.5073 8.32843 10.5073 7.5ZM16.6073 11.7001C16.1669 11.3697 15.5426 11.4577 15.2105 11.8959C15.1488 11.9746 15.081 12.0486 15.0119 12.1207C14.8646 12.2744 14.6432 12.4829 14.3566 12.6913C13.7796 13.111 12.9818 13.5001 12.0073 13.5001C11.0328 13.5001 10.235 13.111 9.65799 12.6913C9.37138 12.4829 9.15004 12.2744 9.00274 12.1207C8.93366 12.0486 8.86581 11.9745 8.80418 11.8959C8.472 11.4577 7.84775 11.3697 7.40732 11.7001C6.96549 12.0314 6.87595 12.6582 7.20732 13.1001C7.20479 13.0968 7.21072 13.1043 7.22094 13.1171C7.24532 13.1478 7.29407 13.2091 7.31068 13.2289C7.36932 13.2987 7.45232 13.3934 7.55877 13.5045C7.77084 13.7258 8.08075 14.0172 8.48165 14.3088C9.27958 14.8891 10.4818 15.5001 12.0073 15.5001C13.5328 15.5001 14.735 14.8891 15.533 14.3088C15.9339 14.0172 16.2438 13.7258 16.4559 13.5045C16.5623 13.3934 16.6453 13.2987 16.704 13.2289C16.7333 13.1939 16.7567 13.165 16.7739 13.1432C17.1193 12.6969 17.0729 12.0493 16.6073 11.7001ZM15.0073 6C15.8358 6 16.5073 6.67157 16.5073 7.5C16.5073 8.32843 15.8358 9 15.0073 9C14.1789 9 13.5073 8.32843 13.5073 7.5C13.5073 6.67157 14.1789 6 15.0073 6Z" fill="white"/>
-  </svg>`,close:`<svg id="closeIcon" width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
-    <path d="M18 18L6 6M6 18L18 6" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
-  </svg>`};async function e(){if(h&&h.token){var e=new URLSearchParams(await(async()=>{var e=h?.inputs||{};let n={};return await Promise.all(Object.entries(e).map(async([e,t])=>{n[e]=(e=t,e=(new TextEncoder).encode(e),e=new Response(new Blob([e]).stream().pipeThrough(new CompressionStream("gzip"))).arrayBuffer(),e=new Uint8Array(await e),await btoa(String.fromCharCode(...e)))})),n})());let t=`${h.baseUrl||`https://${h.isDev?"dev.":""}udify.app`}/chatbot/${h.token}?`+e;function o(){var e=document.createElement("iframe");e.allow="fullscreen;microphone",e.title="dify chatbot bubble window",e.id=c,e.src=t,e.style.cssText=`
-      border: none; position: fixed; flex-direction: column; justify-content: space-between;
-      box-shadow: rgba(150, 150, 150, 0.2) 0px 10px 30px 0px, rgba(150, 150, 150, 0.2) 0px 0px 0px 1px;
-      bottom: 5rem; right: 1rem; width: 24rem; max-width: calc(100vw - 2rem); height: 40rem;
-      max-height: calc(100vh - 6rem); border-radius: 0.75rem; display: flex; z-index: 2147483647;
-      overflow: hidden; left: unset; background-color: #F3F4F6;user-select: none;
-    `,document.body.appendChild(e)}function i(){var e,t,n,o=document.getElementById(c),i=document.getElementById(a);o&&i&&(i=i.getBoundingClientRect(),e=window.innerHeight-i.bottom,t=window.innerWidth-i.right,n=i.left,o.style.bottom=`${e+i.height+5+o.clientHeight>window.innerHeight?e-o.clientHeight-5:e+i.height+5}px`,o.style.right=`${t+o.clientWidth>window.innerWidth?window.innerWidth-n-o.clientWidth:t}px`)}function n(){let n=document.createElement("div");Object.entries(h.containerProps||{}).forEach(([e,t])=>{"className"===e?n.classList.add(...t.split(" ")):"style"===e?"object"==typeof t?Object.assign(n.style,t):n.style.cssText=t:"function"==typeof t?n.addEventListener(e.replace(/^on/,"").toLowerCase(),t):n[e]=t}),n.id=a;var e=document.createElement("style");document.head.appendChild(e),e.sheet.insertRule(`
-      #${n.id} {
-        position: fixed;
-        bottom: var(--${n.id}-bottom, 1rem);
-        right: var(--${n.id}-right, 1rem);
-        left: var(--${n.id}-left, unset);
-        top: var(--${n.id}-top, unset);
-        width: var(--${n.id}-width, 50px);
-        height: var(--${n.id}-height, 50px);
-        border-radius: var(--${n.id}-border-radius, 25px);
-        background-color: var(--${n.id}-bg-color, #155EEF);
-        box-shadow: var(--${n.id}-box-shadow, rgba(0, 0, 0, 0.2) 0px 4px 8px 0px);
-        cursor: pointer;
-        z-index: 2147483647;
-        transition: all 0.2s ease-in-out 0s;
-      }
-    `),e.sheet.insertRule(`
-      #${n.id}:hover {
-        transform: var(--${n.id}-hover-transform, scale(1.1));
-      }
-    `);let t=document.createElement("div");if(t.style.cssText="display: flex; align-items: center; justify-content: center; width: 100%; height: 100%; z-index: 2147483647;",t.innerHTML=p.open,n.appendChild(t),document.body.appendChild(n),n.addEventListener("click",function(){var e=document.getElementById(c);e?(e.style.display="none"===e.style.display?"block":"none",t.innerHTML="none"===e.style.display?p.open:p.close,"none"===e.style.display?document.removeEventListener("keydown",d):document.addEventListener("keydown",d),i()):(o(),i(),this.title="Exit (ESC)",t.innerHTML=p.close,document.addEventListener("keydown",d))}),h.draggable){var s=n;var l=h.dragAxis||"both";let i=!1,d,r;s.addEventListener("mousedown",function(e){i=!0,d=e.clientX-s.offsetLeft,r=e.clientY-s.offsetTop}),document.addEventListener("mousemove",function(e){var t,n,o;i&&(s.style.transition="none",s.style.cursor="grabbing",(t=document.getElementById(c))&&(t.style.display="none",s.querySelector("div").innerHTML=p.open),t=e.clientX-d,e=window.innerHeight-e.clientY-r,o=s.getBoundingClientRect(),n=window.innerWidth-o.width,o=window.innerHeight-o.height,"x"!==l&&"both"!==l||s.style.setProperty(`--${a}-left`,Math.max(0,Math.min(t,n))+"px"),"y"!==l&&"both"!==l||s.style.setProperty(`--${a}-bottom`,Math.max(0,Math.min(e,o))+"px"))}),document.addEventListener("mouseup",function(){i=!1,s.style.transition="",s.style.cursor="pointer"})}}2048<t.length&&console.error("The URL is too long, please reduce the number of inputs to prevent the bot from failing to load"),document.getElementById(a)||n(),o(),document.getElementById(c).style.display="none"}else console.error(t+" is empty or token is not provided")}function d(e){var t;"Escape"===e.key&&(e=document.getElementById(c),t=document.getElementById(a),e)&&"none"!==e.style.display&&(e.style.display="none",t.querySelector("div").innerHTML=p.open)}document.addEventListener("keydown",d),h?.dynamicScript?e():document.body.onload=e})();
\ No newline at end of file
+!function(){const n="difyChatbotConfig",a="dify-chatbot-bubble-button",c="dify-chatbot-bubble-window",p=window[n],h={open:`<svg id="openIcon" width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+      <path fill-rule="evenodd" clip-rule="evenodd" d="M7.7586 2L16.2412 2C17.0462 1.99999 17.7105 1.99998 18.2517 2.04419C18.8138 2.09012 19.3305 2.18868 19.8159 2.43598C20.5685 2.81947 21.1804 3.43139 21.5639 4.18404C21.8112 4.66937 21.9098 5.18608 21.9557 5.74818C21.9999 6.28937 21.9999 6.95373 21.9999 7.7587L22 14.1376C22.0004 14.933 22.0007 15.5236 21.8636 16.0353C21.4937 17.4156 20.4155 18.4938 19.0352 18.8637C18.7277 18.9461 18.3917 18.9789 17.9999 18.9918L17.9999 20.371C18 20.6062 18 20.846 17.9822 21.0425C17.9651 21.2305 17.9199 21.5852 17.6722 21.8955C17.3872 22.2525 16.9551 22.4602 16.4983 22.4597C16.1013 22.4593 15.7961 22.273 15.6386 22.1689C15.474 22.06 15.2868 21.9102 15.1031 21.7632L12.69 19.8327C12.1714 19.4178 12.0174 19.3007 11.8575 19.219C11.697 19.137 11.5262 19.0771 11.3496 19.0408C11.1737 19.0047 10.9803 19 10.3162 19H7.75858C6.95362 19 6.28927 19 5.74808 18.9558C5.18598 18.9099 4.66928 18.8113 4.18394 18.564C3.43129 18.1805 2.81937 17.5686 2.43588 16.816C2.18859 16.3306 2.09002 15.8139 2.0441 15.2518C1.99988 14.7106 1.99989 14.0463 1.9999 13.2413V7.75868C1.99989 6.95372 1.99988 6.28936 2.0441 5.74818C2.09002 5.18608 2.18859 4.66937 2.43588 4.18404C2.81937 3.43139 3.43129 2.81947 4.18394 2.43598C4.66928 2.18868 5.18598 2.09012 5.74808 2.04419C6.28927 1.99998 6.95364 1.99999 7.7586 2ZM10.5073 7.5C10.5073 6.67157 9.83575 6 9.00732 6C8.1789 6 7.50732 6.67157 7.50732 7.5C7.50732 8.32843 8.1789 9 9.00732 9C9.83575 9 10.5073 8.32843 10.5073 7.5ZM16.6073 11.7001C16.1669 11.3697 15.5426 11.4577 15.2105 11.8959C15.1488 11.9746 15.081 12.0486 15.0119 12.1207C14.8646 12.2744 14.6432 12.4829 14.3566 12.6913C13.7796 13.111 12.9818 13.5001 12.0073 13.5001C11.0328 13.5001 10.235 13.111 9.65799 12.6913C9.37138 12.4829 9.15004 12.2744 9.00274 12.1207C8.93366 12.0486 8.86581 11.9745 8.80418 11.8959C8.472 11.4577 7.84775 11.3697 7.40732 11.7001C6.96549 12.0314 6.87595 12.6582 7.20732 13.1001C7.20479 13.0968 7.21072 13.1043 7.22094 13.1171C7.24532 13.1478 7.29407 13.2091 7.31068 13.2289C7.36932 13.2987 7.45232 13.3934 7.55877 13.5045C7.77084 13.7258 8.08075 14.0172 8.48165 14.3088C9.27958 14.8891 10.4818 15.5001 12.0073 15.5001C13.5328 15.5001 14.735 14.8891 15.533 14.3088C15.9339 14.0172 16.2438 13.7258 16.4559 13.5045C16.5623 13.3934 16.6453 13.2987 16.704 13.2289C16.7333 13.1939 16.7567 13.165 16.7739 13.1432C17.1193 12.6969 17.0729 12.0493 16.6073 11.7001ZM15.0073 6C15.8358 6 16.5073 6.67157 16.5073 7.5C16.5073 8.32843 15.8358 9 15.0073 9C14.1789 9 13.5073 8.32843 13.5073 7.5C13.5073 6.67157 14.1789 6 15.0073 6Z" fill="white"/>
+    </svg>`,close:`<svg id="closeIcon" width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+      <path d="M18 18L6 6M6 18L18 6" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
+    </svg>`};async function e(){if(p&&p.token){var e=new URLSearchParams(await async function(){var e=p?.inputs||{};const n={};return await Promise.all(Object.entries(e).map(async([e,t])=>{n[e]=(e=t,e=(new TextEncoder).encode(e),e=new Response(new Blob([e]).stream().pipeThrough(new CompressionStream("gzip"))).arrayBuffer(),e=new Uint8Array(await e),await btoa(String.fromCharCode(...e)))})),n}());const i=`${p.baseUrl||`https://${p.isDev?"dev.":""}udify.app`}/chatbot/${p.token}?`+e;function o(){var e,t;window.innerWidth<=640||(e=document.getElementById(c),t=document.getElementById(a),e&&t&&((t=t.getBoundingClientRect()).top-5>e.clientHeight?(e.style.bottom=t.height+5+"px",e.style.top="unset"):(e.style.bottom="unset",e.style.top=t.height+5+"px"),t.right>e.clientWidth?(e.style.right="0",e.style.left="unset"):(e.style.right="unset",e.style.left=0)))}function t(){const n=document.createElement("div");Object.entries(p.containerProps||{}).forEach(([e,t])=>{"className"===e?n.classList.add(...t.split(" ")):"style"===e?"object"==typeof t?Object.assign(n.style,t):n.style.cssText=t:"function"==typeof t?n.addEventListener(e.replace(/^on/,"").toLowerCase(),t):n[e]=t}),n.id=a;var e=document.createElement("style");document.head.appendChild(e),e.sheet.insertRule(`
+        #${n.id} {
+          position: fixed;
+          bottom: var(--${n.id}-bottom, 1rem);
+          right: var(--${n.id}-right, 1rem);
+          left: var(--${n.id}-left, unset);
+          top: var(--${n.id}-top, unset);
+          width: var(--${n.id}-width, 50px);
+          height: var(--${n.id}-height, 50px);
+          border-radius: var(--${n.id}-border-radius, 25px);
+          background-color: var(--${n.id}-bg-color, #155EEF);
+          box-shadow: var(--${n.id}-box-shadow, rgba(0, 0, 0, 0.2) 0px 4px 8px 0px);
+          cursor: pointer;
+          z-index: 2147483647;
+        }
+      `);const t=document.createElement("div");if(t.style.cssText="display: flex; align-items: center; justify-content: center; width: 100%; height: 100%; z-index: 2147483647;",t.innerHTML=h.open,n.appendChild(t),document.body.appendChild(n),n.addEventListener("click",function(){var e=document.getElementById(c);e?(e.style.display="none"===e.style.display?"block":"none",t.innerHTML="none"===e.style.display?h.open:h.close,"none"===e.style.display?document.removeEventListener("keydown",d):document.addEventListener("keydown",d),o()):(n.appendChild(((e=document.createElement("iframe")).allow="fullscreen;microphone",e.title="dify chatbot bubble window",e.id=c,e.src=i,e.style.cssText=`
+        border: none; position: absolute; flex-direction: column; justify-content: space-between;
+        box-shadow: rgba(150, 150, 150, 0.2) 0px 10px 30px 0px, rgba(150, 150, 150, 0.2) 0px 0px 0px 1px;
+        bottom: 55px; right: 0; width: 24rem; max-width: calc(100vw - 2rem); height: 40rem;
+        max-height: calc(100vh - 6rem); border-radius: 0.75rem; display: flex; z-index: 2147483647;
+        overflow: hidden; left: unset; background-color: #F3F4F6;user-select: none;
+      `,e)),o(),this.title="Exit (ESC)",t.innerHTML=h.close,document.addEventListener("keydown",d))}),p.draggable){var s=n;var l=p.dragAxis||"both";let i=!1,d,r;s.addEventListener("mousedown",function(e){i=!0,d=e.clientX-s.offsetLeft,r=e.clientY-s.offsetTop}),document.addEventListener("mousemove",function(e){var t,n,o;i&&(s.style.transition="none",s.style.cursor="grabbing",(t=document.getElementById(c))&&(t.style.display="none",s.querySelector("div").innerHTML=h.open),t=e.clientX-d,e=window.innerHeight-e.clientY-r,o=s.getBoundingClientRect(),n=window.innerWidth-o.width,o=window.innerHeight-o.height,"x"!==l&&"both"!==l||s.style.setProperty(`--${a}-left`,Math.max(0,Math.min(t,n))+"px"),"y"!==l&&"both"!==l||s.style.setProperty(`--${a}-bottom`,Math.max(0,Math.min(e,o))+"px"))}),document.addEventListener("mouseup",function(){i=!1,s.style.transition="",s.style.cursor="pointer"})}}2048<i.length&&console.error("The URL is too long, please reduce the number of inputs to prevent the bot from failing to load"),document.getElementById(a)||t()}else console.error(n+" is empty or token is not provided")}function d(e){var t;"Escape"===e.key&&(e=document.getElementById(c),t=document.getElementById(a),e)&&"none"!==e.style.display&&(e.style.display="none",t.querySelector("div").innerHTML=h.open)}document.addEventListener("keydown",d),p?.dynamicScript?e():document.body.onload=e}();
\ No newline at end of file